public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ
@ 2023-04-18  7:18 Haochen Jiang
  2023-04-19  1:54 ` Hongtao Liu
  0 siblings, 1 reply; 3+ messages in thread
From: Haochen Jiang @ 2023-04-18  7:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: hongtao.liu, ubizjak

Hi all,

Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
and VPCLMULQDQ, The 128 bit intrin is not available when we just use
compiler flag -mvpclmulqdq. But it should according to Intel SDM.

Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
add dependency between them.

Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
VPCLMULQDQ should imply AVX.

Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

	* common/config/i386/i386-common.cc
	(OPTION_MASK_ISA_VPCLMULQDQ_SET):
	Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
	(OPTION_MASK_ISA_AVX_UNSET):
	Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
	(OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
	* config/i386/i386.md (vpclmulqdqvl): New.
	* config/i386/sse.md (pclmulqdq): Add evex encoding.
	* config/i386/vpclmulqdqintrin.h: Remove redudant avx target
	push.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
---
 gcc/common/config/i386/i386-common.cc      |  9 ++++++---
 gcc/config/i386/i386.md                    |  4 +++-
 gcc/config/i386/sse.md                     | 11 ++++++-----
 gcc/config/i386/vpclmulqdqintrin.h         |  4 ++--
 gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 315db854862..c7954da8e34 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
 #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
 #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
-#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
+#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
+  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
+   | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
 #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
 #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
@@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_AVX_UNSET \
   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
    | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
-   | OPTION_MASK_ISA_AVX2_UNSET )
+   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
 #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
 #define OPTION_MASK_ISA_XSAVE_UNSET \
@@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
 #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
-#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
+#define OPTION_MASK_ISA_PCLMUL_UNSET \
+  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
 #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ed689b044c3..acc994226e7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -841,7 +841,7 @@
 		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
 		    avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
 		    avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
-		    avx512ifmavl,avxneconvert,avx512bf16vl"
+		    avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
   (const_string "base"))
 
 ;; Define instruction set of MMX instructions
@@ -903,6 +903,8 @@
 	 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
 	 (eq_attr "isa" "avx512bf16vl")
 	   (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
+	 (eq_attr "isa" "vpclmulqdqvl")
+	   (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
 
 	 (eq_attr "mmx_isa" "native")
 	   (symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 26812ab6106..33e281901cf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25195,20 +25195,21 @@
    (set_attr "mode" "TI")])
 
 (define_insn "pclmulqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
-		      (match_operand:V2DI 2 "vector_operand" "xBm,xm")
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+		      (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
 		      (match_operand:SI 3 "const_0_to_255_operand")]
 		     UNSPEC_PCLMUL))]
   "TARGET_PCLMUL"
   "@
    pclmulqdq\t{%3, %2, %0|%0, %2, %3}
+   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
    (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 (define_expand "avx_vzeroall"
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index ba93fc4ff9c..2c83b6037a0 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C)
 #pragma GCC pop_options
 #endif /* __DISABLE_VPCLMULQDQF__ */
 
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX__)
+#if !defined(__VPCLMULQDQ__)
 #pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx")
+#pragma GCC target("vpclmulqdq")
 #define __DISABLE_VPCLMULQDQ__
 #endif /* __VPCLMULQDQ__ */
 
diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
index d93f776803f..27b2fd71ea4 100644
--- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
+++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
@@ -2,16 +2,19 @@
 /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <x86intrin.h>
 
 volatile __m512i x1, x2;
 volatile __m256i x3, x4;
+volatile __m128i x5, x6;
 
 void extern
 avx512vl_test (void)
 {
     x1 = _mm512_clmulepi64_epi128(x1, x2, 3);
     x3 = _mm256_clmulepi64_epi128(x3, x4, 3);
+    x5 = _mm_clmulepi64_si128(x5, x6, 3);
 }
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ
  2023-04-18  7:18 [PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ Haochen Jiang
@ 2023-04-19  1:54 ` Hongtao Liu
  2023-04-19  2:00   ` Hongtao Liu
  0 siblings, 1 reply; 3+ messages in thread
From: Hongtao Liu @ 2023-04-19  1:54 UTC (permalink / raw)
  To: Haochen Jiang; +Cc: gcc-patches, hongtao.liu, ubizjak

On Tue, Apr 18, 2023 at 3:18 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all,
>
> Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
> under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
> and VPCLMULQDQ, The 128 bit intrin is not available when we just use
> compiler flag -mvpclmulqdq. But it should according to Intel SDM.
>
> Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
> add dependency between them.
>
> Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
> VPCLMULQDQ should imply AVX.
>
> Tested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
>         * common/config/i386/i386-common.cc
>         (OPTION_MASK_ISA_VPCLMULQDQ_SET):
>         Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
>         (OPTION_MASK_ISA_AVX_UNSET):
>         Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
>         (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
>         * config/i386/i386.md (vpclmulqdqvl): New.
>         * config/i386/sse.md (pclmulqdq): Add evex encoding.
>         * config/i386/vpclmulqdqintrin.h: Remove redudant avx target
>         push.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
> ---
>  gcc/common/config/i386/i386-common.cc      |  9 ++++++---
>  gcc/config/i386/i386.md                    |  4 +++-
>  gcc/config/i386/sse.md                     | 11 ++++++-----
>  gcc/config/i386/vpclmulqdqintrin.h         |  4 ++--
>  gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
>  5 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> index 315db854862..c7954da8e34 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
>  #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
>  #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
> -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
> +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
> +  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
> +   | OPTION_MASK_ISA_AVX_SET)
>  #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
>  #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
>  #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
> @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define OPTION_MASK_ISA_AVX_UNSET \
>    (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
>     | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
> -   | OPTION_MASK_ISA_AVX2_UNSET )
> +   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
>  #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
>  #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
>  #define OPTION_MASK_ISA_XSAVE_UNSET \
> @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
>
>  #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
>  #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
> -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> +#define OPTION_MASK_ISA_PCLMUL_UNSET \
> +  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
>  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
>  #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
>  #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index ed689b044c3..acc994226e7 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -841,7 +841,7 @@
>                     avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
>                     avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
>                     avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
> -                   avx512ifmavl,avxneconvert,avx512bf16vl"
> +                   avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
>    (const_string "base"))
>
>  ;; Define instruction set of MMX instructions
> @@ -903,6 +903,8 @@
>          (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
>          (eq_attr "isa" "avx512bf16vl")
>            (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
> +        (eq_attr "isa" "vpclmulqdqvl")
> +          (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
>
>          (eq_attr "mmx_isa" "native")
>            (symbol_ref "!TARGET_MMX_WITH_SSE")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 26812ab6106..33e281901cf 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25195,20 +25195,21 @@
>     (set_attr "mode" "TI")])
>
>  (define_insn "pclmulqdq"
> -  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
> -       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
> -                     (match_operand:V2DI 2 "vector_operand" "xBm,xm")
> +  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
> +       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
> +                     (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
Just change x to Yv instead of introducing a new alternative.
Others LGTM.
>                       (match_operand:SI 3 "const_0_to_255_operand")]
>                      UNSPEC_PCLMUL))]
>    "TARGET_PCLMUL"
>    "@
>     pclmulqdq\t{%3, %2, %0|%0, %2, %3}
> +   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
>     vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
>     (set_attr "type" "sselog1")
>     (set_attr "prefix_extra" "1")
>     (set_attr "length_immediate" "1")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex")
>     (set_attr "mode" "TI")])
>
>  (define_expand "avx_vzeroall"
> diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
> index ba93fc4ff9c..2c83b6037a0 100644
> --- a/gcc/config/i386/vpclmulqdqintrin.h
> +++ b/gcc/config/i386/vpclmulqdqintrin.h
> @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C)
>  #pragma GCC pop_options
>  #endif /* __DISABLE_VPCLMULQDQF__ */
>
> -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__)
> +#if !defined(__VPCLMULQDQ__)
>  #pragma GCC push_options
> -#pragma GCC target("vpclmulqdq,avx")
> +#pragma GCC target("vpclmulqdq")
>  #define __DISABLE_VPCLMULQDQ__
>  #endif /* __VPCLMULQDQ__ */
>
> diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> index d93f776803f..27b2fd71ea4 100644
> --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> @@ -2,16 +2,19 @@
>  /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */
>  /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
>  /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
>
>  #include <x86intrin.h>
>
>  volatile __m512i x1, x2;
>  volatile __m256i x3, x4;
> +volatile __m128i x5, x6;
>
>  void extern
>  avx512vl_test (void)
>  {
>      x1 = _mm512_clmulepi64_epi128(x1, x2, 3);
>      x3 = _mm256_clmulepi64_epi128(x3, x4, 3);
> +    x5 = _mm_clmulepi64_si128(x5, x6, 3);
>  }
>
> --
> 2.31.1
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ
  2023-04-19  1:54 ` Hongtao Liu
@ 2023-04-19  2:00   ` Hongtao Liu
  0 siblings, 0 replies; 3+ messages in thread
From: Hongtao Liu @ 2023-04-19  2:00 UTC (permalink / raw)
  To: Haochen Jiang; +Cc: gcc-patches, hongtao.liu, ubizjak

On Wed, Apr 19, 2023 at 9:54 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Apr 18, 2023 at 3:18 PM Haochen Jiang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi all,
> >
> > Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
> > under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
> > and VPCLMULQDQ, The 128 bit intrin is not available when we just use
> > compiler flag -mvpclmulqdq. But it should according to Intel SDM.
> >
> > Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
> > add dependency between them.
> >
> > Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
> > VPCLMULQDQ should imply AVX.
> >
> > Tested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > BRs,
> > Haochen
> >
> > gcc/ChangeLog:
> >
> >         * common/config/i386/i386-common.cc
> >         (OPTION_MASK_ISA_VPCLMULQDQ_SET):
> >         Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
> >         (OPTION_MASK_ISA_AVX_UNSET):
> >         Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
> >         (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
> >         * config/i386/i386.md (vpclmulqdqvl): New.
> >         * config/i386/sse.md (pclmulqdq): Add evex encoding.
> >         * config/i386/vpclmulqdqintrin.h: Remove redudant avx target
> >         push.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
> > ---
> >  gcc/common/config/i386/i386-common.cc      |  9 ++++++---
> >  gcc/config/i386/i386.md                    |  4 +++-
> >  gcc/config/i386/sse.md                     | 11 ++++++-----
> >  gcc/config/i386/vpclmulqdqintrin.h         |  4 ++--
> >  gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
> >  5 files changed, 20 insertions(+), 11 deletions(-)
> >
> > diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> > index 315db854862..c7954da8e34 100644
> > --- a/gcc/common/config/i386/i386-common.cc
> > +++ b/gcc/common/config/i386/i386-common.cc
> > @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
> >  #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
> >  #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
> >  #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
> > -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
> > +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
> > +  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
> > +   | OPTION_MASK_ISA_AVX_SET)
> >  #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
> >  #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
> >  #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
> > @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #define OPTION_MASK_ISA_AVX_UNSET \
> >    (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
> >     | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
> > -   | OPTION_MASK_ISA_AVX2_UNSET )
> > +   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
> >  #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
> >  #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
> >  #define OPTION_MASK_ISA_XSAVE_UNSET \
> > @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
> >
> >  #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
> >  #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
> > -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> > +#define OPTION_MASK_ISA_PCLMUL_UNSET \
> > +  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
> >  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
> >  #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
> >  #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index ed689b044c3..acc994226e7 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -841,7 +841,7 @@
> >                     avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
> >                     avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
> >                     avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
> > -                   avx512ifmavl,avxneconvert,avx512bf16vl"
> > +                   avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
> >    (const_string "base"))
> >
> >  ;; Define instruction set of MMX instructions
> > @@ -903,6 +903,8 @@
> >          (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
> >          (eq_attr "isa" "avx512bf16vl")
> >            (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
> > +        (eq_attr "isa" "vpclmulqdqvl")
> > +          (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
> >
> >          (eq_attr "mmx_isa" "native")
> >            (symbol_ref "!TARGET_MMX_WITH_SSE")
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 26812ab6106..33e281901cf 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -25195,20 +25195,21 @@
> >     (set_attr "mode" "TI")])
> >
> >  (define_insn "pclmulqdq"
> > -  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
> > -       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
> > -                     (match_operand:V2DI 2 "vector_operand" "xBm,xm")
> > +  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
> > +       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
> > +                     (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
> Just change x to Yv instead of introducing a new alternative.
Looks like the even encoding needs both vpclmulqdq and avx512vl, so i
take back the upper comments, the patch LGTM.
> Others LGTM.
> >                       (match_operand:SI 3 "const_0_to_255_operand")]
> >                      UNSPEC_PCLMUL))]
> >    "TARGET_PCLMUL"
> >    "@
> >     pclmulqdq\t{%3, %2, %0|%0, %2, %3}
> > +   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
> >     vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> > -  [(set_attr "isa" "noavx,avx")
> > +  [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
> >     (set_attr "type" "sselog1")
> >     (set_attr "prefix_extra" "1")
> >     (set_attr "length_immediate" "1")
> > -   (set_attr "prefix" "orig,vex")
> > +   (set_attr "prefix" "orig,vex,evex")
> >     (set_attr "mode" "TI")])
> >
> >  (define_expand "avx_vzeroall"
> > diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
> > index ba93fc4ff9c..2c83b6037a0 100644
> > --- a/gcc/config/i386/vpclmulqdqintrin.h
> > +++ b/gcc/config/i386/vpclmulqdqintrin.h
> > @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C)
> >  #pragma GCC pop_options
> >  #endif /* __DISABLE_VPCLMULQDQF__ */
> >
> > -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__)
> > +#if !defined(__VPCLMULQDQ__)
> >  #pragma GCC push_options
> > -#pragma GCC target("vpclmulqdq,avx")
> > +#pragma GCC target("vpclmulqdq")
> >  #define __DISABLE_VPCLMULQDQ__
> >  #endif /* __VPCLMULQDQ__ */
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > index d93f776803f..27b2fd71ea4 100644
> > --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > @@ -2,16 +2,19 @@
> >  /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */
> >  /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> >  /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
> >
> >  #include <x86intrin.h>
> >
> >  volatile __m512i x1, x2;
> >  volatile __m256i x3, x4;
> > +volatile __m128i x5, x6;
> >
> >  void extern
> >  avx512vl_test (void)
> >  {
> >      x1 = _mm512_clmulepi64_epi128(x1, x2, 3);
> >      x3 = _mm256_clmulepi64_epi128(x3, x4, 3);
> > +    x5 = _mm_clmulepi64_si128(x5, x6, 3);
> >  }
> >
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-04-19  2:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-18  7:18 [PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ Haochen Jiang
2023-04-19  1:54 ` Hongtao Liu
2023-04-19  2:00   ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).