From: "Hu, Lin1" <lin1.hu@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: hongtao.liu@intel.com, ubizjak@gmail.com, rguenther@suse.de,
Tamar.Christina@arm.com
Subject: [PATCH 1/3 v4] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.
Date: Tue, 25 Jun 2024 11:28:12 +0800 [thread overview]
Message-ID: <20240625032812.3857804-1-lin1.hu@intel.com> (raw)
In-Reply-To: <VI1PR08MB5325581136A38C1AAB959DAFFFD42@VI1PR08MB5325.eurprd08.prod.outlook.com>
Hi,
This is the current version.
I haven't made any major changes to the original code, I think it will have less impact on your code. And I think the current API is sufficient to support the mode selection you mentioned, if you have any concerns you can mention them. I can tweak it further.
BRs,
Lin
gcc/ChangeLog:
PR target/107432
* tree-vect-generic.cc
(expand_vector_conversion): Support convert for int -> int,
float -> float and int <-> float.
* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
indirect convert part.
(supportable_indirect_convert_operation): New function.
* tree-vectorizer.h (supportable_indirect_convert_operation):
Define the new function.
gcc/testsuite/ChangeLog:
PR target/107432
* gcc.target/i386/pr107432-1.c: New test.
* gcc.target/i386/pr107432-2.c: Ditto.
* gcc.target/i386/pr107432-3.c: Ditto.
* gcc.target/i386/pr107432-4.c: Ditto.
* gcc.target/i386/pr107432-5.c: Ditto.
* gcc.target/i386/pr107432-6.c: Ditto.
* gcc.target/i386/pr107432-7.c: Ditto.
---
gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 +++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +++++++++
gcc/testsuite/gcc.target/i386/pr107432-3.c | 55 +++++
gcc/testsuite/gcc.target/i386/pr107432-4.c | 56 +++++
gcc/testsuite/gcc.target/i386/pr107432-5.c | 72 ++++++
gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 +++++++++++
gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++++++++++++
gcc/tree-vect-generic.cc | 34 ++-
gcc/tree-vect-stmts.cc | 259 ++++++++++++++-------
gcc/tree-vectorizer.h | 4 +
10 files changed, 1013 insertions(+), 95 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 00000000000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__m128i mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
+}
+
+__m256i mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__m128i mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+ return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+ return __builtin_convertvector((__v8di)a, __v8qi);
+}
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+ return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__m128i mm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
+}
+
+__m256i mm512_cvtepi32_epi16_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v16si)a, __v16hi);
+}
+
+__v2qi mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
+{
+ return __builtin_convertvector((__v2si)a, __v2qi);
+}
+
+__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4si)a, __v4qi);
+}
+
+__v8qi mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v8si)a, __v8qi);
+}
+
+__m128i mm512_cvtepi32_epi8_builtin_convertvector(__m512i a)
+{
+ return (__m128i)__builtin_convertvector((__v16si)a, __v16qi);
+}
+
+__v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
+{
+ return __builtin_convertvector((__v2hi)a, __v2qi);
+}
+
+__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v8hi)a, __v8qi);
+}
+
+__m128i mm256_cvtepi16_epi8_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v16hi)a, __v16qi);
+}
+
+__m256i mm512_cvtepi16_epi8_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v32hi)a, __v32qi);
+}
+
+__v2su mm_cvtepu64_epu32_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2du)a, __v2su);
+}
+
+__m128i mm256_cvtepu64_epu32_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v4du)a, __v4su);
+}
+
+__m256i mm512_cvtepu64_epu32_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v8du)a, __v8su);
+}
+
+__v2hu mm_cvtepu64_epu16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2du)a, __v2hu);
+}
+
+__v4hu mm256_cvtepu64_epu16_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4du)a, __v4hu);
+}
+
+__m128i mm512_cvtepu64_epu16_builtin_convertvector(__m512i a)
+{
+ return (__m128i)__builtin_convertvector((__v8du)a, __v8hu);
+}
+
+__v2qu mm_cvtepu64_epu8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2du)a, __v2qu);
+}
+
+__v4qu mm256_cvtepu64_epu8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4du)a, __v4qu);
+}
+
+__v8qu mm512_cvtepu64_epu8_builtin_convertvector(__m512i a)
+{
+ return __builtin_convertvector((__v8du)a, __v8qu);
+}
+
+__v2hu mm32_cvtepu32_epu16_builtin_convertvector(__v2su a)
+{
+ return __builtin_convertvector((__v2su)a, __v2hu);
+}
+
+__v4hu mm_cvtepu32_epu16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4su)a, __v4hu);
+}
+
+__m128i mm256_cvtepu32_epu16_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v8su)a, __v8hu);
+}
+
+__m256i mm512_cvtepu32_epu16_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v16su)a, __v16hu);
+}
+
+__v2qu mm32_cvtepu32_epu8_builtin_convertvector(__v2su a)
+{
+ return __builtin_convertvector((__v2su)a, __v2qu);
+}
+
+__v4qu mm_cvtepu2_epu8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4su)a, __v4qu);
+}
+
+__v8qu mm256_cvtepu32_epu8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v8su)a, __v8qu);
+}
+
+__m128i mm512_cvtepu32_epu8_builtin_convertvector(__m512i a)
+{
+ return (__m128i)__builtin_convertvector((__v16su)a, __v16qu);
+}
+
+__v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
+{
+ return __builtin_convertvector((__v2hu)a, __v2qu);
+}
+
+__v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v8hu)a, __v8qu);
+}
+
+__m128i mm256_cvtepu16_epu8_builtin_convertvector(__m256i a)
+{
+ return (__m128i)__builtin_convertvector((__v16hu)a, __v16qu);
+}
+
+__m256i mm512_cvtepu16_epu8_builtin_convertvector(__m512i a)
+{
+ return (__m256i)__builtin_convertvector((__v32hu)a, __v32qu);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-2.c b/gcc/testsuite/gcc.target/i386/pr107432-2.c
new file mode 100644
index 00000000000..02ffd811cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-2.c
@@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 3 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw" 3 } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+__m128i mm_cvtepi32_epi64_builtin_convertvector(__v2si a)
+{
+ return __builtin_convertvector(a, __v2di);
+}
+
+__m256i mm256_cvtepi32_epi64_builtin_convertvector(__v4si a)
+{
+ return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i mm512_cvtepi32_epi64_builtin_convertvector(__v8si a)
+{
+ return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi16_epi64_builtin_convertvector(__v2hi a)
+{
+ return __builtin_convertvector(a, __v2di);
+}
+
+__m256i mm256_cvtepi16_epi64_builtin_convertvector(__v4hi a)
+{
+ return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i mm512_cvtepi16_epi64_builtin_convertvector(__v8hi a)
+{
+ return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi8_epi64_builtin_convertvector(__v2qi a)
+{
+ return __builtin_convertvector(a, __v2di);
+}
+
+__m256i mm256_cvtepi8_epi64_builtin_convertvector(__v4qi a)
+{
+ return (__m256i)__builtin_convertvector(a, __v4di);
+}
+
+__m512i mm512_cvtepi8_epi64_builtin_convertvector(__v8qi a)
+{
+ return (__m512i)__builtin_convertvector(a, __v8di);
+}
+
+__m128i mm_cvtepi16_epi32_builtin_convertvector(__v4hi a)
+{
+ return (__m128i)__builtin_convertvector(a, __v4si);
+}
+
+__m256i mm256_cvtepi16_epi32_builtin_convertvector(__v8hi a)
+{
+ return (__m256i)__builtin_convertvector(a, __v8si);
+}
+
+__m512i mm512_cvtepi16_epi32_builtin_convertvector(__v16hi a)
+{
+ return (__m512i)__builtin_convertvector(a, __v16si);
+}
+
+__m128i mm_cvtepi8_epi32_builtin_convertvector(__v4qi a)
+{
+ return (__m128i)__builtin_convertvector(a, __v4si);
+}
+
+__m256i mm256_cvtepi8_epi32_builtin_convertvector(__v8qi a)
+{
+ return (__m256i)__builtin_convertvector(a, __v8si);
+}
+
+__m512i mm512_cvtepi8_epi32_builtin_convertvector(__v16qi a)
+{
+ return (__m512i)__builtin_convertvector(a, __v16si);
+}
+
+__m128i mm_cvtepi8_epi16_builtin_convertvector(__v8qi a)
+{
+ return (__m128i)__builtin_convertvector(a, __v8hi);
+}
+
+__m256i mm256_cvtepi8_epi16_builtin_convertvector(__v16qi a)
+{
+ return (__m256i)__builtin_convertvector(a, __v16hi);
+}
+
+__v32hi mm512_cvtepi8_epi16_builtin_convertvector(__v32qi a)
+{
+ return __builtin_convertvector(a, __v32hi);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-3.c b/gcc/testsuite/gcc.target/i386/pr107432-3.c
new file mode 100644
index 00000000000..30dc947b6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-3.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ph" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2sf mm_cvtpd_ps_builtin_convertvector(__v2df a)
+{
+ return __builtin_convertvector(a, __v2sf);
+}
+
+__v4sf mm256_cvtpd_ps_builtin_convertvector(__v4df a)
+{
+ return __builtin_convertvector(a, __v4sf);
+}
+
+__v8sf mm512_cvtpd_ps_builtin_convertvector(__v8df a)
+{
+ return __builtin_convertvector(a, __v8sf);
+}
+
+__v2hf mm_cvtpd_ph_builtin_convertvector(__v2df a)
+{
+ return __builtin_convertvector(a, __v2hf);
+}
+
+__v4hf mm256_cvtpd_ph_builtin_convertvector(__v4df a)
+{
+ return __builtin_convertvector(a, __v4hf);
+}
+
+__v8hf mm512_cvtpd_ph_builtin_convertvector(__v8df a)
+{
+ return __builtin_convertvector(a, __v8hf);
+}
+
+__v4hf mm_cvtps_ph_builtin_convertvector(__v4sf a)
+{
+ return __builtin_convertvector(a, __v4hf);
+}
+
+__v8hf mm256_cvtps_ph_builtin_convertvector(__v8sf a)
+{
+ return __builtin_convertvector(a, __v8hf);
+}
+
+__v16hf mm512_cvtps_ph_builtin_convertvector(__v16sf a)
+{
+ return __builtin_convertvector(a, __v16hf);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-4.c b/gcc/testsuite/gcc.target/i386/pr107432-4.c
new file mode 100644
index 00000000000..e537e7349e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-4.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2df mm_cvtps_pd_builtin_convertvector(__v2sf a)
+{
+ return __builtin_convertvector(a, __v2df);
+}
+
+__v4df mm256_cvtps_pd_builtin_convertvector(__v4sf a)
+{
+ return __builtin_convertvector(a, __v4df);
+}
+
+__v8df mm512_cvtps_pd_builtin_convertvector(__v8sf a)
+{
+ return __builtin_convertvector(a, __v8df);
+}
+
+__v2df mm_cvtph_pd_builtin_convertvector(__v2hf a)
+{
+ return __builtin_convertvector(a, __v2df);
+}
+
+__v4df mm256_cvtph_pd_builtin_convertvector(__v4hf a)
+{
+ return __builtin_convertvector(a, __v4df);
+}
+
+__v8df mm512_cvtph_pd_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector(a, __v8df);
+}
+
+__v4sf mm_cvtph_ps_builtin_convertvector(__v4hf a)
+{
+ return __builtin_convertvector(a, __v4sf);
+}
+
+__v8sf mm256_cvtph_ps_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector(a, __v8sf);
+}
+
+__v16sf mm512_cvtph_ps_builtin_convertvector(__v16hf a)
+{
+ return __builtin_convertvector(a, __v16sf);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-5.c b/gcc/testsuite/gcc.target/i386/pr107432-5.c
new file mode 100644
index 00000000000..5a44ef9f3b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-5.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512dq -mavx512fp16 -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq" 3 } } */
+
+#include <x86intrin.h>
+
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtpd_epi32_builtin_convertvector(__v2df a)
+{
+ return __builtin_convertvector(a, __v2si);
+}
+
+__v4si mm256_cvtpd_epi32_builtin_convertvector(__v4df a)
+{
+ return __builtin_convertvector(a, __v4si);
+}
+
+__v8si mm512_cvtpd_epi32_builtin_convertvector(__v8df a)
+{
+ return __builtin_convertvector(a, __v8si);
+}
+
+__v2di mm_cvtps_epi64_builtin_convertvector(__v2sf a)
+{
+ return __builtin_convertvector(a, __v2di);
+}
+
+__v4di mm256_cvtps_epi64_builtin_convertvector(__v4sf a)
+{
+ return __builtin_convertvector(a, __v4di);
+}
+
+__v8di mm512_cvtps_epi64_builtin_convertvector(__v8sf a)
+{
+ return __builtin_convertvector(a, __v8di);
+}
+
+__v4si mm_cvtph_epi32_builtin_convertvector(__v4hf a)
+{
+ return __builtin_convertvector(a, __v4si);
+}
+
+__v8si mm256_cvtph_epi32_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector(a, __v8si);
+}
+
+__v16si mm512_cvtph_epi32_builtin_convertvector(__v16hf a)
+{
+ return __builtin_convertvector(a, __v16si);
+}
+
+__v2di mm_cvtph_epi64_builtin_convertvector(__v2hf a)
+{
+ return __builtin_convertvector(a, __v2di);
+}
+
+__v4di mm256_cvtph_epi64_builtin_convertvector(__v4hf a)
+{
+ return __builtin_convertvector(a, __v4di);
+}
+
+__v8di mm512_cvtph_epi64_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector(a, __v8di);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
new file mode 100644
index 00000000000..4a68a10b089
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
+typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
+typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
+typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
+
+__v2qi mm_cvtpd_epi8_builtin_convertvector(__v2df a)
+{
+ return __builtin_convertvector((__v2df)a, __v2qi);
+}
+
+__v4qi mm256_cvtpd_epi8_builtin_convertvector(__v4df a)
+{
+ return __builtin_convertvector((__v4df)a, __v4qi);
+}
+
+__v8qi mm512_cvtpd_epi8_builtin_convertvector(__v8df a)
+{
+ return __builtin_convertvector((__v8df)a, __v8qi);
+}
+
+__v2qu mm_cvtpd_epu8_builtin_convertvector(__v2df a)
+{
+ return __builtin_convertvector((__v2df)a, __v2qu);
+}
+
+__v4qu mm256_cvtpd_epu8_builtin_convertvector(__v4df a)
+{
+ return __builtin_convertvector((__v4df)a, __v4qu);
+}
+
+__v8qu mm512_cvtpd_epu8_builtin_convertvector(__v8df a)
+{
+ return __builtin_convertvector((__v8df)a, __v8qu);
+}
+
+__v2qi mm64_cvtps_epi8_builtin_convertvector(__v2sf a)
+{
+ return __builtin_convertvector((__v2sf)a, __v2qi);
+}
+
+__v4qi mm128_cvtps_epi8_builtin_convertvector(__v4sf a)
+{
+ return __builtin_convertvector((__v4sf)a, __v4qi);
+}
+
+__v8qi mm256_cvtps_epi8_builtin_convertvector(__v8sf a)
+{
+ return __builtin_convertvector((__v8sf)a, __v8qi);
+}
+
+__v16qi mm512_cvtps_epi8_builtin_convertvector(__v16sf a)
+{
+ return __builtin_convertvector((__v16sf)a, __v16qi);
+}
+
+__v2qu mm64_cvtps_epu8_builtin_convertvector(__v2sf a)
+{
+ return __builtin_convertvector((__v2sf)a, __v2qu);
+}
+
+__v4qu mm128_cvtps_epu8_builtin_convertvector(__v4sf a)
+{
+ return __builtin_convertvector((__v4sf)a, __v4qu);
+}
+
+__v8qu mm256_cvtps_epu8_builtin_convertvector(__v8sf a)
+{
+ return __builtin_convertvector((__v8sf)a, __v8qu);
+}
+
+__v16qu mm512_cvtps_epu8_builtin_convertvector(__v16sf a)
+{
+ return __builtin_convertvector((__v16sf)a, __v16qu);
+}
+
+__v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
+{
+ return __builtin_convertvector((__v2hf)a, __v2qi);
+}
+
+__v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector((__v8hf)a, __v8qi);
+}
+
+__v16qi mm256_cvtph_epi8_builtin_convertvector(__v16hf a)
+{
+ return __builtin_convertvector((__v16hf)a, __v16qi);
+}
+
+__v32qi mm512_cvtph_epi8_builtin_convertvector(__v32hf a)
+{
+ return __builtin_convertvector((__v32hf)a, __v32qi);
+}
+
+__v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
+{
+ return __builtin_convertvector((__v2hf)a, __v2qu);
+}
+
+__v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
+{
+ return __builtin_convertvector((__v8hf)a, __v8qu);
+}
+
+__v16qu mm256_cvtph_epu8_builtin_convertvector(__v16hf a)
+{
+ return __builtin_convertvector((__v16hf)a, __v16qu);
+}
+
+__v32qu mm512_cvtph_epu8_builtin_convertvector(__v32hf a)
+{
+ return __builtin_convertvector((__v32hf)a, __v32qu);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-7.c b/gcc/testsuite/gcc.target/i386/pr107432-7.c
new file mode 100644
index 00000000000..1b33e9a9508
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-7.c
@@ -0,0 +1,150 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd" 6 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph" 10 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd" 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 5 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd" 7 { target { ! ia32 } } } } */
+
+#include <x86intrin.h>
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+typedef unsigned char __v2qu __attribute__ ((vector_size (2)));
+typedef unsigned char __v4qu __attribute__ ((vector_size (4)));
+typedef unsigned char __v8qu __attribute__ ((vector_size (8)));
+typedef unsigned char __v16qu __attribute__ ((vector_size (16)));
+typedef _Float16 __v2hf __attribute__ ((__vector_size__ (4)));
+typedef _Float16 __v4hf __attribute__ ((__vector_size__ (8)));
+typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
+
+__v2df mm_cvtepi8_pd_builtin_convertvector(__v2qi a)
+{
+ return __builtin_convertvector((__v2qi)a, __v2df);
+}
+
+__v4df mm256_cvtepi8_pd_builtin_convertvector(__v4qi a)
+{
+ return __builtin_convertvector((__v4qi)a, __v4df);
+}
+
+__v8df mm512_cvtepi8_pd_builtin_convertvector(__v8qi a)
+{
+ return __builtin_convertvector((__v8qi)a, __v8df);
+}
+
+__v2df mm_cvtepu8_pd_builtin_convertvector(__v2qu a)
+{
+ return __builtin_convertvector((__v2qu)a, __v2df);
+}
+
+__v4df mm256_cvtepu8_pd_builtin_convertvector(__v4qu a)
+{
+ return __builtin_convertvector((__v4qu)a, __v4df);
+}
+
+__v8df mm512_cvtepu8_pd_builtin_convertvector(__v8qu a)
+{
+ return __builtin_convertvector((__v8qu)a, __v8df);
+}
+
+__v2sf mm64_cvtepi8_ps_builtin_convertvector(__v2qi a)
+{
+ return __builtin_convertvector((__v2qi)a, __v2sf);
+}
+
+__v4sf mm128_cvtepi8_ps_builtin_convertvector(__v4qi a)
+{
+ return __builtin_convertvector((__v4qi)a, __v4sf);
+}
+
+__v8sf mm256_cvtepi8_ps_builtin_convertvector(__v8qi a)
+{
+ return __builtin_convertvector((__v8qi)a, __v8sf);
+}
+
+__v16sf mm512_cvtepi8_ps_builtin_convertvector(__v16qi a)
+{
+ return __builtin_convertvector((__v16qi)a, __v16sf);
+}
+
+__v2sf mm64_cvtepu8_ps_builtin_convertvector(__v2qu a)
+{
+ return __builtin_convertvector((__v2qu)a, __v2sf);
+}
+
+__v4sf mm128_cvtepu8_ps_builtin_convertvector(__v4qu a)
+{
+ return __builtin_convertvector((__v4qu)a, __v4sf);
+}
+
+__v8sf mm256_cvtepu8_ps_builtin_convertvector(__v8qu a)
+{
+ return __builtin_convertvector((__v8qu)a, __v8sf);
+}
+
+__v16sf mm512_cvtepu8_ps_builtin_convertvector(__v16qu a)
+{
+ return __builtin_convertvector((__v16qu)a, __v16sf);
+}
+
+__v2hf mm32_cvtepi8_ph_builtin_convertvector(__v2qi a)
+{
+ return __builtin_convertvector((__v2qi)a, __v2hf);
+}
+
+__v4hf mm64_cvtepi8_ph_builtin_convertvector(__v4qi a)
+{
+ return __builtin_convertvector((__v4qi)a, __v4hf);
+}
+
+__v8hf mm128_cvtepi8_ph_builtin_convertvector(__v8qi a)
+{
+ return __builtin_convertvector((__v8qi)a, __v8hf);
+}
+
+__v16hf mm256_cvtepi8_ph_builtin_convertvector(__v16qi a)
+{
+ return __builtin_convertvector((__v16qi)a, __v16hf);
+}
+
+__v32hf mm512_cvtepi8_ph_builtin_convertvector(__v32qi a)
+{
+ return __builtin_convertvector((__v32qi)a, __v32hf);
+}
+
+__v2hf mm32_cvtepu8_ph_builtin_convertvector(__v2qu a)
+{
+ return __builtin_convertvector((__v2qu)a, __v2hf);
+}
+
+__v4hf mm64_cvtepu8_ph_builtin_convertvector(__v4qu a)
+{
+ return __builtin_convertvector((__v4qu)a, __v4hf);
+}
+
+__v8hf mm128_cvtepu8_ph_builtin_convertvector(__v8qu a)
+{
+ return __builtin_convertvector((__v8qu)a, __v8hf);
+}
+
+__v16hf mm256_cvtepu8_ph_builtin_convertvector(__v16qu a)
+{
+ return __builtin_convertvector((__v16qu)a, __v16hf);
+}
+
+__v32hf mm512_cvtepu8_ph_builtin_convertvector(__v32qu a)
+{
+ return __builtin_convertvector((__v32qu)a, __v32hf);
+}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index ea0069f7a67..b5c87bb0e13 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -45,6 +45,8 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-match.h"
#include "recog.h" /* FIXME: for insn_data */
#include "optabs-libfuncs.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
/* Build a ternary operation and gimplify it. Emit code before GSI.
@@ -1870,14 +1872,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
else if (ret_elt_bits > arg_elt_bits)
modifier = WIDEN;
- if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
+ /*
+ if (supportable_convert_operation (code, ret_type, arg_type, &code1))
{
- if (supportable_convert_operation (code, ret_type, arg_type, &code1))
+ g = gimple_build_assign (lhs, code1, arg);
+ gsi_replace (gsi, g, false);
+ return;
+ }
+ */
+
+ vec<std::pair<tree, tree_code> > converts = vNULL;
+ if (supportable_indirect_convert_operation (code,
+ ret_type, arg_type,
+ &converts,
+ arg))
+ {
+ if (TYPE_MODE (converts[0].first) == TYPE_MODE (ret_type))
+ g = gimple_build_assign (lhs, converts[0].second, arg);
+ else
{
- g = gimple_build_assign (lhs, code1, arg);
- gsi_replace (gsi, g, false);
- return;
+ new_rhs = make_ssa_name (converts[0].first);
+ g = gimple_build_assign (new_rhs, converts[0].second, arg);
+ gsi_insert_before (gsi, g, GSI_SAME_STMT);
+ g = gimple_build_assign (lhs, converts[1].second, new_rhs);
}
+ gsi_replace (gsi, g, false);
+ return;
+ }
+
+ if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
+ {
/* Can't use get_compute_type here, as supportable_convert_operation
doesn't necessarily use an optab and needs two arguments. */
tree vec_compute_type
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 05a169ecb2d..f4c829ff6af 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
tree scalar_dest;
tree op0, op1 = NULL_TREE;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
- tree_code tc1, tc2;
+ tree_code tc1;
code_helper code, code1, code2;
code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
tree new_temp;
@@ -5367,6 +5367,7 @@ vectorizable_conversion (vec_info *vinfo,
scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
opt_scalar_mode rhs_mode_iter;
+ vec<std::pair<tree, tree_code> > converts = vNULL;
/* Supportable by target? */
switch (modifier)
@@ -5377,99 +5378,25 @@ vectorizable_conversion (vec_info *vinfo,
&& !CONVERT_EXPR_CODE_P (code))
return false;
gcc_assert (code.is_tree_code ());
- if (supportable_convert_operation ((tree_code) code, vectype_out,
- vectype_in, &tc1))
- {
- code1 = tc1;
- break;
- }
-
- /* For conversions between float and integer types try whether
- we can use intermediate signed integer types to support the
- conversion. */
- if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
- && (code == FLOAT_EXPR ||
- (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
- {
- bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
- bool float_expr_p = code == FLOAT_EXPR;
- unsigned short target_size;
- scalar_mode intermediate_mode;
- if (demotion)
- {
- intermediate_mode = lhs_mode;
- target_size = GET_MODE_SIZE (rhs_mode);
- }
+ if (supportable_indirect_convert_operation (code,
+ vectype_out,
+ vectype_in,
+ &converts,
+ op0))
+ {
+ if (converts.length () == 1)
+ code1 = converts[0].second;
else
{
- target_size = GET_MODE_SIZE (lhs_mode);
- if (!int_mode_for_size
- (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
- goto unsupported;
- }
- code1 = float_expr_p ? code : NOP_EXPR;
- codecvt1 = float_expr_p ? NOP_EXPR : code;
- opt_scalar_mode mode_iter;
- FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
- {
- intermediate_mode = mode_iter.require ();
-
- if (GET_MODE_SIZE (intermediate_mode) > target_size)
- break;
-
- scalar_mode cvt_mode;
- if (!int_mode_for_size
- (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
- break;
-
- cvt_type = build_nonstandard_integer_type
- (GET_MODE_BITSIZE (cvt_mode), 0);
-
- /* Check if the intermediate type can hold OP0's range.
- When converting from float to integer this is not necessary
- because values that do not fit the (smaller) target type are
- unspecified anyway. */
- if (demotion && float_expr_p)
- {
- wide_int op_min_value, op_max_value;
- if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
- break;
-
- if (cvt_type == NULL_TREE
- || (wi::min_precision (op_max_value, SIGNED)
- > TYPE_PRECISION (cvt_type))
- || (wi::min_precision (op_min_value, SIGNED)
- > TYPE_PRECISION (cvt_type)))
- continue;
- }
-
- cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
- /* This should only happened for SLP as long as loop vectorizer
- only supports same-sized vector. */
- if (cvt_type == NULL_TREE
- || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
- || !supportable_convert_operation ((tree_code) code1,
- vectype_out,
- cvt_type, &tc1)
- || !supportable_convert_operation ((tree_code) codecvt1,
- cvt_type,
- vectype_in, &tc2))
- continue;
-
- found_mode = true;
- break;
- }
-
- if (found_mode)
- {
- multi_step_cvt++;
- interm_types.safe_push (cvt_type);
- cvt_type = NULL_TREE;
- code1 = tc1;
- codecvt1 = tc2;
- break;
+ gcc_assert (converts.length () == 2);
+ multi_step_cvt = converts.length () - 1;
+ codecvt1 = converts[0].second;
+ code1 = converts[1].second;
+ interm_types.safe_push (converts[0].first);
}
+ break;
}
+
/* FALLTHRU */
unsupported:
if (dump_enabled_p ())
@@ -14626,6 +14553,158 @@ supportable_narrowing_operation (code_helper code,
return false;
}
+/* Function supportable_indirect_convert_operation
+
+ Check whether an operation represented by the code CODE is two
+ convert operations that are supported by the target platform in
+ vector form (i.e., when operating on arguments of type VECTYPE_IN
+ producing a result of type VECTYPE_OUT).
+
+ Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+ This function checks if these operations are supported
+ by the target platform directly (via vector tree-codes).
+
+ Output:
+ - CODE1 is the code of a vector operation to be used when
+ converting the operation in the first step, if available.
+ - CODE2 is the code of a vector operation to be used when
+ converting the operation in the second step, if available.
+ - MULTI_STEP_CVT determines the number of required intermediate steps in
+ case of multi-step conversion (like int->short->char - in that case
+ MULTI_STEP_CVT will be 1). In the function, it should be 1.
+ - INTERM_TYPES contains the intermediate type required to perform the
+ convert operation (short in the above example). */
+bool
+supportable_indirect_convert_operation (code_helper code,
+ tree vectype_out,
+ tree vectype_in,
+ vec<std::pair<tree, tree_code> > *converts,
+ tree op0)
+{
+ bool found_mode = false;
+ scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
+ scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
+ opt_scalar_mode mode_iter;
+ tree_code tc1, tc2, code1, code2;
+
+ tree cvt_type = NULL_TREE;
+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
+
+ if (supportable_convert_operation ((tree_code) code,
+ vectype_out,
+ vectype_in,
+ &tc1))
+ {
+ converts->safe_push (std::make_pair (vectype_out, tc1));
+ return true;
+ }
+
+ /* For conversions between float and integer types try whether
+ we can use intermediate signed integer types to support the
+ conversion. */
+ if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
+ && (code == FLOAT_EXPR
+ || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
+ {
+ bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
+ bool float_expr_p = code == FLOAT_EXPR;
+ unsigned short target_size;
+ scalar_mode intermediate_mode;
+ if (demotion)
+ {
+ intermediate_mode = lhs_mode;
+ target_size = GET_MODE_SIZE (rhs_mode);
+ }
+ else
+ {
+ target_size = GET_MODE_SIZE (lhs_mode);
+ if (!int_mode_for_size
+ (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
+ return false;
+ }
+ code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
+ code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
+ opt_scalar_mode mode_iter;
+ FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
+ {
+ intermediate_mode = mode_iter.require ();
+
+ if (GET_MODE_SIZE (intermediate_mode) > target_size)
+ break;
+
+ scalar_mode cvt_mode;
+ if (!int_mode_for_size
+ (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
+ break;
+
+ cvt_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (cvt_mode), 0);
+
+ /* Check if the intermediate type can hold OP0's range.
+ When converting from float to integer this is not necessary
+ because values that do not fit the (smaller) target type are
+ unspecified anyway. */
+ if (demotion && float_expr_p)
+ {
+ wide_int op_min_value, op_max_value;
+ /* For vector form, it looks like op0 doesn't have RANGE_INFO.
+ In the future, if it is supported, changes may need to be made
+ to this part, such as checking the RANGE of each element
+ in the vector. */
+ if (!SSA_NAME_RANGE_INFO (op0)
+ || !vect_get_range_info (op0, &op_min_value, &op_max_value))
+ break;
+
+ if (cvt_type == NULL_TREE
+ || (wi::min_precision (op_max_value, SIGNED)
+ > TYPE_PRECISION (cvt_type))
+ || (wi::min_precision (op_min_value, SIGNED)
+ > TYPE_PRECISION (cvt_type)))
+ continue;
+ }
+
+ /*
+ if (vinfo != NULL && slp_node != NULL)
+ cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
+ else
+ {
+ bool uns = TYPE_UNSIGNED (TREE_TYPE (vectype_out))
+ || TYPE_UNSIGNED (TREE_TYPE (vectype_in));
+ cvt_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (cvt_mode), uns);
+ cvt_type = build_vector_type (cvt_type, nelts);
+ }
+ */
+ cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
+ cvt_type,
+ nelts);
+ /* This should only happened for SLP as long as loop vectorizer
+ only supports same-sized vector. */
+ if (cvt_type == NULL_TREE
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
+ || !supportable_convert_operation ((tree_code) code1,
+ vectype_out,
+ cvt_type, &tc1)
+ || !supportable_convert_operation ((tree_code) code2,
+ cvt_type,
+ vectype_in, &tc2))
+ continue;
+
+ found_mode = true;
+ break;
+ }
+
+ if (found_mode)
+ {
+ converts->safe_push (std::make_pair (cvt_type, tc2));
+ if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
+ converts->safe_push (std::make_pair (vectype_out, tc1));
+ return true;
+ }
+ }
+ return false;
+}
+
/* Generate and return a vector mask of MASK_TYPE such that
mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
Add the statements to SEQ. */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97ec9c341e7..53b3f24cb2e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2265,6 +2265,10 @@ extern bool supportable_widening_operation (vec_info*, code_helper,
extern bool supportable_narrowing_operation (code_helper, tree, tree,
code_helper *, int *,
vec<tree> *);
+extern bool supportable_indirect_convert_operation (code_helper,
+ tree, tree,
+ vec<std::pair<tree, tree_code> > *,
+ tree = NULL_TREE);
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt, stmt_vec_info,
--
2.31.1
next prev parent reply other threads:[~2024-06-25 3:28 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-08 1:38 [PATCH] " Hu, Lin1
2024-05-14 2:25 ` Hu, Lin1
2024-05-14 12:23 ` Richard Biener
2024-05-15 2:30 ` Hu, Lin1
2024-05-23 6:37 ` [PATCH 0/3] Optimize __builtin_convertvector for x86-64-v4 and Hu, Lin1
2024-05-23 6:37 ` [PATCH 1/3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float Hu, Lin1
2024-05-29 9:40 ` Richard Biener
2024-05-31 8:54 ` Hu, Lin1
2024-05-31 12:41 ` Richard Biener
2024-06-03 8:23 ` Hu, Lin1
2024-06-03 9:02 ` Richard Biener
2024-06-03 9:26 ` Hu, Lin1
2024-06-03 9:30 ` Richard Biener
2024-06-11 6:49 ` [PATCH 1/3 v3] " Hu, Lin1
2024-06-17 1:48 ` Hu, Lin1
2024-06-18 11:44 ` Richard Biener
2024-06-20 11:26 ` Hu, Lin1
2024-06-24 12:33 ` Richard Biener
2024-06-24 14:12 ` Tamar Christina
2024-06-25 2:00 ` Hu, Lin1
2024-06-25 3:28 ` Hu, Lin1 [this message]
2024-06-25 13:30 ` [PATCH 1/3 v4] " Richard Biener
2024-06-26 10:54 ` [PATCH 1/3 v5] " Hu, Lin1
2024-05-23 6:37 ` [PATCH 2/3] vect: Support v4hi -> v4qi Hu, Lin1
2024-05-27 2:11 ` Hongtao Liu
2024-05-29 8:55 ` [PATCH 2/3 v2] " Hu, Lin1
2024-05-29 9:09 ` Hongtao Liu
2024-05-23 6:37 ` [PATCH 3/3] vect: support direct conversion under x86-64-v3 Hu, Lin1
2024-05-23 6:42 ` Hongtao Liu
2024-05-23 7:17 ` Hu, Lin1
2024-05-23 8:05 ` Hongtao Liu
2024-05-29 8:59 ` [PATCH 3/3 v2] " Hu, Lin1
2024-05-30 6:51 ` Hongtao Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240625032812.3857804-1-lin1.hu@intel.com \
--to=lin1.hu@intel.com \
--cc=Tamar.Christina@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=rguenther@suse.de \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).