From: Uros Bizjak <ubizjak@gmail.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH] i386: Prevent spurious FP exceptions with _mm_cvt{, t}ps_pi32 [PR98522]
Date: Tue, 5 Jan 2021 15:31:17 +0100 [thread overview]
Message-ID: <CAFULd4bntJk=KdD21m-AM+yZ5LtzU3g1WrPmSj94ESe8FGhZpw@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 589 bytes --]
Prevent spurious FP exceptions with _mm_cvt{,t}ps_pi32 for TARGET_MMX_WITH_SSE
by clearing the top 64 bytes of the input XMM register.
2021-01-05 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/98522
* config/i386/sse.md (sse_cvtps2pi): Redefine as define_insn_and_split.
Clear the top 64 bytes of the input XMM register.
(sse_cvttps2pi): Ditto.
gcc/testsuite
PR target/98522
* gcc.target/i386/pr98522.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to mainline, will be beckported to gcc-10.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 4135 bytes --]
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d84103807ff..c8e771fd697 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5103,31 +5103,65 @@
(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_cvtps2pi"
+(define_insn_and_split "sse_cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
UNSPEC_FIX_NOTRUNC)
(parallel [(const_int 0) (const_int 1)])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
"@
cvtps2pi\t{%1, %0|%0, %q1}
- %vcvtps2dq\t{%1, %0|%0, %1}"
+ #"
+ "TARGET_SSE2 && reload_completed
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx op1 = lowpart_subreg (V2SFmode, operands[1],
+ GET_MODE (operands[1]));
+ rtx tmp = lowpart_subreg (V4SFmode, operands[0],
+ GET_MODE (operands[0]));
+
+ op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
+ emit_insn (gen_rtx_SET (tmp, op1));
+
+ rtx dest = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));
+ emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
+ DONE;
+}
[(set_attr "isa" "*,sse2")
(set_attr "mmx_isa" "native,*")
(set_attr "type" "ssecvt")
(set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
-(define_insn "sse_cvttps2pi"
+(define_insn_and_split "sse_cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
(parallel [(const_int 0) (const_int 1)])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
"@
cvttps2pi\t{%1, %0|%0, %q1}
- %vcvttps2dq\t{%1, %0|%0, %1}"
+ #"
+ "TARGET_SSE2 && reload_completed
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx op1 = lowpart_subreg (V2SFmode, operands[1],
+ GET_MODE (operands[1]));
+ rtx tmp = lowpart_subreg (V4SFmode, operands[0],
+ GET_MODE (operands[0]));
+
+ op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
+ emit_insn (gen_rtx_SET (tmp, op1));
+
+ rtx dest = lowpart_subreg (V4SImode, operands[0],
+ GET_MODE (operands[0]));
+ emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
+ DONE;
+}
[(set_attr "isa" "*,sse2")
(set_attr "mmx_isa" "native,*")
(set_attr "type" "ssecvt")
@@ -8026,7 +8060,7 @@
(define_insn "*vec_concatv4sf_0"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
- (match_operand:V2SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V2SF 1 "nonimmediate_operand" "vm")
(match_operand:V2SF 2 "const0_operand" " C")))]
"TARGET_SSE2"
"%vmovq\t{%1, %0|%0, %1}"
@@ -10457,7 +10491,7 @@
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(vec_merge:VF2_512_256
(vec_duplicate:VF2_512_256
- (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
+ (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
(match_operand:VF2_512_256 1 "const0_operand" "C")
(const_int 1)))]
"TARGET_AVX"
diff --git a/gcc/testsuite/gcc.target/i386/pr98522.c b/gcc/testsuite/gcc.target/i386/pr98522.c
new file mode 100644
index 00000000000..762f2eded50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98522.c
@@ -0,0 +1,39 @@
+/* PR target/98522 */
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#include <emmintrin.h>
+#include <fenv.h>
+
+__m64
+__attribute__((noinline))
+test_cvt (__m128 a)
+{
+ return _mm_cvt_ps2pi (a);
+}
+
+__m64
+__attribute__((noinline))
+test_cvtt (__m128 a)
+{
+ return _mm_cvtt_ps2pi (a);
+}
+
+int
+main ()
+{
+ __m128 x = (__m128)(__m128i){0x0000000000000000LL, 0x7fffffffffffffffLL};
+ volatile __m64 y;
+
+ feclearexcept (FE_INVALID);
+
+ y = test_cvt(x);
+ y = test_cvtt (x);
+
+ if (fetestexcept (FE_INVALID))
+ __builtin_abort ();
+
+ return 0;
+}
+
reply other threads:[~2021-01-05 14:31 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAFULd4bntJk=KdD21m-AM+yZ5LtzU3g1WrPmSj94ESe8FGhZpw@mail.gmail.com' \
--to=ubizjak@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).