public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix x86 bmi2 bzhi patterns (PR target/65368)
@ 2015-03-10 19:14 Jakub Jelinek
  2015-03-10 19:26 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2015-03-10 19:14 UTC (permalink / raw)
  To: Uros Bizjak, Jan Hubicka; +Cc: gcc-patches

Hi!

The following testcase fails, because the RTL pattern of bmi2_bzhi_*3
didn't really model (not even close) what the instruction was doing,
so when combiner attempted to simplify it for constant arguments, it
simplified it into something wrong.

Fixed thusly, verified we don't regress on combining memory
for the middle operand, bootstrapped/regtested on x86_64-linux and
i686-linux (both Haswell-E), ok for trunk (and eventually backporting it)?

2015-03-10  Jakub Jelinek  <jakub@redhat.com>

	PR target/65368
	* config/i386/i386.md (bmi2_bzhi_<mode>3): Removed define_insn,
	new define_expand.
	(*bmi2_bzhi_<mode>3, *bmi2_bzhi_<mode>3_1): New define_insns.

	* gcc.target/i386/bmi2-bzhi-2.c: New test.

--- gcc/config/i386/i386.md.jj	2015-03-09 08:05:13.000000000 +0100
+++ gcc/config/i386/i386.md	2015-03-10 09:37:56.615909734 +0100
@@ -12678,17 +12678,53 @@ (define_insn "*bmi_blsr_<mode>"
    (set_attr "mode" "<MODE>")])
 
 ;; BMI2 instructions.
-(define_insn "bmi2_bzhi_<mode>3"
+(define_expand "bmi2_bzhi_<mode>3"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand")
+	    (umin:SWI48
+	      (and:SWI48 (match_operand:SWI48 2 "register_operand")
+			 (const_int 255))
+	      (match_dup 3))
+	    (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_BMI2"
+{
+  operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+})
+
+(define_insn "*bmi2_bzhi_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(and:SWI48 (lshiftrt:SWI48 (const_int -1)
-				   (match_operand:SWI48 2 "register_operand" "r"))
-		   (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
+		       (const_int 255))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2"
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
   "bzhi\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "bitmanip")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
+
+(define_mode_attr k [(SI "k") (DI "q")])
+(define_insn "*bmi2_bzhi_<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "bmi2_pdep_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
--- gcc/testsuite/gcc.target/i386/bmi2-bzhi-2.c.jj	2015-03-10 09:43:06.318843305 +0100
+++ gcc/testsuite/gcc.target/i386/bmi2-bzhi-2.c	2015-03-10 09:42:25.000000000 +0100
@@ -0,0 +1,67 @@
+/* PR target/65368 */
+/* { dg-do assemble { target bmi2 } } */
+/* { dg-options "-O2 -mbmi2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+unsigned int a;
+unsigned long long b;
+
+#define A __attribute__((noinline, noclone))
+
+A unsigned int f1 (void) { return _bzhi_u32 (a, 0); }
+A unsigned int f2 (unsigned int x) { return _bzhi_u32 (x, 0); }
+A unsigned int f3 (void) { return _bzhi_u32 (a, 5); }
+A unsigned int f4 (unsigned int x) { return _bzhi_u32 (x, 5); }
+A unsigned int f5 (void) { return _bzhi_u32 (a, 31); }
+A unsigned int f6 (unsigned int x) { return _bzhi_u32 (x, 31); }
+A unsigned int f7 (void) { return _bzhi_u32 (a, 32); }
+A unsigned int f8 (unsigned int x) { return _bzhi_u32 (x, 32); }
+A unsigned int f9 (void) { return _bzhi_u32 (a, 37); }
+A unsigned int f10 (unsigned int x) { return _bzhi_u32 (x, 37); }
+A unsigned int f11 (void) { return _bzhi_u32 (a, 257); }
+A unsigned int f12 (unsigned int x) { return _bzhi_u32 (x, 257); }
+A unsigned int f13 (void) { return _bzhi_u32 (a, 289); }
+A unsigned int f14 (unsigned int x) { return _bzhi_u32 (x, 289); }
+#ifdef __x86_64__
+A unsigned long long f21 (void) { return _bzhi_u64 (b, 0); }
+A unsigned long long f22 (unsigned long long x) { return _bzhi_u64 (x, 0); }
+A unsigned long long f23 (void) { return _bzhi_u64 (b, 5); }
+A unsigned long long f24 (unsigned long long x) { return _bzhi_u64 (x, 5); }
+A unsigned long long f25 (void) { return _bzhi_u64 (b, 63); }
+A unsigned long long f26 (unsigned long long x) { return _bzhi_u64 (x, 63); }
+A unsigned long long f27 (void) { return _bzhi_u64 (b, 64); }
+A unsigned long long f28 (unsigned long long x) { return _bzhi_u64 (x, 64); }
+A unsigned long long f29 (void) { return _bzhi_u64 (b, 69); }
+A unsigned long long f30 (unsigned long long x) { return _bzhi_u64 (x, 69); }
+A unsigned long long f31 (void) { return _bzhi_u64 (b, 257); }
+A unsigned long long f32 (unsigned long long x) { return _bzhi_u64 (x, 257); }
+A unsigned long long f33 (void) { return _bzhi_u64 (b, 321); }
+A unsigned long long f34 (unsigned long long x) { return _bzhi_u64 (x, 321); }
+#endif
+
+static void
+bmi2_test ()
+{
+  a = -1U;
+  b = -1ULL;
+  if (f1 () != 0 || f2 (-1U) != 0
+      || f3 () != 0x1f || f4 (-1U) != 0x1f
+      || f5 () != 0x7fffffffU || f6 (-1U) != 0x7fffffffU
+      || f7 () != -1U || f8 (-1U) != -1U
+      || f9 () != -1U || f10 (-1U) != -1U
+      || f11 () != 1 || f12 (-1U) != 1
+      || f13 () != -1U || f14 (-1U) != -1U)
+    abort ();
+#ifdef __x86_64__
+  if (f21 () != 0 || f22 (-1ULL) != 0
+      || f23 () != 0x1f || f24 (-1ULL) != 0x1f
+      || f25 () != 0x7fffffffffffffffULL || f26 (-1ULL) != 0x7fffffffffffffffULL
+      || f27 () != -1ULL || f28 (-1ULL) != -1ULL
+      || f29 () != -1ULL || f30 (-1ULL) != -1ULL
+      || f31 () != 1 || f32 (-1ULL) != 1
+      || f33 () != -1ULL || f34 (-1ULL) != -1ULL)
+    abort ();
+#endif
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix x86 bmi2 bzhi patterns (PR target/65368)
  2015-03-10 19:14 [PATCH] Fix x86 bmi2 bzhi patterns (PR target/65368) Jakub Jelinek
@ 2015-03-10 19:26 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2015-03-10 19:26 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jan Hubicka, gcc-patches

On Tue, Mar 10, 2015 at 8:14 PM, Jakub Jelinek <jakub@redhat.com> wrote:

> The following testcase fails, because the RTL pattern of bmi2_bzhi_*3
> didn't really model (not even close) what the instruction was doing,
> so when combiner attempted to simplify it for constant arguments, it
> simplified it into something wrong.
>
> Fixed thusly, verified we don't regress on combining memory
> for the middle operand, bootstrapped/regtested on x86_64-linux and
> i686-linux (both Haswell-E), ok for trunk (and eventually backporting it)?
>
> 2015-03-10  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/65368
>         * config/i386/i386.md (bmi2_bzhi_<mode>3): Removed define_insn,
>         new define_expand.
>         (*bmi2_bzhi_<mode>3, *bmi2_bzhi_<mode>3_1): New define_insns.
>
>         * gcc.target/i386/bmi2-bzhi-2.c: New test.

OK for mainline and release branches.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-03-10 19:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-10 19:14 [PATCH] Fix x86 bmi2 bzhi patterns (PR target/65368) Jakub Jelinek
2015-03-10 19:26 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).