public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Improve vector [GL]E{,U} comparison against vector constants [PR107546]
@ 2022-11-08 10:41 Jakub Jelinek
  2022-11-08 10:50 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2022-11-08 10:41 UTC (permalink / raw)
  To: Uros Bizjak, Hongtao Liu; +Cc: gcc-patches

Hi!

For integer vector comparisons without XOP before AVX512{F,VL} we are
constrained by only GT and EQ being supported in HW.
For GTU we play tricks to implement it using GT or unsigned saturating
subtraction, for LT/LTU we swap the operands and thus turn it into
GT/GTU.  For LE/LEU we handle it by using GT/GTU and negating the
result and for GE/GEU by using GT/GTU on swapped operands and negating
the result.
If the second operand is a CONST_VECTOR, we can usually do better though,
we can avoid the negation.  For LE/LEU cst by doing LT/LTU cst+1 (and
then cst+1 GT/GTU x) and for GE/GEU cst by doing GT/GTU cst-1, provided
there is no wrap-around on those cst+1 or cst-1.
GIMPLE canonicalizes x < cst to x <= cst-1 etc. (the rule is smaller
absolute value on constant), but only for scalars or uniform vectors,
so in some cases this undoes that canonicalization in order to avoid
the extra negation, but it handles also non-uniform constants.
E.g. with -mavx2 the testcase assembly difference is:
-	movl	$47, %eax
+	movl	$48, %eax
 	vmovdqa	%xmm0, %xmm1
 	vmovd	%eax, %xmm0
 	vpbroadcastb	%xmm0, %xmm0
-	vpminsb	%xmm0, %xmm1, %xmm0
-	vpcmpeqb	%xmm1, %xmm0, %xmm0
+	vpcmpgtb	%xmm1, %xmm0, %xmm0
and
-	vmovdqa	%xmm0, %xmm1
-	vmovdqa	.LC1(%rip), %xmm0
-	vpminsb	%xmm1, %xmm0, %xmm1
-	vpcmpeqb	%xmm1, %xmm0, %xmm0
+	vpcmpgtb	.LC1(%rip), %xmm0, %xmm0
while with just SSE2:
-	pcmpgtb	.LC0(%rip), %xmm0
-	pxor	%xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
+	movdqa	%xmm0, %xmm1
+	movdqa	.LC0(%rip), %xmm0
+	pcmpgtb	%xmm1, %xmm0
and
-	movdqa	%xmm0, %xmm1
-	movdqa	.LC1(%rip), %xmm0
-	pcmpgtb	%xmm1, %xmm0
-	pxor	%xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
+	pcmpgtb	.LC1(%rip), %xmm0

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-11-08  Jakub Jelinek  <jakub@redhat.com>

	PR target/107546
	* config/i386/predicates.md (vector_or_const_vector_operand): New
	predicate.
	* config/i386/sse.md (vec_cmp<mode><sseintvecmodelower>,
	vec_cmpv2div2di, vec_cmpu<mode><sseintvecmodelower>,
	vec_cmpuv2div2di): Use nonimmediate_or_const_vector_operand
	predicate instead of nonimmediate_operand and
	vector_or_const_vector_operand instead of vector_operand.
	* config/i386/i386-expand.cc (ix86_expand_int_sse_cmp): For
	LE/LEU or GE/GEU with CONST_VECTOR cop1 try to transform those
	into LE/LEU or GT/GTU with larger or smaller by one cop1 if
	there is no wrap-around.  Force CONST_VECTOR cop0 or cop1 into
	REG.  Formatting fix.

	* gcc.target/i386/pr107546.c: New test.

--- gcc/config/i386/predicates.md.jj	2022-11-07 10:30:42.739629999 +0100
+++ gcc/config/i386/predicates.md	2022-11-07 11:39:42.665065553 +0100
@@ -1235,6 +1235,13 @@ (define_predicate "vector_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "vector_memory_operand")))
 
+; Return true when OP is register_operand, vector_memory_operand
+; or const_vector.
+(define_predicate "vector_or_const_vector_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "vector_memory_operand")
+       (match_code "const_vector")))
+
 (define_predicate "bcst_mem_operand"
   (and (match_code "vec_duplicate")
        (and (match_test "TARGET_AVX512F")
--- gcc/config/i386/sse.md.jj	2022-11-01 13:33:17.557857756 +0100
+++ gcc/config/i386/sse.md	2022-11-07 11:43:45.703748212 +0100
@@ -4311,7 +4311,7 @@ (define_expand "vec_cmp<mode><sseintvecm
   [(set (match_operand:<sseintvecmode> 0 "register_operand")
 	(match_operator:<sseintvecmode> 1 ""
 	  [(match_operand:VI_256 2 "register_operand")
-	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
+	   (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
   "TARGET_AVX2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4323,7 +4323,7 @@ (define_expand "vec_cmp<mode><sseintvecm
   [(set (match_operand:<sseintvecmode> 0 "register_operand")
 	(match_operator:<sseintvecmode> 1 ""
 	  [(match_operand:VI124_128 2 "register_operand")
-	   (match_operand:VI124_128 3 "vector_operand")]))]
+	   (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4335,7 +4335,7 @@ (define_expand "vec_cmpv2div2di"
   [(set (match_operand:V2DI 0 "register_operand")
 	(match_operator:V2DI 1 ""
 	  [(match_operand:V2DI 2 "register_operand")
-	   (match_operand:V2DI 3 "vector_operand")]))]
+	   (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE4_2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4397,7 +4397,7 @@ (define_expand "vec_cmpu<mode><sseintvec
   [(set (match_operand:<sseintvecmode> 0 "register_operand")
 	(match_operator:<sseintvecmode> 1 ""
 	  [(match_operand:VI_256 2 "register_operand")
-	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
+	   (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
   "TARGET_AVX2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4409,7 +4409,7 @@ (define_expand "vec_cmpu<mode><sseintvec
   [(set (match_operand:<sseintvecmode> 0 "register_operand")
 	(match_operator:<sseintvecmode> 1 ""
 	  [(match_operand:VI124_128 2 "register_operand")
-	   (match_operand:VI124_128 3 "vector_operand")]))]
+	   (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
@@ -4421,7 +4421,7 @@ (define_expand "vec_cmpuv2div2di"
   [(set (match_operand:V2DI 0 "register_operand")
 	(match_operator:V2DI 1 ""
 	  [(match_operand:V2DI 2 "register_operand")
-	   (match_operand:V2DI 3 "vector_operand")]))]
+	   (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
   "TARGET_SSE4_2"
 {
   bool ok = ix86_expand_int_vec_cmp (operands);
--- gcc/config/i386/i386-expand.cc.jj	2022-11-07 10:30:42.702630503 +0100
+++ gcc/config/i386/i386-expand.cc	2022-11-07 12:25:25.183638148 +0100
@@ -4510,15 +4510,86 @@ ix86_expand_int_sse_cmp (rtx dest, enum
 	case GTU:
 	  break;
 
-	case NE:
 	case LE:
 	case LEU:
+	  /* x <= cst can be handled as x < cst + 1 unless there is
+	     wrap around in cst + 1.  */
+	  if (GET_CODE (cop1) == CONST_VECTOR
+	      && GET_MODE_INNER (mode) != TImode)
+	    {
+	      unsigned int n_elts = GET_MODE_NUNITS (mode), i;
+	      machine_mode eltmode = GET_MODE_INNER (mode);
+	      for (i = 0; i < n_elts; ++i)
+		{
+		  rtx elt = CONST_VECTOR_ELT (cop1, i);
+		  if (!CONST_INT_P (elt))
+		    break;
+		  if (code == GE)
+		    {
+		      /* For LE punt if some element is signed maximum.  */
+		      if ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
+			  == (GET_MODE_MASK (eltmode) >> 1))
+			break;
+		    }
+		  /* For LEU punt if some element is unsigned maximum.  */
+		  else if (elt == constm1_rtx)
+		    break;
+		}
+	      if (i == n_elts)
+		{
+		  rtvec v = rtvec_alloc (n_elts);
+		  for (i = 0; i < n_elts; ++i)
+		    RTVEC_ELT (v, i)
+		      = GEN_INT (INTVAL (CONST_VECTOR_ELT (cop1, i)) + 1);
+		  cop1 = gen_rtx_CONST_VECTOR (mode, v);
+		  std::swap (cop0, cop1);
+		  code = code == LE ? GT : GTU;
+		  break;
+		}
+	    }
+	  /* FALLTHRU */
+	case NE:
 	  code = reverse_condition (code);
 	  *negate = true;
 	  break;
 
 	case GE:
 	case GEU:
+	  /* x >= cst can be handled as x > cst - 1 unless there is
+	     wrap around in cst - 1.  */
+	  if (GET_CODE (cop1) == CONST_VECTOR
+	      && GET_MODE_INNER (mode) != TImode)
+	    {
+	      unsigned int n_elts = GET_MODE_NUNITS (mode), i;
+	      machine_mode eltmode = GET_MODE_INNER (mode);
+	      for (i = 0; i < n_elts; ++i)
+		{
+		  rtx elt = CONST_VECTOR_ELT (cop1, i);
+		  if (!CONST_INT_P (elt))
+		    break;
+		  if (code == GE)
+		    {
+		      /* For GE punt if some element is signed minimum.  */
+		      if (INTVAL (elt) < 0
+			  && ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
+			      == 0))
+			break;
+		    }
+		  /* For GEU punt if some element is zero.  */
+		  else if (elt == const0_rtx)
+		    break;
+		}
+	      if (i == n_elts)
+		{
+		  rtvec v = rtvec_alloc (n_elts);
+		  for (i = 0; i < n_elts; ++i)
+		    RTVEC_ELT (v, i)
+		      = GEN_INT (INTVAL (CONST_VECTOR_ELT (cop1, i)) - 1);
+		  cop1 = gen_rtx_CONST_VECTOR (mode, v);
+		  code = code == GE ? GT : GTU;
+		  break;
+		}
+	    }
 	  code = reverse_condition (code);
 	  *negate = true;
 	  /* FALLTHRU */
@@ -4556,6 +4627,11 @@ ix86_expand_int_sse_cmp (rtx dest, enum
 	    }
 	}
 
+      if (GET_CODE (cop0) == CONST_VECTOR)
+	cop0 = force_reg (mode, cop0);
+      else if (GET_CODE (cop1) == CONST_VECTOR)
+	cop1 = force_reg (mode, cop1);
+
       rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
       rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode);
       if (*negate)
@@ -4752,13 +4828,13 @@ ix86_expand_int_sse_cmp (rtx dest, enum
   if (*negate)
     std::swap (op_true, op_false);
 
+  if (GET_CODE (cop1) == CONST_VECTOR)
+    cop1 = force_reg (mode, cop1);
+
   /* Allow the comparison to be done in one mode, but the movcc to
      happen in another mode.  */
   if (data_mode == mode)
-    {
-      x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
-			       op_true, op_false);
-    }
+    x = ix86_expand_sse_cmp (dest, code, cop0, cop1, op_true, op_false);
   else
     {
       gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
--- gcc/testsuite/gcc.target/i386/pr107546.c.jj	2022-11-07 12:40:47.348054087 +0100
+++ gcc/testsuite/gcc.target/i386/pr107546.c	2022-11-07 12:40:25.732349055 +0100
@@ -0,0 +1,19 @@
+/* PR target/107546 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-xop -mno-avx512f" } */
+/* { dg-final { scan-assembler-not "pcmpeqb\t" } } */
+/* { dg-final { scan-assembler-times "pcmpgtb\t" 2 } } */
+
+typedef signed char V __attribute__((vector_size(16)));
+
+V
+foo (V x)
+{
+  return x < 48;
+}
+
+V
+bar (V x)
+{
+  return x >= (V) { 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 };
+}

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] i386: Improve vector [GL]E{,U} comparison against vector constants [PR107546]
  2022-11-08 10:41 [PATCH] i386: Improve vector [GL]E{,U} comparison against vector constants [PR107546] Jakub Jelinek
@ 2022-11-08 10:50 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2022-11-08 10:50 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Hongtao Liu, gcc-patches

On Tue, Nov 8, 2022 at 11:42 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> For integer vector comparisons without XOP before AVX512{F,VL} we are
> constrained by only GT and EQ being supported in HW.
> For GTU we play tricks to implement it using GT or unsigned saturating
> subtraction, for LT/LTU we swap the operands and thus turn it into
> GT/GTU.  For LE/LEU we handle it by using GT/GTU and negating the
> result and for GE/GEU by using GT/GTU on swapped operands and negating
> the result.
> If the second operand is a CONST_VECTOR, we can usually do better though,
> we can avoid the negation.  For LE/LEU cst by doing LT/LTU cst+1 (and
> then cst+1 GT/GTU x) and for GE/GEU cst by doing GT/GTU cst-1, provided
> there is no wrap-around on those cst+1 or cst-1.
> GIMPLE canonicalizes x < cst to x <= cst-1 etc. (the rule is smaller
> absolute value on constant), but only for scalars or uniform vectors,
> so in some cases this undoes that canonicalization in order to avoid
> the extra negation, but it handles also non-uniform constants.
> E.g. with -mavx2 the testcase assembly difference is:
> -       movl    $47, %eax
> +       movl    $48, %eax
>         vmovdqa %xmm0, %xmm1
>         vmovd   %eax, %xmm0
>         vpbroadcastb    %xmm0, %xmm0
> -       vpminsb %xmm0, %xmm1, %xmm0
> -       vpcmpeqb        %xmm1, %xmm0, %xmm0
> +       vpcmpgtb        %xmm1, %xmm0, %xmm0
> and
> -       vmovdqa %xmm0, %xmm1
> -       vmovdqa .LC1(%rip), %xmm0
> -       vpminsb %xmm1, %xmm0, %xmm1
> -       vpcmpeqb        %xmm1, %xmm0, %xmm0
> +       vpcmpgtb        .LC1(%rip), %xmm0, %xmm0
> while with just SSE2:
> -       pcmpgtb .LC0(%rip), %xmm0
> -       pxor    %xmm1, %xmm1
> -       pcmpeqb %xmm1, %xmm0
> +       movdqa  %xmm0, %xmm1
> +       movdqa  .LC0(%rip), %xmm0
> +       pcmpgtb %xmm1, %xmm0
> and
> -       movdqa  %xmm0, %xmm1
> -       movdqa  .LC1(%rip), %xmm0
> -       pcmpgtb %xmm1, %xmm0
> -       pxor    %xmm1, %xmm1
> -       pcmpeqb %xmm1, %xmm0
> +       pcmpgtb .LC1(%rip), %xmm0
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2022-11-08  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/107546
>         * config/i386/predicates.md (vector_or_const_vector_operand): New
>         predicate.
>         * config/i386/sse.md (vec_cmp<mode><sseintvecmodelower>,
>         vec_cmpv2div2di, vec_cmpu<mode><sseintvecmodelower>,
>         vec_cmpuv2div2di): Use nonimmediate_or_const_vector_operand
>         predicate instead of nonimmediate_operand and
>         vector_or_const_vector_operand instead of vector_operand.
>         * config/i386/i386-expand.cc (ix86_expand_int_sse_cmp): For
>         LE/LEU or GE/GEU with CONST_VECTOR cop1 try to transform those
>         into LE/LEU or GT/GTU with larger or smaller by one cop1 if
>         there is no wrap-around.  Force CONST_VECTOR cop0 or cop1 into
>         REG.  Formatting fix.
>
>         * gcc.target/i386/pr107546.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/predicates.md.jj    2022-11-07 10:30:42.739629999 +0100
> +++ gcc/config/i386/predicates.md       2022-11-07 11:39:42.665065553 +0100
> @@ -1235,6 +1235,13 @@ (define_predicate "vector_operand"
>    (ior (match_operand 0 "register_operand")
>         (match_operand 0 "vector_memory_operand")))
>
> +; Return true when OP is register_operand, vector_memory_operand
> +; or const_vector.
> +(define_predicate "vector_or_const_vector_operand"
> +  (ior (match_operand 0 "register_operand")
> +       (match_operand 0 "vector_memory_operand")
> +       (match_code "const_vector")))
> +
>  (define_predicate "bcst_mem_operand"
>    (and (match_code "vec_duplicate")
>         (and (match_test "TARGET_AVX512F")
> --- gcc/config/i386/sse.md.jj   2022-11-01 13:33:17.557857756 +0100
> +++ gcc/config/i386/sse.md      2022-11-07 11:43:45.703748212 +0100
> @@ -4311,7 +4311,7 @@ (define_expand "vec_cmp<mode><sseintvecm
>    [(set (match_operand:<sseintvecmode> 0 "register_operand")
>         (match_operator:<sseintvecmode> 1 ""
>           [(match_operand:VI_256 2 "register_operand")
> -          (match_operand:VI_256 3 "nonimmediate_operand")]))]
> +          (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
>    "TARGET_AVX2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4323,7 +4323,7 @@ (define_expand "vec_cmp<mode><sseintvecm
>    [(set (match_operand:<sseintvecmode> 0 "register_operand")
>         (match_operator:<sseintvecmode> 1 ""
>           [(match_operand:VI124_128 2 "register_operand")
> -          (match_operand:VI124_128 3 "vector_operand")]))]
> +          (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
>    "TARGET_SSE2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4335,7 +4335,7 @@ (define_expand "vec_cmpv2div2di"
>    [(set (match_operand:V2DI 0 "register_operand")
>         (match_operator:V2DI 1 ""
>           [(match_operand:V2DI 2 "register_operand")
> -          (match_operand:V2DI 3 "vector_operand")]))]
> +          (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
>    "TARGET_SSE4_2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4397,7 +4397,7 @@ (define_expand "vec_cmpu<mode><sseintvec
>    [(set (match_operand:<sseintvecmode> 0 "register_operand")
>         (match_operator:<sseintvecmode> 1 ""
>           [(match_operand:VI_256 2 "register_operand")
> -          (match_operand:VI_256 3 "nonimmediate_operand")]))]
> +          (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
>    "TARGET_AVX2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4409,7 +4409,7 @@ (define_expand "vec_cmpu<mode><sseintvec
>    [(set (match_operand:<sseintvecmode> 0 "register_operand")
>         (match_operator:<sseintvecmode> 1 ""
>           [(match_operand:VI124_128 2 "register_operand")
> -          (match_operand:VI124_128 3 "vector_operand")]))]
> +          (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
>    "TARGET_SSE2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> @@ -4421,7 +4421,7 @@ (define_expand "vec_cmpuv2div2di"
>    [(set (match_operand:V2DI 0 "register_operand")
>         (match_operator:V2DI 1 ""
>           [(match_operand:V2DI 2 "register_operand")
> -          (match_operand:V2DI 3 "vector_operand")]))]
> +          (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
>    "TARGET_SSE4_2"
>  {
>    bool ok = ix86_expand_int_vec_cmp (operands);
> --- gcc/config/i386/i386-expand.cc.jj   2022-11-07 10:30:42.702630503 +0100
> +++ gcc/config/i386/i386-expand.cc      2022-11-07 12:25:25.183638148 +0100
> @@ -4510,15 +4510,86 @@ ix86_expand_int_sse_cmp (rtx dest, enum
>         case GTU:
>           break;
>
> -       case NE:
>         case LE:
>         case LEU:
> +         /* x <= cst can be handled as x < cst + 1 unless there is
> +            wrap around in cst + 1.  */
> +         if (GET_CODE (cop1) == CONST_VECTOR
> +             && GET_MODE_INNER (mode) != TImode)
> +           {
> +             unsigned int n_elts = GET_MODE_NUNITS (mode), i;
> +             machine_mode eltmode = GET_MODE_INNER (mode);
> +             for (i = 0; i < n_elts; ++i)
> +               {
> +                 rtx elt = CONST_VECTOR_ELT (cop1, i);
> +                 if (!CONST_INT_P (elt))
> +                   break;
> +                 if (code == GE)
> +                   {
> +                     /* For LE punt if some element is signed maximum.  */
> +                     if ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
> +                         == (GET_MODE_MASK (eltmode) >> 1))
> +                       break;
> +                   }
> +                 /* For LEU punt if some element is unsigned maximum.  */
> +                 else if (elt == constm1_rtx)
> +                   break;
> +               }
> +             if (i == n_elts)
> +               {
> +                 rtvec v = rtvec_alloc (n_elts);
> +                 for (i = 0; i < n_elts; ++i)
> +                   RTVEC_ELT (v, i)
> +                     = GEN_INT (INTVAL (CONST_VECTOR_ELT (cop1, i)) + 1);
> +                 cop1 = gen_rtx_CONST_VECTOR (mode, v);
> +                 std::swap (cop0, cop1);
> +                 code = code == LE ? GT : GTU;
> +                 break;
> +               }
> +           }
> +         /* FALLTHRU */
> +       case NE:
>           code = reverse_condition (code);
>           *negate = true;
>           break;
>
>         case GE:
>         case GEU:
> +         /* x >= cst can be handled as x > cst - 1 unless there is
> +            wrap around in cst - 1.  */
> +         if (GET_CODE (cop1) == CONST_VECTOR
> +             && GET_MODE_INNER (mode) != TImode)
> +           {
> +             unsigned int n_elts = GET_MODE_NUNITS (mode), i;
> +             machine_mode eltmode = GET_MODE_INNER (mode);
> +             for (i = 0; i < n_elts; ++i)
> +               {
> +                 rtx elt = CONST_VECTOR_ELT (cop1, i);
> +                 if (!CONST_INT_P (elt))
> +                   break;
> +                 if (code == GE)
> +                   {
> +                     /* For GE punt if some element is signed minimum.  */
> +                     if (INTVAL (elt) < 0
> +                         && ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
> +                             == 0))
> +                       break;
> +                   }
> +                 /* For GEU punt if some element is zero.  */
> +                 else if (elt == const0_rtx)
> +                   break;
> +               }
> +             if (i == n_elts)
> +               {
> +                 rtvec v = rtvec_alloc (n_elts);
> +                 for (i = 0; i < n_elts; ++i)
> +                   RTVEC_ELT (v, i)
> +                     = GEN_INT (INTVAL (CONST_VECTOR_ELT (cop1, i)) - 1);
> +                 cop1 = gen_rtx_CONST_VECTOR (mode, v);
> +                 code = code == GE ? GT : GTU;
> +                 break;
> +               }
> +           }
>           code = reverse_condition (code);
>           *negate = true;
>           /* FALLTHRU */
> @@ -4556,6 +4627,11 @@ ix86_expand_int_sse_cmp (rtx dest, enum
>             }
>         }
>
> +      if (GET_CODE (cop0) == CONST_VECTOR)
> +       cop0 = force_reg (mode, cop0);
> +      else if (GET_CODE (cop1) == CONST_VECTOR)
> +       cop1 = force_reg (mode, cop1);
> +
>        rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
>        rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode);
>        if (*negate)
> @@ -4752,13 +4828,13 @@ ix86_expand_int_sse_cmp (rtx dest, enum
>    if (*negate)
>      std::swap (op_true, op_false);
>
> +  if (GET_CODE (cop1) == CONST_VECTOR)
> +    cop1 = force_reg (mode, cop1);
> +
>    /* Allow the comparison to be done in one mode, but the movcc to
>       happen in another mode.  */
>    if (data_mode == mode)
> -    {
> -      x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
> -                              op_true, op_false);
> -    }
> +    x = ix86_expand_sse_cmp (dest, code, cop0, cop1, op_true, op_false);
>    else
>      {
>        gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
> --- gcc/testsuite/gcc.target/i386/pr107546.c.jj 2022-11-07 12:40:47.348054087 +0100
> +++ gcc/testsuite/gcc.target/i386/pr107546.c    2022-11-07 12:40:25.732349055 +0100
> @@ -0,0 +1,19 @@
> +/* PR target/107546 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-xop -mno-avx512f" } */
> +/* { dg-final { scan-assembler-not "pcmpeqb\t" } } */
> +/* { dg-final { scan-assembler-times "pcmpgtb\t" 2 } } */
> +
> +typedef signed char V __attribute__((vector_size(16)));
> +
> +V
> +foo (V x)
> +{
> +  return x < 48;
> +}
> +
> +V
> +bar (V x)
> +{
> +  return x >= (V) { 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 };
> +}
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-11-08 10:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-08 10:41 [PATCH] i386: Improve vector [GL]E{,U} comparison against vector constants [PR107546] Jakub Jelinek
2022-11-08 10:50 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).