Fix PR48052: loop not vectorized if index is "unsigned int"

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* Fix PR48052: loop not vectorized if index is "unsigned int"
@ 2015-05-04 19:47 Abderrazek Zaafrani
  2015-05-06 11:02 ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: Abderrazek Zaafrani @ 2015-05-04 19:47 UTC (permalink / raw)
  To: gcc-patches, sebpop

[-- Attachment #1: Type: text/plain, Size: 1139 bytes --]

This is an old thread and we are still running into similar issues:
Code is not being vectorized on 64-bit target due to scev not being
able to optimally analyze overflow condition.

While the original test case shown here seems to work now, it does not
work if the start value is not a constant and the loop index variable
is of unsigned type: Ex

void loop2( double const * __restrict__ x_in, double * __restrict__
x_out, double const * __restrict__ c, unsigned int N, unsigned int
start) {
 for(unsigned int i=start; i!=N; ++i)
   x_out[i] = c[i]*x_in[i];
}

Here is our unit test:

int foo(int* A, int* B, unsigned start, unsigned B)
{
  int s;
  for (unsigned k = start; k <start+B; k++)
    s += A[k] * B[k];
  return s;
}

Our unit test case is extracted from a matrix multiply of a
two-dimensional array and all loops are blocked by hand by a factor of
B. Even though a bit modified, above loop corresponds to the innermost
loop of the blocked matrix multiply.

We worked on patch to solve the problem (see attachment.)
The attached patch passed bootstrap and make check on x86_64-linux.
Ok for trunk?

Thanks,
Abderrazek Zaafrani

[-- Attachment #2: 0001-scev-for-vectorization.patch --]
[-- Type: text/plain, Size: 5307 bytes --]

From eedbcd1ef6a81bb9c000e0dba9ff2a6c524576ac Mon Sep 17 00:00:00 2001
From: Abderrazek Zaafrani <a.zaafrani@samsung.com>
Date: Mon, 4 May 2015 11:00:12 -0500
Subject: [PATCH] scev for vectorization

	PR optimization/48052
	* tree-ssa-loop-niter.c	(variable_appears_in_loop_exit_condition): New.
	(scev_probably_wraps_p): Handle unsigned convert expressions to a larger type
	than the basic induction variable.

	* gcc.dg/vect/pr48052.c: New.
---
 gcc/testsuite/gcc.dg/vect/pr48052.c | 27 ++++++++++++
 gcc/tree-ssa-loop-niter.c           | 84 +++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr48052.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 0000000..8e406d7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 042f8df..345fb93 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3773,6 +3773,30 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Returns true when T appears in the exit condition of LOOP.  */
+
+static bool
+variable_appears_in_loop_exit_condition (tree t, struct loop *loop)
+{
+  struct nb_iter_bound *bound;
+
+  /* For now, we are only interested in loops with one exit condition.  */
+  if (loop->bounds == NULL || loop->bounds->next != NULL)
+      return false;
+
+  for (bound = loop->bounds; bound; bound = bound->next)
+    {
+      if (gimple_code (bound->stmt) != GIMPLE_COND)
+        return false;
+
+      if (t == gimple_cond_lhs(bound->stmt)
+	  || t == gimple_cond_rhs(bound->stmt))
+        return true;
+    }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3879,6 +3903,66 @@ scev_probably_wraps_p (tree base, tree step,
 
   fold_undefer_and_ignore_overflow_warnings ();
 
+  /* At this point, we could not determine that the current scalar
+     evolution composed of base and step does not overflow.  In order
+     to improve this analysis, go back to the context of this scev,
+     i.e., statement and loop, and determine from there if we can
+     deduce that there is no overflow.
+
+     We are so far interested in convert statement of this form
+
+     _1 = (some cast) I;
+
+     where I is a basic induction variable.  This case is common when
+     computing addresses for 64-bit targets.  */
+  if (loop != NULL && loop->nb_iterations != NULL && loop->bounds != NULL
+      && at_stmt != NULL && integer_onep (step))
+    {
+      enum tree_code nbi_code = TREE_CODE (loop->nb_iterations);
+      enum gimple_code stmt_code = gimple_code (at_stmt);
+
+      if (nbi_code != SCEV_NOT_KNOWN && stmt_code == GIMPLE_ASSIGN)
+        {
+          tree rhs1 = gimple_assign_rhs1 (at_stmt);
+          enum tree_code tree_code = gimple_assign_rhs_code (at_stmt);
+          tree rhs2 = gimple_assign_rhs2 (at_stmt);
+
+          /* If at_stmt is a convert statement: _1 = (some cast) I;  */
+          if (rhs1 != NULL && rhs2 == NULL
+              && (tree_code == CONVERT_EXPR || tree_code == NOP_EXPR))
+            {
+              tree stmt_type = TREE_TYPE (gimple_assign_lhs (at_stmt));
+              int stmt_type_size = tree_to_uhwi (TYPE_SIZE(stmt_type));
+              int rhs1_type_size = tree_to_uhwi (TYPE_SIZE(TREE_TYPE(rhs1)));
+              gimple def_rhs1 = SSA_NAME_DEF_STMT (rhs1);
+
+              if (gimple_code (def_rhs1) == GIMPLE_PHI
+		  && gimple_phi_num_args (def_rhs1) == 2
+		  && stmt_type_size > rhs1_type_size)
+                {
+                  tree n1 = PHI_ARG_DEF (def_rhs1, 0);
+                  tree n2 = PHI_ARG_DEF (def_rhs1, 1);
+
+		  /* Induction variables with a constant initial value
+		     are already handled above.  */
+                  if (TREE_CODE (n1) != INTEGER_CST)
+                    {
+                      gimple n1_stmt = SSA_NAME_DEF_STMT (n1);
+                      if (n1_stmt != NULL
+                          && (n1_stmt->bb == NULL
+			      || n1_stmt->bb->loop_father != loop)
+                          && variable_appears_in_loop_exit_condition (n2, loop))
+			/* There is no overflow on _1 = (some cast) I;
+			   because the cast is to a larger type than
+			   the type of the basic induction variable
+			   "I" and the loop is countable.  */
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
   return true;
-- 
2.1.0.243.g30d45f7


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-04 19:47 Fix PR48052: loop not vectorized if index is "unsigned int" Abderrazek Zaafrani
@ 2015-05-06 11:02 ` Richard Biener
  2015-05-07 20:23   ` Abderrazek Zaafrani
  2015-05-19 11:16   ` Bin.Cheng
  0 siblings, 2 replies; 9+ messages in thread
From: Richard Biener @ 2015-05-06 11:02 UTC (permalink / raw)
  To: Abderrazek Zaafrani; +Cc: GCC Patches, Sebastian Pop

On Mon, May 4, 2015 at 9:47 PM, Abderrazek Zaafrani
<az.zaafrani@gmail.com> wrote:
> This is an old thread and we are still running into similar issues:
> Code is not being vectorized on 64-bit target due to scev not being
> able to optimally analyze overflow condition.
>
> While the original test case shown here seems to work now, it does not
> work if the start value is not a constant and the loop index variable
> is of unsigned type: Ex
>
> void loop2( double const * __restrict__ x_in, double * __restrict__
> x_out, double const * __restrict__ c, unsigned int N, unsigned int
> start) {
>  for(unsigned int i=start; i!=N; ++i)
>    x_out[i] = c[i]*x_in[i];
> }
>
> Here is our unit test:
>
> int foo(int* A, int* B, unsigned start, unsigned B)
> {
>   int s;
>   for (unsigned k = start; k <start+B; k++)
>     s += A[k] * B[k];
>   return s;
> }
>
> Our unit test case is extracted from a matrix multiply of a
> two-dimensional array and all loops are blocked by hand by a factor of
> B. Even though a bit modified, above loop corresponds to the innermost
> loop of the blocked matrix multiply.
>
> We worked on patch to solve the problem (see attachment.)
> The attached patch passed bootstrap and make check on x86_64-linux.
> Ok for trunk?

Apart from coding style / API issues the case you handle is very special
(IVs with step 1 only?!) I believe it is also wrong - the assumption that
if there is a symbolic or constant expression for the number of iterations
a BIV will not wrap is not true.  niter analysis can very well compute
the number of iterations for a loop with wrapping IVs.  For your unit test
this only works because of the special-casing of step 1 IVs.

Technically it might be more interesting to compute wrapping of IVs
during niter analysis in some more generic way (we have iv->no_overflow
computed by simple_iv, but that is rather not useful here).

Richard.

> Thanks,
> Abderrazek Zaafrani

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-06 11:02 ` Richard Biener
@ 2015-05-07 20:23   ` Abderrazek Zaafrani
  2015-05-19 11:16   ` Bin.Cheng
  1 sibling, 0 replies; 9+ messages in thread
From: Abderrazek Zaafrani @ 2015-05-07 20:23 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, Sebastian Pop

Richard,

Agree that the code is handling a very special case but this special
case is common enough and is limiting the vectorizer in a significant
way. The special case is: loops with unsigned index, non-constant
start value, and step 1. We have a code for a matrix multiply – loops
blocked by hand -  taken from an industry benchmark that is not being
vectorized because of the overflow issue. Note that if we relax the
step 1 assumption, then we will probably not be able to prove
non-overflow and note also that the general case such as constant
start value is already working fine.

I am not too sure about the incorrectness mentioned below for the
cases we are handling. loop->nb_iterations holds a symbolic expression
for the number of iterations (our special case falls into the symbolic
expression). Based on several loops that I experimented with and that
fall under our limited scope, we have either this symbolic expression
holding the exact number of iterations for the loop and without
overflow or the scev_not_known flag is set to true. May be you can
share an example in case you have an example in mind.

The suggestion about improving niter analysis and improving
iv->no_overflow flag and moving what we are trying to do here into
that section with the possibility of using existing information is
good and we may look into it.

Abderrazek

On Wed, May 6, 2015 at 6:02 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, May 4, 2015 at 9:47 PM, Abderrazek Zaafrani
> <az.zaafrani@gmail.com> wrote:
>> This is an old thread and we are still running into similar issues:
>> Code is not being vectorized on 64-bit target due to scev not being
>> able to optimally analyze overflow condition.
>>
>> While the original test case shown here seems to work now, it does not
>> work if the start value is not a constant and the loop index variable
>> is of unsigned type: Ex
>>
>> void loop2( double const * __restrict__ x_in, double * __restrict__
>> x_out, double const * __restrict__ c, unsigned int N, unsigned int
>> start) {
>>  for(unsigned int i=start; i!=N; ++i)
>>    x_out[i] = c[i]*x_in[i];
>> }
>>
>> Here is our unit test:
>>
>> int foo(int* A, int* B, unsigned start, unsigned B)
>> {
>>   int s;
>>   for (unsigned k = start; k <start+B; k++)
>>     s += A[k] * B[k];
>>   return s;
>> }
>>
>> Our unit test case is extracted from a matrix multiply of a
>> two-dimensional array and all loops are blocked by hand by a factor of
>> B. Even though a bit modified, above loop corresponds to the innermost
>> loop of the blocked matrix multiply.
>>
>> We worked on patch to solve the problem (see attachment.)
>> The attached patch passed bootstrap and make check on x86_64-linux.
>> Ok for trunk?
>
> Apart from coding style / API issues the case you handle is very special
> (IVs with step 1 only?!) I believe it is also wrong - the assumption that
> if there is a symbolic or constant expression for the number of iterations
> a BIV will not wrap is not true.  niter analysis can very well compute
> the number of iterations for a loop with wrapping IVs.  For your unit test
> this only works because of the special-casing of step 1 IVs.
>
> Technically it might be more interesting to compute wrapping of IVs
> during niter analysis in some more generic way (we have iv->no_overflow
> computed by simple_iv, but that is rather not useful here).
>
> Richard.
>
>> Thanks,
>> Abderrazek Zaafrani

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-06 11:02 ` Richard Biener
  2015-05-07 20:23   ` Abderrazek Zaafrani
@ 2015-05-19 11:16   ` Bin.Cheng
  1 sibling, 0 replies; 9+ messages in thread
From: Bin.Cheng @ 2015-05-19 11:16 UTC (permalink / raw)
  To: Richard Biener; +Cc: Abderrazek Zaafrani, GCC Patches, Sebastian Pop

On Wed, May 6, 2015 at 7:02 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Mon, May 4, 2015 at 9:47 PM, Abderrazek Zaafrani
> <az.zaafrani@gmail.com> wrote:
>> This is an old thread and we are still running into similar issues:
>> Code is not being vectorized on 64-bit target due to scev not being
>> able to optimally analyze overflow condition.
>>
>> While the original test case shown here seems to work now, it does not
>> work if the start value is not a constant and the loop index variable
>> is of unsigned type: Ex
>>
>> void loop2( double const * __restrict__ x_in, double * __restrict__
>> x_out, double const * __restrict__ c, unsigned int N, unsigned int
>> start) {
>>  for(unsigned int i=start; i!=N; ++i)
>>    x_out[i] = c[i]*x_in[i];
>> }
>>
>> Here is our unit test:
>>
>> int foo(int* A, int* B, unsigned start, unsigned B)
>> {
>>   int s;
>>   for (unsigned k = start; k <start+B; k++)
>>     s += A[k] * B[k];
>>   return s;
>> }
>>
>> Our unit test case is extracted from a matrix multiply of a
>> two-dimensional array and all loops are blocked by hand by a factor of
>> B. Even though a bit modified, above loop corresponds to the innermost
>> loop of the blocked matrix multiply.
>>
>> We worked on patch to solve the problem (see attachment.)
>> The attached patch passed bootstrap and make check on x86_64-linux.
>> Ok for trunk?
>
> Apart from coding style / API issues the case you handle is very special
> (IVs with step 1 only?!) I believe it is also wrong - the assumption that
> if there is a symbolic or constant expression for the number of iterations
> a BIV will not wrap is not true.  niter analysis can very well compute
> the number of iterations for a loop with wrapping IVs.  For your unit test
> this only works because of the special-casing of step 1 IVs.
I happen to look into similar issue right now.  scev_probably_wraps_p
and thus chrec_convert_1 should be improved using niter information.
Actually all information (and the wrap behavior) has already been
computed in tree-ssa-loop-niter.c.  We just need to find a way to used
it.

>
> Technically it might be more interesting to compute wrapping of IVs
> during niter analysis in some more generic way (we have iv->no_overflow
> computed by simple_iv, but that is rather not useful here).

For it iv->no_overflow is computed in simple_iv as below:
      tmp = analyze_scalar_evolution (use_loop, ev);
      ev = resolve_mixers (use_loop, tmp);

      if (folded_casts && tmp != ev)
    *folded_casts = true;

It's inaccurate because calling resolve_mixers doesn't mean the result
scev will wrap.  resolve_mixers could have just done exact the same
transformation as instantiate_parameters.  Also
chrec_convert_aggressive is incomplete and need to revised too.

Thanks,
bin
>
> Richard.
>
>> Thanks,
>> Abderrazek Zaafrani

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-06-01  8:00   ` Richard Biener
@ 2015-06-01  8:15     ` Bin.Cheng
  0 siblings, 0 replies; 9+ messages in thread
From: Bin.Cheng @ 2015-06-01  8:15 UTC (permalink / raw)
  To: Richard Biener
  Cc: Jeff Law, Bingfeng Mei, Aditya K, gcc-patches, a.zaafrani, sebpop

On Mon, Jun 1, 2015 at 4:00 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Sat, May 30, 2015 at 7:47 AM, Jeff Law <law@redhat.com> wrote:
>> On 05/19/2015 10:12 AM, Aditya K wrote:
>>>
>>> w.r.t. the PR48052, here is the patch which finds out if scev would wrap
>>> or not.
>>> The patch symbolically evaluates if valid_niter>= loop->nb_iterations is
>>> true. In that case the scev would not wrap (??).
>>> Currently, we only look for two special 'patterns', which are sufficient
>>> to analyze the simple test cases.
>>>
>>> valid_niter = ~s (= UNIT_MAX - s)
>>> We have to prove that valid_niter>= loop->nb_iterations
>>>
>>> Pattern1 loop->nb_iterations: s>= e ? s - e : 0
>>> Pattern2 loop->nb_iterations: (e - s) -1
>>>
>>> In the first case we prove that valid_niter>= loop->nb_iterations in both
>>> the cases i.e., when s>=e and when not.
>>> In the second case we prove valid_niter>= loop->nb_iterations, by simple
>>> analysis that  UINT_MAX>= e is true in all cases.
>>>
>>> I haven't tested this patch completely. I'm looking for feedback and any
>>> scope for improvement.
>>>
>>>
>>> hth,
>>> -Aditya
>>>
>>>
>>>
>>> Vectorize loops which has typecast.
>>>
>>> 2015-05-19  hiraditya  <hiraditya@msn.com>
>>>
>>>          * gcc.dg/vect/pr48052.c: New test.
>>>
>>> gcc/ChangeLog:
>>>
>>> 2015-05-19  hiraditya  <hiraditya@msn.com>
>>>
>>>          * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional
>>> operation when additional constraints are
>>>          available.
>>>          (fold_binary_minus_p): Fold a subtraction operations of the form
>>> (A - B -1) when additional constraints are
>>>          available.
>>>          (scev_probably_wraps_p): Use the above two functions to find
>>> whether valid_niter>= loop->nb_iterations.
>>
>> Is any of this work still useful if Bin Cheng's work on improving overflow
>> detection for scev goes forward?  I certainly got the impression that Bin's
>> work would solve 48052 and others.
>
> Bin is probably the one to answer this.  His patches are still on my list
> of patches to review...
Richard, you have already approved the first one.  The second one is
at https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02317.html . You may
need to decide if it is as expected.

Yes, PRs like 62173, 52563 and 48052 will be fixed with these two patches.

Thanks,
bin
>
> Richard.
>
>> Jeff
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-30  9:11 ` Jeff Law
@ 2015-06-01  8:00   ` Richard Biener
  2015-06-01  8:15     ` Bin.Cheng
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2015-06-01  8:00 UTC (permalink / raw)
  To: Jeff Law, Bingfeng Mei; +Cc: Aditya K, gcc-patches, a.zaafrani, sebpop

On Sat, May 30, 2015 at 7:47 AM, Jeff Law <law@redhat.com> wrote:
> On 05/19/2015 10:12 AM, Aditya K wrote:
>>
>> w.r.t. the PR48052, here is the patch which finds out if scev would wrap
>> or not.
>> The patch symbolically evaluates if valid_niter>= loop->nb_iterations is
>> true. In that case the scev would not wrap (??).
>> Currently, we only look for two special 'patterns', which are sufficient
>> to analyze the simple test cases.
>>
>> valid_niter = ~s (= UNIT_MAX - s)
>> We have to prove that valid_niter>= loop->nb_iterations
>>
>> Pattern1 loop->nb_iterations: s>= e ? s - e : 0
>> Pattern2 loop->nb_iterations: (e - s) -1
>>
>> In the first case we prove that valid_niter>= loop->nb_iterations in both
>> the cases i.e., when s>=e and when not.
>> In the second case we prove valid_niter>= loop->nb_iterations, by simple
>> analysis that  UINT_MAX>= e is true in all cases.
>>
>> I haven't tested this patch completely. I'm looking for feedback and any
>> scope for improvement.
>>
>>
>> hth,
>> -Aditya
>>
>>
>>
>> Vectorize loops which has typecast.
>>
>> 2015-05-19  hiraditya  <hiraditya@msn.com>
>>
>>          * gcc.dg/vect/pr48052.c: New test.
>>
>> gcc/ChangeLog:
>>
>> 2015-05-19  hiraditya  <hiraditya@msn.com>
>>
>>          * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional
>> operation when additional constraints are
>>          available.
>>          (fold_binary_minus_p): Fold a subtraction operations of the form
>> (A - B -1) when additional constraints are
>>          available.
>>          (scev_probably_wraps_p): Use the above two functions to find
>> whether valid_niter>= loop->nb_iterations.
>
> Is any of this work still useful if Bin Cheng's work on improving overflow
> detection for scev goes forward?  I certainly got the impression that Bin's
> work would solve 48052 and others.

Bin is probably the one to answer this.  His patches are still on my list
of patches to review...

Richard.

> Jeff
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-19 16:50 Aditya K
  2015-05-21 16:02 ` Aditya K
@ 2015-05-30  9:11 ` Jeff Law
  2015-06-01  8:00   ` Richard Biener
  1 sibling, 1 reply; 9+ messages in thread
From: Jeff Law @ 2015-05-30  9:11 UTC (permalink / raw)
  To: Aditya K, gcc-patches, a.zaafrani, sebpop, richard.guenther

On 05/19/2015 10:12 AM, Aditya K wrote:
> w.r.t. the PR48052, here is the patch which finds out if scev would wrap or not.
> The patch symbolically evaluates if valid_niter>= loop->nb_iterations is true. In that case the scev would not wrap (??).
> Currently, we only look for two special 'patterns', which are sufficient to analyze the simple test cases.
>
> valid_niter = ~s (= UNIT_MAX - s)
> We have to prove that valid_niter>= loop->nb_iterations
>
> Pattern1 loop->nb_iterations: s>= e ? s - e : 0
> Pattern2 loop->nb_iterations: (e - s) -1
>
> In the first case we prove that valid_niter>= loop->nb_iterations in both the cases i.e., when s>=e and when not.
> In the second case we prove valid_niter>= loop->nb_iterations, by simple analysis that  UINT_MAX>= e is true in all cases.
>
> I haven't tested this patch completely. I'm looking for feedback and any scope for improvement.
>
>
> hth,
> -Aditya
>
>
>
> Vectorize loops which has typecast.
>
> 2015-05-19  hiraditya  <hiraditya@msn.com>
>
>          * gcc.dg/vect/pr48052.c: New test.
>
> gcc/ChangeLog:
>
> 2015-05-19  hiraditya  <hiraditya@msn.com>
>
>          * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are
>          available.
>          (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are
>          available.
>          (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations.
Is any of this work still useful if Bin Cheng's work on improving 
overflow detection for scev goes forward?  I certainly got the 
impression that Bin's work would solve 48052 and others.

Jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: Fix PR48052: loop not vectorized if index is "unsigned int"
  2015-05-19 16:50 Aditya K
@ 2015-05-21 16:02 ` Aditya K
  2015-05-30  9:11 ` Jeff Law
  1 sibling, 0 replies; 9+ messages in thread
From: Aditya K @ 2015-05-21 16:02 UTC (permalink / raw)
  To: gcc-patches, a.zaafrani, sebpop, law, richard.guenther


I tested this patch and it passes bootstrap and no extra failures.

Thanks
-Aditya


Symbolically evaluate conditionals, and subtraction when additional constraints are provided.

Adding this evaluation mechanism helps vectorize some loops on 64bit machines because on 64bit, a typecast appears
which causes scev to bail out.

gcc/ChangeLog:

2015-05-21  hiraditya  <hiraditya@msn.com>
2015-05-21 Sebastian Pop  <s.pop@samsung.com>
2015-05-21 Abderrazek Zaafrani <a.zaafrani@samsung.com>

        * gcc.dg/vect/pr48052.c: New test.
        * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are
        available.
        (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are
        available.
        (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations.


diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 0000000..8e406d7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+





























----------------------------------------
> From: hiraditya@msn.com
> To: gcc-patches@gcc.gnu.org; a.zaafrani@samsung.com; sebpop@gmail.com; law@redhat.com; richard.guenther@gmail.com
> Subject: Fix PR48052: loop not vectorized if index is "unsigned int"
> Date: Tue, 19 May 2015 16:12:26 +0000
>
> w.r.t. the PR48052, here is the patch which finds out if scev would wrap or not.
> The patch symbolically evaluates if valid_niter>= loop->nb_iterations is true. In that case the scev would not wrap (??).
> Currently, we only look for two special 'patterns', which are sufficient to analyze the simple test cases.
>
> valid_niter = ~s (= UNIT_MAX - s)
> We have to prove that valid_niter>= loop->nb_iterations
>
> Pattern1 loop->nb_iterations: s>= e ? s - e : 0
> Pattern2 loop->nb_iterations: (e - s) -1
>
> In the first case we prove that valid_niter>= loop->nb_iterations in both the cases i.e., when s>=e and when not.
> In the second case we prove valid_niter>= loop->nb_iterations, by simple analysis that  UINT_MAX>= e is true in all cases.
>
> I haven't tested this patch completely. I'm looking for feedback and any scope for improvement.
>
>
> hth,
> -Aditya
>
>
>
> Vectorize loops which has typecast.
>
> 2015-05-19  hiraditya  <hiraditya@msn.com>
> 2015-05-19 Sebastian Pop  <s.pop@samsung.com>
> 2015-05-19 Abderrazek Zaafrani <a.zaafrani@samsung.com>
>
>         * gcc.dg/vect/pr48052.c: New test.
>
> gcc/ChangeLog:
>
> 2015-05-19  hiraditya  <hiraditya@msn.com>
>
>         * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are
>         available.
>         (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are
>         available.
>         (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations.
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
> new file mode 100644
> index 0000000..8e406d7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3" } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +
> +int foo(int* A, int* B,  unsigned start, unsigned BS)
> +{
> +  int s;
> +  for (unsigned k = start;  k < start + BS; k++)
> +    {
> +      s += A[k] * B[k];
> +    }
> +
> +  return s;
> +}
> +
> +int bar(int* A, int* B, unsigned BS)
> +{
> +  int s;
> +  for (unsigned k = 0;  k < BS; k++)
> +    {
> +      s += A[k] * B[k];
> +    }
> +
> +  return s;
> +}
> +
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 042f8df..ddc00cc 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -3773,6 +3773,117 @@ nowrap_type_p (tree type)
>    return false;
>  }
>
> +/* Return true when op0>= op1.
> +   For example:
> +   Where, op0 = ~start_3(D);
> +   op1 = start_3(D) <= stop_6(D) ? stop_6(D) - start_3(D) : 0;
> +   In this case op0 = UINT_MAX - start_3(D);
> +   So, op0>= op1 in all cases because UINT_MAX>= stop_6(D),
> +   when TREE_TYPE(stop_6(D)) == unsigned int;  */
> +bool
> +fold_binary_cond_p (enum tree_code code, tree type, tree op0, tree op1)
> +{
> +  gcc_assert (type == boolean_type_node);
> +
> +  if (TREE_TYPE (op0) != TREE_TYPE (op1))
> +    return false;
> +
> +  /* TODO: Handle other operations.  */
> +  if (code != GE_EXPR)
> +    return false;
> +  // The type of op0 and op1 should be unsigned.
> +  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
> +    return false;
> +  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != COND_EXPR))
> +    return false;
> +
> +  /* We have to show that in both the cases,
> +     (when cond is true and when cond is false) op (op0, op1) is true.  */
> +   tree neg_op0 = TREE_OPERAND (op0, 0);
> +   tree cond_op1 = TREE_OPERAND (op1, 0);
> +   tree true_op1 = TREE_OPERAND (op1, 1);
> +   tree false_op1 = TREE_OPERAND (op1, 2);
> +   gcc_assert(neg_op0 && cond_op1 && true_op1 && false_op1);
> +
> +  /* When cond is false. Evaluate op (op0, false_op1).  */
> +  tree running_exp = fold_binary (code, boolean_type_node, op0, false_op1);
> +  if (running_exp == NULL || integer_zerop (running_exp))
> +    /* TODO: Handle more cases here. */
> +    return false;
> +
> +  /* When cond is true. Evaluate op (op0, true_op1).  */
> +  running_exp = fold_binary (code, boolean_type_node, op0, true_op1);
> +  if (running_exp != NULL && integer_nonzerop (running_exp))
> +    return true;
> +
> +  tree smaller, bigger;
> +  if (TREE_CODE (cond_op1) == LE_EXPR)
> +    {
> +      smaller = TREE_OPERAND (cond_op1, 0);
> +      bigger = TREE_OPERAND (cond_op1, 1);
> +    } else return false;
> +
> +  if (TREE_CODE (true_op1) == MINUS_EXPR)
> +    {
> +      tree minuend = TREE_OPERAND (true_op1, 0);
> +      tree subtrahend = TREE_OPERAND (true_op1, 1);
> +      if (subtrahend == neg_op0 && subtrahend == smaller && minuend == bigger)
> +        {
> +          tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0),
> +                                              TREE_TYPE (neg_op0));
> +          running_exp = fold_binary (code, boolean_type_node, extreme, minuend);
> +          return running_exp != NULL && integer_nonzerop(running_exp);
> +        } else return false;
> +    } else return false;
> +}
> +
> +/* Return true when op0>= op1 and
> +   op0 is ~start3(D) or, UINT_MAX - start3(D)
> +   op1 is (_21 - start_3(D)) - 1; */
> +bool
> +fold_binary_minus_p (enum tree_code code, tree type, tree op0, tree op1)
> +{
> +  gcc_assert (type == boolean_type_node);
> +
> +  if (TREE_TYPE (op0) != TREE_TYPE (op1))
> +    return false;
> +  /* TODO: Handle other operations.  */
> +  if (code != GE_EXPR)
> +    return false;
> +
> +  // The type of op0 and op1 should be unsigned.
> +  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
> +    return false;
> +  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != MINUS_EXPR))
> +    return false;
> +
> +  /* We have to show that op (op0, op1) is true.  */
> +  tree neg_op0 = TREE_OPERAND (op0, 0);
> +  tree minuend_op1 = TREE_OPERAND (op1, 0);
> +  tree subtrahend_op1 = TREE_OPERAND (op1, 1);
> +  gcc_assert(neg_op0 && subtrahend_op1 && minuend_op1);
> +
> +  /* TODO: Also check that the integer_cst is positive.  */
> +  if (TREE_CODE (minuend_op1) != MINUS_EXPR ||
> +      TREE_CODE (subtrahend_op1) != INTEGER_CST)
> +    return false;
> +
> +  tree minuend_minuend_op1 = TREE_OPERAND (minuend_op1, 0);
> +  tree subtrahend_minuend_op1 = TREE_OPERAND (minuend_op1, 1);
> +
> +  /* TODO: Extend this to evaluate the subtrahends.
> +     i.e., when there are complicated operations in the subtrahend.  */
> +  if (subtrahend_minuend_op1 != neg_op0)
> +    return false;
> +
> +  tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0), TREE_TYPE (neg_op0));
> +  tree compare_minuend = fold_binary (GE_EXPR, boolean_type_node,
> +                                      extreme, minuend_minuend_op1);
> +  if (compare_minuend != NULL && integer_nonzerop (compare_minuend))
> +    return true;
> +  return false;
> +}
> +
>  /* Return false only when the induction variable BASE + STEP * I is
>     known to not overflow: i.e. when the number of iterations is small
>     enough with respect to the step and initial condition in order to
> @@ -3867,6 +3978,17 @@ scev_probably_wraps_p (tree base, tree step,
>        fold_undefer_and_ignore_overflow_warnings ();
>        return false;
>      }
> +
> +  if (loop->nb_iterations && at_stmt
> +      && (fold_binary_cond_p (GE_EXPR, boolean_type_node,
> +                            valid_niter, loop->nb_iterations) ||
> +          fold_binary_minus_p (GE_EXPR, boolean_type_node,
> +                             valid_niter, loop->nb_iterations)))
> +    {
> +      fold_undefer_and_ignore_overflow_warnings ();
> +      return false;
> +    }
> +
>    if (at_stmt)
>      for (bound = loop->bounds; bound; bound = bound->next)
>        {
>
>diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 042f8df..ddc00cc 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3773,6 +3773,117 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true when op0>= op1.
+   For example:
+   Where, op0 = ~start_3(D);
+   op1 = start_3(D) <= stop_6(D) ? stop_6(D) - start_3(D) : 0;
+   In this case op0 = UINT_MAX - start_3(D);
+   So, op0>= op1 in all cases because UINT_MAX>= stop_6(D),
+   when TREE_TYPE(stop_6(D)) == unsigned int;  */
+bool
+fold_binary_cond_p (enum tree_code code, tree type, tree op0, tree op1)
+{
+  gcc_assert (type == boolean_type_node);
+
+  if (TREE_TYPE (op0) != TREE_TYPE (op1))
+    return false;
+
+  /* TODO: Handle other operations.  */
+  if (code != GE_EXPR)
+    return false;
+  // The type of op0 and op1 should be unsigned.
+  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
+    return false;
+  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != COND_EXPR))
+    return false;
+
+  /* We have to show that in both the cases,
+     (when cond is true and when cond is false) op (op0, op1) is true.  */
+   tree neg_op0 = TREE_OPERAND (op0, 0);
+   tree cond_op1 = TREE_OPERAND (op1, 0);
+   tree true_op1 = TREE_OPERAND (op1, 1);
+   tree false_op1 = TREE_OPERAND (op1, 2);
+   gcc_assert(neg_op0 && cond_op1 && true_op1 && false_op1);
+
+  /* When cond is false. Evaluate op (op0, false_op1).  */
+  tree running_exp = fold_binary (code, boolean_type_node, op0, false_op1);
+  if (running_exp == NULL || integer_zerop (running_exp))
+    /* TODO: Handle more cases here. */
+    return false;
+
+  /* When cond is true. Evaluate op (op0, true_op1).  */
+  running_exp = fold_binary (code, boolean_type_node, op0, true_op1);
+  if (running_exp != NULL && integer_nonzerop (running_exp))
+    return true;
+
+  tree smaller, bigger;
+  if (TREE_CODE (cond_op1) == LE_EXPR)
+    {
+      smaller = TREE_OPERAND (cond_op1, 0);
+      bigger = TREE_OPERAND (cond_op1, 1);
+    } else return false;
+
+  if (TREE_CODE (true_op1) == MINUS_EXPR)
+    {
+      tree minuend = TREE_OPERAND (true_op1, 0);
+      tree subtrahend = TREE_OPERAND (true_op1, 1);
+      if (subtrahend == neg_op0 && subtrahend == smaller && minuend == bigger)
+        {
+          tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0),
+                                              TREE_TYPE (neg_op0));
+          running_exp = fold_binary (code, boolean_type_node, extreme, minuend);
+          return running_exp != NULL && integer_nonzerop(running_exp);
+        } else return false;
+    } else return false;
+}
+
+/* Return true when op0>= op1 and
+   op0 is ~start3(D) or, UINT_MAX - start3(D)
+   op1 is (_21 - start_3(D)) - 1; */
+bool
+fold_binary_minus_p (enum tree_code code, tree type, tree op0, tree op1)
+{
+  gcc_assert (type == boolean_type_node);
+
+  if (TREE_TYPE (op0) != TREE_TYPE (op1))
+    return false;
+  /* TODO: Handle other operations.  */
+  if (code != GE_EXPR)
+    return false;
+
+  // The type of op0 and op1 should be unsigned.
+  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
+    return false;
+  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != MINUS_EXPR))
+    return false;
+
+  /* We have to show that op (op0, op1) is true.  */
+  tree neg_op0 = TREE_OPERAND (op0, 0);
+  tree minuend_op1 = TREE_OPERAND (op1, 0);
+  tree subtrahend_op1 = TREE_OPERAND (op1, 1);
+  gcc_assert(neg_op0 && subtrahend_op1 && minuend_op1);
+
+  /* TODO: Also check that the integer_cst is positive.  */
+  if (TREE_CODE (minuend_op1) != MINUS_EXPR ||
+      TREE_CODE (subtrahend_op1) != INTEGER_CST)
+    return false;
+
+  tree minuend_minuend_op1 = TREE_OPERAND (minuend_op1, 0);
+  tree subtrahend_minuend_op1 = TREE_OPERAND (minuend_op1, 1);
+
+  /* TODO: Extend this to evaluate the subtrahends.
+     i.e., when there are complicated operations in the subtrahend.  */
+  if (subtrahend_minuend_op1 != neg_op0)
+    return false;
+
+  tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0), TREE_TYPE (neg_op0));
+  tree compare_minuend = fold_binary (GE_EXPR, boolean_type_node,
+                                      extreme, minuend_minuend_op1);
+  if (compare_minuend != NULL && integer_nonzerop (compare_minuend))
+    return true;
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3867,6 +3978,17 @@ scev_probably_wraps_p (tree base, tree step,
       fold_undefer_and_ignore_overflow_warnings ();
       return false;
     }
+
+  if (loop->nb_iterations && at_stmt
+      && (fold_binary_cond_p (GE_EXPR, boolean_type_node,
+                            valid_niter, loop->nb_iterations) ||
+          fold_binary_minus_p (GE_EXPR, boolean_type_node,
+                             valid_niter, loop->nb_iterations)))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return false;
+    }
+
   if (at_stmt)
     for (bound = loop->bounds; bound; bound = bound->next)
       {

 		 	   		  

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Fix PR48052: loop not vectorized if index is "unsigned int"
@ 2015-05-19 16:50 Aditya K
  2015-05-21 16:02 ` Aditya K
  2015-05-30  9:11 ` Jeff Law
  0 siblings, 2 replies; 9+ messages in thread
From: Aditya K @ 2015-05-19 16:50 UTC (permalink / raw)
  To: gcc-patches, a.zaafrani, sebpop, law, richard.guenther

w.r.t. the PR48052, here is the patch which finds out if scev would wrap or not.
The patch symbolically evaluates if valid_niter>= loop->nb_iterations is true. In that case the scev would not wrap (??).
Currently, we only look for two special 'patterns', which are sufficient to analyze the simple test cases.

valid_niter = ~s (= UNIT_MAX - s)
We have to prove that valid_niter>= loop->nb_iterations

Pattern1 loop->nb_iterations: s>= e ? s - e : 0
Pattern2 loop->nb_iterations: (e - s) -1

In the first case we prove that valid_niter>= loop->nb_iterations in both the cases i.e., when s>=e and when not.
In the second case we prove valid_niter>= loop->nb_iterations, by simple analysis that  UINT_MAX>= e is true in all cases.

I haven't tested this patch completely. I'm looking for feedback and any scope for improvement.


hth,
-Aditya



Vectorize loops which has typecast.

2015-05-19  hiraditya  <hiraditya@msn.com>

        * gcc.dg/vect/pr48052.c: New test.

gcc/ChangeLog:

2015-05-19  hiraditya  <hiraditya@msn.com>

        * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are
        available.
        (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are
        available.
        (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations.


diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 0000000..8e406d7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 042f8df..ddc00cc 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3773,6 +3773,117 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true when op0>= op1.
+   For example:
+   Where, op0 = ~start_3(D);
+   op1 = start_3(D) <= stop_6(D) ? stop_6(D) - start_3(D) : 0;
+   In this case op0 = UINT_MAX - start_3(D);
+   So, op0>= op1 in all cases because UINT_MAX>= stop_6(D),
+   when TREE_TYPE(stop_6(D)) == unsigned int;  */
+bool
+fold_binary_cond_p (enum tree_code code, tree type, tree op0, tree op1)
+{
+  gcc_assert (type == boolean_type_node);
+
+  if (TREE_TYPE (op0) != TREE_TYPE (op1))
+    return false;
+
+  /* TODO: Handle other operations.  */
+  if (code != GE_EXPR)
+    return false;
+  // The type of op0 and op1 should be unsigned.
+  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
+    return false;
+  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != COND_EXPR))
+    return false;
+
+  /* We have to show that in both the cases,
+     (when cond is true and when cond is false) op (op0, op1) is true.  */
+   tree neg_op0 = TREE_OPERAND (op0, 0);
+   tree cond_op1 = TREE_OPERAND (op1, 0);
+   tree true_op1 = TREE_OPERAND (op1, 1);
+   tree false_op1 = TREE_OPERAND (op1, 2);
+   gcc_assert(neg_op0 && cond_op1 && true_op1 && false_op1);
+
+  /* When cond is false. Evaluate op (op0, false_op1).  */
+  tree running_exp = fold_binary (code, boolean_type_node, op0, false_op1);
+  if (running_exp == NULL || integer_zerop (running_exp))
+    /* TODO: Handle more cases here. */
+    return false;
+
+  /* When cond is true. Evaluate op (op0, true_op1).  */
+  running_exp = fold_binary (code, boolean_type_node, op0, true_op1);
+  if (running_exp != NULL && integer_nonzerop (running_exp))
+    return true;
+
+  tree smaller, bigger;
+  if (TREE_CODE (cond_op1) == LE_EXPR)
+    {
+      smaller = TREE_OPERAND (cond_op1, 0);
+      bigger = TREE_OPERAND (cond_op1, 1);
+    } else return false;
+
+  if (TREE_CODE (true_op1) == MINUS_EXPR)
+    {
+      tree minuend = TREE_OPERAND (true_op1, 0);
+      tree subtrahend = TREE_OPERAND (true_op1, 1);
+      if (subtrahend == neg_op0 && subtrahend == smaller && minuend == bigger)
+        {
+          tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0),
+                                              TREE_TYPE (neg_op0));
+          running_exp = fold_binary (code, boolean_type_node, extreme, minuend);
+          return running_exp != NULL && integer_nonzerop(running_exp);
+        } else return false;
+    } else return false;
+}
+
+/* Return true when op0>= op1 and
+   op0 is ~start3(D) or, UINT_MAX - start3(D)
+   op1 is (_21 - start_3(D)) - 1; */
+bool
+fold_binary_minus_p (enum tree_code code, tree type, tree op0, tree op1)
+{
+  gcc_assert (type == boolean_type_node);
+
+  if (TREE_TYPE (op0) != TREE_TYPE (op1))
+    return false;
+  /* TODO: Handle other operations.  */
+  if (code != GE_EXPR)
+    return false;
+
+  // The type of op0 and op1 should be unsigned.
+  if (!TYPE_UNSIGNED (TREE_TYPE(op0)))
+    return false;
+  if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != MINUS_EXPR))
+    return false;
+
+  /* We have to show that op (op0, op1) is true.  */
+  tree neg_op0 = TREE_OPERAND (op0, 0);
+  tree minuend_op1 = TREE_OPERAND (op1, 0);
+  tree subtrahend_op1 = TREE_OPERAND (op1, 1);
+  gcc_assert(neg_op0 && subtrahend_op1 && minuend_op1);
+
+  /* TODO: Also check that the integer_cst is positive.  */
+  if (TREE_CODE (minuend_op1) != MINUS_EXPR ||
+      TREE_CODE (subtrahend_op1) != INTEGER_CST)
+    return false;
+
+  tree minuend_minuend_op1 = TREE_OPERAND (minuend_op1, 0);
+  tree subtrahend_minuend_op1 = TREE_OPERAND (minuend_op1, 1);
+
+  /* TODO: Extend this to evaluate the subtrahends.
+     i.e., when there are complicated operations in the subtrahend.  */
+  if (subtrahend_minuend_op1 != neg_op0)
+    return false;
+
+  tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0), TREE_TYPE (neg_op0));
+  tree compare_minuend = fold_binary (GE_EXPR, boolean_type_node,
+                                      extreme, minuend_minuend_op1);
+  if (compare_minuend != NULL && integer_nonzerop (compare_minuend))
+    return true;
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3867,6 +3978,17 @@ scev_probably_wraps_p (tree base, tree step,
       fold_undefer_and_ignore_overflow_warnings ();
       return false;
     }
+
+  if (loop->nb_iterations && at_stmt
+      && (fold_binary_cond_p (GE_EXPR, boolean_type_node,
+                            valid_niter, loop->nb_iterations) ||
+          fold_binary_minus_p (GE_EXPR, boolean_type_node,
+                             valid_niter, loop->nb_iterations)))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return false;
+    }
+
   if (at_stmt)
     for (bound = loop->bounds; bound; bound = bound->next)
       {

 		 	   		  

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-06-01  8:15 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-04 19:47 Fix PR48052: loop not vectorized if index is "unsigned int" Abderrazek Zaafrani
2015-05-06 11:02 ` Richard Biener
2015-05-07 20:23   ` Abderrazek Zaafrani
2015-05-19 11:16   ` Bin.Cheng
2015-05-19 16:50 Aditya K
2015-05-21 16:02 ` Aditya K
2015-05-30  9:11 ` Jeff Law
2015-06-01  8:00   ` Richard Biener
2015-06-01  8:15     ` Bin.Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).