public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH PR71734] Add missed check that reference defined inside loop.
@ 2016-07-05 14:56 Yuri Rumyantsev
  2016-07-06  9:38 ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-05 14:56 UTC (permalink / raw)
  To: gcc-patches, Richard Biener, Jakub Jelinek,
	Илья
	Энкович,
	Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 557 bytes --]

Hi All,

Here is a simple fix to cure regressions introduced by my fix for
70729. Patch also contains minor changes in test found by Jakub.

Bootstrapping and regression testing did not show any new failures.

Is it OK for trunk?

ChangeLog:
2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/71734
* tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
LOOP as independent if at least two loop iterations are not dependent.
gcc/testsuite/ChangeLog:
        * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

[-- Attachment #2: 71734.patch --]
[-- Type: application/octet-stream, Size: 3316 bytes --]

diff --git a/gcc/testsuite/g++.dg/vect/pr70729.cc b/gcc/testsuite/g++.dg/vect/pr70729.cc
index 0d5d353..014de8c 100644
--- a/gcc/testsuite/g++.dg/vect/pr70729.cc
+++ b/gcc/testsuite/g++.dg/vect/pr70729.cc
@@ -1,14 +1,13 @@
 // { dg-do compile }
-// { dg-require-effective-target vect_simd_clones }
-// { dg-additional-options "-Ofast" }
-// { dg-additional-options "-mavx2 -fopenmp-simd" { target x86_64-*-* i?86-*-* } }
+// { dg-additional-options "-ffast-math -fopenmp-simd" }
+// { dg-additional-options "-msse2" { target x86_64-*-* i?86-*-* } }
 
 
 #include <string.h>
 #include <xmmintrin.h>
 
-inline void* my_alloc(size_t bytes) {return _mm_malloc(bytes, 128);}
-inline void my_free(void* memory) {_mm_free(memory);}
+inline void* my_alloc (size_t bytes) {return _mm_malloc (bytes, 128);}
+inline void my_free (void* memory) {_mm_free (memory);}
 
 template <typename T>
 class Vec
@@ -18,13 +17,13 @@ class Vec
 
 public:
 
-  Vec (int n) : isize(n) {data = (T*)my_alloc(isize*sizeof(T));}
+  Vec (int n) : isize (n) {data = (T*)my_alloc (isize*sizeof (T));}
   ~Vec () {my_free(data);}
 
   Vec& operator = (const Vec& other)	
     {
       if (this != &other)
-	memcpy(data, other.data, isize*sizeof(T));
+	memcpy (data, other.data, isize*sizeof (T));
       return *this;
     }
 
@@ -67,7 +66,7 @@ struct Ss
 void Ss::foo (float *in, float w)
 {
 #pragma omp simd
-  for (int i=0; i<S_n; i++)
+  for (int i = 0; i < S_n; i++)
     {
       float w1 = C2[S_n + i] * w;
       v1.v_i[i] += (int)w1;
@@ -75,4 +74,4 @@ void Ss::foo (float *in, float w)
     }
 }
  
-// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }
+// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target x86_64-*-* i?86-*-* } } }
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index ee04826..14caa66 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2128,13 +2128,16 @@ ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
     return false;
 
-  if (loop->safelen > 0)
+  /* We consider REF defined in LOOP as independent if at least 2 loop
+     iterations are not dependent.  */
+  if (loop->safelen > 1
+      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  fprintf(dump_file,"Consider REF independent in loop#%d\n", loop->num);
-	  print_generic_expr(dump_file, ref->mem.ref, TDF_SLIM);
-	  fprintf(dump_file, "\n");
+	  fprintf (dump_file,"REF is independent in loop#%d\n", loop->num);
+	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
+	  fprintf (dump_file, "\n");
 	}
       return true;
     }
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2669813..9fbd183 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -204,6 +204,10 @@ adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
 	  simduid_to_vf *p = NULL, data;
 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
+	  /* Need to nullify loop safelen field since it's value is not
+	     valid after transformation.  */
+	  if (bb->loop_father && bb->loop_father->safelen > 0)
+	    bb->loop_father->safelen = 0;
 	  if (htab)
 	    {
 	      p = htab->find (&data);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-05 14:56 [PATCH PR71734] Add missed check that reference defined inside loop Yuri Rumyantsev
@ 2016-07-06  9:38 ` Richard Biener
  2016-07-06  9:50   ` Yuri Rumyantsev
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-07-06  9:38 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: gcc-patches, Jakub Jelinek,
	Илья
	Энкович,
	Igor Zamyatin

On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Hi All,
>
> Here is a simple fix to cure regressions introduced by my fix for
> 70729. Patch also contains minor changes in test found by Jakub.
>
> Bootstrapping and regression testing did not show any new failures.
>
> Is it OK for trunk?

+      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))

So safelen does not apply to refs in nested loops?  The added comment only
explains the safelen check change but this also requires explanation.

Richard.

> ChangeLog:
> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
> LOOP as independent if at least two loop iterations are not dependent.
> gcc/testsuite/ChangeLog:
>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-06  9:38 ` Richard Biener
@ 2016-07-06  9:50   ` Yuri Rumyantsev
  2016-07-06 10:34     ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-06  9:50 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek,
	Илья
	Энкович,
	Igor Zamyatin

Richard,

I pointed out in the commentary that REF is defined inside loop and
this check is related to this statement. Should I clarify it?

+  /* We consider REF defined in LOOP as independent if at least 2 loop
+     iterations are not dependent.  */


2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> Hi All,
>>
>> Here is a simple fix to cure regressions introduced by my fix for
>> 70729. Patch also contains minor changes in test found by Jakub.
>>
>> Bootstrapping and regression testing did not show any new failures.
>>
>> Is it OK for trunk?
>
> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>
> So safelen does not apply to refs in nested loops?  The added comment only
> explains the safelen check change but this also requires explanation.
>
> Richard.
>
>> ChangeLog:
>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>
>> PR tree-optimization/71734
>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>> LOOP as independent if at least two loop iterations are not dependent.
>> gcc/testsuite/ChangeLog:
>>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-06  9:50   ` Yuri Rumyantsev
@ 2016-07-06 10:34     ` Richard Biener
       [not found]       ` <CAEoMCqRH6L4=aBDmkpW_oNLZBj79_mFZ51i8PC8Qc+gTuHqKAw@mail.gmail.com>
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-07-06 10:34 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: gcc-patches, Jakub Jelinek,
	Илья
	Энкович,
	Igor Zamyatin

On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> I pointed out in the commentary that REF is defined inside loop and
> this check is related to this statement. Should I clarify it?
>
> +  /* We consider REF defined in LOOP as independent if at least 2 loop
> +     iterations are not dependent.  */

Yes, I fail to see why x[0] should not be disambiguated against y[i] in

#pragma GCC loop ivdep
  for (i...)
    {
      y[i] = ...;
      for (j...)
        ... = x[0];
    }

REF is always inside the loop nest with LOOP being the outermost loop.

Richard.

>
> 2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Hi All,
>>>
>>> Here is a simple fix to cure regressions introduced by my fix for
>>> 70729. Patch also contains minor changes in test found by Jakub.
>>>
>>> Bootstrapping and regression testing did not show any new failures.
>>>
>>> Is it OK for trunk?
>>
>> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>
>> So safelen does not apply to refs in nested loops?  The added comment only
>> explains the safelen check change but this also requires explanation.
>>
>> Richard.
>>
>>> ChangeLog:
>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>
>>> PR tree-optimization/71734
>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>>> LOOP as independent if at least two loop iterations are not dependent.
>>> gcc/testsuite/ChangeLog:
>>>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
       [not found]                                 ` <CAFiYyc3UqfahbU6K-DEF6RpARKLmEYvMkabpWAS5qD+3oESk3w@mail.gmail.com>
@ 2016-07-08 14:07                                   ` Yuri Rumyantsev
  2016-07-15 14:06                                     ` Yuri Rumyantsev
                                                       ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-08 14:07 UTC (permalink / raw)
  To: Richard Biener, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin
  Cc: Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 12519 bytes --]

Hi Richard,

Thanks for your help - your patch looks much better.
Here is new patch in which additional argument was added to determine
source loop of reference.

Bootstrap and regression testing did not show any new failures.

Is it OK for trunk?
ChangeLog:
2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/71734
* tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
contains REF, use it to check safelen, assume that safelen value
must be greater 1, fix style.
(ref_indep_loop_p_2): Add REF_LOOP argument.
(ref_indep_loop_p): Pass LOOP as additional argument to
ref_indep_loop_p_2.
gcc/testsuite/ChangeLog:
        * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> I checked simd3.f90 and found out that my additional check reject
>> independence of references
>>
>> REF is independent in loop#3
>> .istart0.19, .iend0.20
>> which are defined in loop#1 which is outer for loop#3.
>> Note that these references are defined by
>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>> which is in loop#1.
>> It is clear that both these references can not be independent for loop#3.
>
> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner loops
> of LOOP to catch memory references in those as well.  So the issue is really
> that we look at the wrong loop for safelen and we _do_ want to apply safelen
> to inner loops as well.
>
> So better track the loop we are ultimately asking the question for, like in the
> attached patch (fixes the testcase for me).
>
> Richard.
>
>
>
>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>> Here is copy of Jakub message:
>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>> The #c27 r237844 change looks bogus to me.
>>>> First of all, IMNSHO you can argue this way only if ref is a reference seen in
>>>> loop LOOP,
>>>
>>> or inner loops of LOOP I guess.  I _think_ we never call ref_indep_loop_p_1 with
>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would not make
>>> sense to do that, it would be a waste of time).
>>>
>>> So only if "or inner loops of LOOP" is not correct the check would be needed
>>> but then my issue with unrolling an inner loop and turning a ref that safelen
>>> does not apply to into a ref that it now applies to arises.
>>>
>>> I don't fully get what Jakub is hinting at.
>>>
>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can you
>>> explain that bitmap check with a simple testcase?
>>>
>>> Thanks,
>>> Richard.
>>>
>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p - the
>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the outer loop
>>>> obviously can be dependent on many of the loads and/or stores in the loop, be
>>>> it "omp simd array" or not.
>>>> Say for
>>>> void
>>>> foo (int *p, int *q)
>>>> {
>>>>   #pragma omp simd
>>>>   for (int i = 0; i < 1024; i++)
>>>>     p[i] += q[0];
>>>> }
>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could write
>>>> something that changes its value, and then it would behave differently from
>>>> using VF = 1024, where everything is performed in parallel.
>>>> Though, actually, it can alias, just it would have to write the same value as
>>>> was there.  So, if this is used to determine if it is safe to hoist the load
>>>> before the loop, it is fine, if it is used to determine if &q[0] >= &p[0] &&
>>>> &q[0] <= &p[1023], then it is not fine.
>>>>
>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias in a
>>>> valid program.  #pragma omp simd I think guarantees that the last iteration is
>>>> executed last, it isn't necessarily executed last alone, it could be, or
>>>> together with one before last iteration, or (for simdlen INT_MAX) even all
>>>> iterations can be done concurrently, in hw or sw, so it is fine if it is
>>>> transformed into:
>>>>   int temp[1024], temp2[1024], temp3[1024];
>>>>   for (int i = 0; i < 1024; i++)
>>>>     temp[i] = p[i];
>>>>   for (int i = 0; i < 1024; i++)
>>>>     temp2[i] = q[0];
>>>>   /* The above two loops can be also swapped, or intermixed.  */
>>>>   for (int i = 0; i < 1024; i++)
>>>>     temp3[i] = temp[i] + temp2[i];
>>>>   for (int i = 0; i < 1024; i++)
>>>>     p[i] = temp3[i];
>>>>   /* Or the above loop reversed etc. */
>>>>
>>>> If you have:
>>>> int
>>>> bar (int *p, int *q)
>>>> {
>>>>   q[0] = 0;
>>>>   #pragma omp simd
>>>>   for (int i = 0; i < 1024; i++)
>>>>     p[i]++;
>>>>   return q[0];
>>>> }
>>>> i.e. something similar to what misbehaves in simd3.f90 with the change, then
>>>> the answer is that q[0] isn't guaranteed to be independent of any references in
>>>> the simd loop.
>>>>
>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>> Richard,
>>>>>>
>>>>>> Did you meas the following check enhancement:
>>>>>>
>>>>>>   outer = loop_outer (loop);
>>>>>>   /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>      iterations are not dependent.  */
>>>>>>   if ((loop->safelen > 1
>>>>>>        || (outer && outer->safelen > 1))
>>>>>>       && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>     return true;
>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>> #pragma omp simd
>>>>>>   for (i = 0; i< 100; i++)
>>>>>>     {
>>>>>>       y[i] = i * 2;
>>>>>>       for (j = 0; j < 100; j++)
>>>>>> z[j] += x[0];
>>>>>>     }
>>>>>>
>>>>>> Moving statement
>>>>>> _9 = *x_19(D);
>>>>>> (cost 20) out of loop 1.
>>>>>
>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>> which eventually
>>>>> calls the function you are patching.
>>>>>
>>>>> I was questioning the added test
>>>>>
>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>
>>>>> and still do not understand why you need that.  It makes us only consider the
>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>> to inner loops
>>>>> of loop).
>>>>>
>>>>> Richard.
>>>>>
>>>>>
>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>> Richard,
>>>>>>>>
>>>>>>>> I don't understand your question and how it is related to proposed patch.
>>>>>>>>
>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>
>>>>>>>>> But how can the check be valid given a
>>>>>>>>>
>>>>>>>>> #pragma omp simd
>>>>>>>>>   for (;;)
>>>>>>>>>     {
>>>>>>>>>        for (;;)
>>>>>>>>>          ...ref in question...
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> can be transformed to
>>>>>>>>>
>>>>>>>>> #pragma omp simd
>>>>>>>>>    for (;;)
>>>>>>>>>      {
>>>>>>>>>         ...ref in question...
>>>>>>>>>         ...ref in question..
>>>>>>>>>         ...
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>> by means of unrolling?
>>>>>>>
>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>> and true for the inner unrolled loop.
>>>>>>> So it cannot be correct.
>>>>>>>
>>>>>>> (not sure why this is all off-list btw)
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>>> Richard.
>>>>>>>>>
>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>> I did not learn LIM detailed but I see on the following example
>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>   for (i = 0; i< 100; i++)
>>>>>>>>>>>>     {
>>>>>>>>>>>>       y[i] = i * 2;
>>>>>>>>>>>>       for (j = 0; j < 100; j++)
>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>     }
>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>
>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>
>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>
>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>
>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop if
>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>
>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of outer one
>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>
>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>
>>>>>>>>>>> Richard.
>>>>>>>>>>>
>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle loop
>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have non-zero
>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the innermost loop
>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner loop being unrolled
>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the outer loop safelen
>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>
>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside loop and
>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify it?
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated against y[i] in
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>   for (i...)
>>>>>>>>>>>>>>>     {
>>>>>>>>>>>>>>>       y[i] = ...;
>>>>>>>>>>>>>>>       for (j...)
>>>>>>>>>>>>>>>         ... = x[0];
>>>>>>>>>>>>>>>     }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the outermost loop.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by my fix for
>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found by Jakub.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The added comment only
>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires explanation.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are not dependent.
>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

[-- Attachment #2: 71734.patch --]
[-- Type: application/octet-stream, Size: 4660 bytes --]

diff --git a/gcc/testsuite/g++.dg/vect/pr70729.cc b/gcc/testsuite/g++.dg/vect/pr70729.cc
index 0d5d353..014de8c 100644
--- a/gcc/testsuite/g++.dg/vect/pr70729.cc
+++ b/gcc/testsuite/g++.dg/vect/pr70729.cc
@@ -1,14 +1,13 @@
 // { dg-do compile }
-// { dg-require-effective-target vect_simd_clones }
-// { dg-additional-options "-Ofast" }
-// { dg-additional-options "-mavx2 -fopenmp-simd" { target x86_64-*-* i?86-*-* } }
+// { dg-additional-options "-ffast-math -fopenmp-simd" }
+// { dg-additional-options "-msse2" { target x86_64-*-* i?86-*-* } }
 
 
 #include <string.h>
 #include <xmmintrin.h>
 
-inline void* my_alloc(size_t bytes) {return _mm_malloc(bytes, 128);}
-inline void my_free(void* memory) {_mm_free(memory);}
+inline void* my_alloc (size_t bytes) {return _mm_malloc (bytes, 128);}
+inline void my_free (void* memory) {_mm_free (memory);}
 
 template <typename T>
 class Vec
@@ -18,13 +17,13 @@ class Vec
 
 public:
 
-  Vec (int n) : isize(n) {data = (T*)my_alloc(isize*sizeof(T));}
+  Vec (int n) : isize (n) {data = (T*)my_alloc (isize*sizeof (T));}
   ~Vec () {my_free(data);}
 
   Vec& operator = (const Vec& other)	
     {
       if (this != &other)
-	memcpy(data, other.data, isize*sizeof(T));
+	memcpy (data, other.data, isize*sizeof (T));
       return *this;
     }
 
@@ -67,7 +66,7 @@ struct Ss
 void Ss::foo (float *in, float w)
 {
 #pragma omp simd
-  for (int i=0; i<S_n; i++)
+  for (int i = 0; i < S_n; i++)
     {
       float w1 = C2[S_n + i] * w;
       v1.v_i[i] += (int)w1;
@@ -75,4 +74,4 @@ void Ss::foo (float *in, float w)
     }
 }
  
-// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }
+// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target x86_64-*-* i?86-*-* } } }
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index ee04826..278f60a 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2109,11 +2109,12 @@ record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
     loop = loop_outer (loop);
 }
 
-/* Returns true if REF is independent on all other memory references in
-   LOOP.  */
+/* Returns true if REF in REF_LOOP is independent on all other memory
+   references in LOOP.  */
 
 static bool
-ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
+ref_indep_loop_p_1 (struct loop *ref_loop, struct loop *loop,
+		    im_mem_ref *ref, bool stored_p)
 {
   bitmap refs_to_check;
   unsigned i;
@@ -2128,13 +2129,14 @@ ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
     return false;
 
-  if (loop->safelen > 0)
+  if (ref_loop->safelen > 1)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  fprintf(dump_file,"Consider REF independent in loop#%d\n", loop->num);
-	  print_generic_expr(dump_file, ref->mem.ref, TDF_SLIM);
-	  fprintf(dump_file, "\n");
+	  fprintf (dump_file,"REF is independent in ref_loop#%d\n",
+		   ref_loop->num);
+	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
+	  fprintf (dump_file, "\n");
 	}
       return true;
     }
@@ -2149,11 +2151,13 @@ ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
   return true;
 }
 
-/* Returns true if REF is independent on all other memory references in
-   LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
+/* Returns true if REF in REF_LOOP is independent on all other memory
+   references in LOOP.  Wrapper over ref_indep_loop_p_1, caching its
+   results.  */
 
 static bool
-ref_indep_loop_p_2 (struct loop *loop, im_mem_ref *ref, bool stored_p)
+ref_indep_loop_p_2 (struct loop *ref_loop, struct loop *loop,
+		    im_mem_ref *ref, bool stored_p)
 {
   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
 
@@ -2165,12 +2169,12 @@ ref_indep_loop_p_2 (struct loop *loop, im_mem_ref *ref, bool stored_p)
   struct loop *inner = loop->inner;
   while (inner)
     {
-      if (!ref_indep_loop_p_2 (inner, ref, stored_p))
+      if (!ref_indep_loop_p_2 (ref_loop, inner, ref, stored_p))
 	return false;
       inner = inner->next;
     }
 
-  bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
+  bool indep_p = ref_indep_loop_p_1 (ref_loop, loop, ref, stored_p);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
@@ -2209,7 +2213,7 @@ ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
 {
   gcc_checking_assert (MEM_ANALYZABLE (ref));
 
-  return ref_indep_loop_p_2 (loop, ref, false);
+  return ref_indep_loop_p_2 (loop, loop, ref, false);
 }
 
 /* Returns true if we can perform store motion of REF from LOOP.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-08 14:07                                   ` Yuri Rumyantsev
@ 2016-07-15 14:06                                     ` Yuri Rumyantsev
  2016-07-18 10:33                                     ` Richard Biener
  2016-07-19 15:29                                     ` Renlin Li
  2 siblings, 0 replies; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-15 14:06 UTC (permalink / raw)
  To: Richard Biener, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin
  Cc: Jakub Jelinek

Richard!

Did you have a chance to look at this patch?

Thanks.
Yuri.

2016-07-08 17:07 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
> Hi Richard,
>
> Thanks for your help - your patch looks much better.
> Here is new patch in which additional argument was added to determine
> source loop of reference.
>
> Bootstrap and regression testing did not show any new failures.
>
> Is it OK for trunk?
> ChangeLog:
> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
> contains REF, use it to check safelen, assume that safelen value
> must be greater 1, fix style.
> (ref_indep_loop_p_2): Add REF_LOOP argument.
> (ref_indep_loop_p): Pass LOOP as additional argument to
> ref_indep_loop_p_2.
> gcc/testsuite/ChangeLog:
>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>
> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> I checked simd3.f90 and found out that my additional check reject
>>> independence of references
>>>
>>> REF is independent in loop#3
>>> .istart0.19, .iend0.20
>>> which are defined in loop#1 which is outer for loop#3.
>>> Note that these references are defined by
>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>> which is in loop#1.
>>> It is clear that both these references can not be independent for loop#3.
>>
>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner loops
>> of LOOP to catch memory references in those as well.  So the issue is really
>> that we look at the wrong loop for safelen and we _do_ want to apply safelen
>> to inner loops as well.
>>
>> So better track the loop we are ultimately asking the question for, like in the
>> attached patch (fixes the testcase for me).
>>
>> Richard.
>>
>>
>>
>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>> Here is copy of Jakub message:
>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>> The #c27 r237844 change looks bogus to me.
>>>>> First of all, IMNSHO you can argue this way only if ref is a reference seen in
>>>>> loop LOOP,
>>>>
>>>> or inner loops of LOOP I guess.  I _think_ we never call ref_indep_loop_p_1 with
>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would not make
>>>> sense to do that, it would be a waste of time).
>>>>
>>>> So only if "or inner loops of LOOP" is not correct the check would be needed
>>>> but then my issue with unrolling an inner loop and turning a ref that safelen
>>>> does not apply to into a ref that it now applies to arises.
>>>>
>>>> I don't fully get what Jakub is hinting at.
>>>>
>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can you
>>>> explain that bitmap check with a simple testcase?
>>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p - the
>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the outer loop
>>>>> obviously can be dependent on many of the loads and/or stores in the loop, be
>>>>> it "omp simd array" or not.
>>>>> Say for
>>>>> void
>>>>> foo (int *p, int *q)
>>>>> {
>>>>>   #pragma omp simd
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i] += q[0];
>>>>> }
>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could write
>>>>> something that changes its value, and then it would behave differently from
>>>>> using VF = 1024, where everything is performed in parallel.
>>>>> Though, actually, it can alias, just it would have to write the same value as
>>>>> was there.  So, if this is used to determine if it is safe to hoist the load
>>>>> before the loop, it is fine, if it is used to determine if &q[0] >= &p[0] &&
>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>
>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias in a
>>>>> valid program.  #pragma omp simd I think guarantees that the last iteration is
>>>>> executed last, it isn't necessarily executed last alone, it could be, or
>>>>> together with one before last iteration, or (for simdlen INT_MAX) even all
>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it is
>>>>> transformed into:
>>>>>   int temp[1024], temp2[1024], temp3[1024];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp[i] = p[i];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp2[i] = q[0];
>>>>>   /* The above two loops can be also swapped, or intermixed.  */
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp3[i] = temp[i] + temp2[i];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i] = temp3[i];
>>>>>   /* Or the above loop reversed etc. */
>>>>>
>>>>> If you have:
>>>>> int
>>>>> bar (int *p, int *q)
>>>>> {
>>>>>   q[0] = 0;
>>>>>   #pragma omp simd
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i]++;
>>>>>   return q[0];
>>>>> }
>>>>> i.e. something similar to what misbehaves in simd3.f90 with the change, then
>>>>> the answer is that q[0] isn't guaranteed to be independent of any references in
>>>>> the simd loop.
>>>>>
>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Richard,
>>>>>>>
>>>>>>> Did you meas the following check enhancement:
>>>>>>>
>>>>>>>   outer = loop_outer (loop);
>>>>>>>   /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>      iterations are not dependent.  */
>>>>>>>   if ((loop->safelen > 1
>>>>>>>        || (outer && outer->safelen > 1))
>>>>>>>       && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>     return true;
>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>> #pragma omp simd
>>>>>>>   for (i = 0; i< 100; i++)
>>>>>>>     {
>>>>>>>       y[i] = i * 2;
>>>>>>>       for (j = 0; j < 100; j++)
>>>>>>> z[j] += x[0];
>>>>>>>     }
>>>>>>>
>>>>>>> Moving statement
>>>>>>> _9 = *x_19(D);
>>>>>>> (cost 20) out of loop 1.
>>>>>>
>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>> which eventually
>>>>>> calls the function you are patching.
>>>>>>
>>>>>> I was questioning the added test
>>>>>>
>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>
>>>>>> and still do not understand why you need that.  It makes us only consider the
>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>> to inner loops
>>>>>> of loop).
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>
>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>> Richard,
>>>>>>>>>
>>>>>>>>> I don't understand your question and how it is related to proposed patch.
>>>>>>>>>
>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>
>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>   for (;;)
>>>>>>>>>>     {
>>>>>>>>>>        for (;;)
>>>>>>>>>>          ...ref in question...
>>>>>>>>>>     }
>>>>>>>>>>
>>>>>>>>>> can be transformed to
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>    for (;;)
>>>>>>>>>>      {
>>>>>>>>>>         ...ref in question...
>>>>>>>>>>         ...ref in question..
>>>>>>>>>>         ...
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> by means of unrolling?
>>>>>>>>
>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>> and true for the inner unrolled loop.
>>>>>>>> So it cannot be correct.
>>>>>>>>
>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following example
>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>   for (i = 0; i< 100; i++)
>>>>>>>>>>>>>     {
>>>>>>>>>>>>>       y[i] = i * 2;
>>>>>>>>>>>>>       for (j = 0; j < 100; j++)
>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>     }
>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>
>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop if
>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>
>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of outer one
>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>
>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>
>>>>>>>>>>>> Richard.
>>>>>>>>>>>>
>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle loop
>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have non-zero
>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the innermost loop
>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner loop being unrolled
>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the outer loop safelen
>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside loop and
>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify it?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated against y[i] in
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>   for (i...)
>>>>>>>>>>>>>>>>     {
>>>>>>>>>>>>>>>>       y[i] = ...;
>>>>>>>>>>>>>>>>       for (j...)
>>>>>>>>>>>>>>>>         ... = x[0];
>>>>>>>>>>>>>>>>     }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the outermost loop.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by my fix for
>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found by Jakub.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The added comment only
>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires explanation.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are not dependent.
>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-08 14:07                                   ` Yuri Rumyantsev
  2016-07-15 14:06                                     ` Yuri Rumyantsev
@ 2016-07-18 10:33                                     ` Richard Biener
  2016-07-19 15:29                                     ` Renlin Li
  2 siblings, 0 replies; 24+ messages in thread
From: Richard Biener @ 2016-07-18 10:33 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

On Fri, Jul 8, 2016 at 4:07 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Hi Richard,
>
> Thanks for your help - your patch looks much better.
> Here is new patch in which additional argument was added to determine
> source loop of reference.
>
> Bootstrap and regression testing did not show any new failures.
>
> Is it OK for trunk?

Yes.

Thanks,
Richard.

> ChangeLog:
> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
> contains REF, use it to check safelen, assume that safelen value
> must be greater 1, fix style.
> (ref_indep_loop_p_2): Add REF_LOOP argument.
> (ref_indep_loop_p): Pass LOOP as additional argument to
> ref_indep_loop_p_2.
> gcc/testsuite/ChangeLog:
>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>
> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> I checked simd3.f90 and found out that my additional check reject
>>> independence of references
>>>
>>> REF is independent in loop#3
>>> .istart0.19, .iend0.20
>>> which are defined in loop#1 which is outer for loop#3.
>>> Note that these references are defined by
>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>> which is in loop#1.
>>> It is clear that both these references can not be independent for loop#3.
>>
>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner loops
>> of LOOP to catch memory references in those as well.  So the issue is really
>> that we look at the wrong loop for safelen and we _do_ want to apply safelen
>> to inner loops as well.
>>
>> So better track the loop we are ultimately asking the question for, like in the
>> attached patch (fixes the testcase for me).
>>
>> Richard.
>>
>>
>>
>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>> Here is copy of Jakub message:
>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>> The #c27 r237844 change looks bogus to me.
>>>>> First of all, IMNSHO you can argue this way only if ref is a reference seen in
>>>>> loop LOOP,
>>>>
>>>> or inner loops of LOOP I guess.  I _think_ we never call ref_indep_loop_p_1 with
>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would not make
>>>> sense to do that, it would be a waste of time).
>>>>
>>>> So only if "or inner loops of LOOP" is not correct the check would be needed
>>>> but then my issue with unrolling an inner loop and turning a ref that safelen
>>>> does not apply to into a ref that it now applies to arises.
>>>>
>>>> I don't fully get what Jakub is hinting at.
>>>>
>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can you
>>>> explain that bitmap check with a simple testcase?
>>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p - the
>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the outer loop
>>>>> obviously can be dependent on many of the loads and/or stores in the loop, be
>>>>> it "omp simd array" or not.
>>>>> Say for
>>>>> void
>>>>> foo (int *p, int *q)
>>>>> {
>>>>>   #pragma omp simd
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i] += q[0];
>>>>> }
>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could write
>>>>> something that changes its value, and then it would behave differently from
>>>>> using VF = 1024, where everything is performed in parallel.
>>>>> Though, actually, it can alias, just it would have to write the same value as
>>>>> was there.  So, if this is used to determine if it is safe to hoist the load
>>>>> before the loop, it is fine, if it is used to determine if &q[0] >= &p[0] &&
>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>
>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias in a
>>>>> valid program.  #pragma omp simd I think guarantees that the last iteration is
>>>>> executed last, it isn't necessarily executed last alone, it could be, or
>>>>> together with one before last iteration, or (for simdlen INT_MAX) even all
>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it is
>>>>> transformed into:
>>>>>   int temp[1024], temp2[1024], temp3[1024];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp[i] = p[i];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp2[i] = q[0];
>>>>>   /* The above two loops can be also swapped, or intermixed.  */
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     temp3[i] = temp[i] + temp2[i];
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i] = temp3[i];
>>>>>   /* Or the above loop reversed etc. */
>>>>>
>>>>> If you have:
>>>>> int
>>>>> bar (int *p, int *q)
>>>>> {
>>>>>   q[0] = 0;
>>>>>   #pragma omp simd
>>>>>   for (int i = 0; i < 1024; i++)
>>>>>     p[i]++;
>>>>>   return q[0];
>>>>> }
>>>>> i.e. something similar to what misbehaves in simd3.f90 with the change, then
>>>>> the answer is that q[0] isn't guaranteed to be independent of any references in
>>>>> the simd loop.
>>>>>
>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Richard,
>>>>>>>
>>>>>>> Did you meas the following check enhancement:
>>>>>>>
>>>>>>>   outer = loop_outer (loop);
>>>>>>>   /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>      iterations are not dependent.  */
>>>>>>>   if ((loop->safelen > 1
>>>>>>>        || (outer && outer->safelen > 1))
>>>>>>>       && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>     return true;
>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>> #pragma omp simd
>>>>>>>   for (i = 0; i< 100; i++)
>>>>>>>     {
>>>>>>>       y[i] = i * 2;
>>>>>>>       for (j = 0; j < 100; j++)
>>>>>>> z[j] += x[0];
>>>>>>>     }
>>>>>>>
>>>>>>> Moving statement
>>>>>>> _9 = *x_19(D);
>>>>>>> (cost 20) out of loop 1.
>>>>>>
>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>> which eventually
>>>>>> calls the function you are patching.
>>>>>>
>>>>>> I was questioning the added test
>>>>>>
>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>
>>>>>> and still do not understand why you need that.  It makes us only consider the
>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>> to inner loops
>>>>>> of loop).
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>
>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>> Richard,
>>>>>>>>>
>>>>>>>>> I don't understand your question and how it is related to proposed patch.
>>>>>>>>>
>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>
>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>   for (;;)
>>>>>>>>>>     {
>>>>>>>>>>        for (;;)
>>>>>>>>>>          ...ref in question...
>>>>>>>>>>     }
>>>>>>>>>>
>>>>>>>>>> can be transformed to
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>    for (;;)
>>>>>>>>>>      {
>>>>>>>>>>         ...ref in question...
>>>>>>>>>>         ...ref in question..
>>>>>>>>>>         ...
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> by means of unrolling?
>>>>>>>>
>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>> and true for the inner unrolled loop.
>>>>>>>> So it cannot be correct.
>>>>>>>>
>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following example
>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>   for (i = 0; i< 100; i++)
>>>>>>>>>>>>>     {
>>>>>>>>>>>>>       y[i] = i * 2;
>>>>>>>>>>>>>       for (j = 0; j < 100; j++)
>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>     }
>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>
>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop if
>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>
>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of outer one
>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>
>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>
>>>>>>>>>>>> Richard.
>>>>>>>>>>>>
>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle loop
>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have non-zero
>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the innermost loop
>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner loop being unrolled
>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the outer loop safelen
>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside loop and
>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify it?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated against y[i] in
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>   for (i...)
>>>>>>>>>>>>>>>>     {
>>>>>>>>>>>>>>>>       y[i] = ...;
>>>>>>>>>>>>>>>>       for (j...)
>>>>>>>>>>>>>>>>         ... = x[0];
>>>>>>>>>>>>>>>>     }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the outermost loop.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by my fix for
>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found by Jakub.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The added comment only
>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires explanation.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are not dependent.
>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>         * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-08 14:07                                   ` Yuri Rumyantsev
  2016-07-15 14:06                                     ` Yuri Rumyantsev
  2016-07-18 10:33                                     ` Richard Biener
@ 2016-07-19 15:29                                     ` Renlin Li
  2016-07-20  9:35                                       ` Yuri Rumyantsev
  2 siblings, 1 reply; 24+ messages in thread
From: Renlin Li @ 2016-07-19 15:29 UTC (permalink / raw)
  To: Yuri Rumyantsev, Richard Biener, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin
  Cc: Jakub Jelinek

Hi Yuri,

I saw this test case runs on arm platforms, and maybe other platforms as 
well.

testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such 
file or directory

Before the change here, it's gated by vect_simd_clones target selector, 
which limit it to i?86/x86_64 platform only.

Regards,
Renlin Li



On 08/07/16 15:07, Yuri Rumyantsev wrote:
> Hi Richard,
>
> Thanks for your help - your patch looks much better.
> Here is new patch in which additional argument was added to determine
> source loop of reference.
>
> Bootstrap and regression testing did not show any new failures.
>
> Is it OK for trunk?
> ChangeLog:
> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
> contains REF, use it to check safelen, assume that safelen value
> must be greater 1, fix style.
> (ref_indep_loop_p_2): Add REF_LOOP argument.
> (ref_indep_loop_p): Pass LOOP as additional argument to
> ref_indep_loop_p_2.
> gcc/testsuite/ChangeLog:
>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>
> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> I checked simd3.f90 and found out that my additional check reject
>>> independence of references
>>>
>>> REF is independent in loop#3
>>> .istart0.19, .iend0.20
>>> which are defined in loop#1 which is outer for loop#3.
>>> Note that these references are defined by
>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>> which is in loop#1.
>>> It is clear that both these references can not be independent for loop#3.
>>
>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner loops
>> of LOOP to catch memory references in those as well.  So the issue is really
>> that we look at the wrong loop for safelen and we _do_ want to apply safelen
>> to inner loops as well.
>>
>> So better track the loop we are ultimately asking the question for, like in the
>> attached patch (fixes the testcase for me).
>>
>> Richard.
>>
>>
>>
>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>> Here is copy of Jakub message:
>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>> The #c27 r237844 change looks bogus to me.
>>>>> First of all, IMNSHO you can argue this way only if ref is a reference seen in
>>>>> loop LOOP,
>>>>
>>>> or inner loops of LOOP I guess.  I _think_ we never call ref_indep_loop_p_1 with
>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would not make
>>>> sense to do that, it would be a waste of time).
>>>>
>>>> So only if "or inner loops of LOOP" is not correct the check would be needed
>>>> but then my issue with unrolling an inner loop and turning a ref that safelen
>>>> does not apply to into a ref that it now applies to arises.
>>>>
>>>> I don't fully get what Jakub is hinting at.
>>>>
>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can you
>>>> explain that bitmap check with a simple testcase?
>>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p - the
>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the outer loop
>>>>> obviously can be dependent on many of the loads and/or stores in the loop, be
>>>>> it "omp simd array" or not.
>>>>> Say for
>>>>> void
>>>>> foo (int *p, int *q)
>>>>> {
>>>>>    #pragma omp simd
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      p[i] += q[0];
>>>>> }
>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could write
>>>>> something that changes its value, and then it would behave differently from
>>>>> using VF = 1024, where everything is performed in parallel.
>>>>> Though, actually, it can alias, just it would have to write the same value as
>>>>> was there.  So, if this is used to determine if it is safe to hoist the load
>>>>> before the loop, it is fine, if it is used to determine if &q[0] >= &p[0] &&
>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>
>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias in a
>>>>> valid program.  #pragma omp simd I think guarantees that the last iteration is
>>>>> executed last, it isn't necessarily executed last alone, it could be, or
>>>>> together with one before last iteration, or (for simdlen INT_MAX) even all
>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it is
>>>>> transformed into:
>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      temp[i] = p[i];
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      temp2[i] = q[0];
>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      p[i] = temp3[i];
>>>>>    /* Or the above loop reversed etc. */
>>>>>
>>>>> If you have:
>>>>> int
>>>>> bar (int *p, int *q)
>>>>> {
>>>>>    q[0] = 0;
>>>>>    #pragma omp simd
>>>>>    for (int i = 0; i < 1024; i++)
>>>>>      p[i]++;
>>>>>    return q[0];
>>>>> }
>>>>> i.e. something similar to what misbehaves in simd3.f90 with the change, then
>>>>> the answer is that q[0] isn't guaranteed to be independent of any references in
>>>>> the simd loop.
>>>>>
>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Richard,
>>>>>>>
>>>>>>> Did you meas the following check enhancement:
>>>>>>>
>>>>>>>    outer = loop_outer (loop);
>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>       iterations are not dependent.  */
>>>>>>>    if ((loop->safelen > 1
>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>      return true;
>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>> #pragma omp simd
>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>      {
>>>>>>>        y[i] = i * 2;
>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>> z[j] += x[0];
>>>>>>>      }
>>>>>>>
>>>>>>> Moving statement
>>>>>>> _9 = *x_19(D);
>>>>>>> (cost 20) out of loop 1.
>>>>>>
>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>> which eventually
>>>>>> calls the function you are patching.
>>>>>>
>>>>>> I was questioning the added test
>>>>>>
>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>
>>>>>> and still do not understand why you need that.  It makes us only consider the
>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>> to inner loops
>>>>>> of loop).
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>
>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>> Richard,
>>>>>>>>>
>>>>>>>>> I don't understand your question and how it is related to proposed patch.
>>>>>>>>>
>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>
>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>    for (;;)
>>>>>>>>>>      {
>>>>>>>>>>         for (;;)
>>>>>>>>>>           ...ref in question...
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> can be transformed to
>>>>>>>>>>
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>     for (;;)
>>>>>>>>>>       {
>>>>>>>>>>          ...ref in question...
>>>>>>>>>>          ...ref in question..
>>>>>>>>>>          ...
>>>>>>>>>>       }
>>>>>>>>>>
>>>>>>>>>> by means of unrolling?
>>>>>>>>
>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>> and true for the inner unrolled loop.
>>>>>>>> So it cannot be correct.
>>>>>>>>
>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following example
>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>      {
>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>      }
>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>
>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>
>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop if
>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>
>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of outer one
>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>
>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>
>>>>>>>>>>>> Richard.
>>>>>>>>>>>>
>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle loop
>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have non-zero
>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the innermost loop
>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner loop being unrolled
>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the outer loop safelen
>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside loop and
>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify it?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at least 2 loop
>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated against y[i] in
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the outermost loop.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by my fix for
>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found by Jakub.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The added comment only
>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires explanation.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF defined in
>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are not dependent.
>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-19 15:29                                     ` Renlin Li
@ 2016-07-20  9:35                                       ` Yuri Rumyantsev
  2016-07-26 15:50                                         ` Yuri Rumyantsev
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-20  9:35 UTC (permalink / raw)
  To: Renlin Li
  Cc: Richard Biener, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

Richard,

Jakub has already fixed it.
Sorry for troubles.
Yuri.

2016-07-19 18:29 GMT+03:00 Renlin Li <renlin.li@foss.arm.com>:
> Hi Yuri,
>
> I saw this test case runs on arm platforms, and maybe other platforms as
> well.
>
> testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such
> file or directory
>
> Before the change here, it's gated by vect_simd_clones target selector,
> which limit it to i?86/x86_64 platform only.
>
> Regards,
> Renlin Li
>
>
>
>
> On 08/07/16 15:07, Yuri Rumyantsev wrote:
>>
>> Hi Richard,
>>
>> Thanks for your help - your patch looks much better.
>> Here is new patch in which additional argument was added to determine
>> source loop of reference.
>>
>> Bootstrap and regression testing did not show any new failures.
>>
>> Is it OK for trunk?
>> ChangeLog:
>> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>
>> PR tree-optimization/71734
>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
>> contains REF, use it to check safelen, assume that safelen value
>> must be greater 1, fix style.
>> (ref_indep_loop_p_2): Add REF_LOOP argument.
>> (ref_indep_loop_p): Pass LOOP as additional argument to
>> ref_indep_loop_p_2.
>> gcc/testsuite/ChangeLog:
>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>>
>> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>
>>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>> wrote:
>>>>
>>>> I checked simd3.f90 and found out that my additional check reject
>>>> independence of references
>>>>
>>>> REF is independent in loop#3
>>>> .istart0.19, .iend0.20
>>>> which are defined in loop#1 which is outer for loop#3.
>>>> Note that these references are defined by
>>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>>> which is in loop#1.
>>>> It is clear that both these references can not be independent for
>>>> loop#3.
>>>
>>>
>>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner
>>> loops
>>> of LOOP to catch memory references in those as well.  So the issue is
>>> really
>>> that we look at the wrong loop for safelen and we _do_ want to apply
>>> safelen
>>> to inner loops as well.
>>>
>>> So better track the loop we are ultimately asking the question for, like
>>> in the
>>> attached patch (fixes the testcase for me).
>>>
>>> Richard.
>>>
>>>
>>>
>>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>
>>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>> wrote:
>>>>>>
>>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>>> Here is copy of Jakub message:
>>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>>> The #c27 r237844 change looks bogus to me.
>>>>>> First of all, IMNSHO you can argue this way only if ref is a reference
>>>>>> seen in
>>>>>> loop LOOP,
>>>>>
>>>>>
>>>>> or inner loops of LOOP I guess.  I _think_ we never call
>>>>> ref_indep_loop_p_1 with
>>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would
>>>>> not make
>>>>> sense to do that, it would be a waste of time).
>>>>>
>>>>> So only if "or inner loops of LOOP" is not correct the check would be
>>>>> needed
>>>>> but then my issue with unrolling an inner loop and turning a ref that
>>>>> safelen
>>>>> does not apply to into a ref that it now applies to arises.
>>>>>
>>>>> I don't fully get what Jakub is hinting at.
>>>>>
>>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can
>>>>> you
>>>>> explain that bitmap check with a simple testcase?
>>>>>
>>>>> Thanks,
>>>>> Richard.
>>>>>
>>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p
>>>>>> - the
>>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the
>>>>>> outer loop
>>>>>> obviously can be dependent on many of the loads and/or stores in the
>>>>>> loop, be
>>>>>> it "omp simd array" or not.
>>>>>> Say for
>>>>>> void
>>>>>> foo (int *p, int *q)
>>>>>> {
>>>>>>    #pragma omp simd
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      p[i] += q[0];
>>>>>> }
>>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could
>>>>>> write
>>>>>> something that changes its value, and then it would behave differently
>>>>>> from
>>>>>> using VF = 1024, where everything is performed in parallel.
>>>>>> Though, actually, it can alias, just it would have to write the same
>>>>>> value as
>>>>>> was there.  So, if this is used to determine if it is safe to hoist
>>>>>> the load
>>>>>> before the loop, it is fine, if it is used to determine if &q[0] >=
>>>>>> &p[0] &&
>>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>>
>>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias
>>>>>> in a
>>>>>> valid program.  #pragma omp simd I think guarantees that the last
>>>>>> iteration is
>>>>>> executed last, it isn't necessarily executed last alone, it could be,
>>>>>> or
>>>>>> together with one before last iteration, or (for simdlen INT_MAX) even
>>>>>> all
>>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it
>>>>>> is
>>>>>> transformed into:
>>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      temp[i] = p[i];
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      temp2[i] = q[0];
>>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      p[i] = temp3[i];
>>>>>>    /* Or the above loop reversed etc. */
>>>>>>
>>>>>> If you have:
>>>>>> int
>>>>>> bar (int *p, int *q)
>>>>>> {
>>>>>>    q[0] = 0;
>>>>>>    #pragma omp simd
>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>      p[i]++;
>>>>>>    return q[0];
>>>>>> }
>>>>>> i.e. something similar to what misbehaves in simd3.f90 with the
>>>>>> change, then
>>>>>> the answer is that q[0] isn't guaranteed to be independent of any
>>>>>> references in
>>>>>> the simd loop.
>>>>>>
>>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener
>>>>>> <richard.guenther@gmail.com>:
>>>>>>>
>>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> Richard,
>>>>>>>>
>>>>>>>> Did you meas the following check enhancement:
>>>>>>>>
>>>>>>>>    outer = loop_outer (loop);
>>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2
>>>>>>>> loop
>>>>>>>>       iterations are not dependent.  */
>>>>>>>>    if ((loop->safelen > 1
>>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>> ref->id))
>>>>>>>>      return true;
>>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>>> #pragma omp simd
>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>      {
>>>>>>>>        y[i] = i * 2;
>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>> z[j] += x[0];
>>>>>>>>      }
>>>>>>>>
>>>>>>>> Moving statement
>>>>>>>> _9 = *x_19(D);
>>>>>>>> (cost 20) out of loop 1.
>>>>>>>
>>>>>>>
>>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>>> which eventually
>>>>>>> calls the function you are patching.
>>>>>>>
>>>>>>> I was questioning the added test
>>>>>>>
>>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>> ref->id))
>>>>>>>
>>>>>>> and still do not understand why you need that.  It makes us only
>>>>>>> consider the
>>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>>> to inner loops
>>>>>>> of loop).
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>
>>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener
>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>
>>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev
>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>> Richard,
>>>>>>>>>>
>>>>>>>>>> I don't understand your question and how it is related to proposed
>>>>>>>>>> patch.
>>>>>>>>>>
>>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener
>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>
>>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev
>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>>
>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>    for (;;)
>>>>>>>>>>>      {
>>>>>>>>>>>         for (;;)
>>>>>>>>>>>           ...ref in question...
>>>>>>>>>>>      }
>>>>>>>>>>>
>>>>>>>>>>> can be transformed to
>>>>>>>>>>>
>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>     for (;;)
>>>>>>>>>>>       {
>>>>>>>>>>>          ...ref in question...
>>>>>>>>>>>          ...ref in question..
>>>>>>>>>>>          ...
>>>>>>>>>>>       }
>>>>>>>>>>>
>>>>>>>>>>> by means of unrolling?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>>> and true for the inner unrolled loop.
>>>>>>>>> So it cannot be correct.
>>>>>>>>>
>>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>>
>>>>>>>>> Richard.
>>>>>>>>>
>>>>>>>>>>> Richard.
>>>>>>>>>>>
>>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener
>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev
>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following
>>>>>>>>>>>>>> example
>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop
>>>>>>>>>>>>> if
>>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of
>>>>>>>>>>>>>> outer one
>>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>>
>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener
>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle
>>>>>>>>>>>>>>>> loop
>>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have
>>>>>>>>>>>>>>>> non-zero
>>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the
>>>>>>>>>>>>>>> innermost loop
>>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner
>>>>>>>>>>>>>>> loop being unrolled
>>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the
>>>>>>>>>>>>>>> outer loop safelen
>>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside
>>>>>>>>>>>>>>>>>> loop and
>>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify
>>>>>>>>>>>>>>>>>> it?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at
>>>>>>>>>>>>>>>>>> least 2 loop
>>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated
>>>>>>>>>>>>>>>>> against y[i] in
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the
>>>>>>>>>>>>>>>>> outermost loop.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by
>>>>>>>>>>>>>>>>>>>> my fix for
>>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found
>>>>>>>>>>>>>>>>>>>> by Jakub.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any
>>>>>>>>>>>>>>>>>>>> new failures.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p
>>>>>>>>>>>>>>>>>>> (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The
>>>>>>>>>>>>>>>>>>> added comment only
>>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires
>>>>>>>>>>>>>>>>>>> explanation.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF
>>>>>>>>>>>>>>>>>>>> defined in
>>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are
>>>>>>>>>>>>>>>>>>>> not dependent.
>>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg
>>>>>>>>>>>>>>>>>>>> options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-20  9:35                                       ` Yuri Rumyantsev
@ 2016-07-26 15:50                                         ` Yuri Rumyantsev
  2016-07-27 13:25                                           ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-26 15:50 UTC (permalink / raw)
  To: Renlin Li
  Cc: Richard Biener, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 16079 bytes --]

Hi Richard,

It turned out that the patch proposed by you does not work properly
for nested loop. If we slightly change pr70729.cc to
(non-essential part is omitted
void inline Ss::foo (float w)
{
#pragma omp simd
  for (int i=0; i<S_n; i++)
    {
      float w1 = C2[S_n + i] * w;
      v1.v_i[i] += (int)w1;
      C1[S_n + i] += w1;
    }
}
void Ss::boo (int n)
{
  for (int i = 0; i<n; i++)
    foo(ww[i]);
}
loop in foo won't be vectorized since REF_LOOP is outer loop which is
not marked with omp simd pragma:
 t1.cc:73:25: note: not vectorized: not suitable for scatter store *_31 = _33;
t1.cc:73:25: note: bad data references.

Note that the check I proposed before works fine.

2016-07-20 12:35 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
> Richard,
>
> Jakub has already fixed it.
> Sorry for troubles.
> Yuri.
>
> 2016-07-19 18:29 GMT+03:00 Renlin Li <renlin.li@foss.arm.com>:
>> Hi Yuri,
>>
>> I saw this test case runs on arm platforms, and maybe other platforms as
>> well.
>>
>> testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such
>> file or directory
>>
>> Before the change here, it's gated by vect_simd_clones target selector,
>> which limit it to i?86/x86_64 platform only.
>>
>> Regards,
>> Renlin Li
>>
>>
>>
>>
>> On 08/07/16 15:07, Yuri Rumyantsev wrote:
>>>
>>> Hi Richard,
>>>
>>> Thanks for your help - your patch looks much better.
>>> Here is new patch in which additional argument was added to determine
>>> source loop of reference.
>>>
>>> Bootstrap and regression testing did not show any new failures.
>>>
>>> Is it OK for trunk?
>>> ChangeLog:
>>> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>
>>> PR tree-optimization/71734
>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
>>> contains REF, use it to check safelen, assume that safelen value
>>> must be greater 1, fix style.
>>> (ref_indep_loop_p_2): Add REF_LOOP argument.
>>> (ref_indep_loop_p): Pass LOOP as additional argument to
>>> ref_indep_loop_p_2.
>>> gcc/testsuite/ChangeLog:
>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>>>
>>> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>
>>>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>> wrote:
>>>>>
>>>>> I checked simd3.f90 and found out that my additional check reject
>>>>> independence of references
>>>>>
>>>>> REF is independent in loop#3
>>>>> .istart0.19, .iend0.20
>>>>> which are defined in loop#1 which is outer for loop#3.
>>>>> Note that these references are defined by
>>>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>>>> which is in loop#1.
>>>>> It is clear that both these references can not be independent for
>>>>> loop#3.
>>>>
>>>>
>>>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner
>>>> loops
>>>> of LOOP to catch memory references in those as well.  So the issue is
>>>> really
>>>> that we look at the wrong loop for safelen and we _do_ want to apply
>>>> safelen
>>>> to inner loops as well.
>>>>
>>>> So better track the loop we are ultimately asking the question for, like
>>>> in the
>>>> attached patch (fixes the testcase for me).
>>>>
>>>> Richard.
>>>>
>>>>
>>>>
>>>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>
>>>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>> wrote:
>>>>>>>
>>>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>>>> Here is copy of Jakub message:
>>>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>>>> The #c27 r237844 change looks bogus to me.
>>>>>>> First of all, IMNSHO you can argue this way only if ref is a reference
>>>>>>> seen in
>>>>>>> loop LOOP,
>>>>>>
>>>>>>
>>>>>> or inner loops of LOOP I guess.  I _think_ we never call
>>>>>> ref_indep_loop_p_1 with
>>>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would
>>>>>> not make
>>>>>> sense to do that, it would be a waste of time).
>>>>>>
>>>>>> So only if "or inner loops of LOOP" is not correct the check would be
>>>>>> needed
>>>>>> but then my issue with unrolling an inner loop and turning a ref that
>>>>>> safelen
>>>>>> does not apply to into a ref that it now applies to arises.
>>>>>>
>>>>>> I don't fully get what Jakub is hinting at.
>>>>>>
>>>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can
>>>>>> you
>>>>>> explain that bitmap check with a simple testcase?
>>>>>>
>>>>>> Thanks,
>>>>>> Richard.
>>>>>>
>>>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p
>>>>>>> - the
>>>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the
>>>>>>> outer loop
>>>>>>> obviously can be dependent on many of the loads and/or stores in the
>>>>>>> loop, be
>>>>>>> it "omp simd array" or not.
>>>>>>> Say for
>>>>>>> void
>>>>>>> foo (int *p, int *q)
>>>>>>> {
>>>>>>>    #pragma omp simd
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      p[i] += q[0];
>>>>>>> }
>>>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could
>>>>>>> write
>>>>>>> something that changes its value, and then it would behave differently
>>>>>>> from
>>>>>>> using VF = 1024, where everything is performed in parallel.
>>>>>>> Though, actually, it can alias, just it would have to write the same
>>>>>>> value as
>>>>>>> was there.  So, if this is used to determine if it is safe to hoist
>>>>>>> the load
>>>>>>> before the loop, it is fine, if it is used to determine if &q[0] >=
>>>>>>> &p[0] &&
>>>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>>>
>>>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias
>>>>>>> in a
>>>>>>> valid program.  #pragma omp simd I think guarantees that the last
>>>>>>> iteration is
>>>>>>> executed last, it isn't necessarily executed last alone, it could be,
>>>>>>> or
>>>>>>> together with one before last iteration, or (for simdlen INT_MAX) even
>>>>>>> all
>>>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it
>>>>>>> is
>>>>>>> transformed into:
>>>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      temp[i] = p[i];
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      temp2[i] = q[0];
>>>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      p[i] = temp3[i];
>>>>>>>    /* Or the above loop reversed etc. */
>>>>>>>
>>>>>>> If you have:
>>>>>>> int
>>>>>>> bar (int *p, int *q)
>>>>>>> {
>>>>>>>    q[0] = 0;
>>>>>>>    #pragma omp simd
>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>      p[i]++;
>>>>>>>    return q[0];
>>>>>>> }
>>>>>>> i.e. something similar to what misbehaves in simd3.f90 with the
>>>>>>> change, then
>>>>>>> the answer is that q[0] isn't guaranteed to be independent of any
>>>>>>> references in
>>>>>>> the simd loop.
>>>>>>>
>>>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener
>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>
>>>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>> Richard,
>>>>>>>>>
>>>>>>>>> Did you meas the following check enhancement:
>>>>>>>>>
>>>>>>>>>    outer = loop_outer (loop);
>>>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2
>>>>>>>>> loop
>>>>>>>>>       iterations are not dependent.  */
>>>>>>>>>    if ((loop->safelen > 1
>>>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>> ref->id))
>>>>>>>>>      return true;
>>>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>>>> #pragma omp simd
>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>      {
>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>> z[j] += x[0];
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>> Moving statement
>>>>>>>>> _9 = *x_19(D);
>>>>>>>>> (cost 20) out of loop 1.
>>>>>>>>
>>>>>>>>
>>>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>>>> which eventually
>>>>>>>> calls the function you are patching.
>>>>>>>>
>>>>>>>> I was questioning the added test
>>>>>>>>
>>>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>> ref->id))
>>>>>>>>
>>>>>>>> and still do not understand why you need that.  It makes us only
>>>>>>>> consider the
>>>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>>>> to inner loops
>>>>>>>> of loop).
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>
>>>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener
>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>
>>>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev
>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> Richard,
>>>>>>>>>>>
>>>>>>>>>>> I don't understand your question and how it is related to proposed
>>>>>>>>>>> patch.
>>>>>>>>>>>
>>>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener
>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>
>>>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev
>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>>>
>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>    for (;;)
>>>>>>>>>>>>      {
>>>>>>>>>>>>         for (;;)
>>>>>>>>>>>>           ...ref in question...
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> can be transformed to
>>>>>>>>>>>>
>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>     for (;;)
>>>>>>>>>>>>       {
>>>>>>>>>>>>          ...ref in question...
>>>>>>>>>>>>          ...ref in question..
>>>>>>>>>>>>          ...
>>>>>>>>>>>>       }
>>>>>>>>>>>>
>>>>>>>>>>>> by means of unrolling?
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>>>> and true for the inner unrolled loop.
>>>>>>>>>> So it cannot be correct.
>>>>>>>>>>
>>>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>>> Richard.
>>>>>>>>>>>>
>>>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener
>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev
>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following
>>>>>>>>>>>>>>> example
>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop
>>>>>>>>>>>>>> if
>>>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of
>>>>>>>>>>>>>>> outer one
>>>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle
>>>>>>>>>>>>>>>>> loop
>>>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have
>>>>>>>>>>>>>>>>> non-zero
>>>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the
>>>>>>>>>>>>>>>> innermost loop
>>>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner
>>>>>>>>>>>>>>>> loop being unrolled
>>>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the
>>>>>>>>>>>>>>>> outer loop safelen
>>>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside
>>>>>>>>>>>>>>>>>>> loop and
>>>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify
>>>>>>>>>>>>>>>>>>> it?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at
>>>>>>>>>>>>>>>>>>> least 2 loop
>>>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated
>>>>>>>>>>>>>>>>>> against y[i] in
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the
>>>>>>>>>>>>>>>>>> outermost loop.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by
>>>>>>>>>>>>>>>>>>>>> my fix for
>>>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found
>>>>>>>>>>>>>>>>>>>>> by Jakub.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any
>>>>>>>>>>>>>>>>>>>>> new failures.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p
>>>>>>>>>>>>>>>>>>>> (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The
>>>>>>>>>>>>>>>>>>>> added comment only
>>>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires
>>>>>>>>>>>>>>>>>>>> explanation.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF
>>>>>>>>>>>>>>>>>>>>> defined in
>>>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are
>>>>>>>>>>>>>>>>>>>>> not dependent.
>>>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg
>>>>>>>>>>>>>>>>>>>>> options, fix style.

[-- Attachment #2: t1.cc --]
[-- Type: application/octet-stream, Size: 1801 bytes --]

// { dg-do compile }
// { dg-require-effective-target vect_simd_clones }
// { dg-additional-options "-Ofast" }
// { dg-additional-options "-mavx2 -fopenmp-simd" { target x86_64-*-* i?86-*-* } }


#include <string.h>
#include <xmmintrin.h>

inline void* my_alloc(size_t bytes) {return _mm_malloc(bytes, 128);}
inline void my_free(void* memory) {_mm_free(memory);}
float ww[100];
template <typename T>
class Vec
{
  const int isize;
	T* data;

public:

  Vec (int n) : isize(n) {data = (T*)my_alloc(isize*sizeof(T));}
  ~Vec () {my_free(data);}

  Vec& operator = (const Vec& other)	
    {
      if (this != &other)
	memcpy(data, other.data, isize*sizeof(T));
      return *this;
    }

  T& operator [] (int i) {return data[i];}
  const T& operator [] (int i) const {return data[i];}
  T& at (int i)  {return data[i];}
  const T& at (int i) const {return data[i];}

  operator T* ()  {return data;}
  int size () const {return isize;}
};

template <typename T>                                  
class Cl
{
public:

  Cl (int n, int m);
  const int N, M;
  Vec<T> v_x, v_y;
  Vec<int> v_i;
  Vec<float> v_z;
};

struct Ss
{
    const int S_n, S_m;
    Cl<float> v1;
    float* C1;
    float* C2;
    Ss (int n1, int n2): S_n(n1), S_m(n2), v1(n1, n2)
      {
	C1 = new float[n1 * 3];
	C2 = new float[n2 * 4]; 
      }

    ~Ss () { delete C1; delete C2;}
   void inline foo (float w);
   void boo (int n);
};
void inline Ss::foo (float w)
{
#pragma omp simd
  for (int i=0; i<S_n; i++)
    {
      float w1 = C2[S_n + i] * w;
      v1.v_i[i] += (int)w1;
      C1[S_n + i] += w1;
    }
}
void Ss::boo (int n)
{
  for (int i = 0; i<n; i++)
    foo(ww[i]);
}

 
// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } }

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-26 15:50                                         ` Yuri Rumyantsev
@ 2016-07-27 13:25                                           ` Richard Biener
  2016-07-28 13:49                                             ` Yuri Rumyantsev
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-07-27 13:25 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: Renlin Li, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 16862 bytes --]

On Tue, Jul 26, 2016 at 5:49 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Hi Richard,
>
> It turned out that the patch proposed by you does not work properly
> for nested loop. If we slightly change pr70729.cc to
> (non-essential part is omitted
> void inline Ss::foo (float w)
> {
> #pragma omp simd
>   for (int i=0; i<S_n; i++)
>     {
>       float w1 = C2[S_n + i] * w;
>       v1.v_i[i] += (int)w1;
>       C1[S_n + i] += w1;
>     }
> }
> void Ss::boo (int n)
> {
>   for (int i = 0; i<n; i++)
>     foo(ww[i]);
> }
> loop in foo won't be vectorized since REF_LOOP is outer loop which is
> not marked with omp simd pragma:
>  t1.cc:73:25: note: not vectorized: not suitable for scatter store *_31 = _33;
> t1.cc:73:25: note: bad data references.
>
> Note that the check I proposed before works fine.

The attached works for me (current trunk doesn't work because of caching
we do - I suppose we should try again to avoid the quadraticness in other
ways when ref_indep_loop_p is called from outermost_indep_loop).

Richard.

> 2016-07-20 12:35 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>> Richard,
>>
>> Jakub has already fixed it.
>> Sorry for troubles.
>> Yuri.
>>
>> 2016-07-19 18:29 GMT+03:00 Renlin Li <renlin.li@foss.arm.com>:
>>> Hi Yuri,
>>>
>>> I saw this test case runs on arm platforms, and maybe other platforms as
>>> well.
>>>
>>> testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such
>>> file or directory
>>>
>>> Before the change here, it's gated by vect_simd_clones target selector,
>>> which limit it to i?86/x86_64 platform only.
>>>
>>> Regards,
>>> Renlin Li
>>>
>>>
>>>
>>>
>>> On 08/07/16 15:07, Yuri Rumyantsev wrote:
>>>>
>>>> Hi Richard,
>>>>
>>>> Thanks for your help - your patch looks much better.
>>>> Here is new patch in which additional argument was added to determine
>>>> source loop of reference.
>>>>
>>>> Bootstrap and regression testing did not show any new failures.
>>>>
>>>> Is it OK for trunk?
>>>> ChangeLog:
>>>> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>
>>>> PR tree-optimization/71734
>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
>>>> contains REF, use it to check safelen, assume that safelen value
>>>> must be greater 1, fix style.
>>>> (ref_indep_loop_p_2): Add REF_LOOP argument.
>>>> (ref_indep_loop_p): Pass LOOP as additional argument to
>>>> ref_indep_loop_p_2.
>>>> gcc/testsuite/ChangeLog:
>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>>>>
>>>> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>
>>>>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>> wrote:
>>>>>>
>>>>>> I checked simd3.f90 and found out that my additional check reject
>>>>>> independence of references
>>>>>>
>>>>>> REF is independent in loop#3
>>>>>> .istart0.19, .iend0.20
>>>>>> which are defined in loop#1 which is outer for loop#3.
>>>>>> Note that these references are defined by
>>>>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>>>>> which is in loop#1.
>>>>>> It is clear that both these references can not be independent for
>>>>>> loop#3.
>>>>>
>>>>>
>>>>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner
>>>>> loops
>>>>> of LOOP to catch memory references in those as well.  So the issue is
>>>>> really
>>>>> that we look at the wrong loop for safelen and we _do_ want to apply
>>>>> safelen
>>>>> to inner loops as well.
>>>>>
>>>>> So better track the loop we are ultimately asking the question for, like
>>>>> in the
>>>>> attached patch (fixes the testcase for me).
>>>>>
>>>>> Richard.
>>>>>
>>>>>
>>>>>
>>>>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>
>>>>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>>>>> Here is copy of Jakub message:
>>>>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>>>>> The #c27 r237844 change looks bogus to me.
>>>>>>>> First of all, IMNSHO you can argue this way only if ref is a reference
>>>>>>>> seen in
>>>>>>>> loop LOOP,
>>>>>>>
>>>>>>>
>>>>>>> or inner loops of LOOP I guess.  I _think_ we never call
>>>>>>> ref_indep_loop_p_1 with
>>>>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would
>>>>>>> not make
>>>>>>> sense to do that, it would be a waste of time).
>>>>>>>
>>>>>>> So only if "or inner loops of LOOP" is not correct the check would be
>>>>>>> needed
>>>>>>> but then my issue with unrolling an inner loop and turning a ref that
>>>>>>> safelen
>>>>>>> does not apply to into a ref that it now applies to arises.
>>>>>>>
>>>>>>> I don't fully get what Jakub is hinting at.
>>>>>>>
>>>>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can
>>>>>>> you
>>>>>>> explain that bitmap check with a simple testcase?
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Richard.
>>>>>>>
>>>>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p
>>>>>>>> - the
>>>>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the
>>>>>>>> outer loop
>>>>>>>> obviously can be dependent on many of the loads and/or stores in the
>>>>>>>> loop, be
>>>>>>>> it "omp simd array" or not.
>>>>>>>> Say for
>>>>>>>> void
>>>>>>>> foo (int *p, int *q)
>>>>>>>> {
>>>>>>>>    #pragma omp simd
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      p[i] += q[0];
>>>>>>>> }
>>>>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could
>>>>>>>> write
>>>>>>>> something that changes its value, and then it would behave differently
>>>>>>>> from
>>>>>>>> using VF = 1024, where everything is performed in parallel.
>>>>>>>> Though, actually, it can alias, just it would have to write the same
>>>>>>>> value as
>>>>>>>> was there.  So, if this is used to determine if it is safe to hoist
>>>>>>>> the load
>>>>>>>> before the loop, it is fine, if it is used to determine if &q[0] >=
>>>>>>>> &p[0] &&
>>>>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>>>>
>>>>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias
>>>>>>>> in a
>>>>>>>> valid program.  #pragma omp simd I think guarantees that the last
>>>>>>>> iteration is
>>>>>>>> executed last, it isn't necessarily executed last alone, it could be,
>>>>>>>> or
>>>>>>>> together with one before last iteration, or (for simdlen INT_MAX) even
>>>>>>>> all
>>>>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it
>>>>>>>> is
>>>>>>>> transformed into:
>>>>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      temp[i] = p[i];
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      temp2[i] = q[0];
>>>>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      p[i] = temp3[i];
>>>>>>>>    /* Or the above loop reversed etc. */
>>>>>>>>
>>>>>>>> If you have:
>>>>>>>> int
>>>>>>>> bar (int *p, int *q)
>>>>>>>> {
>>>>>>>>    q[0] = 0;
>>>>>>>>    #pragma omp simd
>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>      p[i]++;
>>>>>>>>    return q[0];
>>>>>>>> }
>>>>>>>> i.e. something similar to what misbehaves in simd3.f90 with the
>>>>>>>> change, then
>>>>>>>> the answer is that q[0] isn't guaranteed to be independent of any
>>>>>>>> references in
>>>>>>>> the simd loop.
>>>>>>>>
>>>>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener
>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>
>>>>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>>> wrote:
>>>>>>>>>>
>>>>>>>>>> Richard,
>>>>>>>>>>
>>>>>>>>>> Did you meas the following check enhancement:
>>>>>>>>>>
>>>>>>>>>>    outer = loop_outer (loop);
>>>>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2
>>>>>>>>>> loop
>>>>>>>>>>       iterations are not dependent.  */
>>>>>>>>>>    if ((loop->safelen > 1
>>>>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>>> ref->id))
>>>>>>>>>>      return true;
>>>>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>>>>> #pragma omp simd
>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>      {
>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> Moving statement
>>>>>>>>>> _9 = *x_19(D);
>>>>>>>>>> (cost 20) out of loop 1.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>>>>> which eventually
>>>>>>>>> calls the function you are patching.
>>>>>>>>>
>>>>>>>>> I was questioning the added test
>>>>>>>>>
>>>>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>> ref->id))
>>>>>>>>>
>>>>>>>>> and still do not understand why you need that.  It makes us only
>>>>>>>>> consider the
>>>>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>>>>> to inner loops
>>>>>>>>> of loop).
>>>>>>>>>
>>>>>>>>> Richard.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener
>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>
>>>>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev
>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> Richard,
>>>>>>>>>>>>
>>>>>>>>>>>> I don't understand your question and how it is related to proposed
>>>>>>>>>>>> patch.
>>>>>>>>>>>>
>>>>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener
>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev
>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>>>>
>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>    for (;;)
>>>>>>>>>>>>>      {
>>>>>>>>>>>>>         for (;;)
>>>>>>>>>>>>>           ...ref in question...
>>>>>>>>>>>>>      }
>>>>>>>>>>>>>
>>>>>>>>>>>>> can be transformed to
>>>>>>>>>>>>>
>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>     for (;;)
>>>>>>>>>>>>>       {
>>>>>>>>>>>>>          ...ref in question...
>>>>>>>>>>>>>          ...ref in question..
>>>>>>>>>>>>>          ...
>>>>>>>>>>>>>       }
>>>>>>>>>>>>>
>>>>>>>>>>>>> by means of unrolling?
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>>>>> and true for the inner unrolled loop.
>>>>>>>>>>> So it cannot be correct.
>>>>>>>>>>>
>>>>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>>>>
>>>>>>>>>>> Richard.
>>>>>>>>>>>
>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener
>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following
>>>>>>>>>>>>>>>> example
>>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop
>>>>>>>>>>>>>>> if
>>>>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of
>>>>>>>>>>>>>>>> outer one
>>>>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle
>>>>>>>>>>>>>>>>>> loop
>>>>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have
>>>>>>>>>>>>>>>>>> non-zero
>>>>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the
>>>>>>>>>>>>>>>>> innermost loop
>>>>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner
>>>>>>>>>>>>>>>>> loop being unrolled
>>>>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the
>>>>>>>>>>>>>>>>> outer loop safelen
>>>>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside
>>>>>>>>>>>>>>>>>>>> loop and
>>>>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify
>>>>>>>>>>>>>>>>>>>> it?
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at
>>>>>>>>>>>>>>>>>>>> least 2 loop
>>>>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated
>>>>>>>>>>>>>>>>>>> against y[i] in
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the
>>>>>>>>>>>>>>>>>>> outermost loop.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by
>>>>>>>>>>>>>>>>>>>>>> my fix for
>>>>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found
>>>>>>>>>>>>>>>>>>>>>> by Jakub.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any
>>>>>>>>>>>>>>>>>>>>>> new failures.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p
>>>>>>>>>>>>>>>>>>>>> (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The
>>>>>>>>>>>>>>>>>>>>> added comment only
>>>>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires
>>>>>>>>>>>>>>>>>>>>> explanation.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF
>>>>>>>>>>>>>>>>>>>>>> defined in
>>>>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are
>>>>>>>>>>>>>>>>>>>>>> not dependent.
>>>>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg
>>>>>>>>>>>>>>>>>>>>>> options, fix style.

[-- Attachment #2: p --]
[-- Type: application/octet-stream, Size: 2345 bytes --]

Index: gcc/tree-ssa-loop-im.c
===================================================================
--- gcc/tree-ssa-loop-im.c	(revision 238783)
+++ gcc/tree-ssa-loop-im.c	(working copy)
@@ -2113,7 +2176,7 @@ record_dep_loop (struct loop *loop, im_m
    references in LOOP.  */
 
 static bool
-ref_indep_loop_p_1 (struct loop *ref_loop, struct loop *loop,
+ref_indep_loop_p_1 (int safelen, struct loop *loop,
 		    im_mem_ref *ref, bool stored_p)
 {
   bitmap refs_to_check;
@@ -2129,12 +2192,12 @@ ref_indep_loop_p_1 (struct loop *ref_loo
   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
     return false;
 
-  if (ref_loop->safelen > 1)
+  if (safelen > 1)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  fprintf (dump_file,"REF is independent in ref_loop#%d\n",
-		   ref_loop->num);
+	  fprintf (dump_file,"REF is independent due to safelen %d\n",
+		   safelen);
 	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
 	  fprintf (dump_file, "\n");
 	}
@@ -2156,7 +2219,7 @@ ref_indep_loop_p_1 (struct loop *ref_loo
    results.  */
 
 static bool
-ref_indep_loop_p_2 (struct loop *ref_loop, struct loop *loop,
+ref_indep_loop_p_2 (int safelen, struct loop *loop,
 		    im_mem_ref *ref, bool stored_p)
 {
   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
@@ -2166,15 +2229,18 @@ ref_indep_loop_p_2 (struct loop *ref_loo
   if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return false;
 
+  if (loop->safelen > safelen)
+    safelen = loop->safelen;
+
   struct loop *inner = loop->inner;
   while (inner)
     {
-      if (!ref_indep_loop_p_2 (ref_loop, inner, ref, stored_p))
+      if (!ref_indep_loop_p_2 (safelen, inner, ref, stored_p))
 	return false;
       inner = inner->next;
     }
 
-  bool indep_p = ref_indep_loop_p_1 (ref_loop, loop, ref, stored_p);
+  bool indep_p = ref_indep_loop_p_1 (safelen, loop, ref, stored_p);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
@@ -2213,7 +2279,7 @@ ref_indep_loop_p (struct loop *loop, im_
 {
   gcc_checking_assert (MEM_ANALYZABLE (ref));
 
-  return ref_indep_loop_p_2 (loop, loop, ref, false);
+  return ref_indep_loop_p_2 (0, loop, ref, false);
 }
 
 /* Returns true if we can perform store motion of REF from LOOP.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-27 13:25                                           ` Richard Biener
@ 2016-07-28 13:49                                             ` Yuri Rumyantsev
  2016-07-28 13:52                                               ` Richard Biener
  2016-07-28 21:33                                               ` H.J. Lu
  0 siblings, 2 replies; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-28 13:49 UTC (permalink / raw)
  To: Richard Biener
  Cc: Renlin Li, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 18032 bytes --]

Richard,

I prepare a patch which is based on yours. New test is also included.
Bootstrapping and regression testing did not show any new failures.
Is it OK for trunk?

Thanks.
ChangeLog:
2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/71734
* tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
attribute instead of REF_LOOP and use it.
(ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
set it for Loops having non-zero safelen attribute.
(ref_indep_loop_p): Pass zero as initial value for safelen.
gcc/testsuite/ChangeLog:
        * g++.dg/vect/pr70729-nest.cc: New test.

2016-07-27 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Tue, Jul 26, 2016 at 5:49 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> Hi Richard,
>>
>> It turned out that the patch proposed by you does not work properly
>> for nested loop. If we slightly change pr70729.cc to
>> (non-essential part is omitted
>> void inline Ss::foo (float w)
>> {
>> #pragma omp simd
>>   for (int i=0; i<S_n; i++)
>>     {
>>       float w1 = C2[S_n + i] * w;
>>       v1.v_i[i] += (int)w1;
>>       C1[S_n + i] += w1;
>>     }
>> }
>> void Ss::boo (int n)
>> {
>>   for (int i = 0; i<n; i++)
>>     foo(ww[i]);
>> }
>> loop in foo won't be vectorized since REF_LOOP is outer loop which is
>> not marked with omp simd pragma:
>>  t1.cc:73:25: note: not vectorized: not suitable for scatter store *_31 = _33;
>> t1.cc:73:25: note: bad data references.
>>
>> Note that the check I proposed before works fine.
>
> The attached works for me (current trunk doesn't work because of caching
> we do - I suppose we should try again to avoid the quadraticness in other
> ways when ref_indep_loop_p is called from outermost_indep_loop).
>
> Richard.
>
>> 2016-07-20 12:35 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>> Richard,
>>>
>>> Jakub has already fixed it.
>>> Sorry for troubles.
>>> Yuri.
>>>
>>> 2016-07-19 18:29 GMT+03:00 Renlin Li <renlin.li@foss.arm.com>:
>>>> Hi Yuri,
>>>>
>>>> I saw this test case runs on arm platforms, and maybe other platforms as
>>>> well.
>>>>
>>>> testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such
>>>> file or directory
>>>>
>>>> Before the change here, it's gated by vect_simd_clones target selector,
>>>> which limit it to i?86/x86_64 platform only.
>>>>
>>>> Regards,
>>>> Renlin Li
>>>>
>>>>
>>>>
>>>>
>>>> On 08/07/16 15:07, Yuri Rumyantsev wrote:
>>>>>
>>>>> Hi Richard,
>>>>>
>>>>> Thanks for your help - your patch looks much better.
>>>>> Here is new patch in which additional argument was added to determine
>>>>> source loop of reference.
>>>>>
>>>>> Bootstrap and regression testing did not show any new failures.
>>>>>
>>>>> Is it OK for trunk?
>>>>> ChangeLog:
>>>>> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>
>>>>> PR tree-optimization/71734
>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
>>>>> contains REF, use it to check safelen, assume that safelen value
>>>>> must be greater 1, fix style.
>>>>> (ref_indep_loop_p_2): Add REF_LOOP argument.
>>>>> (ref_indep_loop_p): Pass LOOP as additional argument to
>>>>> ref_indep_loop_p_2.
>>>>> gcc/testsuite/ChangeLog:
>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>>>>>
>>>>> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>
>>>>>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>> wrote:
>>>>>>>
>>>>>>> I checked simd3.f90 and found out that my additional check reject
>>>>>>> independence of references
>>>>>>>
>>>>>>> REF is independent in loop#3
>>>>>>> .istart0.19, .iend0.20
>>>>>>> which are defined in loop#1 which is outer for loop#3.
>>>>>>> Note that these references are defined by
>>>>>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>>>>>> which is in loop#1.
>>>>>>> It is clear that both these references can not be independent for
>>>>>>> loop#3.
>>>>>>
>>>>>>
>>>>>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner
>>>>>> loops
>>>>>> of LOOP to catch memory references in those as well.  So the issue is
>>>>>> really
>>>>>> that we look at the wrong loop for safelen and we _do_ want to apply
>>>>>> safelen
>>>>>> to inner loops as well.
>>>>>>
>>>>>> So better track the loop we are ultimately asking the question for, like
>>>>>> in the
>>>>>> attached patch (fixes the testcase for me).
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>
>>>>>>
>>>>>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>
>>>>>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>>>>>> Here is copy of Jakub message:
>>>>>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>>>>>> The #c27 r237844 change looks bogus to me.
>>>>>>>>> First of all, IMNSHO you can argue this way only if ref is a reference
>>>>>>>>> seen in
>>>>>>>>> loop LOOP,
>>>>>>>>
>>>>>>>>
>>>>>>>> or inner loops of LOOP I guess.  I _think_ we never call
>>>>>>>> ref_indep_loop_p_1 with
>>>>>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would
>>>>>>>> not make
>>>>>>>> sense to do that, it would be a waste of time).
>>>>>>>>
>>>>>>>> So only if "or inner loops of LOOP" is not correct the check would be
>>>>>>>> needed
>>>>>>>> but then my issue with unrolling an inner loop and turning a ref that
>>>>>>>> safelen
>>>>>>>> does not apply to into a ref that it now applies to arises.
>>>>>>>>
>>>>>>>> I don't fully get what Jakub is hinting at.
>>>>>>>>
>>>>>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can
>>>>>>>> you
>>>>>>>> explain that bitmap check with a simple testcase?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p
>>>>>>>>> - the
>>>>>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the
>>>>>>>>> outer loop
>>>>>>>>> obviously can be dependent on many of the loads and/or stores in the
>>>>>>>>> loop, be
>>>>>>>>> it "omp simd array" or not.
>>>>>>>>> Say for
>>>>>>>>> void
>>>>>>>>> foo (int *p, int *q)
>>>>>>>>> {
>>>>>>>>>    #pragma omp simd
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      p[i] += q[0];
>>>>>>>>> }
>>>>>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could
>>>>>>>>> write
>>>>>>>>> something that changes its value, and then it would behave differently
>>>>>>>>> from
>>>>>>>>> using VF = 1024, where everything is performed in parallel.
>>>>>>>>> Though, actually, it can alias, just it would have to write the same
>>>>>>>>> value as
>>>>>>>>> was there.  So, if this is used to determine if it is safe to hoist
>>>>>>>>> the load
>>>>>>>>> before the loop, it is fine, if it is used to determine if &q[0] >=
>>>>>>>>> &p[0] &&
>>>>>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>>>>>
>>>>>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias
>>>>>>>>> in a
>>>>>>>>> valid program.  #pragma omp simd I think guarantees that the last
>>>>>>>>> iteration is
>>>>>>>>> executed last, it isn't necessarily executed last alone, it could be,
>>>>>>>>> or
>>>>>>>>> together with one before last iteration, or (for simdlen INT_MAX) even
>>>>>>>>> all
>>>>>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it
>>>>>>>>> is
>>>>>>>>> transformed into:
>>>>>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      temp[i] = p[i];
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      temp2[i] = q[0];
>>>>>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      p[i] = temp3[i];
>>>>>>>>>    /* Or the above loop reversed etc. */
>>>>>>>>>
>>>>>>>>> If you have:
>>>>>>>>> int
>>>>>>>>> bar (int *p, int *q)
>>>>>>>>> {
>>>>>>>>>    q[0] = 0;
>>>>>>>>>    #pragma omp simd
>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>      p[i]++;
>>>>>>>>>    return q[0];
>>>>>>>>> }
>>>>>>>>> i.e. something similar to what misbehaves in simd3.f90 with the
>>>>>>>>> change, then
>>>>>>>>> the answer is that q[0] isn't guaranteed to be independent of any
>>>>>>>>> references in
>>>>>>>>> the simd loop.
>>>>>>>>>
>>>>>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener
>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>
>>>>>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>>>> wrote:
>>>>>>>>>>>
>>>>>>>>>>> Richard,
>>>>>>>>>>>
>>>>>>>>>>> Did you meas the following check enhancement:
>>>>>>>>>>>
>>>>>>>>>>>    outer = loop_outer (loop);
>>>>>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2
>>>>>>>>>>> loop
>>>>>>>>>>>       iterations are not dependent.  */
>>>>>>>>>>>    if ((loop->safelen > 1
>>>>>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>>>> ref->id))
>>>>>>>>>>>      return true;
>>>>>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>      {
>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>      }
>>>>>>>>>>>
>>>>>>>>>>> Moving statement
>>>>>>>>>>> _9 = *x_19(D);
>>>>>>>>>>> (cost 20) out of loop 1.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>>>>>> which eventually
>>>>>>>>>> calls the function you are patching.
>>>>>>>>>>
>>>>>>>>>> I was questioning the added test
>>>>>>>>>>
>>>>>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>>> ref->id))
>>>>>>>>>>
>>>>>>>>>> and still do not understand why you need that.  It makes us only
>>>>>>>>>> consider the
>>>>>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>>>>>> to inner loops
>>>>>>>>>> of loop).
>>>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener
>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>
>>>>>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev
>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>
>>>>>>>>>>>>> I don't understand your question and how it is related to proposed
>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>
>>>>>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener
>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev
>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>    for (;;)
>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>         for (;;)
>>>>>>>>>>>>>>           ...ref in question...
>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> can be transformed to
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>     for (;;)
>>>>>>>>>>>>>>       {
>>>>>>>>>>>>>>          ...ref in question...
>>>>>>>>>>>>>>          ...ref in question..
>>>>>>>>>>>>>>          ...
>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> by means of unrolling?
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>>>>>> and true for the inner unrolled loop.
>>>>>>>>>>>> So it cannot be correct.
>>>>>>>>>>>>
>>>>>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>>>>>
>>>>>>>>>>>> Richard.
>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following
>>>>>>>>>>>>>>>>> example
>>>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop
>>>>>>>>>>>>>>>> if
>>>>>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of
>>>>>>>>>>>>>>>>> outer one
>>>>>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle
>>>>>>>>>>>>>>>>>>> loop
>>>>>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have
>>>>>>>>>>>>>>>>>>> non-zero
>>>>>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the
>>>>>>>>>>>>>>>>>> innermost loop
>>>>>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner
>>>>>>>>>>>>>>>>>> loop being unrolled
>>>>>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the
>>>>>>>>>>>>>>>>>> outer loop safelen
>>>>>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside
>>>>>>>>>>>>>>>>>>>>> loop and
>>>>>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify
>>>>>>>>>>>>>>>>>>>>> it?
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at
>>>>>>>>>>>>>>>>>>>>> least 2 loop
>>>>>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated
>>>>>>>>>>>>>>>>>>>> against y[i] in
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the
>>>>>>>>>>>>>>>>>>>> outermost loop.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by
>>>>>>>>>>>>>>>>>>>>>>> my fix for
>>>>>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found
>>>>>>>>>>>>>>>>>>>>>>> by Jakub.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any
>>>>>>>>>>>>>>>>>>>>>>> new failures.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p
>>>>>>>>>>>>>>>>>>>>>> (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The
>>>>>>>>>>>>>>>>>>>>>> added comment only
>>>>>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires
>>>>>>>>>>>>>>>>>>>>>> explanation.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF
>>>>>>>>>>>>>>>>>>>>>>> defined in
>>>>>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are
>>>>>>>>>>>>>>>>>>>>>>> not dependent.
>>>>>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg
>>>>>>>>>>>>>>>>>>>>>>> options, fix style.

[-- Attachment #2: 71734.new-patch --]
[-- Type: application/octet-stream, Size: 4377 bytes --]

diff --git a/gcc/testsuite/g++.dg/vect/pr70729-nest.cc b/gcc/testsuite/g++.dg/vect/pr70729-nest.cc
new file mode 100644
index 0000000..96171e5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr70729-nest.cc
@@ -0,0 +1,79 @@
+// { dg-do compile }
+// { dg-additional-options "-ffast-math -fopenmp-simd" }
+// { dg-additional-options "-msse2" { target x86_64-*-* i?86-*-* } }
+
+inline void* my_alloc (__SIZE_TYPE__ bytes) {void *ptr; __builtin_posix_memalign (&ptr, bytes, 128);}
+inline void my_free (void* memory) {__builtin_free (memory);}
+float W[100];
+
+template <typename T>
+class Vec
+{
+  const int isize;
+	T* data;
+
+public:
+
+  Vec (int n) : isize (n) {data = (T*)my_alloc (isize*sizeof (T));}
+  ~Vec () {my_free(data);}
+
+  Vec& operator = (const Vec& other)	
+    {
+      if (this != &other)
+	__builtin_memcpy (data, other.data, isize*sizeof (T));
+      return *this;
+    }
+
+  T& operator [] (int i) {return data[i];}
+  const T& operator [] (int i) const {return data[i];}
+  T& at (int i)  {return data[i];}
+  const T& at (int i) const {return data[i];}
+
+  operator T* ()  {return data;}
+  int size () const {return isize;}
+};
+
+template <typename T>                                  
+class Cl
+{
+public:
+
+  Cl (int n, int m);
+  const int N, M;
+  Vec<T> v_x, v_y;
+  Vec<int> v_i;
+  Vec<float> v_z;
+};
+
+struct Ss
+{
+    const int S_n, S_m;
+    Cl<float> v1;
+    float* C1;
+    float* C2;
+    Ss (int n1, int n2): S_n(n1), S_m(n2), v1(n1, n2)
+      {
+	C1 = new float[n1 * 3];
+	C2 = new float[n2 * 4]; 
+      }
+
+    ~Ss () { delete C1; delete C2;}
+   void foo (int n);
+};
+void Ss::foo (int n)
+{
+  float w;
+  for (int j = 0; j < n; j++)
+    {
+      w = W[j];
+#pragma omp simd
+      for (int i = 0; i < S_n; i++)
+	{
+	  float w1 = C2[S_n + i] * w;
+	  v1.v_i[i] += (int)w1;
+	  C1[S_n + i] += w1;
+	}
+    }
+}
+ 
+// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target x86_64-*-* i?86-*-* } } }
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 278f60a..190c8c6 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2113,7 +2113,7 @@ record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
    references in LOOP.  */
 
 static bool
-ref_indep_loop_p_1 (struct loop *ref_loop, struct loop *loop,
+ref_indep_loop_p_1 (int safelen, struct loop *loop,
 		    im_mem_ref *ref, bool stored_p)
 {
   bitmap refs_to_check;
@@ -2129,12 +2129,12 @@ ref_indep_loop_p_1 (struct loop *ref_loop, struct loop *loop,
   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
     return false;
 
-  if (ref_loop->safelen > 1)
+  if (safelen > 1)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  fprintf (dump_file,"REF is independent in ref_loop#%d\n",
-		   ref_loop->num);
+	  fprintf (dump_file,"REF is independent due to safelen %d\n",
+		   safelen);
 	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
 	  fprintf (dump_file, "\n");
 	}
@@ -2156,7 +2156,7 @@ ref_indep_loop_p_1 (struct loop *ref_loop, struct loop *loop,
    results.  */
 
 static bool
-ref_indep_loop_p_2 (struct loop *ref_loop, struct loop *loop,
+ref_indep_loop_p_2 (int safelen, struct loop *loop,
 		    im_mem_ref *ref, bool stored_p)
 {
   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
@@ -2166,15 +2166,18 @@ ref_indep_loop_p_2 (struct loop *ref_loop, struct loop *loop,
   if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return false;
 
+  if (loop->safelen > safelen)
+    safelen = loop->safelen;
+
   struct loop *inner = loop->inner;
   while (inner)
     {
-      if (!ref_indep_loop_p_2 (ref_loop, inner, ref, stored_p))
+      if (!ref_indep_loop_p_2 (safelen, inner, ref, stored_p))
 	return false;
       inner = inner->next;
     }
 
-  bool indep_p = ref_indep_loop_p_1 (ref_loop, loop, ref, stored_p);
+  bool indep_p = ref_indep_loop_p_1 (safelen, loop, ref, stored_p);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
@@ -2213,7 +2216,7 @@ ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
 {
   gcc_checking_assert (MEM_ANALYZABLE (ref));
 
-  return ref_indep_loop_p_2 (loop, loop, ref, false);
+  return ref_indep_loop_p_2 (0, loop, ref, false);
 }
 
 /* Returns true if we can perform store motion of REF from LOOP.  */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-28 13:49                                             ` Yuri Rumyantsev
@ 2016-07-28 13:52                                               ` Richard Biener
  2016-07-28 21:33                                               ` H.J. Lu
  1 sibling, 0 replies; 24+ messages in thread
From: Richard Biener @ 2016-07-28 13:52 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: Renlin Li, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

On Thu, Jul 28, 2016 at 3:49 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> I prepare a patch which is based on yours. New test is also included.
> Bootstrapping and regression testing did not show any new failures.
> Is it OK for trunk?

Ok.

Thanks,
Richard.

> Thanks.
> ChangeLog:
> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
> attribute instead of REF_LOOP and use it.
> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
> set it for Loops having non-zero safelen attribute.
> (ref_indep_loop_p): Pass zero as initial value for safelen.
> gcc/testsuite/ChangeLog:
>         * g++.dg/vect/pr70729-nest.cc: New test.
>
> 2016-07-27 16:25 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Tue, Jul 26, 2016 at 5:49 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Hi Richard,
>>>
>>> It turned out that the patch proposed by you does not work properly
>>> for nested loop. If we slightly change pr70729.cc to
>>> (non-essential part is omitted
>>> void inline Ss::foo (float w)
>>> {
>>> #pragma omp simd
>>>   for (int i=0; i<S_n; i++)
>>>     {
>>>       float w1 = C2[S_n + i] * w;
>>>       v1.v_i[i] += (int)w1;
>>>       C1[S_n + i] += w1;
>>>     }
>>> }
>>> void Ss::boo (int n)
>>> {
>>>   for (int i = 0; i<n; i++)
>>>     foo(ww[i]);
>>> }
>>> loop in foo won't be vectorized since REF_LOOP is outer loop which is
>>> not marked with omp simd pragma:
>>>  t1.cc:73:25: note: not vectorized: not suitable for scatter store *_31 = _33;
>>> t1.cc:73:25: note: bad data references.
>>>
>>> Note that the check I proposed before works fine.
>>
>> The attached works for me (current trunk doesn't work because of caching
>> we do - I suppose we should try again to avoid the quadraticness in other
>> ways when ref_indep_loop_p is called from outermost_indep_loop).
>>
>> Richard.
>>
>>> 2016-07-20 12:35 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>> Richard,
>>>>
>>>> Jakub has already fixed it.
>>>> Sorry for troubles.
>>>> Yuri.
>>>>
>>>> 2016-07-19 18:29 GMT+03:00 Renlin Li <renlin.li@foss.arm.com>:
>>>>> Hi Yuri,
>>>>>
>>>>> I saw this test case runs on arm platforms, and maybe other platforms as
>>>>> well.
>>>>>
>>>>> testsuite/g++.dg/vect/pr70729.cc:7:10: fatal error: xmmintrin.h: No such
>>>>> file or directory
>>>>>
>>>>> Before the change here, it's gated by vect_simd_clones target selector,
>>>>> which limit it to i?86/x86_64 platform only.
>>>>>
>>>>> Regards,
>>>>> Renlin Li
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> On 08/07/16 15:07, Yuri Rumyantsev wrote:
>>>>>>
>>>>>> Hi Richard,
>>>>>>
>>>>>> Thanks for your help - your patch looks much better.
>>>>>> Here is new patch in which additional argument was added to determine
>>>>>> source loop of reference.
>>>>>>
>>>>>> Bootstrap and regression testing did not show any new failures.
>>>>>>
>>>>>> Is it OK for trunk?
>>>>>> ChangeLog:
>>>>>> 2016-07-08  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>
>>>>>> PR tree-optimization/71734
>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Add REF_LOOP argument which
>>>>>> contains REF, use it to check safelen, assume that safelen value
>>>>>> must be greater 1, fix style.
>>>>>> (ref_indep_loop_p_2): Add REF_LOOP argument.
>>>>>> (ref_indep_loop_p): Pass LOOP as additional argument to
>>>>>> ref_indep_loop_p_2.
>>>>>> gcc/testsuite/ChangeLog:
>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg options, fix style.
>>>>>>
>>>>>> 2016-07-08 11:18 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>
>>>>>>> On Thu, Jul 7, 2016 at 5:38 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> I checked simd3.f90 and found out that my additional check reject
>>>>>>>> independence of references
>>>>>>>>
>>>>>>>> REF is independent in loop#3
>>>>>>>> .istart0.19, .iend0.20
>>>>>>>> which are defined in loop#1 which is outer for loop#3.
>>>>>>>> Note that these references are defined by
>>>>>>>> _103 = __builtin_GOMP_loop_dynamic_next (&.istart0.19, &.iend0.20);
>>>>>>>> which is in loop#1.
>>>>>>>> It is clear that both these references can not be independent for
>>>>>>>> loop#3.
>>>>>>>
>>>>>>>
>>>>>>> Ok, so we end up calling ref_indep_loop for ref in LOOP also for inner
>>>>>>> loops
>>>>>>> of LOOP to catch memory references in those as well.  So the issue is
>>>>>>> really
>>>>>>> that we look at the wrong loop for safelen and we _do_ want to apply
>>>>>>> safelen
>>>>>>> to inner loops as well.
>>>>>>>
>>>>>>> So better track the loop we are ultimately asking the question for, like
>>>>>>> in the
>>>>>>> attached patch (fixes the testcase for me).
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>> 2016-07-07 17:11 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>
>>>>>>>>> On Thu, Jul 7, 2016 at 4:04 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>>> wrote:
>>>>>>>>>>
>>>>>>>>>> I Added this check because of new failures in libgomp.fortran suite.
>>>>>>>>>> Here is copy of Jakub message:
>>>>>>>>>> --- Comment #29 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
>>>>>>>>>> The #c27 r237844 change looks bogus to me.
>>>>>>>>>> First of all, IMNSHO you can argue this way only if ref is a reference
>>>>>>>>>> seen in
>>>>>>>>>> loop LOOP,
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> or inner loops of LOOP I guess.  I _think_ we never call
>>>>>>>>> ref_indep_loop_p_1 with
>>>>>>>>> a REF whose loop is not a sub-loop of LOOP or LOOP itself (as it would
>>>>>>>>> not make
>>>>>>>>> sense to do that, it would be a waste of time).
>>>>>>>>>
>>>>>>>>> So only if "or inner loops of LOOP" is not correct the check would be
>>>>>>>>> needed
>>>>>>>>> but then my issue with unrolling an inner loop and turning a ref that
>>>>>>>>> safelen
>>>>>>>>> does not apply to into a ref that it now applies to arises.
>>>>>>>>>
>>>>>>>>> I don't fully get what Jakub is hinting at.
>>>>>>>>>
>>>>>>>>> Can you install the safelen > 0 -> safelen > 1 fix please?  Jakub, can
>>>>>>>>> you
>>>>>>>>> explain that bitmap check with a simple testcase?
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>> Richard.
>>>>>>>>>
>>>>>>>>>> which is the case of e.g. *.omp_data_i_23(D).a ref in simd3.f90 -O2
>>>>>>>>>> -fopenmp -msse2, but not the D.3815[0] case tested during can_sm_ref_p
>>>>>>>>>> - the
>>>>>>>>>> D.3815[0] = 0; as well as something = D.3815[0]; stmt found in the
>>>>>>>>>> outer loop
>>>>>>>>>> obviously can be dependent on many of the loads and/or stores in the
>>>>>>>>>> loop, be
>>>>>>>>>> it "omp simd array" or not.
>>>>>>>>>> Say for
>>>>>>>>>> void
>>>>>>>>>> foo (int *p, int *q)
>>>>>>>>>> {
>>>>>>>>>>    #pragma omp simd
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      p[i] += q[0];
>>>>>>>>>> }
>>>>>>>>>> sure, q[0] can't alias p[0] ... p[1022], the earlier iterations could
>>>>>>>>>> write
>>>>>>>>>> something that changes its value, and then it would behave differently
>>>>>>>>>> from
>>>>>>>>>> using VF = 1024, where everything is performed in parallel.
>>>>>>>>>> Though, actually, it can alias, just it would have to write the same
>>>>>>>>>> value as
>>>>>>>>>> was there.  So, if this is used to determine if it is safe to hoist
>>>>>>>>>> the load
>>>>>>>>>> before the loop, it is fine, if it is used to determine if &q[0] >=
>>>>>>>>>> &p[0] &&
>>>>>>>>>> &q[0] <= &p[1023], then it is not fine.
>>>>>>>>>>
>>>>>>>>>> For aliasing of q[0] and p[1023], I don't see why they couldn't alias
>>>>>>>>>> in a
>>>>>>>>>> valid program.  #pragma omp simd I think guarantees that the last
>>>>>>>>>> iteration is
>>>>>>>>>> executed last, it isn't necessarily executed last alone, it could be,
>>>>>>>>>> or
>>>>>>>>>> together with one before last iteration, or (for simdlen INT_MAX) even
>>>>>>>>>> all
>>>>>>>>>> iterations can be done concurrently, in hw or sw, so it is fine if it
>>>>>>>>>> is
>>>>>>>>>> transformed into:
>>>>>>>>>>    int temp[1024], temp2[1024], temp3[1024];
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      temp[i] = p[i];
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      temp2[i] = q[0];
>>>>>>>>>>    /* The above two loops can be also swapped, or intermixed.  */
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      temp3[i] = temp[i] + temp2[i];
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      p[i] = temp3[i];
>>>>>>>>>>    /* Or the above loop reversed etc. */
>>>>>>>>>>
>>>>>>>>>> If you have:
>>>>>>>>>> int
>>>>>>>>>> bar (int *p, int *q)
>>>>>>>>>> {
>>>>>>>>>>    q[0] = 0;
>>>>>>>>>>    #pragma omp simd
>>>>>>>>>>    for (int i = 0; i < 1024; i++)
>>>>>>>>>>      p[i]++;
>>>>>>>>>>    return q[0];
>>>>>>>>>> }
>>>>>>>>>> i.e. something similar to what misbehaves in simd3.f90 with the
>>>>>>>>>> change, then
>>>>>>>>>> the answer is that q[0] isn't guaranteed to be independent of any
>>>>>>>>>> references in
>>>>>>>>>> the simd loop.
>>>>>>>>>>
>>>>>>>>>> 2016-07-07 16:57 GMT+03:00 Richard Biener
>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>
>>>>>>>>>>> On Thu, Jul 7, 2016 at 3:24 PM, Yuri Rumyantsev <ysrumyan@gmail.com>
>>>>>>>>>>> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> Richard,
>>>>>>>>>>>>
>>>>>>>>>>>> Did you meas the following check enhancement:
>>>>>>>>>>>>
>>>>>>>>>>>>    outer = loop_outer (loop);
>>>>>>>>>>>>    /* We consider REF defined in LOOP as independent if at least 2
>>>>>>>>>>>> loop
>>>>>>>>>>>>       iterations are not dependent.  */
>>>>>>>>>>>>    if ((loop->safelen > 1
>>>>>>>>>>>>         || (outer && outer->safelen > 1))
>>>>>>>>>>>>        && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>>>>> ref->id))
>>>>>>>>>>>>      return true;
>>>>>>>>>>>> which allows us to consider reference x[0] as invariant for the test
>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>      {
>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> Moving statement
>>>>>>>>>>>> _9 = *x_19(D);
>>>>>>>>>>>> (cost 20) out of loop 1.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> outer loops will be automatically considered by outermost_indep_loop
>>>>>>>>>>> which eventually
>>>>>>>>>>> calls the function you are patching.
>>>>>>>>>>>
>>>>>>>>>>> I was questioning the added test
>>>>>>>>>>>
>>>>>>>>>>>         && bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num],
>>>>>>>>>>> ref->id))
>>>>>>>>>>>
>>>>>>>>>>> and still do not understand why you need that.  It makes us only
>>>>>>>>>>> consider the
>>>>>>>>>>> loop of ref itself when looking at safelen (but not refs that belong
>>>>>>>>>>> to inner loops
>>>>>>>>>>> of loop).
>>>>>>>>>>>
>>>>>>>>>>> Richard.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> 2016-07-07 11:46 GMT+03:00 Richard Biener
>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 4:42 PM, Yuri Rumyantsev
>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I don't understand your question and how it is related to proposed
>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> 2016-07-06 16:55 GMT+03:00 Richard Biener
>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 3:50 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> See for example Jakub comments for 70729.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> But how can the check be valid given a
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>    for (;;)
>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>         for (;;)
>>>>>>>>>>>>>>>           ...ref in question...
>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> can be transformed to
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>     for (;;)
>>>>>>>>>>>>>>>       {
>>>>>>>>>>>>>>>          ...ref in question...
>>>>>>>>>>>>>>>          ...ref in question..
>>>>>>>>>>>>>>>          ...
>>>>>>>>>>>>>>>       }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> by means of unrolling?
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> The bitmap check would return false for the not unrolled inner loop
>>>>>>>>>>>>> and true for the inner unrolled loop.
>>>>>>>>>>>>> So it cannot be correct.
>>>>>>>>>>>>>
>>>>>>>>>>>>> (not sure why this is all off-list btw)
>>>>>>>>>>>>>
>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 2016-07-06 16:25 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:43 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I did not learn LIM detailed but I see on the following
>>>>>>>>>>>>>>>>>> example
>>>>>>>>>>>>>>>>>> #pragma omp simd
>>>>>>>>>>>>>>>>>>    for (i = 0; i< 100; i++)
>>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>>        y[i] = i * 2;
>>>>>>>>>>>>>>>>>>        for (j = 0; j < 100; j++)
>>>>>>>>>>>>>>>>>> z[j] += x[0];
>>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>> that only inner,ost loop is handled:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Memory reference in loop#2 1: *_7
>>>>>>>>>>>>>>>>>> Memory reference in loop#2 2: *x_19(D)
>>>>>>>>>>>>>>>>>> Memory reference in loop#1 3: *_3
>>>>>>>>>>>>>>>>>> Basic block 3 (loop 1 -- depth 1):
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Basic block 4 (loop 2 -- depth 2):
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Loop#2 is innermost
>>>>>>>>>>>>>>>>>> Querying dependency of refs 2 and 1: dependent.
>>>>>>>>>>>>>>>>>> Querying dependencies of ref 2 in loop 2: dependent
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> well, LIM doesn't bother to check against refs in an outer loop
>>>>>>>>>>>>>>>>> if
>>>>>>>>>>>>>>>>> refs in the inner loop are already dependent.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Loop unroll of inner loop does not invalidate 'safelen' of
>>>>>>>>>>>>>>>>>> outer one
>>>>>>>>>>>>>>>>>> accordingly with safelen definition.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> So why do you need the bitmap_bit_p check then?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 2016-07-06 14:23 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 1:17 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> LIM phase considers only innermost loops and does not handle
>>>>>>>>>>>>>>>>>>>> loop
>>>>>>>>>>>>>>>>>>>> nests, so in your case the innermost loop (j) does not have
>>>>>>>>>>>>>>>>>>>> non-zero
>>>>>>>>>>>>>>>>>>>> safelen.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Huh?  LIM considers loop nests just fine.  And yes, the
>>>>>>>>>>>>>>>>>>> innermost loop
>>>>>>>>>>>>>>>>>>> does not have safelen but shouldn't it?  Consider the inner
>>>>>>>>>>>>>>>>>>> loop being unrolled
>>>>>>>>>>>>>>>>>>> by GCC so it is removed - should that then invalidate the
>>>>>>>>>>>>>>>>>>> outer loop safelen
>>>>>>>>>>>>>>>>>>> because innermost loop refs now appear in the outer loop?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> 2016-07-06 13:34 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> On Wed, Jul 6, 2016 at 11:50 AM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> I pointed out in the commentary that REF is defined inside
>>>>>>>>>>>>>>>>>>>>>> loop and
>>>>>>>>>>>>>>>>>>>>>> this check is related to this statement. Should I clarify
>>>>>>>>>>>>>>>>>>>>>> it?
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> +  /* We consider REF defined in LOOP as independent if at
>>>>>>>>>>>>>>>>>>>>>> least 2 loop
>>>>>>>>>>>>>>>>>>>>>> +     iterations are not dependent.  */
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Yes, I fail to see why x[0] should not be disambiguated
>>>>>>>>>>>>>>>>>>>>> against y[i] in
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> #pragma GCC loop ivdep
>>>>>>>>>>>>>>>>>>>>>    for (i...)
>>>>>>>>>>>>>>>>>>>>>      {
>>>>>>>>>>>>>>>>>>>>>        y[i] = ...;
>>>>>>>>>>>>>>>>>>>>>        for (j...)
>>>>>>>>>>>>>>>>>>>>>          ... = x[0];
>>>>>>>>>>>>>>>>>>>>>      }
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> REF is always inside the loop nest with LOOP being the
>>>>>>>>>>>>>>>>>>>>> outermost loop.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> 2016-07-06 12:38 GMT+03:00 Richard Biener
>>>>>>>>>>>>>>>>>>>>>> <richard.guenther@gmail.com>:
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> On Tue, Jul 5, 2016 at 4:56 PM, Yuri Rumyantsev
>>>>>>>>>>>>>>>>>>>>>>> <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> Here is a simple fix to cure regressions introduced by
>>>>>>>>>>>>>>>>>>>>>>>> my fix for
>>>>>>>>>>>>>>>>>>>>>>>> 70729. Patch also contains minor changes in test found
>>>>>>>>>>>>>>>>>>>>>>>> by Jakub.
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any
>>>>>>>>>>>>>>>>>>>>>>>> new failures.
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> +      && bitmap_bit_p
>>>>>>>>>>>>>>>>>>>>>>> (&memory_accesses.refs_in_loop[loop->num], ref->id))
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> So safelen does not apply to refs in nested loops?  The
>>>>>>>>>>>>>>>>>>>>>>> added comment only
>>>>>>>>>>>>>>>>>>>>>>> explains the safelen check change but this also requires
>>>>>>>>>>>>>>>>>>>>>>> explanation.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Richard.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>>>>>>>>>>>> 2016-07-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Consider REF
>>>>>>>>>>>>>>>>>>>>>>>> defined in
>>>>>>>>>>>>>>>>>>>>>>>> LOOP as independent if at least two loop iterations are
>>>>>>>>>>>>>>>>>>>>>>>> not dependent.
>>>>>>>>>>>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>>>>>>>>>>>          * g++.dg/vect/pr70729.cc: Delete redundant dg
>>>>>>>>>>>>>>>>>>>>>>>> options, fix style.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-28 13:49                                             ` Yuri Rumyantsev
  2016-07-28 13:52                                               ` Richard Biener
@ 2016-07-28 21:33                                               ` H.J. Lu
       [not found]                                                 ` <CAEoMCqTJCPTz_hv1bHxme9S151ErPxjcuxp1s+ZeSL_TF3FQUw@mail.gmail.com>
  1 sibling, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2016-07-28 21:33 UTC (permalink / raw)
  To: Yuri Rumyantsev
  Cc: Richard Biener, Renlin Li, gcc-patches,
	Илья
	Энкович,
	Igor Zamyatin, Jakub Jelinek

On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> I prepare a patch which is based on yours. New test is also included.
> Bootstrapping and regression testing did not show any new failures.
> Is it OK for trunk?
>
> Thanks.
> ChangeLog:
> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
> attribute instead of REF_LOOP and use it.
> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
> set it for Loops having non-zero safelen attribute.
> (ref_indep_loop_p): Pass zero as initial value for safelen.
> gcc/testsuite/ChangeLog:
>         * g++.dg/vect/pr70729-nest.cc: New test.
>

Does this cause

FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test

on AVX machines and

FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test

on non-AVX machines?

-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
       [not found]                                                 ` <CAEoMCqTJCPTz_hv1bHxme9S151ErPxjcuxp1s+ZeSL_TF3FQUw@mail.gmail.com>
@ 2016-07-29 14:00                                                   ` Yuri Rumyantsev
  2016-08-02 14:59                                                     ` Yuri Rumyantsev
  2016-08-03 13:44                                                     ` Richard Biener
  0 siblings, 2 replies; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-07-29 14:00 UTC (permalink / raw)
  To: H.J. Lu, Richard Biener, gcc-patches, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 2413 bytes --]

Hi Richard.

It turned out that the fix proposed by you does not work for liggomp
tests simd3 and simd4.
The reason is that we can't change safelen value for references not
defined inside loop. So I add missed check on it to patch.
Is it OK for trunk?
ChangeLog:
2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/71734
* tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
(ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.

2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
> Sorry H.J.
>
> I checked both these tests manually but forgot to pass "-fopenmp" option.
> I will fix the issue asap.
>
> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Richard,
>>>
>>> I prepare a patch which is based on yours. New test is also included.
>>> Bootstrapping and regression testing did not show any new failures.
>>> Is it OK for trunk?
>>>
>>> Thanks.
>>> ChangeLog:
>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>
>>> PR tree-optimization/71734
>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>> attribute instead of REF_LOOP and use it.
>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>> set it for Loops having non-zero safelen attribute.
>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>> gcc/testsuite/ChangeLog:
>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>
>>
>> Does this cause
>>
>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>> test
>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>> test
>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>
>> on AVX machines and
>>
>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>> test
>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>> test
>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>
>> on non-AVX machines?
>>
>> --
>> H.J.

[-- Attachment #2: 71734.patch.3 --]
[-- Type: application/octet-stream, Size: 1322 bytes --]

diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 190c8c6..889607b 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2151,7 +2151,27 @@ ref_indep_loop_p_1 (int safelen, struct loop *loop,
   return true;
 }
 
-/* Returns true if REF in REF_LOOP is independent on all other memory
+/* Returns true if REF is defined inside LOOP.  */
+
+static bool
+ref_defined_in_loop_p (im_mem_ref *ref, struct loop *loop)
+{
+  struct loop *inner;
+  if (bitmap_bit_p (&memory_accesses.refs_in_loop[loop->num], ref->id))
+    return true;
+
+  /* Check inner loops if any.  */
+  inner = loop->inner;
+  while (inner)
+    {
+      if (ref_defined_in_loop_p (ref, inner))
+	return true;
+      inner = inner->next;
+    }
+  return false;
+}
+
+/* Returns true if REF in LOOP is independent on all other memory
    references in LOOP.  Wrapper over ref_indep_loop_p_1, caching its
    results.  */
 
@@ -2166,7 +2186,8 @@ ref_indep_loop_p_2 (int safelen, struct loop *loop,
   if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return false;
 
-  if (loop->safelen > safelen)
+  /* Change SAFELEN value if REF is defined inside LOOP,  */
+  if (loop->safelen > safelen && ref_defined_in_loop_p (ref, loop))
     safelen = loop->safelen;
 
   struct loop *inner = loop->inner;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-29 14:00                                                   ` Yuri Rumyantsev
@ 2016-08-02 14:59                                                     ` Yuri Rumyantsev
  2016-08-03 13:44                                                     ` Richard Biener
  1 sibling, 0 replies; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-08-02 14:59 UTC (permalink / raw)
  To: H.J. Lu, Richard Biener, gcc-patches, Igor Zamyatin

Hi Richard,

Did you have a chance to look at this patch?

Thanks.

2016-07-29 17:00 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
> Hi Richard.
>
> It turned out that the fix proposed by you does not work for liggomp
> tests simd3 and simd4.
> The reason is that we can't change safelen value for references not
> defined inside loop. So I add missed check on it to patch.
> Is it OK for trunk?
> ChangeLog:
> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>
> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>> Sorry H.J.
>>
>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>> I will fix the issue asap.
>>
>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>> Richard,
>>>>
>>>> I prepare a patch which is based on yours. New test is also included.
>>>> Bootstrapping and regression testing did not show any new failures.
>>>> Is it OK for trunk?
>>>>
>>>> Thanks.
>>>> ChangeLog:
>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>
>>>> PR tree-optimization/71734
>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>> attribute instead of REF_LOOP and use it.
>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>> set it for Loops having non-zero safelen attribute.
>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>> gcc/testsuite/ChangeLog:
>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>
>>>
>>> Does this cause
>>>
>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>
>>> on AVX machines and
>>>
>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>
>>> on non-AVX machines?
>>>
>>> --
>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-07-29 14:00                                                   ` Yuri Rumyantsev
  2016-08-02 14:59                                                     ` Yuri Rumyantsev
@ 2016-08-03 13:44                                                     ` Richard Biener
  2016-08-05 13:29                                                       ` Yuri Rumyantsev
  1 sibling, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-08-03 13:44 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: H.J. Lu, gcc-patches, Igor Zamyatin

On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Hi Richard.
>
> It turned out that the fix proposed by you does not work for liggomp
> tests simd3 and simd4.
> The reason is that we can't change safelen value for references not
> defined inside loop. So I add missed check on it to patch.
> Is it OK for trunk?

Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
that operation can end up being quadratic in the loop depth/width.

But I also wonder about correctness given that LIM "commons"
references.  So we can have

  for (;;)
    .. = ref;  (1)
    for (;;) // safelen == 2  (2)
      ... = ref;

and when looking at the ref at (1) which according to you should not
have safelen applied your function will happily return that ref is defined
in the inner loop.

So it looks like to be able to apply safelen the caller of ref_indep_loop_p
needs to pass down a ref plus a location (a stmt).  In which case your
function can simply use flow_loop_nested_p (loop, gimple_bb
(stmt)->loop_father);

Richard.

> ChangeLog:
> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>
> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>> Sorry H.J.
>>
>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>> I will fix the issue asap.
>>
>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>> Richard,
>>>>
>>>> I prepare a patch which is based on yours. New test is also included.
>>>> Bootstrapping and regression testing did not show any new failures.
>>>> Is it OK for trunk?
>>>>
>>>> Thanks.
>>>> ChangeLog:
>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>
>>>> PR tree-optimization/71734
>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>> attribute instead of REF_LOOP and use it.
>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>> set it for Loops having non-zero safelen attribute.
>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>> gcc/testsuite/ChangeLog:
>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>
>>>
>>> Does this cause
>>>
>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>
>>> on AVX machines and
>>>
>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>> test
>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>
>>> on non-AVX machines?
>>>
>>> --
>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-03 13:44                                                     ` Richard Biener
@ 2016-08-05 13:29                                                       ` Yuri Rumyantsev
  2016-08-05 13:51                                                         ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-08-05 13:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: H.J. Lu, gcc-patches, Igor Zamyatin

[-- Attachment #1: Type: text/plain, Size: 4486 bytes --]

Richard,

Here is updated patch which implements your proposal - I pass loop
instead of stmt to determine either REF is defined inside LOOP nest or
not. I checked that for pr70729-nest.cc the reference this->S_n  for
statements which are out of innermost loop are  not considered as
independent as you pointed out.

Regression testing did not show any new failures and both failed tests
from libgomp.fortran suite now passed.

Is it OK for trunk?
ChangeLog:
2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>

PR tree-optimization/71734
* tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
(outermost_indep_loop): Pass LOOP argumnet where REF was defined to
ref_indep_loop_p.
(ref_indep_loop_p_1): Fix commentary.
(ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
variable NEW_SAFELEN which may have new value for SAFELEN, ignore
dependencde for loop having greater safelen value, pass REF_LOOP in
recursive call.
(can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.

2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> Hi Richard.
>>
>> It turned out that the fix proposed by you does not work for liggomp
>> tests simd3 and simd4.
>> The reason is that we can't change safelen value for references not
>> defined inside loop. So I add missed check on it to patch.
>> Is it OK for trunk?
>
> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
> that operation can end up being quadratic in the loop depth/width.
>
> But I also wonder about correctness given that LIM "commons"
> references.  So we can have
>
>   for (;;)
>     .. = ref;  (1)
>     for (;;) // safelen == 2  (2)
>       ... = ref;
>
> and when looking at the ref at (1) which according to you should not
> have safelen applied your function will happily return that ref is defined
> in the inner loop.
>
> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
> needs to pass down a ref plus a location (a stmt).  In which case your
> function can simply use flow_loop_nested_p (loop, gimple_bb
> (stmt)->loop_father);
>
> Richard.
>
>> ChangeLog:
>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>
>> PR tree-optimization/71734
>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>
>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>> Sorry H.J.
>>>
>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>> I will fix the issue asap.
>>>
>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> Richard,
>>>>>
>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>> Is it OK for trunk?
>>>>>
>>>>> Thanks.
>>>>> ChangeLog:
>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>
>>>>> PR tree-optimization/71734
>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>> attribute instead of REF_LOOP and use it.
>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>> set it for Loops having non-zero safelen attribute.
>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>> gcc/testsuite/ChangeLog:
>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>
>>>>
>>>> Does this cause
>>>>
>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>> test
>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>> test
>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>
>>>> on AVX machines and
>>>>
>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>> test
>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>> test
>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>
>>>> on non-AVX machines?
>>>>
>>>> --
>>>> H.J.

[-- Attachment #2: 71734.patch.4 --]
[-- Type: application/octet-stream, Size: 3434 bytes --]

diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 947724b..bbd4b10 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -196,7 +196,7 @@ static struct
 static bitmap_obstack lim_bitmap_obstack;
 static obstack mem_ref_obstack;
 
-static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
+static bool ref_indep_loop_p (struct loop *, im_mem_ref *, struct loop *);
 
 /* Minimum cost of an expensive expression.  */
 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
@@ -544,10 +544,10 @@ outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
        aloop != loop;
        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
-	&& ref_indep_loop_p (aloop, ref))
+	&& ref_indep_loop_p (aloop, ref, loop))
       return aloop;
 
-  if (ref_indep_loop_p (loop, ref))
+  if (ref_indep_loop_p (loop, ref, loop))
     return loop;
   else
     return NULL;
@@ -2109,7 +2109,7 @@ record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
     loop = loop_outer (loop);
 }
 
-/* Returns true if REF in REF_LOOP is independent on all other memory
+/* Returns true if REF is independent on all other memory
    references in LOOP.  */
 
 static bool
@@ -2157,22 +2157,31 @@ ref_indep_loop_p_1 (int safelen, struct loop *loop,
 
 static bool
 ref_indep_loop_p_2 (int safelen, struct loop *loop,
-		    im_mem_ref *ref, bool stored_p)
+		    im_mem_ref *ref, bool stored_p,
+		    struct loop *ref_loop)
 {
   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
 
   if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return true;
-  if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
+
+  int new_safelen = safelen;
+  if (loop->safelen > safelen
+      /* Check that REF is defined inside LOOP.  */
+      && (loop == ref_loop || flow_loop_nested_p (loop, ref_loop)))
+    new_safelen = loop->safelen;
+
+  /* Ignore dependence for loops having greater safelen.  */
+  if (new_safelen == safelen
+      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return false;
 
-  if (loop->safelen > safelen)
-    safelen = loop->safelen;
+  safelen = new_safelen;
 
   struct loop *inner = loop->inner;
   while (inner)
     {
-      if (!ref_indep_loop_p_2 (safelen, inner, ref, stored_p))
+      if (!ref_indep_loop_p_2 (safelen, inner, ref, stored_p, ref_loop))
 	return false;
       inner = inner->next;
     }
@@ -2209,14 +2218,14 @@ ref_indep_loop_p_2 (int safelen, struct loop *loop,
 }
 
 /* Returns true if REF is independent on all other memory references in
-   LOOP.  */
+   LOOP.  REF_LOOP is loop where REF is defined.  */
 
 static bool
-ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
+ref_indep_loop_p (struct loop *loop, im_mem_ref *ref, struct loop *ref_loop)
 {
   gcc_checking_assert (MEM_ANALYZABLE (ref));
 
-  return ref_indep_loop_p_2 (0, loop, ref, false);
+  return ref_indep_loop_p_2 (0, loop, ref, false, ref_loop);
 }
 
 /* Returns true if we can perform store motion of REF from LOOP.  */
@@ -2252,7 +2261,7 @@ can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
 
   /* And it must be independent on all other memory references
      in LOOP.  */
-  if (!ref_indep_loop_p (loop, ref))
+  if (!ref_indep_loop_p (loop, ref, loop))
     return false;
 
   return true;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-05 13:29                                                       ` Yuri Rumyantsev
@ 2016-08-05 13:51                                                         ` Richard Biener
       [not found]                                                           ` <CAEoMCqRyCr804MQoRNNKYaajSjFE=guN8m1dvvV3jKUzpyNFMg@mail.gmail.com>
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-08-05 13:51 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: H.J. Lu, gcc-patches, Igor Zamyatin

On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> Here is updated patch which implements your proposal - I pass loop
> instead of stmt to determine either REF is defined inside LOOP nest or
> not. I checked that for pr70729-nest.cc the reference this->S_n  for
> statements which are out of innermost loop are  not considered as
> independent as you pointed out.
>
> Regression testing did not show any new failures and both failed tests
> from libgomp.fortran suite now passed.
>
> Is it OK for trunk?

I don't quite understand

+  /* Ignore dependence for loops having greater safelen.  */
+  if (new_safelen == safelen
+      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
     return false;

this seems to suggest (correctly I think) that we cannot rely on the caching
for safelen, neither for optimal results (you seem to address that) but also
not for correctness (we cache the no-dep result from a safelen run and
then happily re-use that info for a ref that is not safe for safelen).

It seems to me we need to avoid any caching if we made things independent
because of safelen and simply not do the dep test afterwards.  this means
inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
to do this w/o confusing the control flow).

Richard.

> ChangeLog:
> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>
> PR tree-optimization/71734
> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
> ref_indep_loop_p.
> (ref_indep_loop_p_1): Fix commentary.
> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
> dependencde for loop having greater safelen value, pass REF_LOOP in
> recursive call.
> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>
> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Hi Richard.
>>>
>>> It turned out that the fix proposed by you does not work for liggomp
>>> tests simd3 and simd4.
>>> The reason is that we can't change safelen value for references not
>>> defined inside loop. So I add missed check on it to patch.
>>> Is it OK for trunk?
>>
>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>> that operation can end up being quadratic in the loop depth/width.
>>
>> But I also wonder about correctness given that LIM "commons"
>> references.  So we can have
>>
>>   for (;;)
>>     .. = ref;  (1)
>>     for (;;) // safelen == 2  (2)
>>       ... = ref;
>>
>> and when looking at the ref at (1) which according to you should not
>> have safelen applied your function will happily return that ref is defined
>> in the inner loop.
>>
>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>> needs to pass down a ref plus a location (a stmt).  In which case your
>> function can simply use flow_loop_nested_p (loop, gimple_bb
>> (stmt)->loop_father);
>>
>> Richard.
>>
>>> ChangeLog:
>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>
>>> PR tree-optimization/71734
>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>
>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>> Sorry H.J.
>>>>
>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>> I will fix the issue asap.
>>>>
>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>> Richard,
>>>>>>
>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>> Is it OK for trunk?
>>>>>>
>>>>>> Thanks.
>>>>>> ChangeLog:
>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>
>>>>>> PR tree-optimization/71734
>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>> attribute instead of REF_LOOP and use it.
>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>> gcc/testsuite/ChangeLog:
>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>
>>>>>
>>>>> Does this cause
>>>>>
>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>> test
>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>> test
>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>
>>>>> on AVX machines and
>>>>>
>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>> test
>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>> test
>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>
>>>>> on non-AVX machines?
>>>>>
>>>>> --
>>>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
       [not found]                                                               ` <CAEoMCqRwWNVDwrmmLZhXHu_EjEbixMn15TWmTmGjHuwDCfNeUw@mail.gmail.com>
@ 2016-08-09 10:00                                                                 ` Richard Biener
  2016-08-09 11:26                                                                   ` Yuri Rumyantsev
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-08-09 10:00 UTC (permalink / raw)
  To: Yuri Rumyantsev, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 7623 bytes --]

On Tue, Aug 9, 2016 at 11:20 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Yes it is impossible since all basic blocks are handled from outer
> loops to innermost so we can not have the sequence with wrong
> dependence, i.e. we cached that reference is independent (due to
> safelen) but the same reference in outer loop must be evaluated as
> dependent. So we must re-evaluate only dependent references in loops
> having non-zero safelen attribute.

Hmm.  I don't like depending on this implementation detail.  Does the
attached patch work
which simply avoids any positive/negative caching on safelen affected
refs?  It also makes
the query cheaper by avoiding the dive into child loops.

Richard.

> 2016-08-09 11:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Fri, Aug 5, 2016 at 4:57 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Richard,
>>>
>>> I added additional check before caching dependencies since (1) all
>>> statements in loop are handled in loop postorder order, i.e. form
>>> outer to inner; (2) we can change dependency for reference in subloops
>>> which have non-zero safelen attribute. So I propose to re-evaluate it
>>> in such cases. I don't see why we need to avoid dependence caching for
>>> all loop nests since pragma omp simd is used very rarely.
>>
>> You think it is impossible to construct a testcase which hits the
>> correctness issue?
>> "very rarely" is not a good argument to generate wrong code.
>>
>> Richard.
>>
>>> 2016-08-05 16:50 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> Richard,
>>>>>
>>>>> Here is updated patch which implements your proposal - I pass loop
>>>>> instead of stmt to determine either REF is defined inside LOOP nest or
>>>>> not. I checked that for pr70729-nest.cc the reference this->S_n  for
>>>>> statements which are out of innermost loop are  not considered as
>>>>> independent as you pointed out.
>>>>>
>>>>> Regression testing did not show any new failures and both failed tests
>>>>> from libgomp.fortran suite now passed.
>>>>>
>>>>> Is it OK for trunk?
>>>>
>>>> I don't quite understand
>>>>
>>>> +  /* Ignore dependence for loops having greater safelen.  */
>>>> +  if (new_safelen == safelen
>>>> +      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
>>>>      return false;
>>>>
>>>> this seems to suggest (correctly I think) that we cannot rely on the caching
>>>> for safelen, neither for optimal results (you seem to address that) but also
>>>> not for correctness (we cache the no-dep result from a safelen run and
>>>> then happily re-use that info for a ref that is not safe for safelen).
>>>>
>>>> It seems to me we need to avoid any caching if we made things independent
>>>> because of safelen and simply not do the dep test afterwards.  this means
>>>> inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
>>>> to do this w/o confusing the control flow).
>>>>
>>>> Richard.
>>>>
>>>>> ChangeLog:
>>>>> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>
>>>>> PR tree-optimization/71734
>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
>>>>> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
>>>>> ref_indep_loop_p.
>>>>> (ref_indep_loop_p_1): Fix commentary.
>>>>> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
>>>>> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
>>>>> dependencde for loop having greater safelen value, pass REF_LOOP in
>>>>> recursive call.
>>>>> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>>>>>
>>>>> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Hi Richard.
>>>>>>>
>>>>>>> It turned out that the fix proposed by you does not work for liggomp
>>>>>>> tests simd3 and simd4.
>>>>>>> The reason is that we can't change safelen value for references not
>>>>>>> defined inside loop. So I add missed check on it to patch.
>>>>>>> Is it OK for trunk?
>>>>>>
>>>>>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>>>>>> that operation can end up being quadratic in the loop depth/width.
>>>>>>
>>>>>> But I also wonder about correctness given that LIM "commons"
>>>>>> references.  So we can have
>>>>>>
>>>>>>   for (;;)
>>>>>>     .. = ref;  (1)
>>>>>>     for (;;) // safelen == 2  (2)
>>>>>>       ... = ref;
>>>>>>
>>>>>> and when looking at the ref at (1) which according to you should not
>>>>>> have safelen applied your function will happily return that ref is defined
>>>>>> in the inner loop.
>>>>>>
>>>>>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>>>>>> needs to pass down a ref plus a location (a stmt).  In which case your
>>>>>> function can simply use flow_loop_nested_p (loop, gimple_bb
>>>>>> (stmt)->loop_father);
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>> ChangeLog:
>>>>>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>
>>>>>>> PR tree-optimization/71734
>>>>>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>>>>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>>>>>
>>>>>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>>>>>> Sorry H.J.
>>>>>>>>
>>>>>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>>>>>> I will fix the issue asap.
>>>>>>>>
>>>>>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>> Richard,
>>>>>>>>>>
>>>>>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>
>>>>>>>>>> Thanks.
>>>>>>>>>> ChangeLog:
>>>>>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>
>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>>>>>> attribute instead of REF_LOOP and use it.
>>>>>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Does this cause
>>>>>>>>>
>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>> test
>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>> test
>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>>>>>
>>>>>>>>> on AVX machines and
>>>>>>>>>
>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>> test
>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>> test
>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>>>>>
>>>>>>>>> on non-AVX machines?
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>> H.J.

[-- Attachment #2: p --]
[-- Type: application/octet-stream, Size: 5493 bytes --]

Index: gcc/tree-ssa-loop-im.c
===================================================================
--- gcc/tree-ssa-loop-im.c	(revision 239276)
+++ gcc/tree-ssa-loop-im.c	(working copy)
@@ -196,7 +199,7 @@ static struct
 static bitmap_obstack lim_bitmap_obstack;
 static obstack mem_ref_obstack;
 
-static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
+static bool ref_indep_loop_p (struct loop *, im_mem_ref *, struct loop *);
 
 /* Minimum cost of an expensive expression.  */
 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
@@ -544,10 +577,10 @@ outermost_indep_loop (struct loop *outer
        aloop != loop;
        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
-	&& ref_indep_loop_p (aloop, ref))
+	&& ref_indep_loop_p (aloop, ref, loop))
       return aloop;
 
-  if (ref_indep_loop_p (loop, ref))
+  if (ref_indep_loop_p (loop, ref, loop))
     return loop;
   else
     return NULL;
@@ -2109,17 +2172,28 @@ record_dep_loop (struct loop *loop, im_m
     loop = loop_outer (loop);
 }
 
-/* Returns true if REF in REF_LOOP is independent on all other memory
-   references in LOOP.  */
+/* Returns true if REF is independent on all other memory
+   references in LOOP.  REF_LOOP is where REF is accessed, SAFELEN is the
+   safelen to apply.  */
 
 static bool
-ref_indep_loop_p_1 (int safelen, struct loop *loop,
-		    im_mem_ref *ref, bool stored_p)
+ref_indep_loop_p_1 (int safelen, struct loop *loop, im_mem_ref *ref,
+		    bool stored_p, struct loop *ref_loop)
 {
+  stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
+
+  if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
+    return true;
+  if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
+    return false;
+
+  if (loop->safelen > safelen
+      /* Check that REF is accessed inside LOOP.  */
+      && (loop == ref_loop || flow_loop_nested_p (loop, ref_loop)))
+    safelen = loop->safelen;
+
+  bool indep_p = true;
   bitmap refs_to_check;
-  unsigned i;
-  bitmap_iterator bi;
-  im_mem_ref *aref;
 
   if (stored_p)
     refs_to_check = &memory_accesses.refs_in_loop[loop->num];
@@ -2127,9 +2201,8 @@ ref_indep_loop_p_1 (int safelen, struct
     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
 
   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
-    return false;
-
-  if (safelen > 1)
+    indep_p = false;
+  else if (safelen > 1)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
@@ -2138,47 +2211,40 @@ ref_indep_loop_p_1 (int safelen, struct
 	  print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
 	  fprintf (dump_file, "\n");
 	}
+
+      /* Avoid caching here as safelen depends on context and refs
+         are shared between different contexts.  */
       return true;
     }
-
-  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
+  else
     {
-      aref = memory_accesses.refs_list[i];
-      if (!refs_independent_p (ref, aref))
-	return false;
-    }
-
-  return true;
-}
-
-/* Returns true if REF in REF_LOOP is independent on all other memory
-   references in LOOP.  Wrapper over ref_indep_loop_p_1, caching its
-   results.  */
-
-static bool
-ref_indep_loop_p_2 (int safelen, struct loop *loop,
-		    im_mem_ref *ref, bool stored_p)
-{
-  stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
-
-  if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
-    return true;
-  if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
-    return false;
-
-  if (loop->safelen > safelen)
-    safelen = loop->safelen;
+      struct loop *inner = loop->inner;
+      while (inner)
+	{
+	  if (!ref_indep_loop_p_1 (safelen, inner, ref, stored_p, ref_loop))
+	    {
+	      indep_p = false;
+	      break;
+	    }
+	  inner = inner->next;
+	}
 
-  struct loop *inner = loop->inner;
-  while (inner)
-    {
-      if (!ref_indep_loop_p_2 (safelen, inner, ref, stored_p))
-	return false;
-      inner = inner->next;
+      if (indep_p)
+	{
+	  unsigned i;
+	  bitmap_iterator bi;
+	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
+	    {
+	      im_mem_ref *aref = memory_accesses.refs_list[i];
+	      if (!refs_independent_p (ref, aref))
+		{
+		  indep_p = false;
+		  break;
+		}
+	    }
+	}
     }
 
-  bool indep_p = ref_indep_loop_p_1 (safelen, loop, ref, stored_p);
-
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
@@ -2209,14 +2275,14 @@ ref_indep_loop_p_2 (int safelen, struct
 }
 
 /* Returns true if REF is independent on all other memory references in
-   LOOP.  */
+   LOOP.  REF_LOOP is the loop where REF is accessed.  */
 
 static bool
-ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
+ref_indep_loop_p (struct loop *loop, im_mem_ref *ref, struct loop *ref_loop)
 {
   gcc_checking_assert (MEM_ANALYZABLE (ref));
 
-  return ref_indep_loop_p_2 (0, loop, ref, false);
+  return ref_indep_loop_p_1 (0, loop, ref, false, ref_loop);
 }
 
 /* Returns true if we can perform store motion of REF from LOOP.  */
@@ -2252,7 +2318,7 @@ can_sm_ref_p (struct loop *loop, im_mem_
 
   /* And it must be independent on all other memory references
      in LOOP.  */
-  if (!ref_indep_loop_p (loop, ref))
+  if (!ref_indep_loop_p (loop, ref, loop))
     return false;
 
   return true;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-09 10:00                                                                 ` Richard Biener
@ 2016-08-09 11:26                                                                   ` Yuri Rumyantsev
  2016-08-09 11:33                                                                     ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-08-09 11:26 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard,

The patch proposed by you does not work properly for
g++.dg/vect/pr70729-nest.cc test since the reference for this->S_n has
been cached as dependent for outer loop and loop is not vectorized:

 g++ -Ofast -fopenmp -mavx2 pr70729-nest.cc -c
-fdump-tree-vect-details
grep 'LOOP VECTORIZED' pr70729-nest.cc.149t.vect
<not found>

You missed additional check I added before check on cached dependence.

2016-08-09 13:00 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Tue, Aug 9, 2016 at 11:20 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> Yes it is impossible since all basic blocks are handled from outer
>> loops to innermost so we can not have the sequence with wrong
>> dependence, i.e. we cached that reference is independent (due to
>> safelen) but the same reference in outer loop must be evaluated as
>> dependent. So we must re-evaluate only dependent references in loops
>> having non-zero safelen attribute.
>
> Hmm.  I don't like depending on this implementation detail.  Does the
> attached patch work
> which simply avoids any positive/negative caching on safelen affected
> refs?  It also makes
> the query cheaper by avoiding the dive into child loops.
>
> Richard.
>
>> 2016-08-09 11:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Fri, Aug 5, 2016 at 4:57 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>> Richard,
>>>>
>>>> I added additional check before caching dependencies since (1) all
>>>> statements in loop are handled in loop postorder order, i.e. form
>>>> outer to inner; (2) we can change dependency for reference in subloops
>>>> which have non-zero safelen attribute. So I propose to re-evaluate it
>>>> in such cases. I don't see why we need to avoid dependence caching for
>>>> all loop nests since pragma omp simd is used very rarely.
>>>
>>> You think it is impossible to construct a testcase which hits the
>>> correctness issue?
>>> "very rarely" is not a good argument to generate wrong code.
>>>
>>> Richard.
>>>
>>>> 2016-08-05 16:50 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>> On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>> Richard,
>>>>>>
>>>>>> Here is updated patch which implements your proposal - I pass loop
>>>>>> instead of stmt to determine either REF is defined inside LOOP nest or
>>>>>> not. I checked that for pr70729-nest.cc the reference this->S_n  for
>>>>>> statements which are out of innermost loop are  not considered as
>>>>>> independent as you pointed out.
>>>>>>
>>>>>> Regression testing did not show any new failures and both failed tests
>>>>>> from libgomp.fortran suite now passed.
>>>>>>
>>>>>> Is it OK for trunk?
>>>>>
>>>>> I don't quite understand
>>>>>
>>>>> +  /* Ignore dependence for loops having greater safelen.  */
>>>>> +  if (new_safelen == safelen
>>>>> +      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
>>>>>      return false;
>>>>>
>>>>> this seems to suggest (correctly I think) that we cannot rely on the caching
>>>>> for safelen, neither for optimal results (you seem to address that) but also
>>>>> not for correctness (we cache the no-dep result from a safelen run and
>>>>> then happily re-use that info for a ref that is not safe for safelen).
>>>>>
>>>>> It seems to me we need to avoid any caching if we made things independent
>>>>> because of safelen and simply not do the dep test afterwards.  this means
>>>>> inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
>>>>> to do this w/o confusing the control flow).
>>>>>
>>>>> Richard.
>>>>>
>>>>>> ChangeLog:
>>>>>> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>
>>>>>> PR tree-optimization/71734
>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
>>>>>> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
>>>>>> ref_indep_loop_p.
>>>>>> (ref_indep_loop_p_1): Fix commentary.
>>>>>> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
>>>>>> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
>>>>>> dependencde for loop having greater safelen value, pass REF_LOOP in
>>>>>> recursive call.
>>>>>> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>>>>>>
>>>>>> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>> Hi Richard.
>>>>>>>>
>>>>>>>> It turned out that the fix proposed by you does not work for liggomp
>>>>>>>> tests simd3 and simd4.
>>>>>>>> The reason is that we can't change safelen value for references not
>>>>>>>> defined inside loop. So I add missed check on it to patch.
>>>>>>>> Is it OK for trunk?
>>>>>>>
>>>>>>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>>>>>>> that operation can end up being quadratic in the loop depth/width.
>>>>>>>
>>>>>>> But I also wonder about correctness given that LIM "commons"
>>>>>>> references.  So we can have
>>>>>>>
>>>>>>>   for (;;)
>>>>>>>     .. = ref;  (1)
>>>>>>>     for (;;) // safelen == 2  (2)
>>>>>>>       ... = ref;
>>>>>>>
>>>>>>> and when looking at the ref at (1) which according to you should not
>>>>>>> have safelen applied your function will happily return that ref is defined
>>>>>>> in the inner loop.
>>>>>>>
>>>>>>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>>>>>>> needs to pass down a ref plus a location (a stmt).  In which case your
>>>>>>> function can simply use flow_loop_nested_p (loop, gimple_bb
>>>>>>> (stmt)->loop_father);
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>> ChangeLog:
>>>>>>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>
>>>>>>>> PR tree-optimization/71734
>>>>>>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>>>>>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>>>>>>
>>>>>>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>>>>>>> Sorry H.J.
>>>>>>>>>
>>>>>>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>>>>>>> I will fix the issue asap.
>>>>>>>>>
>>>>>>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>>>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>> Richard,
>>>>>>>>>>>
>>>>>>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>
>>>>>>>>>>> Thanks.
>>>>>>>>>>> ChangeLog:
>>>>>>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>
>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>>>>>>> attribute instead of REF_LOOP and use it.
>>>>>>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Does this cause
>>>>>>>>>>
>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>> test
>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>> test
>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>>>>>>
>>>>>>>>>> on AVX machines and
>>>>>>>>>>
>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>> test
>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>> test
>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>>>>>>
>>>>>>>>>> on non-AVX machines?
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-09 11:26                                                                   ` Yuri Rumyantsev
@ 2016-08-09 11:33                                                                     ` Richard Biener
  2016-08-09 12:05                                                                       ` Yuri Rumyantsev
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Biener @ 2016-08-09 11:33 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: GCC Patches

On Tue, Aug 9, 2016 at 1:26 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> The patch proposed by you does not work properly for
> g++.dg/vect/pr70729-nest.cc test since the reference for this->S_n has
> been cached as dependent for outer loop and loop is not vectorized:
>
>  g++ -Ofast -fopenmp -mavx2 pr70729-nest.cc -c
> -fdump-tree-vect-details
> grep 'LOOP VECTORIZED' pr70729-nest.cc.149t.vect
> <not found>
>
> You missed additional check I added before check on cached dependence.

Ok, but it should get the correctness right?

I suppose that if you move the cache checks inside the else clause it
would work?
I'd be ok with that change.

Richard.

> 2016-08-09 13:00 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Tue, Aug 9, 2016 at 11:20 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Yes it is impossible since all basic blocks are handled from outer
>>> loops to innermost so we can not have the sequence with wrong
>>> dependence, i.e. we cached that reference is independent (due to
>>> safelen) but the same reference in outer loop must be evaluated as
>>> dependent. So we must re-evaluate only dependent references in loops
>>> having non-zero safelen attribute.
>>
>> Hmm.  I don't like depending on this implementation detail.  Does the
>> attached patch work
>> which simply avoids any positive/negative caching on safelen affected
>> refs?  It also makes
>> the query cheaper by avoiding the dive into child loops.
>>
>> Richard.
>>
>>> 2016-08-09 11:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Fri, Aug 5, 2016 at 4:57 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> Richard,
>>>>>
>>>>> I added additional check before caching dependencies since (1) all
>>>>> statements in loop are handled in loop postorder order, i.e. form
>>>>> outer to inner; (2) we can change dependency for reference in subloops
>>>>> which have non-zero safelen attribute. So I propose to re-evaluate it
>>>>> in such cases. I don't see why we need to avoid dependence caching for
>>>>> all loop nests since pragma omp simd is used very rarely.
>>>>
>>>> You think it is impossible to construct a testcase which hits the
>>>> correctness issue?
>>>> "very rarely" is not a good argument to generate wrong code.
>>>>
>>>> Richard.
>>>>
>>>>> 2016-08-05 16:50 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Richard,
>>>>>>>
>>>>>>> Here is updated patch which implements your proposal - I pass loop
>>>>>>> instead of stmt to determine either REF is defined inside LOOP nest or
>>>>>>> not. I checked that for pr70729-nest.cc the reference this->S_n  for
>>>>>>> statements which are out of innermost loop are  not considered as
>>>>>>> independent as you pointed out.
>>>>>>>
>>>>>>> Regression testing did not show any new failures and both failed tests
>>>>>>> from libgomp.fortran suite now passed.
>>>>>>>
>>>>>>> Is it OK for trunk?
>>>>>>
>>>>>> I don't quite understand
>>>>>>
>>>>>> +  /* Ignore dependence for loops having greater safelen.  */
>>>>>> +  if (new_safelen == safelen
>>>>>> +      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
>>>>>>      return false;
>>>>>>
>>>>>> this seems to suggest (correctly I think) that we cannot rely on the caching
>>>>>> for safelen, neither for optimal results (you seem to address that) but also
>>>>>> not for correctness (we cache the no-dep result from a safelen run and
>>>>>> then happily re-use that info for a ref that is not safe for safelen).
>>>>>>
>>>>>> It seems to me we need to avoid any caching if we made things independent
>>>>>> because of safelen and simply not do the dep test afterwards.  this means
>>>>>> inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
>>>>>> to do this w/o confusing the control flow).
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>> ChangeLog:
>>>>>>> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>
>>>>>>> PR tree-optimization/71734
>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
>>>>>>> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
>>>>>>> ref_indep_loop_p.
>>>>>>> (ref_indep_loop_p_1): Fix commentary.
>>>>>>> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
>>>>>>> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
>>>>>>> dependencde for loop having greater safelen value, pass REF_LOOP in
>>>>>>> recursive call.
>>>>>>> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>>>>>>>
>>>>>>> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>> Hi Richard.
>>>>>>>>>
>>>>>>>>> It turned out that the fix proposed by you does not work for liggomp
>>>>>>>>> tests simd3 and simd4.
>>>>>>>>> The reason is that we can't change safelen value for references not
>>>>>>>>> defined inside loop. So I add missed check on it to patch.
>>>>>>>>> Is it OK for trunk?
>>>>>>>>
>>>>>>>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>>>>>>>> that operation can end up being quadratic in the loop depth/width.
>>>>>>>>
>>>>>>>> But I also wonder about correctness given that LIM "commons"
>>>>>>>> references.  So we can have
>>>>>>>>
>>>>>>>>   for (;;)
>>>>>>>>     .. = ref;  (1)
>>>>>>>>     for (;;) // safelen == 2  (2)
>>>>>>>>       ... = ref;
>>>>>>>>
>>>>>>>> and when looking at the ref at (1) which according to you should not
>>>>>>>> have safelen applied your function will happily return that ref is defined
>>>>>>>> in the inner loop.
>>>>>>>>
>>>>>>>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>>>>>>>> needs to pass down a ref plus a location (a stmt).  In which case your
>>>>>>>> function can simply use flow_loop_nested_p (loop, gimple_bb
>>>>>>>> (stmt)->loop_father);
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>> ChangeLog:
>>>>>>>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>
>>>>>>>>> PR tree-optimization/71734
>>>>>>>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>>>>>>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>>>>>>>
>>>>>>>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>>>>>>>> Sorry H.J.
>>>>>>>>>>
>>>>>>>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>>>>>>>> I will fix the issue asap.
>>>>>>>>>>
>>>>>>>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>>>>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>> Richard,
>>>>>>>>>>>>
>>>>>>>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks.
>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>
>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>>>>>>>> attribute instead of REF_LOOP and use it.
>>>>>>>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>>>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>>>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Does this cause
>>>>>>>>>>>
>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>> test
>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>> test
>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>>>>>>>
>>>>>>>>>>> on AVX machines and
>>>>>>>>>>>
>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>> test
>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>> test
>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>>>>>>>
>>>>>>>>>>> on non-AVX machines?
>>>>>>>>>>>
>>>>>>>>>>> --
>>>>>>>>>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-09 11:33                                                                     ` Richard Biener
@ 2016-08-09 12:05                                                                       ` Yuri Rumyantsev
  2016-08-09 12:32                                                                         ` Richard Biener
  0 siblings, 1 reply; 24+ messages in thread
From: Yuri Rumyantsev @ 2016-08-09 12:05 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

Richard,

I checked that this move helps.
Does it mean that I've got approval to integrate it to trunk?

2016-08-09 14:33 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Tue, Aug 9, 2016 at 1:26 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>> Richard,
>>
>> The patch proposed by you does not work properly for
>> g++.dg/vect/pr70729-nest.cc test since the reference for this->S_n has
>> been cached as dependent for outer loop and loop is not vectorized:
>>
>>  g++ -Ofast -fopenmp -mavx2 pr70729-nest.cc -c
>> -fdump-tree-vect-details
>> grep 'LOOP VECTORIZED' pr70729-nest.cc.149t.vect
>> <not found>
>>
>> You missed additional check I added before check on cached dependence.
>
> Ok, but it should get the correctness right?
>
> I suppose that if you move the cache checks inside the else clause it
> would work?
> I'd be ok with that change.
>
> Richard.
>
>> 2016-08-09 13:00 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>> On Tue, Aug 9, 2016 at 11:20 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>> Yes it is impossible since all basic blocks are handled from outer
>>>> loops to innermost so we can not have the sequence with wrong
>>>> dependence, i.e. we cached that reference is independent (due to
>>>> safelen) but the same reference in outer loop must be evaluated as
>>>> dependent. So we must re-evaluate only dependent references in loops
>>>> having non-zero safelen attribute.
>>>
>>> Hmm.  I don't like depending on this implementation detail.  Does the
>>> attached patch work
>>> which simply avoids any positive/negative caching on safelen affected
>>> refs?  It also makes
>>> the query cheaper by avoiding the dive into child loops.
>>>
>>> Richard.
>>>
>>>> 2016-08-09 11:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>> On Fri, Aug 5, 2016 at 4:57 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>> Richard,
>>>>>>
>>>>>> I added additional check before caching dependencies since (1) all
>>>>>> statements in loop are handled in loop postorder order, i.e. form
>>>>>> outer to inner; (2) we can change dependency for reference in subloops
>>>>>> which have non-zero safelen attribute. So I propose to re-evaluate it
>>>>>> in such cases. I don't see why we need to avoid dependence caching for
>>>>>> all loop nests since pragma omp simd is used very rarely.
>>>>>
>>>>> You think it is impossible to construct a testcase which hits the
>>>>> correctness issue?
>>>>> "very rarely" is not a good argument to generate wrong code.
>>>>>
>>>>> Richard.
>>>>>
>>>>>> 2016-08-05 16:50 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>> On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>> Richard,
>>>>>>>>
>>>>>>>> Here is updated patch which implements your proposal - I pass loop
>>>>>>>> instead of stmt to determine either REF is defined inside LOOP nest or
>>>>>>>> not. I checked that for pr70729-nest.cc the reference this->S_n  for
>>>>>>>> statements which are out of innermost loop are  not considered as
>>>>>>>> independent as you pointed out.
>>>>>>>>
>>>>>>>> Regression testing did not show any new failures and both failed tests
>>>>>>>> from libgomp.fortran suite now passed.
>>>>>>>>
>>>>>>>> Is it OK for trunk?
>>>>>>>
>>>>>>> I don't quite understand
>>>>>>>
>>>>>>> +  /* Ignore dependence for loops having greater safelen.  */
>>>>>>> +  if (new_safelen == safelen
>>>>>>> +      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
>>>>>>>      return false;
>>>>>>>
>>>>>>> this seems to suggest (correctly I think) that we cannot rely on the caching
>>>>>>> for safelen, neither for optimal results (you seem to address that) but also
>>>>>>> not for correctness (we cache the no-dep result from a safelen run and
>>>>>>> then happily re-use that info for a ref that is not safe for safelen).
>>>>>>>
>>>>>>> It seems to me we need to avoid any caching if we made things independent
>>>>>>> because of safelen and simply not do the dep test afterwards.  this means
>>>>>>> inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
>>>>>>> to do this w/o confusing the control flow).
>>>>>>>
>>>>>>> Richard.
>>>>>>>
>>>>>>>> ChangeLog:
>>>>>>>> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>
>>>>>>>> PR tree-optimization/71734
>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
>>>>>>>> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
>>>>>>>> ref_indep_loop_p.
>>>>>>>> (ref_indep_loop_p_1): Fix commentary.
>>>>>>>> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
>>>>>>>> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
>>>>>>>> dependencde for loop having greater safelen value, pass REF_LOOP in
>>>>>>>> recursive call.
>>>>>>>> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>>>>>>>>
>>>>>>>> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>> Hi Richard.
>>>>>>>>>>
>>>>>>>>>> It turned out that the fix proposed by you does not work for liggomp
>>>>>>>>>> tests simd3 and simd4.
>>>>>>>>>> The reason is that we can't change safelen value for references not
>>>>>>>>>> defined inside loop. So I add missed check on it to patch.
>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>
>>>>>>>>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>>>>>>>>> that operation can end up being quadratic in the loop depth/width.
>>>>>>>>>
>>>>>>>>> But I also wonder about correctness given that LIM "commons"
>>>>>>>>> references.  So we can have
>>>>>>>>>
>>>>>>>>>   for (;;)
>>>>>>>>>     .. = ref;  (1)
>>>>>>>>>     for (;;) // safelen == 2  (2)
>>>>>>>>>       ... = ref;
>>>>>>>>>
>>>>>>>>> and when looking at the ref at (1) which according to you should not
>>>>>>>>> have safelen applied your function will happily return that ref is defined
>>>>>>>>> in the inner loop.
>>>>>>>>>
>>>>>>>>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>>>>>>>>> needs to pass down a ref plus a location (a stmt).  In which case your
>>>>>>>>> function can simply use flow_loop_nested_p (loop, gimple_bb
>>>>>>>>> (stmt)->loop_father);
>>>>>>>>>
>>>>>>>>> Richard.
>>>>>>>>>
>>>>>>>>>> ChangeLog:
>>>>>>>>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>
>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>>>>>>>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>>>>>>>>
>>>>>>>>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>>>>>>>>> Sorry H.J.
>>>>>>>>>>>
>>>>>>>>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>>>>>>>>> I will fix the issue asap.
>>>>>>>>>>>
>>>>>>>>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>>>>>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>
>>>>>>>>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks.
>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>
>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>>>>>>>>> attribute instead of REF_LOOP and use it.
>>>>>>>>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>>>>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>>>>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Does this cause
>>>>>>>>>>>>
>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>> test
>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>> test
>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>>>>>>>>
>>>>>>>>>>>> on AVX machines and
>>>>>>>>>>>>
>>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>> test
>>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>> test
>>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>>>>>>>>
>>>>>>>>>>>> on non-AVX machines?
>>>>>>>>>>>>
>>>>>>>>>>>> --
>>>>>>>>>>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH PR71734] Add missed check that reference defined inside loop.
  2016-08-09 12:05                                                                       ` Yuri Rumyantsev
@ 2016-08-09 12:32                                                                         ` Richard Biener
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Biener @ 2016-08-09 12:32 UTC (permalink / raw)
  To: Yuri Rumyantsev; +Cc: GCC Patches

On Tue, Aug 9, 2016 at 2:05 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
> Richard,
>
> I checked that this move helps.
> Does it mean that I've got approval to integrate it to trunk?

Yes, if it survives bootstrap & regtest.

Richard.

> 2016-08-09 14:33 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> On Tue, Aug 9, 2016 at 1:26 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>> Richard,
>>>
>>> The patch proposed by you does not work properly for
>>> g++.dg/vect/pr70729-nest.cc test since the reference for this->S_n has
>>> been cached as dependent for outer loop and loop is not vectorized:
>>>
>>>  g++ -Ofast -fopenmp -mavx2 pr70729-nest.cc -c
>>> -fdump-tree-vect-details
>>> grep 'LOOP VECTORIZED' pr70729-nest.cc.149t.vect
>>> <not found>
>>>
>>> You missed additional check I added before check on cached dependence.
>>
>> Ok, but it should get the correctness right?
>>
>> I suppose that if you move the cache checks inside the else clause it
>> would work?
>> I'd be ok with that change.
>>
>> Richard.
>>
>>> 2016-08-09 13:00 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>> On Tue, Aug 9, 2016 at 11:20 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>> Yes it is impossible since all basic blocks are handled from outer
>>>>> loops to innermost so we can not have the sequence with wrong
>>>>> dependence, i.e. we cached that reference is independent (due to
>>>>> safelen) but the same reference in outer loop must be evaluated as
>>>>> dependent. So we must re-evaluate only dependent references in loops
>>>>> having non-zero safelen attribute.
>>>>
>>>> Hmm.  I don't like depending on this implementation detail.  Does the
>>>> attached patch work
>>>> which simply avoids any positive/negative caching on safelen affected
>>>> refs?  It also makes
>>>> the query cheaper by avoiding the dive into child loops.
>>>>
>>>> Richard.
>>>>
>>>>> 2016-08-09 11:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>> On Fri, Aug 5, 2016 at 4:57 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>> Richard,
>>>>>>>
>>>>>>> I added additional check before caching dependencies since (1) all
>>>>>>> statements in loop are handled in loop postorder order, i.e. form
>>>>>>> outer to inner; (2) we can change dependency for reference in subloops
>>>>>>> which have non-zero safelen attribute. So I propose to re-evaluate it
>>>>>>> in such cases. I don't see why we need to avoid dependence caching for
>>>>>>> all loop nests since pragma omp simd is used very rarely.
>>>>>>
>>>>>> You think it is impossible to construct a testcase which hits the
>>>>>> correctness issue?
>>>>>> "very rarely" is not a good argument to generate wrong code.
>>>>>>
>>>>>> Richard.
>>>>>>
>>>>>>> 2016-08-05 16:50 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>> On Fri, Aug 5, 2016 at 3:28 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>> Richard,
>>>>>>>>>
>>>>>>>>> Here is updated patch which implements your proposal - I pass loop
>>>>>>>>> instead of stmt to determine either REF is defined inside LOOP nest or
>>>>>>>>> not. I checked that for pr70729-nest.cc the reference this->S_n  for
>>>>>>>>> statements which are out of innermost loop are  not considered as
>>>>>>>>> independent as you pointed out.
>>>>>>>>>
>>>>>>>>> Regression testing did not show any new failures and both failed tests
>>>>>>>>> from libgomp.fortran suite now passed.
>>>>>>>>>
>>>>>>>>> Is it OK for trunk?
>>>>>>>>
>>>>>>>> I don't quite understand
>>>>>>>>
>>>>>>>> +  /* Ignore dependence for loops having greater safelen.  */
>>>>>>>> +  if (new_safelen == safelen
>>>>>>>> +      && bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
>>>>>>>>      return false;
>>>>>>>>
>>>>>>>> this seems to suggest (correctly I think) that we cannot rely on the caching
>>>>>>>> for safelen, neither for optimal results (you seem to address that) but also
>>>>>>>> not for correctness (we cache the no-dep result from a safelen run and
>>>>>>>> then happily re-use that info for a ref that is not safe for safelen).
>>>>>>>>
>>>>>>>> It seems to me we need to avoid any caching if we made things independent
>>>>>>>> because of safelen and simply not do the dep test afterwards.  this means
>>>>>>>> inlining ref_indep_loop_p_1 partly into _2 (not sure if there's a great way
>>>>>>>> to do this w/o confusing the control flow).
>>>>>>>>
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>> ChangeLog:
>>>>>>>>> 2016-08-05  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>
>>>>>>>>> PR tree-optimization/71734
>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p): Add new argument REF_LOOP.
>>>>>>>>> (outermost_indep_loop): Pass LOOP argumnet where REF was defined to
>>>>>>>>> ref_indep_loop_p.
>>>>>>>>> (ref_indep_loop_p_1): Fix commentary.
>>>>>>>>> (ref_indep_loop_p_2): Add additional argument REF_LOOP, introduce new
>>>>>>>>> variable NEW_SAFELEN which may have new value for SAFELEN, ignore
>>>>>>>>> dependencde for loop having greater safelen value, pass REF_LOOP in
>>>>>>>>> recursive call.
>>>>>>>>> (can_sm_ref_p): Pass LOOP as additional argument to ref_indep_loop_p.
>>>>>>>>>
>>>>>>>>> 2016-08-03 16:44 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>>>>>>>> On Fri, Jul 29, 2016 at 4:00 PM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>> Hi Richard.
>>>>>>>>>>>
>>>>>>>>>>> It turned out that the fix proposed by you does not work for liggomp
>>>>>>>>>>> tests simd3 and simd4.
>>>>>>>>>>> The reason is that we can't change safelen value for references not
>>>>>>>>>>> defined inside loop. So I add missed check on it to patch.
>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>
>>>>>>>>>> Hmm, I don't like the walk of all subloops in ref_defined_in_loop_p as
>>>>>>>>>> that operation can end up being quadratic in the loop depth/width.
>>>>>>>>>>
>>>>>>>>>> But I also wonder about correctness given that LIM "commons"
>>>>>>>>>> references.  So we can have
>>>>>>>>>>
>>>>>>>>>>   for (;;)
>>>>>>>>>>     .. = ref;  (1)
>>>>>>>>>>     for (;;) // safelen == 2  (2)
>>>>>>>>>>       ... = ref;
>>>>>>>>>>
>>>>>>>>>> and when looking at the ref at (1) which according to you should not
>>>>>>>>>> have safelen applied your function will happily return that ref is defined
>>>>>>>>>> in the inner loop.
>>>>>>>>>>
>>>>>>>>>> So it looks like to be able to apply safelen the caller of ref_indep_loop_p
>>>>>>>>>> needs to pass down a ref plus a location (a stmt).  In which case your
>>>>>>>>>> function can simply use flow_loop_nested_p (loop, gimple_bb
>>>>>>>>>> (stmt)->loop_father);
>>>>>>>>>>
>>>>>>>>>> Richard.
>>>>>>>>>>
>>>>>>>>>>> ChangeLog:
>>>>>>>>>>> 2016-07-29  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>
>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>> * tree-ssa-loop-im.c (ref_defined_in_loop_p): New function.
>>>>>>>>>>> (ref_indep_loop_p_2): Change SAFELEN value for REF defined inside LOOP.
>>>>>>>>>>>
>>>>>>>>>>> 2016-07-29 13:08 GMT+03:00 Yuri Rumyantsev <ysrumyan@gmail.com>:
>>>>>>>>>>>> Sorry H.J.
>>>>>>>>>>>>
>>>>>>>>>>>> I checked both these tests manually but forgot to pass "-fopenmp" option.
>>>>>>>>>>>> I will fix the issue asap.
>>>>>>>>>>>>
>>>>>>>>>>>> 2016-07-29 0:33 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>:
>>>>>>>>>>>>> On Thu, Jul 28, 2016 at 6:49 AM, Yuri Rumyantsev <ysrumyan@gmail.com> wrote:
>>>>>>>>>>>>>> Richard,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I prepare a patch which is based on yours. New test is also included.
>>>>>>>>>>>>>> Bootstrapping and regression testing did not show any new failures.
>>>>>>>>>>>>>> Is it OK for trunk?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks.
>>>>>>>>>>>>>> ChangeLog:
>>>>>>>>>>>>>> 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> PR tree-optimization/71734
>>>>>>>>>>>>>> * tree-ssa-loop-im.c (ref_indep_loop_p_1): Pass value of safelen
>>>>>>>>>>>>>> attribute instead of REF_LOOP and use it.
>>>>>>>>>>>>>> (ref_indep_loop_p_2): Use SAFELEN argument instead of REF_LOOP and
>>>>>>>>>>>>>> set it for Loops having non-zero safelen attribute.
>>>>>>>>>>>>>> (ref_indep_loop_p): Pass zero as initial value for safelen.
>>>>>>>>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>>>>>>>>         * g++.dg/vect/pr70729-nest.cc: New test.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Does this cause
>>>>>>>>>>>>>
>>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>>> test
>>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-1.f90   -O3 -g  execution test
>>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>>> test
>>>>>>>>>>>>> FAIL: libgomp.fortran/pr71734-2.f90   -O3 -g  execution test
>>>>>>>>>>>>>
>>>>>>>>>>>>> on AVX machines and
>>>>>>>>>>>>>
>>>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>>> test
>>>>>>>>>>>>> FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
>>>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
>>>>>>>>>>>>> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
>>>>>>>>>>>>> test
>>>>>>>>>>>>> FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test
>>>>>>>>>>>>>
>>>>>>>>>>>>> on non-AVX machines?
>>>>>>>>>>>>>
>>>>>>>>>>>>> --
>>>>>>>>>>>>> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-08-09 12:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-05 14:56 [PATCH PR71734] Add missed check that reference defined inside loop Yuri Rumyantsev
2016-07-06  9:38 ` Richard Biener
2016-07-06  9:50   ` Yuri Rumyantsev
2016-07-06 10:34     ` Richard Biener
     [not found]       ` <CAEoMCqRH6L4=aBDmkpW_oNLZBj79_mFZ51i8PC8Qc+gTuHqKAw@mail.gmail.com>
     [not found]         ` <CAFiYyc0izmKn_T2g8qnhA1TGe1o+ZOF7jUTuHnt6AAhZj6Rtkw@mail.gmail.com>
     [not found]           ` <CAEoMCqSroBFn_PPu+7N-2+c_v4SpPzfe03xUS=wK0ibwJNAhAg@mail.gmail.com>
     [not found]             ` <CAFiYyc0VRd3rAR0S+-WWiA=0tfRp_DrgbjMwvCAo3tOfRTA-2g@mail.gmail.com>
     [not found]               ` <CAEoMCqTVc2xFUz-GqnVMP66HQZSAyQG8KsbUkWsA7kDYb=yypw@mail.gmail.com>
     [not found]                 ` <CAFiYyc02xah6YyojHeTcK2+QY8iT7qRGawtYeJzd8C3gCqdPoQ@mail.gmail.com>
     [not found]                   ` <CAEoMCqSN8uMc7=rfvtRwve=CcD39v39fYWWs2ZFjc-t7tZ+e-A@mail.gmail.com>
     [not found]                     ` <CAFiYyc2LWtdLXADo799LJSfAX3EPxaVEoaOrSN-D2ahSN=+v4g@mail.gmail.com>
     [not found]                       ` <CAEoMCqSgtuGOQSHCwD7mmTxg7ZR1yt39oxeJrfK74AsgYTmpaw@mail.gmail.com>
     [not found]                         ` <CAFiYyc2uqQFYvJZDU8Yy0ZuOMYrr3hhhN2VyvSbCyEa-2JZopw@mail.gmail.com>
     [not found]                           ` <CAEoMCqQ7nWK9CcGqMdu6PhnYGNwKXgDDho3A=phGzJN6MKgo9g@mail.gmail.com>
     [not found]                             ` <CAFiYyc3gS7W8vQRLiq+o2My3+e7oFAz+PsKhg2Xa+2PZSQT70w@mail.gmail.com>
     [not found]                               ` <CAEoMCqQYXtHgJtSmBnTMhKjRd9NjtCWR_rRiC+ndaDTTxzCJKA@mail.gmail.com>
     [not found]                                 ` <CAFiYyc3UqfahbU6K-DEF6RpARKLmEYvMkabpWAS5qD+3oESk3w@mail.gmail.com>
2016-07-08 14:07                                   ` Yuri Rumyantsev
2016-07-15 14:06                                     ` Yuri Rumyantsev
2016-07-18 10:33                                     ` Richard Biener
2016-07-19 15:29                                     ` Renlin Li
2016-07-20  9:35                                       ` Yuri Rumyantsev
2016-07-26 15:50                                         ` Yuri Rumyantsev
2016-07-27 13:25                                           ` Richard Biener
2016-07-28 13:49                                             ` Yuri Rumyantsev
2016-07-28 13:52                                               ` Richard Biener
2016-07-28 21:33                                               ` H.J. Lu
     [not found]                                                 ` <CAEoMCqTJCPTz_hv1bHxme9S151ErPxjcuxp1s+ZeSL_TF3FQUw@mail.gmail.com>
2016-07-29 14:00                                                   ` Yuri Rumyantsev
2016-08-02 14:59                                                     ` Yuri Rumyantsev
2016-08-03 13:44                                                     ` Richard Biener
2016-08-05 13:29                                                       ` Yuri Rumyantsev
2016-08-05 13:51                                                         ` Richard Biener
     [not found]                                                           ` <CAEoMCqRyCr804MQoRNNKYaajSjFE=guN8m1dvvV3jKUzpyNFMg@mail.gmail.com>
     [not found]                                                             ` <CAFiYyc3C7S18HejjE8BpGYCrPaujLT9K9AETZqejKjbbUW=xtQ@mail.gmail.com>
     [not found]                                                               ` <CAEoMCqRwWNVDwrmmLZhXHu_EjEbixMn15TWmTmGjHuwDCfNeUw@mail.gmail.com>
2016-08-09 10:00                                                                 ` Richard Biener
2016-08-09 11:26                                                                   ` Yuri Rumyantsev
2016-08-09 11:33                                                                     ` Richard Biener
2016-08-09 12:05                                                                       ` Yuri Rumyantsev
2016-08-09 12:32                                                                         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).