public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/49616] New: REGRESSION vectorization fails in case of runtime dimensioned vector
@ 2011-07-03 10:00 vincenzo.innocente at cern dot ch
  2011-07-03 11:31 ` [Bug tree-optimization/49616] " dominiq at lps dot ens.fr
  2012-05-18  9:46 ` vincenzo.innocente at cern dot ch
  0 siblings, 2 replies; 3+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2011-07-03 10:00 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49616

           Summary: REGRESSION vectorization fails in case of runtime
                    dimensioned vector
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: vincenzo.innocente@cern.ch


4.7 fails while 4.6.1 succeed.
Test case at the end of the message: I was not able to reduce it more (the one
that fails is at the bottom)

gcc version 4.7.0 20110702 (experimental) (GCC) 
c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc 

vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:71: note: vectorized 2 loops in function.

vectBug.cc:35: note: not vectorized: data ref analysis failed *bs$__b_46[k_320]
= D.2547_242;

vectBug.cc:48: note: not vectorized: data ref analysis failed *bs$__b_46[k_187]
= D.2556_271;

vectBug.cc:45: note: not vectorized: data ref analysis failed *bs$__b_46[k_70]
= D.2553_262;

vectBug.cc:40: note: not vectorized: data ref analysis failed *bs$__b_46[k_317]
= D.2550_251;

vectBug.cc:35: note: LOOP VECTORIZED.
vectBug.cc:48: note: LOOP VECTORIZED.
vectBug.cc:45: note: LOOP VECTORIZED.
vectBug.cc:40: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:94: note: vectorized 12 loops in function.

while
c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc -DFIXED

vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:71: note: vectorized 2 loops in function.

vectBug.cc:35: note: LOOP VECTORIZED.
vectBug.cc:48: note: LOOP VECTORIZED.
vectBug.cc:45: note: LOOP VECTORIZED.
vectBug.cc:40: note: LOOP VECTORIZED.
vectBug.cc:35: note: LOOP VECTORIZED.
vectBug.cc:48: note: LOOP VECTORIZED.
vectBug.cc:45: note: LOOP VECTORIZED.
vectBug.cc:40: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:94: note: vectorized 16 loops in function.

and
gcc version 4.6.1 20110520 (prerelease) (GCC) 
c++ -O3 -std=c++0x -ftree-vectorizer-verbose=2 -c vectBug.cc

vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:68: note: LOOP VECTORIZED.
vectBug.cc:71: note: vectorized 2 loops in function.

vectBug.cc:35: note: LOOP VECTORIZED.
vectBug.cc:48: note: LOOP VECTORIZED.
vectBug.cc:45: note: LOOP VECTORIZED.
vectBug.cc:40: note: LOOP VECTORIZED.
vectBug.cc:35: note: LOOP VECTORIZED.
vectBug.cc:48: note: LOOP VECTORIZED.
vectBug.cc:45: note: LOOP VECTORIZED.
vectBug.cc:40: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:16: note: LOOP VECTORIZED.
vectBug.cc:28: note: LOOP VECTORIZED.
vectBug.cc:25: note: LOOP VECTORIZED.
vectBug.cc:20: note: LOOP VECTORIZED.
vectBug.cc:94: note: vectorized 16 loops in function.

test case

cat vectBug.cc 

const int arraySize=512;

struct Bar {

  int __attribute__ ((aligned(16))) c[arraySize];
  int last;

  Bar() : last(0) { refresh();}

  void refresh();

  void loop0(int N, float * f) {
    int k=0;
    int lead = arraySize-last;
    if (N<=lead) {
      for (int i=0; i!=N; ++i) f[k++] = c[last++];
      return;
    }

    for (int i=last; i!=arraySize; ++i)  f[k++] = c[i];
    int outLoop = (N-lead)/arraySize;
    last = N -lead -  outLoop*arraySize;
    for (int j=0; j!=outLoop; ++j)  {
      refresh();
      for (int i=0; i!=arraySize; ++i) f[k++] = c[i];
    }
    refresh();
    for (int i=0; i!=last; ++i) f[k++] = c[i];
  }

  template<typename F>
  void loop(int N, F f) {
    int lead = arraySize-last;
    if (N<=lead) {
      for (int i=0; i!=N; ++i) f(c[last+i]);
      last +=N;
      return;
    }

    for (int i=last; i!=arraySize; ++i)  f(c[i]);
    int outLoop = (N-lead)/arraySize;
    last = N -lead -  outLoop*arraySize;
    for (int j=0; j!=outLoop; ++j)  {
      refresh();
      for (int i=0; i!=arraySize; ++i) f(c[i]);
    }
    refresh();
    for (int i=0; i!=last; ++i) f(c[i]);
  }

};


float __attribute__ ((aligned(16))) z[4096];
void refresh();
int j=0;


void fun(float const *, float const *, int); 


template<typename F>
inline void loop(int N, F f) {
  if (j+N>4096) {
    j=0;
    refresh();
  }
  for (int i=0; i!=N; ++i) f(z[j++]);
}

void foo(int N) {
  float __attribute__ ((aligned(16))) x[N];
  float __attribute__ ((aligned(16))) y[N];
  int k=0;
  auto xs = [&x, &k](float r) { x[k++]= 1.5f*r;};
  auto ys = [&y, &k](float r) { y[k++]= r+1.f;};


  k=0;
  loop(N,xs);
  // for (int i=0; i!=N; ++i) xs(z[j++]);
    // x[k++] = z[j++];

  k=0;
  loop(N,ys);

  //  for (int i=0; i!=N; ++i) ys(z[j++]);
  //    y[k++] = z[j++];

  fun(x,y,N);
}


void load(int N) {

float __attribute__ ((aligned(16))) a[N];
#ifndef FIXED
float __attribute__ ((aligned(16))) b[N];
#else
float __attribute__ ((aligned(16))) b[1024];
#endif

  static Bar bar;


  bar.loop0(N,a);
  bar.loop0(N,b);
  fun(a,b,N);



  int k=0;
  auto as = [&a, &k](float r) { a[k++]= 1.5f*r;};
  auto bs = [&b, &k](float r) { b[k++]= r+1.f;};

  k=0;
  bar.loop(N,as);
  k=0;
  bar.loop(N,bs);   // <=== this fail (all others ok)


  fun(a,b,N);

}


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/49616] REGRESSION vectorization fails in case of runtime dimensioned vector
  2011-07-03 10:00 [Bug tree-optimization/49616] New: REGRESSION vectorization fails in case of runtime dimensioned vector vincenzo.innocente at cern dot ch
@ 2011-07-03 11:31 ` dominiq at lps dot ens.fr
  2012-05-18  9:46 ` vincenzo.innocente at cern dot ch
  1 sibling, 0 replies; 3+ messages in thread
From: dominiq at lps dot ens.fr @ 2011-07-03 11:31 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49616

Dominique d'Humieres <dominiq at lps dot ens.fr> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2011.07.03 11:30:06
                 CC|                            |hubicka at gcc dot gnu.org,
                   |                            |irar at il dot ibm.com
     Ever Confirmed|0                           |1

--- Comment #1 from Dominique d'Humieres <dominiq at lps dot ens.fr> 2011-07-03 11:30:06 UTC ---
It seems due to revision 172430:

Author:    hubicka
Date:    Thu Apr 14 13:26:44 2011 UTC (2 months, 2 weeks ago)
Changed paths:    16
Log Message:    
    * cgraph.c (dump_cgraph_node): Do not dump inline summaries.
    * cgraph.h (struct inline_summary): Move to ipa-inline.h
    (cgraph_local_info): Remove inline_summary.
    * ipa-cp.c: Include ipa-inline.h.
    (ipcp_cloning_candidate_p, ipcp_estimate_growth,
    ipcp_estimate_cloning_cost, ipcp_insert_stage): Use inline_summary
    accesor.
    * lto-cgraph.c (lto_output_node): Do not stream inline summary.
    (input_overwrite_node): Do not set inline summary.
    (input_node): Do not stream inline summary.
    * ipa-inline.c (cgraph_decide_inlining): Dump inline summaries.
    (cgraph_decide_inlining_incrementally): Do not try to estimate overall
    growth; we do not have inline parameters computed for that anyway.
    (cgraph_early_inlining): After inlining compute call_stmt_sizes.
    * ipa-inline.h (struct inline_summary): Move here from ipa-inline.h
    (inline_summary_t): New type and VECtor.
    (debug_inline_summary, dump_inline_summaries): Declare.
    (inline_summary): Use VOCtor.
    (estimate_edge_growth): Kill hack computing call stmt size directly.
    * lto-section-in.c (lto_section_name): Add inline section.
    * ipa-inline-analysis.c: Include lto-streamer.h
    (node_removal_hook_holder, node_duplication_hook_holder): New holders
    (inline_node_removal_hook, inline_node_duplication_hook): New functions.
    (inline_summary_vec): Define.
    (inline_summary_alloc, dump_inline_summary, debug_inline_summary,
    dump_inline_summaries): New functions.
    (estimate_function_body_sizes): Properly compute size/time of outgoing
calls.
    (compute_inline_parameters): Alloc inline_summary; do not compute size/time
    of incomming calls.
    (estimate_edge_time): Avoid missing time summary hack.
    (inline_read_summary): Read inline summary info.
    (inline_write_summary): Write inline summary info.
    (inline_free_summary): Free all hooks and inline summary vector.
    * lto-streamer.h: Add LTO_section_inline_summary section.
    * Makefile.in (ipa-cp.o, ipa-inline-analysis.o): Update dependencies.
    * ipa.c (cgraph_remove_unreachable_nodes): Fix dump file formating.

    * lto.c: Include ipa-inline.h
    (add_cgraph_node_to_partition, undo_partition): Use inline_summary
accessor.
    (ipa_node_duplication_hook): Fix declaration.
    * Make-lang.in (lto.o): Update dependencies.

With revision 172429 I get

....
pr49616.cc:94: note: vectorized 16 loops in function.

but

...
pr49616.cc:94: note: vectorized 12 loops in function.

with revision 172430.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug tree-optimization/49616] REGRESSION vectorization fails in case of runtime dimensioned vector
  2011-07-03 10:00 [Bug tree-optimization/49616] New: REGRESSION vectorization fails in case of runtime dimensioned vector vincenzo.innocente at cern dot ch
  2011-07-03 11:31 ` [Bug tree-optimization/49616] " dominiq at lps dot ens.fr
@ 2012-05-18  9:46 ` vincenzo.innocente at cern dot ch
  1 sibling, 0 replies; 3+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2012-05-18  9:46 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49616

vincenzo Innocente <vincenzo.innocente at cern dot ch> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |FIXED

--- Comment #2 from vincenzo Innocente <vincenzo.innocente at cern dot ch> 2012-05-18 09:39:24 UTC ---
now ok in
gcc version 4.7.1 20120517 (prerelease) [gcc-4_7-branch revision 187624] (GCC) 
and
gcc version 4.8.0 20120509 (experimental) [trunk revision 187326] (GCC)


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-05-18  9:40 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-03 10:00 [Bug tree-optimization/49616] New: REGRESSION vectorization fails in case of runtime dimensioned vector vincenzo.innocente at cern dot ch
2011-07-03 11:31 ` [Bug tree-optimization/49616] " dominiq at lps dot ens.fr
2012-05-18  9:46 ` vincenzo.innocente at cern dot ch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).