public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/8] Outline lst_niter_for_loop.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
                   ` (2 preceding siblings ...)
  2010-09-09 19:31 ` [PATCH 8/8] New pass: loop flattening Sebastian Pop
@ 2010-09-09 19:31 ` Sebastian Pop
  2010-09-09 19:36 ` [PATCH 3/8] Call fatal_error when the transform read from file is not legal Sebastian Pop
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-blocking.c (pbb_strip_mine_profitable_p): Renamed
	lst_strip_mine_profitable_p.  Call lst_niter_for_loop.
	* graphite-poly.h (lst_niter_for_loop): New.
---
 gcc/ChangeLog.graphite  |    6 ++++++
 gcc/graphite-blocking.c |   17 ++++++++---------
 gcc/graphite-poly.h     |   13 +++++++++++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 46dd03b..831c2fe 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,11 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-blocking.c (pbb_strip_mine_profitable_p): Renamed
+	lst_strip_mine_profitable_p.  Call lst_niter_for_loop.
+	* graphite-poly.h (lst_niter_for_loop): New.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.c (apply_poly_transforms): Do not abort when the
 	transform read from disk is not legal.  Call fatal_error instead.
 
diff --git a/gcc/graphite-blocking.c b/gcc/graphite-blocking.c
index 6e4334a..3951b60 100644
--- a/gcc/graphite-blocking.c
+++ b/gcc/graphite-blocking.c
@@ -172,25 +172,25 @@ pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride)
   return true;
 }
 
-/* Returns true when strip mining with STRIDE of the loop around PBB
-   at DEPTH is profitable.  */
+/* Returns true when strip mining with STRIDE of the loop LST is
+   profitable.  */
 
 static bool
-pbb_strip_mine_profitable_p (poly_bb_p pbb,
-			     graphite_dim_t depth,
-			     int stride)
+lst_strip_mine_profitable_p (lst_p lst, int stride)
 {
   mpz_t niter, strip_stride;
   bool res;
 
+  gcc_assert (LST_LOOP_P (lst));
   mpz_init (strip_stride);
   mpz_init (niter);
+
   mpz_set_si (strip_stride, stride);
-  pbb_number_of_iterations_at_time (pbb, psct_dynamic_dim (pbb, depth), niter);
+  lst_niter_for_loop (lst, niter);
   res = (mpz_cmp (niter, strip_stride) > 0);
+
   mpz_clear (strip_stride);
   mpz_clear (niter);
-
   return res;
 }
 
@@ -244,8 +244,7 @@ lst_do_strip_mine (lst_p lst)
 
   depth = lst_depth (lst);
   if (depth >= 0
-      && pbb_strip_mine_profitable_p (LST_PBB (lst_find_first_pbb (lst)),
-				      depth, stride))
+      && lst_strip_mine_profitable_p (lst, stride))
     {
       res |= lst_do_strip_mine_loop (lst, lst_depth (lst));
       lst_add_loop_under_loop (lst);
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 5ed1b04..5f536a8 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -1062,6 +1062,19 @@ lst_remove_from_sequence (lst_p lst)
   LST_LOOP_FATHER (lst) = NULL;
 }
 
+/* Sets NITER to the upper bound approximation of the number of
+   iterations of loop LST.  */
+
+static inline void
+lst_niter_for_loop (lst_p lst, mpz_t niter)
+{
+  int depth = lst_depth (lst);
+  poly_bb_p pbb = LST_PBB (lst_find_first_pbb (lst));
+
+  gcc_assert (LST_LOOP_P (lst));
+  pbb_number_of_iterations_at_time (pbb, psct_dynamic_dim (pbb, depth), niter);
+}
+
 /* Updates the scattering of PBB to be at the DEWEY number in the loop
    at depth LEVEL.  */
 
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 8/8] New pass: loop flattening.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
  2010-09-09 19:31 ` [PATCH 5/8] Fix lst_update_scattering Sebastian Pop
  2010-09-09 19:31 ` [PATCH 7/8] Add cloog_checksum Sebastian Pop
@ 2010-09-09 19:31 ` Sebastian Pop
  2010-09-09 19:45   ` Nathan Froyd
  2010-09-09 19:31 ` [PATCH 4/8] Outline lst_niter_for_loop Sebastian Pop
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

Hi,

This patch implements a loop flattening pass that has been described
in a very Fortran specific way in the 1992 paper by Reinhard von
Hanxleden and Ken Kennedy: "Relaxing SIMD Control Flow Constraints
using Loop Transformations":
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033

The pass does not contain an analysis of the profitability yet, and so
it is not enabled by default even in the graphite branch.

As loop flattening is a transformation that is supposed to preserve
the execution trace, I tested the pass by looking at the md5sum of
several execution traces before and after loop flattening.

I committed this patch to the graphite branch.

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* Makefile.in (OBJS-common): Add graphite-flattening.o.
	(graphite-flattening.o): New rule.
	* common.opt (floop-flatten): New flag.
	* doc/invoke.texi (-floop-flatten): Documented.
	* graphite-flattening.c: New.
	* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
	* graphite-poly.h (flatten_all_loops): Declared.
	(lst_remove_loop_and_inline_stmts_in_loop_father): New.
	* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
	is set, also set flag_graphite.
---
 gcc/ChangeLog.graphite    |   13 ++
 gcc/Makefile.in           |    7 +
 gcc/common.opt            |    4 +
 gcc/doc/invoke.texi       |   12 +-
 gcc/graphite-flattening.c |  442 +++++++++++++++++++++++++++++++++++++++++++++
 gcc/graphite-poly.c       |    6 +-
 gcc/graphite-poly.h       |   27 +++-
 gcc/tree-ssa-loop.c       |    8 +-
 8 files changed, 512 insertions(+), 7 deletions(-)
 create mode 100644 gcc/graphite-flattening.c

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index f2c4127..2512ea6 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,18 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* Makefile.in (OBJS-common): Add graphite-flattening.o.
+	(graphite-flattening.o): New rule.
+	* common.opt (floop-flatten): New flag.
+	* doc/invoke.texi (-floop-flatten): Documented.
+	* graphite-flattening.c: New.
+	* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
+	* graphite-poly.h (flatten_all_loops): Declared.
+	(lst_remove_loop_and_inline_stmts_in_loop_father): New.
+	* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
+	is set, also set flag_graphite.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.c (cloog_checksum): New.
 	* graphite-poly.h (cloog_checksum): Declared.
 
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d2bea4e..957b31e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1241,6 +1241,7 @@ OBJS-common = \
 	graphite-clast-to-gimple.o \
 	graphite-cloog-util.o \
 	graphite-dependences.o \
+	graphite-flattening.o \
 	graphite-interchange.o \
 	graphite-poly.o \
 	graphite-ppl.o \
@@ -2694,6 +2695,12 @@ graphite-dependences.o: graphite-dependences.c $(CONFIG_H) $(SYSTEM_H) \
    $(TOPLEV_H) $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    $(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h \
    graphite.h graphite-poly.h graphite-ppl.h graphite-dependences.h
+graphite-flattening.o: graphite-flattening.c $(CONFIG_H) $(SYSTEM_H)	\
+   coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h		\
+   $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) $(TREE_FLOW_H)		\
+   $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H)			\
+   $(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h graphite.h	\
+   graphite-poly.h graphite-ppl.h
 graphite-interchange.o: graphite-interchange.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
diff --git a/gcc/common.opt b/gcc/common.opt
index 9c53606..60de165 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -700,6 +700,10 @@ floop-block
 Common Report Var(flag_loop_block) Optimization
 Enable Loop Blocking transformation
 
+floop-flatten
+Common Report Var(flag_loop_flatten) Optimization
+Enable Loop Flattening transformation
+
 fstrict-volatile-bitfields
 Common Report Var(flag_strict_volatile_bitfields) Init(-1)
 Force bitfield accesses to match their type width
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 47e4db3..61b1d45 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -353,7 +353,7 @@ Objective-C and Objective-C++ Dialects}.
 -fira-loop-pressure -fno-ira-share-save-slots @gol
 -fno-ira-share-spill-slots -fira-verbose=@var{n} @gol
 -fivopts -fkeep-inline-functions -fkeep-static-consts @gol
--floop-block -floop-interchange -floop-strip-mine @gol
+-floop-block -floop-flatten -floop-interchange -floop-strip-mine @gol
 -floop-parallelize-all -flto -flto-compression-level -flto-report -fltrans @gol
 -fltrans-output-list -fmerge-all-constants -fmerge-constants -fmodulo-sched @gol
 -fmodulo-sched-allow-regmoves -fmove-loop-invariants -fmudflap @gol
@@ -6820,6 +6820,7 @@ Perform linear loop transformations on tree.  This flag can improve cache
 performance and allow further loop optimizations to take place.
 
 @item -floop-interchange
+@opindex floop-interchange
 Perform loop interchange transformations on loops.  Interchanging two
 nested loops switches the inner and outer loops.  For example, given a
 loop like:
@@ -6849,6 +6850,7 @@ Graphite loop transformation infrastructure.  This option is disabled
 when @option{-fgraphite-read} is used.
 
 @item -floop-strip-mine
+@opindex floop-strip-mine
 Perform loop strip mining transformations on loops.  Strip mining
 splits a loop into two nested loops.  The outer loop has strides
 equal to the strip size and the inner loop has strides of the
@@ -6875,6 +6877,7 @@ enable the Graphite loop transformation infrastructure.  This option is
 disabled when @option{-fgraphite-read} is used.
 
 @item -floop-block
+@opindex floop-block
 Perform loop blocking transformations on loops.  Blocking strip mines
 each loop in the loop nest such that the memory accesses of the
 element loops fit inside caches.  The strip length can be changed
@@ -6931,7 +6934,14 @@ program is described in a separate file.  For a program @code{file.c}
 that has 3 scops, the dumped files are named @code{file.0.graphite},
 @code{file.1.graphite}, and @code{file.2.graphite}.
 
+@item -floop-flatten
+@opindex floop-flatten
+Removes the loop nesting structure: transforms the loop nest into a
+single loop.  This transformation can be useful to vectorize all the
+levels of the loop nest.
+
 @item -floop-parallelize-all
+@opindex floop-parallelize-all
 Use the Graphite data dependence analysis to identify loops that can
 be parallelized.  Parallelize all the loops that can be analyzed to
 not contain loop carried dependences without checking that it is
diff --git a/gcc/graphite-flattening.c b/gcc/graphite-flattening.c
new file mode 100644
index 0000000..0f98337
--- /dev/null
+++ b/gcc/graphite-flattening.c
@@ -0,0 +1,442 @@
+/* Loop flattening for Graphite.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Sebastian Pop <sebastian.pop@amd.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "output.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "params.h"
+
+#ifdef HAVE_cloog
+#include "ppl_c.h"
+#include "sese.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+
+/* The loop flattening pass transforms loop nests into a single loop,
+   removing the loop nesting structure.  The auto-vectorization can
+   then apply on the full loop body, without needing the outer-loop
+   vectorization.
+
+   The canonical example is as follows: suppose that we have a loop
+   nest with known iteration counts
+
+   | for (i = 1; i <= 6; i++)
+   |   for (j = 1; j <= 6; j++)
+   |     S1(i,j);
+
+   The loop flattening is performed by linearizing the iteration space
+   using the function "f (x) = 6 * i + j".  In this case, CLooG would
+   produce this code:
+
+   | for (c1=7;c1<=42;c1++) {
+   |   i = floord(c1-1,6);
+   |   S1(i,c1-6*i);
+   | }
+
+   There are several limitations for loop flattening that are linked
+   to the expressivity of the polyhedral model.  One has to take an
+   upper bound approximation to deal with the parametric case of loop
+   flattening.  For example, in the loop nest:
+
+   | for (i = 1; i <= N; i++)
+   |   for (j = 1; j <= M; j++)
+   |     S1(i,j);
+
+   One would like to flatten this loop using a linearization function
+   like this "f (x) = M * i + j".  However CLooG's schedules are not
+   expressive enough to deal with this case, and so the parameter M
+   has to be replaced by an integer upper bound approximation.  If we
+   further know in the context of the scop that "M <= 6", then it is
+   possible to linearize the loop with "f (x) = 6 * i + j".  In this
+   case, CLooG would produce this code:
+
+   | for (c1=7;c1<=6*M+N;c1++) {
+   |   i = ceild(c1-N,6);
+   |   if (i <= floord(c1-1,6)) {
+   |     S1(i,c1-6*i);
+   |   }
+   | }
+
+   For an arbitrarily complex loop nests the algorithm proceeds in two
+   steps.  First, the LST is flattened by removing the loops structure
+   and by inserting the statements in the order they appear in
+   depth-first order.  Then, the scattering of each statement is
+   transformed such that it
+
+   Supposing that the original program is represented by the following
+   LST:
+
+   | (loop_1
+   |  stmt_1
+   |  (loop_2 stmt_3
+   |   (loop_3 stmt_4)
+   |   (loop_4 stmt_5 stmt_6)
+   |   stmt_7
+   |  )
+   |  stmt_2
+   | )
+
+   Loop flattening traverses the LST in depth-first order, and
+   flattens pairs of loops successively by projecting the inner loops
+   in the iteration domain of the outer loops:
+
+   lst_project_loop (loop_2, loop_3, stride)
+
+   | (loop_1
+   |  stmt_1
+   |  (loop_2 stmt_3 stmt_4
+   |   (loop_4 stmt_5 stmt_6)
+   |   stmt_7
+   |  )
+   |  stmt_2
+   | )
+
+   lst_project_loop (loop_2, loop_4, stride)
+
+   | (loop_1
+   |  stmt_1
+   |  (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
+   |  stmt_2
+   | )
+
+   lst_project_loop (loop_1, loop_2, stride)
+
+   | (loop_1
+   |  stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
+   | )
+
+   At each step, the iteration domain of the outer loop is enlarged to
+   contain enough points to iterate over the inner loop domain.  */
+
+/* Initializes RES to the number of iterations of the linearized loop
+   LST.  RES is the cardinal of the iteration domain of LST.  */
+
+static void
+lst_linearized_niter (lst_p lst, mpz_t res)
+{
+  int i;
+  lst_p l;
+  mpz_t n;
+
+  mpz_init (n);
+  mpz_set_si (res, 0);
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
+    if (LST_LOOP_P (l))
+      {
+	lst_linearized_niter (l, n);
+	mpz_add (res, res, n);
+      }
+
+  if (LST_LOOP_P (lst))
+    {
+      lst_niter_for_loop (lst, n);
+
+      if (mpz_cmp_si (res, 0) != 0)
+	mpz_mul (res, res, n);
+      else
+	mpz_set (res, n);
+    }
+
+  mpz_clear (n);
+}
+
+/* Applies the translation "f (x) = x + OFFSET" to the loop containing
+   STMT.  */
+
+static void
+lst_offset (lst_p stmt, mpz_t offset)
+{
+  lst_p inner = LST_LOOP_FATHER (stmt);
+  poly_bb_p pbb = LST_PBB (stmt);
+  ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
+  int inner_depth = lst_depth (inner);
+  ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
+  ppl_Linear_Expression_t expr;
+  ppl_dimension_type dim;
+  ppl_Coefficient_t one;
+  mpz_t x;
+
+  mpz_init (x);
+  mpz_set_si (x, 1);
+  ppl_new_Coefficient (&one);
+  ppl_assign_Coefficient_from_mpz_t (one, x);
+
+  ppl_Polyhedron_space_dimension (poly, &dim);
+  ppl_new_Linear_Expression_with_dimension (&expr, dim);
+
+  ppl_set_coef (expr, inner_dim, 1);
+  ppl_set_inhomogeneous_gmp (expr, offset);
+  ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
+  ppl_delete_Linear_Expression (expr);
+  ppl_delete_Coefficient (one);
+}
+
+/* Scale by FACTOR the loop LST containing STMT.  */
+
+static void
+lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
+{
+  mpz_t x;
+  ppl_Coefficient_t one;
+  int outer_depth = lst_depth (lst);
+  poly_bb_p pbb = LST_PBB (stmt);
+  ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
+  ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
+  ppl_Linear_Expression_t expr;
+  ppl_dimension_type dim;
+
+  mpz_init (x);
+  mpz_set_si (x, 1);
+  ppl_new_Coefficient (&one);
+  ppl_assign_Coefficient_from_mpz_t (one, x);
+
+  ppl_Polyhedron_space_dimension (poly, &dim);
+  ppl_new_Linear_Expression_with_dimension (&expr, dim);
+
+  /* outer_dim = factor * outer_dim.  */
+  ppl_set_coef_gmp (expr, outer_dim, factor);
+  ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
+  ppl_delete_Linear_Expression (expr);
+
+  mpz_clear (x);
+  ppl_delete_Coefficient (one);
+}
+
+/* Project the INNER loop into the iteration domain of the OUTER loop.
+   STRIDE is the number of iterations between two iterations of the
+   outer loop.  */
+
+static void
+lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
+{
+  int i;
+  lst_p stmt;
+  mpz_t x;
+  ppl_Coefficient_t one;
+  int outer_depth = lst_depth (outer);
+  int inner_depth = lst_depth (inner);
+
+  mpz_init (x);
+  mpz_set_si (x, 1);
+  ppl_new_Coefficient (&one);
+  ppl_assign_Coefficient_from_mpz_t (one, x);
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
+    {
+      poly_bb_p pbb = LST_PBB (stmt);
+      ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
+      ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
+      ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
+      ppl_Linear_Expression_t expr;
+      ppl_dimension_type dim;
+      ppl_dimension_type *ds;
+
+      /* There should be no loops under INNER.  */
+      gcc_assert (!LST_LOOP_P (stmt));
+      ppl_Polyhedron_space_dimension (poly, &dim);
+      ppl_new_Linear_Expression_with_dimension (&expr, dim);
+
+      /* outer_dim = outer_dim * stride + inner_dim.  */
+      ppl_set_coef (expr, inner_dim, 1);
+      ppl_set_coef_gmp (expr, outer_dim, stride);
+      ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
+      ppl_delete_Linear_Expression (expr);
+
+      /* Project on inner_dim.  */
+      ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
+      ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
+      ppl_delete_Linear_Expression (expr);
+
+      /* Remove inner loop and the static schedule of its body.  */
+      ds = XNEWVEC (ppl_dimension_type, 2);
+      ds[0] = inner_dim;
+      ds[1] = inner_dim + 1;
+      ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
+      PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
+      free (ds);
+    }
+
+  mpz_clear (x);
+  ppl_delete_Coefficient (one);
+}
+
+/* Flattens the loop nest LST.  Return true when something changed.
+   OFFSET is used to compute the number of iterations of the outermost
+   loop before the current LST is executed.  */
+
+static bool
+lst_flatten_loop (lst_p lst, mpz_t init_offset)
+{
+  int i;
+  lst_p l;
+  bool res = false;
+  mpz_t n, one, offset, stride;
+
+  mpz_init (n);
+  mpz_init (one);
+  mpz_init (offset);
+  mpz_init (stride);
+  mpz_set (offset, init_offset);
+  mpz_set_si (one, 1);
+
+  lst_linearized_niter (lst, stride);
+  lst_niter_for_loop (lst, n);
+  mpz_tdiv_q (stride, stride, n);
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
+    if (LST_LOOP_P (l))
+      {
+	res = true;
+
+	lst_flatten_loop (l, offset);
+	lst_niter_for_loop (l, n);
+
+	lst_project_loop (lst, l, stride);
+
+	/* The offset is the number of iterations minus 1, as we want
+	   to execute the next statements at the same iteration as the
+	   last iteration of the loop.  */
+	mpz_sub (n, n, one);
+	mpz_add (offset, offset, n);
+      }
+    else
+      {
+	lst_scale (lst, l, stride);
+	if (mpz_cmp_si (offset, 0) != 0)
+	  lst_offset (l, offset);
+      }
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
+    if (LST_LOOP_P (l))
+      lst_remove_loop_and_inline_stmts_in_loop_father (l);
+
+  mpz_clear (n);
+  mpz_clear (one);
+  mpz_clear (offset);
+  mpz_clear (stride);
+  return res;
+}
+
+/* Remove all but the first 3 dimensions of the scattering:
+   - dim0: the static schedule for the loop
+   - dim1: the dynamic schedule of the loop
+   - dim2: the static schedule for the loop body.  */
+
+static void
+remove_unused_scattering_dimensions (lst_p lst)
+{
+  int i;
+  lst_p stmt;
+  mpz_t x;
+  ppl_Coefficient_t one;
+
+  mpz_init (x);
+  mpz_set_si (x, 1);
+  ppl_new_Coefficient (&one);
+  ppl_assign_Coefficient_from_mpz_t (one, x);
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
+    {
+      poly_bb_p pbb = LST_PBB (stmt);
+      ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
+      int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
+      ppl_dimension_type *ds;
+
+      /* There should be no loops inside LST after flattening.  */
+      gcc_assert (!LST_LOOP_P (stmt));
+
+      if (!nb_dims_to_remove)
+	continue;
+
+      ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
+      for (j = 0; j < nb_dims_to_remove; j++)
+	ds[j] = j + 3;
+
+      ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
+      PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
+      free (ds);
+    }
+
+  mpz_clear (x);
+  ppl_delete_Coefficient (one);
+}
+
+/* Flattens all the loop nests of LST.  Return true when something
+   changed.  */
+
+static bool
+lst_do_flatten (lst_p lst)
+{
+  int i;
+  lst_p l;
+  bool res = false;
+  mpz_t zero;
+
+  if (!lst
+      || !LST_LOOP_P (lst))
+    return false;
+
+  mpz_init (zero);
+  mpz_set_si (zero, 0);
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
+    if (LST_LOOP_P (l))
+      {
+	res |= lst_flatten_loop (l, zero);
+	remove_unused_scattering_dimensions (l);
+      }
+
+  lst_update_scattering (lst);
+  mpz_clear (zero);
+  return res;
+}
+
+/* Flatten all the loop nests in SCOP.  Returns true when something
+   changed.  */
+
+bool
+flatten_all_loops (scop_p scop)
+{
+  return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
+}
+
+#endif
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 2fc7124..4a1c00f 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -782,6 +782,9 @@ apply_poly_transforms (scop_p scop)
 	transform_done |= scop_do_interchange (scop);
     }
 
+  if (flag_loop_flatten)
+    transform_done |= flatten_all_loops (scop);
+
   if (flag_graphite_write)
     {
       graphite_file = init_graphite_out_file (file_scop_number);
@@ -1686,7 +1689,8 @@ pbb_number_of_iterations_at_time (poly_bb_p pbb,
       ppl_delete_Constraint_System (cs);
     }
 
-  /* Compute the lower bound on the original iteration domain.  */
+  /* Compute the lower bound on the original iteration domain and add
+     it to the scattering.  */
   ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
     (&sctr_lb, PBB_TRANSFORMED_SCATTERING (pbb));
   for (i = 0; i < (int) domain_dim; i++)
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index b9bf1ed..8b950a4 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -414,6 +414,7 @@ extern void debug_iteration_domains (scop_p, int);
 extern bool scop_do_interchange (scop_p);
 extern bool scop_do_strip_mine (scop_p);
 extern bool scop_do_block (scop_p);
+extern bool flatten_all_loops (scop_p);
 extern void pbb_number_of_iterations_at_time (poly_bb_p, graphite_dim_t, mpz_t);
 extern void pbb_remove_duplicate_pdrs (poly_bb_p);
 
@@ -944,7 +945,7 @@ find_lst_loop (lst_p stmt, int loop_depth)
   return loop;
 }
 
-/* Return the first lst representing a PBB statement in LST.  */
+/* Return the first LST representing a PBB statement in LST.  */
 
 static inline lst_p
 lst_find_first_pbb (lst_p lst)
@@ -968,7 +969,7 @@ lst_find_first_pbb (lst_p lst)
   return NULL;
 }
 
-/* Returns true when LST is a loop that does not contains
+/* Returns true when LST is a loop that does not contain
    statements.  */
 
 static inline bool
@@ -977,7 +978,7 @@ lst_empty_p (lst_p lst)
   return !lst_find_first_pbb (lst);
 }
 
-/* Return the last lst representing a PBB statement in LST.  */
+/* Return the last LST representing a PBB statement in LST.  */
 
 static inline lst_p
 lst_find_last_pbb (lst_p lst)
@@ -1061,6 +1062,26 @@ lst_remove_from_sequence (lst_p lst)
   LST_LOOP_FATHER (lst) = NULL;
 }
 
+/* Removes the loop LST and inline its body in the father loop.  */
+
+static inline void
+lst_remove_loop_and_inline_stmts_in_loop_father (lst_p lst)
+{
+  lst_p l, father = LST_LOOP_FATHER (lst);
+  int i, dewey = lst_dewey_number (lst);
+
+  gcc_assert (lst && father && dewey >= 0);
+
+  VEC_ordered_remove (lst_p, LST_SEQ (father), dewey);
+  LST_LOOP_FATHER (lst) = NULL;
+
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
+    {
+      VEC_safe_insert (lst_p, heap, LST_SEQ (father), dewey + i, l);
+      LST_LOOP_FATHER (l) = father;
+    }
+}
+
 /* Sets NITER to the upper bound approximation of the number of
    iterations of loop LST.  */
 
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 9523dab..c653d27 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -303,8 +303,12 @@ gate_graphite_transforms (void)
 {
   /* Enable -fgraphite pass if any one of the graphite optimization flags
      is turned on.  */
-  if (flag_loop_block || flag_loop_interchange || flag_loop_strip_mine
-      || flag_graphite_identity || flag_loop_parallelize_all)
+  if (flag_loop_block
+      || flag_loop_interchange
+      || flag_loop_strip_mine
+      || flag_graphite_identity
+      || flag_loop_parallelize_all
+      || flag_loop_flatten)
     flag_graphite = 1;
 
   return flag_graphite != 0;
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 7/8] Add cloog_checksum.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
  2010-09-09 19:31 ` [PATCH 5/8] Fix lst_update_scattering Sebastian Pop
@ 2010-09-09 19:31 ` Sebastian Pop
  2010-09-09 19:31 ` [PATCH 8/8] New pass: loop flattening Sebastian Pop
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.c (cloog_checksum): New.
	* graphite-poly.h (cloog_checksum): Declared.
---
 gcc/ChangeLog.graphite |    5 +++++
 gcc/graphite-poly.c    |   18 ++++++++++++++++++
 gcc/graphite-poly.h    |    1 +
 3 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index ea2c115..f2c4127 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,10 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-poly.c (cloog_checksum): New.
+	* graphite-poly.h (cloog_checksum): Declared.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.c (pbb_number_of_iterations): Removed.
 	(pbb_number_of_iterations_at_time): Correctly compute the number
 	of iterations in the transformed loop.
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 0a3cc1f..2fc7124 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -1911,5 +1911,23 @@ dot_lst (lst_p lst)
 #endif
 }
 
+/* Computes a checksum for the code generated by CLooG for SCOP.  */
+
+DEBUG_FUNCTION void
+cloog_checksum (scop_p scop ATTRIBUTE_UNUSED)
+{
+  /* When debugging, enable the following code.  This cannot be used
+     in production compilers because it calls "system".  */
+#if 1
+  FILE *stream = fopen ("/tmp/scop.cloog", "w");
+  gcc_assert (stream);
+  print_cloog (stream, scop, 0);
+  fclose (stream);
+
+  fputs ("\n", stdout);
+  system ("cloog -compilable 1 /tmp/scop.cloog > /tmp/scop.c ; gcc -O0 -g /tmp/scop.c -lm -o /tmp/scop; /tmp/scop | md5sum ");
+#endif
+}
+
 #endif
 
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 98ce124..b9bf1ed 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -1398,6 +1398,7 @@ extern int scop_max_loop_depth (scop_p);
 extern int unify_scattering_dimensions (scop_p);
 extern bool apply_poly_transforms (scop_p);
 extern bool graphite_legal_transform (scop_p);
+extern void cloog_checksum (scop_p);
 
 /* Set the region of SCOP to REGION.  */
 
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/8] Use FOR_EACH_VEC_ELT.
@ 2010-09-09 19:31 Sebastian Pop
  2010-09-09 19:31 ` [PATCH 5/8] Fix lst_update_scattering Sebastian Pop
                   ` (6 more replies)
  0 siblings, 7 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.h (lst_dewey_number): Use FOR_EACH_VEC_ELT.
---
 gcc/ChangeLog.graphite |    4 ++++
 gcc/graphite-poly.h    |    2 +-
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index ed502f2..028da12 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,3 +1,7 @@
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
+	* graphite-poly.h (lst_dewey_number): Use FOR_EACH_VEC_ELT.
+
 2010-09-02  Vladimir Kargov  <kargov@gmail.com>
 
 	* graphite-scop-detection.c (is_valid_expr_p, is_valid_loop_p): New.
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 0e1aa1f..5ed1b04 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -843,7 +843,7 @@ lst_dewey_number (lst_p lst)
   if (!LST_LOOP_FATHER (lst))
     return 0;
 
-  for (i = 0; VEC_iterate (lst_p, LST_SEQ (LST_LOOP_FATHER (lst)), i, l); i++)
+  FOR_EACH_VEC_ELT (lst_p, LST_SEQ (LST_LOOP_FATHER (lst)), i, l)
     if (l == lst)
       return i;
 
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 5/8] Fix lst_update_scattering.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
@ 2010-09-09 19:31 ` Sebastian Pop
  2010-09-09 19:31 ` [PATCH 7/8] Add cloog_checksum Sebastian Pop
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.h (lst_update_scattering_seq): Removed.
	(lst_update_scattering): Correctly handle outermost loop dewey
	renumbering.
---
 gcc/ChangeLog.graphite |    6 ++++++
 gcc/graphite-poly.h    |   36 ++++++++++++++----------------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 831c2fe..7d37d41 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,11 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-poly.h (lst_update_scattering_seq): Removed.
+	(lst_update_scattering): Correctly handle outermost loop dewey
+	renumbering.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-blocking.c (pbb_strip_mine_profitable_p): Renamed
 	lst_strip_mine_profitable_p.  Call lst_niter_for_loop.
 	* graphite-poly.h (lst_niter_for_loop): New.
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 5f536a8..e93c2ad 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -1120,24 +1120,6 @@ lst_update_scattering_under (lst_p lst, int level, int dewey)
     pbb_update_scattering (LST_PBB (lst), level, dewey);
 }
 
-/* Updates the scattering of all the PBBs under LST and in sequence
-   with LST.  */
-
-static inline void
-lst_update_scattering_seq (lst_p lst)
-{
-  int i;
-  lst_p l;
-  lst_p father = LST_LOOP_FATHER (lst);
-  int dewey = lst_dewey_number (lst);
-  int level = lst_depth (lst);
-
-  gcc_assert (lst && father && dewey >= 0 && level >= 0);
-
-  for (i = dewey; VEC_iterate (lst_p, LST_SEQ (father), i, l); i++)
-    lst_update_scattering_under (l, level, i);
-}
-
 /* Updates the all the scattering levels of all the PBBs under
    LST.  */
 
@@ -1147,14 +1129,24 @@ lst_update_scattering (lst_p lst)
   int i;
   lst_p l;
 
-  if (!lst || !LST_LOOP_P (lst))
+  if (!lst)
     return;
 
   if (LST_LOOP_FATHER (lst))
-    lst_update_scattering_seq (lst);
+    {
+      lst_p father = LST_LOOP_FATHER (lst);
+      int dewey = lst_dewey_number (lst);
+      int level = lst_depth (lst);
 
-  for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
-    lst_update_scattering (l);
+      gcc_assert (lst && father && dewey >= 0 && level >= 0);
+
+      for (i = dewey; VEC_iterate (lst_p, LST_SEQ (father), i, l); i++)
+	lst_update_scattering_under (l, level, i);
+    }
+
+  if (LST_LOOP_P (lst))
+    for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
+      lst_update_scattering (l);
 }
 
 /* Inserts LST1 before LST2 if BEFORE is true; inserts LST1 after LST2
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/8] Call fatal_error when the transform read from file is not legal.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
                   ` (3 preceding siblings ...)
  2010-09-09 19:31 ` [PATCH 4/8] Outline lst_niter_for_loop Sebastian Pop
@ 2010-09-09 19:36 ` Sebastian Pop
  2010-09-09 19:40 ` [PATCH 6/8] Fix pbb_number_of_iterations_at_time Sebastian Pop
  2010-09-09 19:41 ` [PATCH 2/8] Fix pretty printers Sebastian Pop
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.c (apply_poly_transforms): Do not abort when the
	transform read from disk is not legal.  Call fatal_error instead.
---
 gcc/ChangeLog.graphite |    5 +++++
 gcc/graphite-poly.c    |    6 +++++-
 2 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 3cc99df..46dd03b 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,10 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-poly.c (apply_poly_transforms): Do not abort when the
+	transform read from disk is not legal.  Call fatal_error instead.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.c (print_pbb_body): Add missing closing parenthesis.
 	(print_scop_header): Removed.  Inlined in the only call place...
 	(print_scop): ... here.
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index a648b45..4220c05 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -752,7 +752,11 @@ apply_poly_transforms (scop_p scop)
     {
       graphite_file = init_graphite_in_file (file_scop_number);
       transform_done |= graphite_read_scop_file (graphite_file, scop);
-      gcc_assert (graphite_legal_transform (scop));
+
+      if (!graphite_legal_transform (scop))
+	fatal_error ("the graphite file read for scop %d does not contain a legal transform",
+		     (int) file_scop_number);
+
       file_scop_number++;
     }
 
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 6/8] Fix pbb_number_of_iterations_at_time.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
                   ` (4 preceding siblings ...)
  2010-09-09 19:36 ` [PATCH 3/8] Call fatal_error when the transform read from file is not legal Sebastian Pop
@ 2010-09-09 19:40 ` Sebastian Pop
  2010-09-09 19:41 ` [PATCH 2/8] Fix pretty printers Sebastian Pop
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:40 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.c (pbb_number_of_iterations): Removed.
	(pbb_number_of_iterations_at_time): Correctly compute the number
	of iterations in the transformed loop.
	* graphite-poly.h (pbb_number_of_iterations): Removed.
---
 gcc/ChangeLog.graphite     |    7 ++
 gcc/graphite-interchange.c |    2 +-
 gcc/graphite-poly.c        |  143 ++++++++++++++++++++++++++++---------------
 gcc/graphite-poly.h        |    1 -
 4 files changed, 101 insertions(+), 52 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 7d37d41..ea2c115 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,12 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-poly.c (pbb_number_of_iterations): Removed.
+	(pbb_number_of_iterations_at_time): Correctly compute the number
+	of iterations in the transformed loop.
+	* graphite-poly.h (pbb_number_of_iterations): Removed.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.h (lst_update_scattering_seq): Removed.
 	(lst_update_scattering): Correctly handle outermost loop dewey
 	renumbering.
diff --git a/gcc/graphite-interchange.c b/gcc/graphite-interchange.c
index 83027d3..e4cab49 100644
--- a/gcc/graphite-interchange.c
+++ b/gcc/graphite-interchange.c
@@ -320,7 +320,7 @@ pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
     {
       char *str;
       void (*gmp_free) (void *, size_t);
-      
+
       fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:",
 	       pbb_index (pbb), PDR_ID (pdr), (int) depth);
       str = mpz_get_str (0, 10, stride);
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 4220c05..0a3cc1f 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -1623,72 +1623,115 @@ psct_scattering_dim_for_loop_depth (poly_bb_p pbb, graphite_dim_t loop_depth)
   gcc_unreachable ();
 }
 
-/* Returns the number of iterations NITER of the loop around PBB at
-   depth LOOP_DEPTH.  */
-
-void
-pbb_number_of_iterations (poly_bb_p pbb,
-			  graphite_dim_t loop_depth,
-			  mpz_t niter)
-{
-  ppl_Linear_Expression_t le;
-  ppl_dimension_type dim;
-
-  ppl_Pointset_Powerset_C_Polyhedron_space_dimension (PBB_DOMAIN (pbb), &dim);
-  ppl_new_Linear_Expression_with_dimension (&le, dim);
-  ppl_set_coef (le, pbb_iterator_dim (pbb, loop_depth), 1);
-  mpz_set_si (niter, -1);
-  ppl_max_for_le_pointset (PBB_DOMAIN (pbb), le, niter);
-  ppl_delete_Linear_Expression (le);
-}
-
-/* Returns the number of iterations NITER of the loop around PBB at
+/* Returns the number of iterations RES of the loop around PBB at
    time(scattering) dimension TIME_DEPTH.  */
 
 void
 pbb_number_of_iterations_at_time (poly_bb_p pbb,
 				  graphite_dim_t time_depth,
-				  mpz_t niter)
+				  mpz_t res)
 {
-  ppl_Pointset_Powerset_C_Polyhedron_t ext_domain, sctr;
+  ppl_Pointset_Powerset_C_Polyhedron_t domain, sctr_lb, sctr_ub;
+  ppl_dimension_type domain_dim, sctr_dim;
   ppl_Linear_Expression_t le;
-  ppl_dimension_type dim;
-
-  /* Takes together domain and scattering polyhedrons, and composes
-     them into the bigger polyhedron that has the following format:
+  mpz_t lb, ub, diff, one;
+  int i;
 
-     t0..t_{n-1} | l0..l_{nlcl-1} | i0..i_{niter-1} | g0..g_{nparm-1}
+  ppl_Polyhedron_space_dimension (PBB_TRANSFORMED_SCATTERING (pbb), &sctr_dim);
 
-     where
-     | t0..t_{n-1} are time dimensions (scattering dimensions)
-     | l0..l_{nclc-1} are local variables in scattering function
-     | i0..i_{niter-1} are original iteration variables
-     | g0..g_{nparam-1} are global parameters.  */
+  ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
+    (&domain, PBB_DOMAIN (pbb));
 
-  ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron (&sctr,
-      PBB_TRANSFORMED_SCATTERING (pbb));
+  ppl_Pointset_Powerset_C_Polyhedron_space_dimension (domain, &domain_dim);
+  mpz_init (diff);
+  mpz_init (lb);
+  mpz_init (ub);
+  mpz_init (one);
+  mpz_set_si (one, 1);
 
-  /* Extend the iteration domain with the scattering dimensions:
-     0..0 | 0..0 | i0..i_{niter-1} | g0..g_{nparm-1}.   */
-  ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
-    (&ext_domain, PBB_DOMAIN (pbb));
-  ppl_insert_dimensions_pointset (ext_domain, 0,
-                                  pbb_nb_scattering_transform (pbb)
-                                  + pbb_nb_local_vars (pbb));
+  /* Compute the upper bound on the original iteration domain and add
+     that upper bound to the scattering.  */
+  ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
+    (&sctr_ub, PBB_TRANSFORMED_SCATTERING (pbb));
+  for (i = 0; i < (int) domain_dim; i++)
+    {
+      ppl_Linear_Expression_t eq;
+      ppl_Constraint_t pc;
+      ppl_Constraint_System_t cs;
+      ppl_Polyhedron_t ph;
+      ppl_Pointset_Powerset_C_Polyhedron_t pph;
+
+      ppl_new_Linear_Expression_with_dimension (&le, domain_dim);
+      ppl_set_coef (le, i, 1);
+      ppl_min_for_le_pointset (domain, le, lb);
+      ppl_max_for_le_pointset (domain, le, ub);
+      mpz_sub (diff, ub, lb);
+      mpz_add (diff, diff, one);
+
+      ppl_new_Linear_Expression_with_dimension (&eq, sctr_dim);
+      ppl_set_coef (eq, psct_iterator_dim (pbb, i), -1);
+      ppl_set_inhomogeneous_gmp (eq, diff);
+
+      ppl_new_Constraint (&pc, eq, PPL_CONSTRAINT_TYPE_EQUAL);
+      ppl_new_Constraint_System_from_Constraint (&cs, pc);
+      ppl_new_C_Polyhedron_from_Constraint_System (&ph, cs);
+      ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron (&pph, ph);
+      ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (sctr_ub, pph);
+
+      ppl_delete_Linear_Expression (le);
+      ppl_delete_Linear_Expression (eq);
+      ppl_delete_Polyhedron (ph);
+      ppl_delete_Pointset_Powerset_C_Polyhedron (pph);
+      ppl_delete_Constraint (pc);
+      ppl_delete_Constraint_System (cs);
+    }
 
-  /* Add to sctr the extended domain.  */
-  ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (sctr, ext_domain);
+  /* Compute the lower bound on the original iteration domain.  */
+  ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
+    (&sctr_lb, PBB_TRANSFORMED_SCATTERING (pbb));
+  for (i = 0; i < (int) domain_dim; i++)
+    {
+      ppl_Linear_Expression_t eq;
+      ppl_Constraint_t pc;
+      ppl_Constraint_System_t cs;
+      ppl_Polyhedron_t ph;
+      ppl_Pointset_Powerset_C_Polyhedron_t pph;
+
+      ppl_new_Linear_Expression_with_dimension (&le, domain_dim);
+      ppl_set_coef (le, i, 1);
+      ppl_min_for_le_pointset (domain, le, lb);
+
+      ppl_new_Linear_Expression_with_dimension (&eq, sctr_dim);
+      ppl_set_coef (eq, psct_iterator_dim (pbb, i), -1);
+      ppl_set_inhomogeneous_gmp (eq, lb);
+
+      ppl_new_Constraint (&pc, eq, PPL_CONSTRAINT_TYPE_EQUAL);
+      ppl_new_Constraint_System_from_Constraint (&cs, pc);
+      ppl_new_C_Polyhedron_from_Constraint_System (&ph, cs);
+      ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron (&pph, ph);
+      ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (sctr_lb, pph);
+
+      ppl_delete_Linear_Expression (le);
+      ppl_delete_Linear_Expression (eq);
+      ppl_delete_Polyhedron (ph);
+      ppl_delete_Pointset_Powerset_C_Polyhedron (pph);
+      ppl_delete_Constraint (pc);
+      ppl_delete_Constraint_System (cs);
+    }
 
   /* Extract the number of iterations.  */
-  ppl_Pointset_Powerset_C_Polyhedron_space_dimension (sctr, &dim);
-  ppl_new_Linear_Expression_with_dimension (&le, dim);
+  ppl_new_Linear_Expression_with_dimension (&le, sctr_dim);
   ppl_set_coef (le, time_depth, 1);
-  mpz_set_si (niter, -1);
-  ppl_max_for_le_pointset (sctr, le, niter);
+  ppl_min_for_le_pointset (sctr_lb, le, lb);
+  ppl_max_for_le_pointset (sctr_ub, le, ub);
+  mpz_sub (res, ub, lb);
 
-  ppl_delete_Linear_Expression (le);
-  ppl_delete_Pointset_Powerset_C_Polyhedron (sctr);
-  ppl_delete_Pointset_Powerset_C_Polyhedron (ext_domain);
+  mpz_clear (one);
+  mpz_clear (diff);
+  mpz_clear (lb);
+  mpz_clear (ub);
+  ppl_delete_Pointset_Powerset_C_Polyhedron (sctr_ub);
+  ppl_delete_Pointset_Powerset_C_Polyhedron (sctr_lb);
 }
 
 /* Translates LOOP to LST.  */
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index e93c2ad..98ce124 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -414,7 +414,6 @@ extern void debug_iteration_domains (scop_p, int);
 extern bool scop_do_interchange (scop_p);
 extern bool scop_do_strip_mine (scop_p);
 extern bool scop_do_block (scop_p);
-extern void pbb_number_of_iterations (poly_bb_p, graphite_dim_t, mpz_t);
 extern void pbb_number_of_iterations_at_time (poly_bb_p, graphite_dim_t, mpz_t);
 extern void pbb_remove_duplicate_pdrs (poly_bb_p);
 
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/8] Fix pretty printers.
  2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
                   ` (5 preceding siblings ...)
  2010-09-09 19:40 ` [PATCH 6/8] Fix pbb_number_of_iterations_at_time Sebastian Pop
@ 2010-09-09 19:41 ` Sebastian Pop
  6 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: gcc-graphite, Sebastian Pop

2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>

	* graphite-poly.c (print_pbb_body): Add missing closing parenthesis.
	(print_scop_header): Removed.  Inlined in the only call place...
	(print_scop): ... here.
---
 gcc/ChangeLog.graphite |    6 ++++++
 gcc/graphite-poly.c    |   36 +++++++++++++++---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 028da12..3cc99df 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,11 @@
 2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
 
+	* graphite-poly.c (print_pbb_body): Add missing closing parenthesis.
+	(print_scop_header): Removed.  Inlined in the only call place...
+	(print_scop): ... here.
+
+2010-09-09  Sebastian Pop  <sebastian.pop@amd.com>
+
 	* graphite-poly.h (lst_dewey_number): Use FOR_EACH_VEC_ELT.
 
 2010-09-02  Vladimir Kargov  <kargov@gmail.com>
diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index 2bc0023..a648b45 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -1247,13 +1247,16 @@ print_pbb_body (FILE *file, poly_bb_p pbb, int verbosity,
     fprintf (file, "# Body (\n");
 
   if (!statement_body_provided)
-  {
-    if (verbosity > 0)
-      fprintf (file, "# Statement body is not provided\n");
+    {
+      if (verbosity > 0)
+	fprintf (file, "# Statement body is not provided\n");
 
-   fprintf (file, "0\n");
-   return;
-  }
+      fprintf (file, "0\n");
+
+      if (verbosity > 1)
+	fprintf (file, "#)\n");
+      return;
+    }
 
   if (verbosity > 0)
     fprintf (file, "# Statement body is provided\n");
@@ -1392,12 +1395,14 @@ print_scop_context (FILE *file, scop_p scop, int verbosity)
     fprintf (file, "# )\n");
 }
 
-/* Print to FILE the SCOP header: context, parameters, and statements
-   number.  */
+/* Print to FILE the SCOP, at some VERBOSITY level.  */
 
-static void
-print_scop_header (FILE *file, scop_p scop, int verbosity)
+void
+print_scop (FILE *file, scop_p scop, int verbosity)
 {
+  int i;
+  poly_bb_p pbb;
+
   fprintf (file, "SCoP 1\n#(\n");
   fprintf (file, "# Language\nGimple\n");
   openscop_print_scop_context (file, scop, verbosity);
@@ -1407,17 +1412,6 @@ print_scop_header (FILE *file, scop_p scop, int verbosity)
     fprintf (file, "# Number of statements\n");
 
   fprintf (file, "%d\n",VEC_length (poly_bb_p, SCOP_BBS (scop)));
-}
-
-/* Print to FILE the SCOP, at some VERBOSITY level.  */
-
-void
-print_scop (FILE *file, scop_p scop, int verbosity)
-{
-  int i;
-  poly_bb_p pbb;
-
-  print_scop_header (file, scop, verbosity);
 
   FOR_EACH_VEC_ELT (poly_bb_p, SCOP_BBS (scop), i, pbb)
     print_pbb (file, pbb, verbosity);
-- 
1.7.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 8/8] New pass: loop flattening.
  2010-09-09 19:31 ` [PATCH 8/8] New pass: loop flattening Sebastian Pop
@ 2010-09-09 19:45   ` Nathan Froyd
  2010-09-09 19:46     ` Sebastian Pop
  0 siblings, 1 reply; 11+ messages in thread
From: Nathan Froyd @ 2010-09-09 19:45 UTC (permalink / raw)
  To: Sebastian Pop; +Cc: gcc-patches, gcc-graphite

On Thu, Sep 09, 2010 at 02:30:06PM -0500, Sebastian Pop wrote:
> This patch implements a loop flattening pass that has been described
> in a very Fortran specific way in the 1992 paper by Reinhard von
> Hanxleden and Ken Kennedy: "Relaxing SIMD Control Flow Constraints
> using Loop Transformations":
> http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033

Could you please insert a reference or short blurb about the paper in
the file itself?  It's always helpful to have some idea of where an
optimization is coming from.

-Nathan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 8/8] New pass: loop flattening.
  2010-09-09 19:45   ` Nathan Froyd
@ 2010-09-09 19:46     ` Sebastian Pop
  2010-09-09 20:20       ` Sebastian Pop
  0 siblings, 1 reply; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 19:46 UTC (permalink / raw)
  To: Nathan Froyd; +Cc: gcc-patches, gcc-graphite

On Thu, Sep 9, 2010 at 14:36, Nathan Froyd <froydnj@codesourcery.com> wrote:
> On Thu, Sep 09, 2010 at 02:30:06PM -0500, Sebastian Pop wrote:
>> This patch implements a loop flattening pass that has been described
>> in a very Fortran specific way in the 1992 paper by Reinhard von
>> Hanxleden and Ken Kennedy: "Relaxing SIMD Control Flow Constraints
>> using Loop Transformations":
>> http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
>
> Could you please insert a reference or short blurb about the paper in
> the file itself?  It's always helpful to have some idea of where an
> optimization is coming from.

Thanks for the suggestion, I will do that right now.

Sebastian

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 8/8] New pass: loop flattening.
  2010-09-09 19:46     ` Sebastian Pop
@ 2010-09-09 20:20       ` Sebastian Pop
  0 siblings, 0 replies; 11+ messages in thread
From: Sebastian Pop @ 2010-09-09 20:20 UTC (permalink / raw)
  To: Nathan Froyd; +Cc: gcc-patches, gcc-graphite

[-- Attachment #1: Type: text/plain, Size: 459 bytes --]

Oh, by the way, here is an example of a flattened matrix multiplication:

#define N 11

int A[N][N], B[N][N], C[N][N];

static void
matmult (void)
{
  int i, j, k, l;

  for (i = 0; i < N; i++)
    {
      for (j = 0; j < 1; j++)
	{
	  A[i][j] = 0;
	  for (k = 0; k < N; k++)
	    A[i][j] += B[i][k] * C[k][j];
	}
    }
}

See the attached files for the cloog output for the non transformed kernel
matmult.c and the flattened kernel matmult-flat.c

Sebastian

[-- Attachment #2: matmult.c --]
[-- Type: text/x-c++, Size: 1091 bytes --]

/* Generated from foo1.cloog by CLooG 0.14.0-309-g4a19d7b gmp bits in 0.02s. */
/* DON'T FORGET TO USE -lm OPTION TO COMPILE. */

/* Useful headers. */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

/* Parameter value. */
/* Useful macros. */
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
#define ceild(n,d)  (((n)<0) ? -((-(n))/(d)) : ((n)+(d)-1)/(d))
#define max(x,y)    ((x) > (y) ? (x) : (y))
#define min(x,y)    ((x) < (y) ? (x) : (y))

/* Statement macros (please set). */
#define S1(i,j) {total++; printf("S1 %d %d\n",i,j);}
#define S2(i,j,k) {total++; printf("S2 %d %d %d\n",i,j,k);}
#define S3(i,j) {total++; printf("S3 %d %d\n",i,j);}
#define S4(i,j) {total++; printf("S4 %d %d\n",i,j);}

int main() {
  /* Scattering iterators. */
  int c2, c4, c6;
  /* Original iterators. */
  int i, j, k;
  int total=0;

  for (c2=0;c2<=10;c2++) {
    for (c4=0;c4<=10;c4++) {
      S1(c2,c4);
      for (c6=0;c6<=10;c6++) {
        S2(c2,c4,c6);
      }
      S3(c2,c4);
      S4(c2,c4);
    }
  }

  printf("Number of integral points: %d.\n",total);
  return 0;
}

[-- Attachment #3: matmult-flat.c --]
[-- Type: text/x-c++, Size: 1444 bytes --]

/* Generated from foo.cloog by CLooG 0.14.0-309-g4a19d7b gmp bits in 0.02s. */
/* DON'T FORGET TO USE -lm OPTION TO COMPILE. */

/* Useful headers. */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

/* Parameter value. */
/* Useful macros. */
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
#define ceild(n,d)  (((n)<0) ? -((-(n))/(d)) : ((n)+(d)-1)/(d))
#define max(x,y)    ((x) > (y) ? (x) : (y))
#define min(x,y)    ((x) < (y) ? (x) : (y))

/* Statement macros (please set). */
#define S1(i,j) {total++; printf("S1 %d %d\n",i,j);}
#define S2(i,j,k) {total++; printf("S2 %d %d %d\n",i,j,k);}
#define S3(i,j) {total++; printf("S3 %d %d\n",i,j);}
#define S4(i,j) {total++; printf("S4 %d %d\n",i,j);}

int main() {
  /* Scattering iterators. */
  int c2;
  /* Original iterators. */
  int i, j, k;
  int total=0;

  S1(0,0);
  for (c2=0;c2<=9;c2++) {
    S2(0,0,c2);
  }
  for (c2=10;c2<=1320;c2++) {
    if (c2%121 <= 110) {
      i = floord(c2,121);
      if (c2%11 == 0) {
        S1(i,(c2-121*i)/11);
      }
    }
    i = floord(c2,121);
    j = floord(c2-121*i,11);
    S2(i,j,c2-121*i-11*j);
    if ((c2+111)%121 <= 110) {
      i = floord(c2-10,121);
      if ((c2+1)%11 == 0) {
        S3(i,(c2-121*i-10)/11);
        S4(i,(c2-121*i-10)/11);
      }
    }
  }
  for (c2=1321;c2<=1330;c2++) {
    S2(10,10,c2-1320);
  }
  S3(10,10);
  S4(10,10);

  printf("Number of integral points: %d.\n",total);
  return 0;
}

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-09-09 19:45 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-09 19:31 [PATCH 1/8] Use FOR_EACH_VEC_ELT Sebastian Pop
2010-09-09 19:31 ` [PATCH 5/8] Fix lst_update_scattering Sebastian Pop
2010-09-09 19:31 ` [PATCH 7/8] Add cloog_checksum Sebastian Pop
2010-09-09 19:31 ` [PATCH 8/8] New pass: loop flattening Sebastian Pop
2010-09-09 19:45   ` Nathan Froyd
2010-09-09 19:46     ` Sebastian Pop
2010-09-09 20:20       ` Sebastian Pop
2010-09-09 19:31 ` [PATCH 4/8] Outline lst_niter_for_loop Sebastian Pop
2010-09-09 19:36 ` [PATCH 3/8] Call fatal_error when the transform read from file is not legal Sebastian Pop
2010-09-09 19:40 ` [PATCH 6/8] Fix pbb_number_of_iterations_at_time Sebastian Pop
2010-09-09 19:41 ` [PATCH 2/8] Fix pretty printers Sebastian Pop

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).