public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFC 4/5] Handle constant-pool entries
  2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
@ 2015-08-25 11:06 ` Alan Lawrence
  2015-08-25 20:19   ` Jeff Law
  2015-08-26 14:08   ` Martin Jambor
  2015-08-25 11:06 ` [RFC 5/5] Always completely replace constant pool entries Alan Lawrence
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 11:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, Alan Lawrence

This makes SRA replace loads of records/arrays from constant pool entries,
with elementwise assignments of the constant values, hence, overcoming the
fundamental problem in PR/63679.

As a first pass, the approach I took was to look for constant-pool loads as
we scanned through other accesses, and add them as candidates there; to build a
constant replacement_decl for any such accesses in completely_scalarize; and to
use any existing replacement_decl rather than creating a variable in
create_access_replacement. (I did try using CONSTANT_CLASS_P in the latter, but
that does not allow addresses of labels, which can still end up in the constant
pool.)

Feedback as to the approach or how it might be better structured / fitted into
SRA, is solicited ;).

Bootstrapped + check-gcc on x86-none-linux-gnu, aarch64-none-linux-gnu and
arm-none-linux-gnueabihf, including with the next patch (rfc), which greatly increases the number of testcases in which this code is exercised!

Have also verified that the ssa-dom-cse-2.c scan-tree-dump test passes (using a stage 1 compiler only, without execution) on alpha, hppa, powerpc, sparc, avr, and sh.

gcc/ChangeLog:

	* tree-sra.c (create_access): Scan for uses of constant pool and add
	to candidates.
	(subst_initial): New.
	(scalarize_elem): Build replacement_decl using subst_initial.
	(create_access_replacement): Use replacement_decl if set.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Remove xfail, add --param
	sra-max-scalarization-size-Ospeed.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c |  7 +---
 gcc/tree-sra.c                                | 56 +++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index 9eccdc9..b13d583 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized" } */
+/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized --param sra-max-scalarization-size-Ospeed=32" } */
 
 int
 foo ()
@@ -17,7 +17,4 @@ foo ()
 /* After late unrolling the above loop completely DOM should be
    able to optimize this to return 28.  */
 
-/* See PR63679 and PR64159, if the target forces the initializer to memory then
-   DOM is not able to perform this optimization.  */
-
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail aarch64*-*-* alpha*-*-* hppa*-*-* powerpc*-*-* sparc*-*-* s390*-*-* } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index af35fcc..a3ff2df 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -865,6 +865,17 @@ create_access (tree expr, gimple stmt, bool write)
   else
     ptr = false;
 
+  /* FORNOW: scan for uses of constant pool as we go along.  */
+  if (TREE_CODE (base) == VAR_DECL && DECL_IN_CONSTANT_POOL (base)
+      && !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
+    {
+      gcc_assert (!write);
+      bitmap_set_bit (candidate_bitmap, DECL_UID (base));
+      tree_node **slot = candidates->find_slot_with_hash (base, DECL_UID (base),
+							  INSERT);
+      *slot = base;
+    }
+
   if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
     return NULL;
 
@@ -1025,6 +1036,37 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
     }
 }
 
+static tree
+subst_initial (tree expr, tree var)
+{
+  if (TREE_CODE (expr) == VAR_DECL)
+    {
+      gcc_assert (DECL_IN_CONSTANT_POOL (expr));
+      gcc_assert (expr == var);
+      return DECL_INITIAL (expr);
+    }
+  if (TREE_CODE (expr) == COMPONENT_REF)
+    {
+      gcc_assert (TREE_CODE (TREE_OPERAND (expr, 1)) == FIELD_DECL);
+      gcc_assert (TREE_OPERAND (expr, 2) == NULL_TREE);
+      return fold_build3 (COMPONENT_REF, TREE_TYPE (expr),
+			  subst_initial (TREE_OPERAND (expr, 0), var),
+			  TREE_OPERAND (expr, 1),
+			  NULL_TREE);
+    }
+  if (TREE_CODE (expr) == ARRAY_REF)
+    {
+      gcc_assert (TREE_OPERAND (expr, 2) == NULL_TREE);
+      gcc_assert (TREE_OPERAND (expr, 3) == NULL_TREE);
+      return fold (build4 (ARRAY_REF, TREE_TYPE (expr),
+			   subst_initial (TREE_OPERAND (expr, 0), var),
+			   TREE_OPERAND (expr, 1),
+			   NULL_TREE,
+			   NULL_TREE));
+    }
+  gcc_unreachable ();
+}
+
 static void
 scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
 		 tree ref, tree type)
@@ -1033,6 +1075,9 @@ scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
   {
     struct access *access = create_access_1 (base, pos, size);
     access->expr = ref;
+    if (TREE_CODE (base) == VAR_DECL
+	&& DECL_IN_CONSTANT_POOL (base))
+      access->replacement_decl = subst_initial (ref, base);
     access->type = type;
     access->grp_total_scalarization = 1;
     /* Accesses for intraprocedural SRA can have their stmt NULL.  */
@@ -2038,14 +2083,19 @@ sort_and_splice_var_accesses (tree var)
 }
 
 /* Create a variable for the given ACCESS which determines the type, name and a
-   few other properties.  Return the variable declaration and store it also to
-   ACCESS->replacement.  */
+   few other properties.  Return the variable declaration.  */
 
 static tree
 create_access_replacement (struct access *access)
 {
-  tree repl;
+  if (access->replacement_decl)
+    {
+      gcc_assert (!access->write);
+      gcc_assert (is_gimple_reg_type (TREE_TYPE (access->expr)));
+      return access->replacement_decl;
+    }
 
+  tree repl;
   if (access->grp_to_be_debug_replaced)
     {
       repl = create_tmp_var_raw (access->type);
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads
@ 2015-08-25 11:06 Alan Lawrence
  2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
                   ` (4 more replies)
  0 siblings, 5 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 11:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor

ssa-dom-cse-2.c fails on a number of platforms because the input array is pushed
out to the constant pool, preventing later stages from folding away the entire
computation. This patch series fixes the failure by extending SRA to pull the
constants back in.

This is my first patch(set) to SRA and as such I'd appreciate suggestions about
the approach. I think the first two patches, which essentially just extend SRA
to deal with ARRAY_TYPE as well as RECORD_TYPE, are fairly straightforward and
may stand alone. Later patches, in particular, may be better done in a different
way and I'd welcome feedback as to what a patch (series) should look like.

Also the heuristic for controlling SRA, when dealing with constant-pool loads,
may want something better/other than the default
--param sra-max-scalarization-size-O{speed,size}, or else platforms where the
initializer is forced to memory, will still suffer in terms of constant
propagation.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [RFC 5/5] Always completely replace constant pool entries
  2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
  2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
@ 2015-08-25 11:06 ` Alan Lawrence
  2015-08-25 20:09   ` Jeff Law
  2015-08-25 11:21 ` [PATCH 2/5] completely_scalarize arrays as well as records Alan Lawrence
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 11:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, Alan Lawrence

I used this as a means of better-testing the previous changes, as it exercises
the constant replacement code a whole lot more. Indeed, quite a few tests are
now optimized away to nothing on AArch64...

Always pulling in constants, is almost certainly not what we want, but we may
nonetheless want something more aggressive than the usual --param, e.g. for the
ssa-dom-cse-2.c test. Thoughts welcomed?

Thanks, Alan

gcc/ChangeLog:

	* tree-sra.c (analyze_all_variable_accesses): Bypass size limit for
	constant-pool accesses.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Remove --param
	sra-max-scalarization-size-Ospeed.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c | 2 +-
 gcc/tree-sra.c                                | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index b13d583..370b785 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized --param sra-max-scalarization-size-Ospeed=32" } */
+/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized" } */
 
 int
 foo ()
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index a3ff2df..2a741b8 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -2651,7 +2651,8 @@ analyze_all_variable_accesses (void)
 	    && scalarizable_type_p (TREE_TYPE (var)))
 	  {
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
-		<= max_scalarization_size)
+		  <= max_scalarization_size
+		|| DECL_IN_CONSTANT_POOL (var))
 	      {
 		create_total_scalarization_access (var);
 		completely_scalarize (var, TREE_TYPE (var), 0, var);
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
  2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
  2015-08-25 11:06 ` [RFC 5/5] Always completely replace constant pool entries Alan Lawrence
@ 2015-08-25 11:21 ` Alan Lawrence
  2015-08-25 19:40   ` Jeff Law
  2015-08-25 21:44   ` [PATCH 2/5] completely_scalarize arrays as well as records Martin Jambor
  2015-08-25 11:30 ` [PATCH 1/5] Refactor completely_scalarize_var Alan Lawrence
  2015-08-25 12:30 ` [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE Alan Lawrence
  4 siblings, 2 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 11:21 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, Alan Lawrence

This changes the completely_scalarize_record path to also work on arrays (thus
allowing records containing arrays, etc.). This just required extending the
existing type_consists_of_records_p and completely_scalarize_record methods
to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed both
methods so as not to mention 'record'.

Bootstrapped + check-gcc on aarch64-none-linux-gnu, arm-none-linux-gnueabihf and x86_64-none-linux-gnu.

Have also verified the scan-tree-dump check in the new sra-15.c passes (using a stage 1 compiler only, no execution test) on alpha, hppa, powerpc, sparc, avr and sh.

gcc/ChangeLog:

	* tree-sra.c (type_consists_of_records_p): Rename to...
	(scalarizable_type_p): ...this, add case for ARRAY_TYPE.

	(completely_scalarize_record): Rename to...
	(completely_scalarize): ...this, add ARRAY_TYPE case, move some code to:
	(scalarize_elem): New.

gcc/testsuite/ChangeLog:
	* gcc.dg/tree-ssa/sra-15.c: New.
---
 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  38 +++++++++
 gcc/tree-sra.c                         | 146 ++++++++++++++++++++++-----------
 2 files changed, 135 insertions(+), 49 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
new file mode 100644
index 0000000..e251058
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
@@ -0,0 +1,38 @@
+/* Verify that SRA total scalarization works on records containing arrays.  */
+/* Test skipped for targets with small (often default) MOVE_RATIO.  */
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+struct S
+{
+  char c;
+  unsigned short f[2][2];
+  int i;
+  unsigned short f3, f4;
+};
+
+
+int __attribute__ ((noinline))
+foo (struct S *p)
+{
+  struct S l;
+
+  l = *p;
+  l.i++;
+  l.f[1][0] += 3;
+  *p = l;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
+  foo (&a);
+  if (a.i != 5 || a.f[1][0] != 12)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index a0c92b0..08fa8dc 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -915,74 +915,122 @@ create_access (tree expr, gimple stmt, bool write)
 }
 
 
-/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
-   register types or (recursively) records with only these two kinds of fields.
-   It also returns false if any of these records contains a bit-field.  */
+/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or ARRAY_TYPE with
+   fields that are either of gimple register types (excluding bit-fields)
+   or (recursively) scalarizable types.  */
 
 static bool
-type_consists_of_records_p (tree type)
+scalarizable_type_p (tree type)
 {
-  tree fld;
+  gcc_assert (!is_gimple_reg_type (type));
 
-  if (TREE_CODE (type) != RECORD_TYPE)
-    return false;
+  switch (TREE_CODE (type))
+  {
+  case RECORD_TYPE:
+    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
+      if (TREE_CODE (fld) == FIELD_DECL)
+	{
+	  tree ft = TREE_TYPE (fld);
 
-  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
-      {
-	tree ft = TREE_TYPE (fld);
+	  if (DECL_BIT_FIELD (fld))
+	    return false;
 
-	if (DECL_BIT_FIELD (fld))
-	  return false;
+	  if (!is_gimple_reg_type (ft)
+	      && !scalarizable_type_p (ft))
+	    return false;
+	}
 
-	if (!is_gimple_reg_type (ft)
-	    && !type_consists_of_records_p (ft))
-	  return false;
-      }
+    return true;
 
-  return true;
+  case ARRAY_TYPE:
+    {
+      tree elem = TREE_TYPE (type);
+      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
+	return false;
+      if (!is_gimple_reg_type (elem)
+	 && !scalarizable_type_p (elem))
+	return false;
+      return true;
+    }
+  default:
+    return false;
+  }
 }
 
-/* Create total_scalarization accesses for all scalar type fields in DECL that
-   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
-   must be the top-most VAR_DECL representing the variable, OFFSET must be the
-   offset of DECL within BASE.  REF must be the memory reference expression for
-   the given decl.  */
+static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
+
+/* Create total_scalarization accesses for all scalar fields of a member
+   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
+   must be the top-most VAR_DECL representing the variable; within that,
+   OFFSET locates the member and REF must be the memory reference expression for
+   the member.  */
 
 static void
-completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
-			     tree ref)
+completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
 {
-  tree fld, decl_type = TREE_TYPE (decl);
+  switch (TREE_CODE (decl_type))
+    {
+    case RECORD_TYPE:
+      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
+	if (TREE_CODE (fld) == FIELD_DECL)
+	  {
+	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
+	    tree ft = TREE_TYPE (fld);
+	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
 
-  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
+	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
+			    ft);
+	  }
+      break;
+    case ARRAY_TYPE:
       {
-	HOST_WIDE_INT pos = offset + int_bit_position (fld);
-	tree ft = TREE_TYPE (fld);
-	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
-			    NULL_TREE);
-
-	if (is_gimple_reg_type (ft))
+	tree elemtype = TREE_TYPE (decl_type);
+	tree elem_size = TYPE_SIZE (elemtype);
+	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
+	int el_size = tree_to_uhwi (elem_size);
+	gcc_assert (el_size);
+
+	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
+	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
+	gcc_assert (TREE_CODE (minidx) == INTEGER_CST
+		    && TREE_CODE (maxidx) == INTEGER_CST);
+	unsigned HOST_WIDE_INT len = tree_to_uhwi (maxidx)
+				     + 1 - tree_to_uhwi (minidx);
+	/* 4th operand to ARRAY_REF is size in units of the type alignment.  */
+	for (unsigned HOST_WIDE_INT idx = 0; idx < len; idx++)
 	  {
-	    struct access *access;
-	    HOST_WIDE_INT size;
-
-	    size = tree_to_uhwi (DECL_SIZE (fld));
-	    access = create_access_1 (base, pos, size);
-	    access->expr = nref;
-	    access->type = ft;
-	    access->grp_total_scalarization = 1;
-	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+	    tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
+	    tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, NULL_TREE,
+				NULL_TREE);
+	    int el_off = offset + idx * el_size;
+	    scalarize_elem (base, el_off, el_size, nref, elemtype);
 	  }
-	else
-	  completely_scalarize_record (base, fld, pos, nref);
       }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static void
+scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
+		 tree ref, tree type)
+{
+  if (is_gimple_reg_type (type))
+  {
+    struct access *access = create_access_1 (base, pos, size);
+    access->expr = ref;
+    access->type = type;
+    access->grp_total_scalarization = 1;
+    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+  }
+  else
+    completely_scalarize (base, type, pos, ref);
 }
 
 /* Create total_scalarization accesses for all scalar type fields in VAR and
-   for VAR as a whole.  VAR must be of a RECORD_TYPE conforming to
-   type_consists_of_records_p.   */
+   for VAR as a whole.  VAR must be of a RECORD_TYPE or ARRAY_TYPE conforming to
+   scalarizable_type_p.  */
 
 static void
 create_total_scalarization_access (tree var)
@@ -2522,13 +2570,13 @@ analyze_all_variable_accesses (void)
 	tree var = candidate (i);
 
 	if (TREE_CODE (var) == VAR_DECL
-	    && type_consists_of_records_p (TREE_TYPE (var)))
+	    && scalarizable_type_p (TREE_TYPE (var)))
 	  {
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
 		<= max_scalarization_size)
 	      {
 		create_total_scalarization_access (var);
-		completely_scalarize_record (var, var, 0, var);
+		completely_scalarize (var, TREE_TYPE (var), 0, var);
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  {
 		    fprintf (dump_file, "Will attempt to totally scalarize ");
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 1/5] Refactor completely_scalarize_var
  2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
                   ` (2 preceding siblings ...)
  2015-08-25 11:21 ` [PATCH 2/5] completely_scalarize arrays as well as records Alan Lawrence
@ 2015-08-25 11:30 ` Alan Lawrence
  2015-08-25 19:36   ` Jeff Law
  2015-08-25 21:42   ` Martin Jambor
  2015-08-25 12:30 ` [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE Alan Lawrence
  4 siblings, 2 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 11:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, Alan Lawrence

This is a small refactoring/renaming patch, it just moves the call to
"completely_scalarize_record" out from completely_scalarize_var, and renames
the latter to create_total_scalarization_access.

This is because the next patch needs to drop the "_record" suffix and I felt
it would be confusing to have both completely_scalarize and
completely_scalarize_var. However, it also makes the new function name
(create_total_scalarization_access) consistent with the existing code & comment.

Bootstrapped + check-gcc on x86_64.

gcc/ChangeLog:

	* tree-sra.c (completely_scalarize_var): Rename to...
	(create_total_scalarization_access): ... Here. Drop call to
	completely_scalarize_record.

	(analyze_all_variable_accesses): Replace completely_scalarize_var
	with create_total_scalarization_access and completely_scalarize_record.
---
 gcc/tree-sra.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 818c290..a0c92b0 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -985,7 +985,7 @@ completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
    type_consists_of_records_p.   */
 
 static void
-completely_scalarize_var (tree var)
+create_total_scalarization_access (tree var)
 {
   HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE (var));
   struct access *access;
@@ -994,8 +994,6 @@ completely_scalarize_var (tree var)
   access->expr = var;
   access->type = TREE_TYPE (var);
   access->grp_total_scalarization = 1;
-
-  completely_scalarize_record (var, var, 0, var);
 }
 
 /* Return true if REF has an VIEW_CONVERT_EXPR somewhere in it.  */
@@ -2529,7 +2527,8 @@ analyze_all_variable_accesses (void)
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
 		<= max_scalarization_size)
 	      {
-		completely_scalarize_var (var);
+		create_total_scalarization_access (var);
+		completely_scalarize_record (var, var, 0, var);
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  {
 		    fprintf (dump_file, "Will attempt to totally scalarize ");
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
                   ` (3 preceding siblings ...)
  2015-08-25 11:30 ` [PATCH 1/5] Refactor completely_scalarize_var Alan Lawrence
@ 2015-08-25 12:30 ` Alan Lawrence
  2015-08-25 19:54   ` Jeff Law
  2015-08-25 22:51   ` Martin Jambor
  4 siblings, 2 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-25 12:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, Alan Lawrence

When SRA completely scalarizes an array, this patch changes the generated accesses from e.g.

   MEM[(int[8] *)&a + 4B] = 1;

to

   a[1] = 1;

This overcomes a limitation in dom2, that accesses to equivalent chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant propagation in the ssa-dom-cse-2.c testcase (after the next patch that makes SRA handle constant-pool loads).

I tried to work around this by making dom2's hashable_expr_equal_p less conservative, but found that on platforms without AArch64's vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC, mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8] *)&a] equivalent to a[0], etc.; a complete overhaul of hashable_expr_equal_p seems like a larger task than this patch series.

I can't see how to write a testcase for this in C though as direct assignment to an array is not possible; such assignments occur only with constant pool data, which is dealt with in the next patch.

Bootstrap + check-gcc on x86-none-linux-gnu, arm-none-linux-gnueabihf, aarch64-none-linux-gnu.

gcc/ChangeLog:

	* tree-sra.c (completely_scalarize): Move some code into:
	(get_elem_size): New.
	(build_ref_for_offset): Build ARRAY_REF if base is aligned array.
---
 gcc/tree-sra.c | 110 ++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 08fa8dc..af35fcc 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -957,6 +957,20 @@ scalarizable_type_p (tree type)
   }
 }
 
+static bool
+get_elem_size (const_tree type, unsigned HOST_WIDE_INT *sz_out)
+{
+  gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+  tree t_size = TYPE_SIZE (TREE_TYPE (type));
+  if (!t_size || !tree_fits_uhwi_p (t_size))
+    return false;
+  unsigned HOST_WIDE_INT sz = tree_to_uhwi (t_size);
+  if (!sz)
+    return false;
+  *sz_out = sz;
+  return true;
+}
+
 static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
 
 /* Create total_scalarization accesses for all scalar fields of a member
@@ -985,10 +999,9 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
     case ARRAY_TYPE:
       {
 	tree elemtype = TREE_TYPE (decl_type);
-	tree elem_size = TYPE_SIZE (elemtype);
-	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
-	int el_size = tree_to_uhwi (elem_size);
-	gcc_assert (el_size);
+	unsigned HOST_WIDE_INT el_size;
+	if (!get_elem_size (decl_type, &el_size))
+	  gcc_assert (false);
 
 	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
 	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
@@ -1563,7 +1576,7 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset,
   tree off;
   tree mem_ref;
   HOST_WIDE_INT base_offset;
-  unsigned HOST_WIDE_INT misalign;
+  unsigned HOST_WIDE_INT misalign, el_sz;
   unsigned int align;
 
   gcc_checking_assert (offset % BITS_PER_UNIT == 0);
@@ -1572,47 +1585,62 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset,
 
   /* get_addr_base_and_unit_offset returns NULL for references with a variable
      offset such as array[var_index].  */
-  if (!base)
-    {
-      gassign *stmt;
-      tree tmp, addr;
-
-      gcc_checking_assert (gsi);
-      tmp = make_ssa_name (build_pointer_type (TREE_TYPE (prev_base)));
-      addr = build_fold_addr_expr (unshare_expr (prev_base));
-      STRIP_USELESS_TYPE_CONVERSION (addr);
-      stmt = gimple_build_assign (tmp, addr);
-      gimple_set_location (stmt, loc);
-      if (insert_after)
-	gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
-      else
-	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
-
-      off = build_int_cst (reference_alias_ptr_type (prev_base),
-			   offset / BITS_PER_UNIT);
-      base = tmp;
-    }
-  else if (TREE_CODE (base) == MEM_REF)
-    {
-      off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)),
-			   base_offset + offset / BITS_PER_UNIT);
-      off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off);
-      base = unshare_expr (TREE_OPERAND (base, 0));
+  if (base
+      && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
+      && misalign == 0
+      && get_elem_size (TREE_TYPE (base), &el_sz)
+      && ((offset % el_sz) == 0)
+      && useless_type_conversion_p (exp_type, TREE_TYPE (TREE_TYPE (base)))
+      && (align >= TYPE_ALIGN (exp_type)))
+    {
+      tree idx = build_int_cst (TYPE_DOMAIN (TREE_TYPE (base)), offset / el_sz);
+      base = unshare_expr (base);
+      mem_ref = build4 (ARRAY_REF, exp_type, base, idx, NULL_TREE, NULL_TREE);
     }
   else
     {
-      off = build_int_cst (reference_alias_ptr_type (base),
-			   base_offset + offset / BITS_PER_UNIT);
-      base = build_fold_addr_expr (unshare_expr (base));
-    }
+      if (!base)
+	{
+	  gassign *stmt;
+	  tree tmp, addr;
+
+	  gcc_checking_assert (gsi);
+	  tmp = make_ssa_name (build_pointer_type (TREE_TYPE (prev_base)));
+	  addr = build_fold_addr_expr (unshare_expr (prev_base));
+	  STRIP_USELESS_TYPE_CONVERSION (addr);
+	  stmt = gimple_build_assign (tmp, addr);
+	  gimple_set_location (stmt, loc);
+	  if (insert_after)
+	    gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
+	  else
+	    gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 
-  misalign = (misalign + offset) & (align - 1);
-  if (misalign != 0)
-    align = (misalign & -misalign);
-  if (align != TYPE_ALIGN (exp_type))
-    exp_type = build_aligned_type (exp_type, align);
+	  off = build_int_cst (reference_alias_ptr_type (prev_base),
+			       offset / BITS_PER_UNIT);
+	  base = tmp;
+	}
+      else if (TREE_CODE (base) == MEM_REF)
+	{
+	  off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)),
+			       base_offset + offset / BITS_PER_UNIT);
+	  off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off);
+	  base = unshare_expr (TREE_OPERAND (base, 0));
+	}
+      else
+	{
+	  off = build_int_cst (reference_alias_ptr_type (base),
+			       base_offset + offset / BITS_PER_UNIT);
+	  base = build_fold_addr_expr (unshare_expr (base));
+	}
 
-  mem_ref = fold_build2_loc (loc, MEM_REF, exp_type, base, off);
+      misalign = (misalign + offset) & (align - 1);
+      if (misalign != 0)
+	align = (misalign & -misalign);
+      if (align != TYPE_ALIGN (exp_type))
+	exp_type = build_aligned_type (exp_type, align);
+
+      mem_ref = fold_build2_loc (loc, MEM_REF, exp_type, base, off);
+    }
   if (TREE_THIS_VOLATILE (prev_base))
     TREE_THIS_VOLATILE (mem_ref) = 1;
   if (TREE_SIDE_EFFECTS (prev_base))
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 1/5] Refactor completely_scalarize_var
  2015-08-25 11:30 ` [PATCH 1/5] Refactor completely_scalarize_var Alan Lawrence
@ 2015-08-25 19:36   ` Jeff Law
  2015-08-25 21:42   ` Martin Jambor
  1 sibling, 0 replies; 45+ messages in thread
From: Jeff Law @ 2015-08-25 19:36 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches; +Cc: rguenther, mjambor

On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> This is a small refactoring/renaming patch, it just moves the call to
> "completely_scalarize_record" out from completely_scalarize_var, and renames
> the latter to create_total_scalarization_access.
>
> This is because the next patch needs to drop the "_record" suffix and I felt
> it would be confusing to have both completely_scalarize and
> completely_scalarize_var. However, it also makes the new function name
> (create_total_scalarization_access) consistent with the existing code & comment.
>
> Bootstrapped + check-gcc on x86_64.
>
> gcc/ChangeLog:
>
> 	* tree-sra.c (completely_scalarize_var): Rename to...
> 	(create_total_scalarization_access): ... Here. Drop call to
> 	completely_scalarize_record.
>
> 	(analyze_all_variable_accesses): Replace completely_scalarize_var
> 	with create_total_scalarization_access and completely_scalarize_record.
OK.
Jeff

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 11:21 ` [PATCH 2/5] completely_scalarize arrays as well as records Alan Lawrence
@ 2015-08-25 19:40   ` Jeff Law
  2015-08-27 16:54     ` Fixing sra-12.c (was: Re: [PATCH 2/5] completely_scalarize arrays as well as records) Alan Lawrence
  2015-08-25 21:44   ` [PATCH 2/5] completely_scalarize arrays as well as records Martin Jambor
  1 sibling, 1 reply; 45+ messages in thread
From: Jeff Law @ 2015-08-25 19:40 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches; +Cc: rguenther, mjambor

On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> This changes the completely_scalarize_record path to also work on arrays (thus
> allowing records containing arrays, etc.). This just required extending the
> existing type_consists_of_records_p and completely_scalarize_record methods
> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed both
> methods so as not to mention 'record'.
>
> Bootstrapped + check-gcc on aarch64-none-linux-gnu, arm-none-linux-gnueabihf and x86_64-none-linux-gnu.
>
> Have also verified the scan-tree-dump check in the new sra-15.c passes (using a stage 1 compiler only, no execution test) on alpha, hppa, powerpc, sparc, avr and sh.
>
> gcc/ChangeLog:
>
> 	* tree-sra.c (type_consists_of_records_p): Rename to...
> 	(scalarizable_type_p): ...this, add case for ARRAY_TYPE.
>
> 	(completely_scalarize_record): Rename to...
> 	(completely_scalarize): ...this, add ARRAY_TYPE case, move some code to:
> 	(scalarize_elem): New.
>
> gcc/testsuite/ChangeLog:
> 	* gcc.dg/tree-ssa/sra-15.c: New.
> ---
>   gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  38 +++++++++
>   gcc/tree-sra.c                         | 146 ++++++++++++++++++++++-----------
>   2 files changed, 135 insertions(+), 49 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> new file mode 100644
> index 0000000..e251058
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> @@ -0,0 +1,38 @@
> +/* Verify that SRA total scalarization works on records containing arrays.  */
> +/* Test skipped for targets with small (often default) MOVE_RATIO.  */
?!?  I don't see anything that skips this test for any targets. 
Presumably this was copied from sra-12.c.  I suspect this comment should 
just be removed.

With that comment removed from the testcase, this is OK.

jeff


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-25 12:30 ` [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE Alan Lawrence
@ 2015-08-25 19:54   ` Jeff Law
  2015-08-26  6:34     ` Bin.Cheng
  2015-08-26  7:20     ` Richard Biener
  2015-08-25 22:51   ` Martin Jambor
  1 sibling, 2 replies; 45+ messages in thread
From: Jeff Law @ 2015-08-25 19:54 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches; +Cc: rguenther, mjambor

On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> When SRA completely scalarizes an array, this patch changes the
> generated accesses from e.g.
>
> MEM[(int[8] *)&a + 4B] = 1;
>
> to
>
> a[1] = 1;
>
> This overcomes a limitation in dom2, that accesses to equivalent
> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
> propagation in the ssa-dom-cse-2.c testcase (after the next patch
> that makes SRA handle constant-pool loads).
>
> I tried to work around this by making dom2's hashable_expr_equal_p
> less conservative, but found that on platforms without AArch64's
> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
> *)&a] equivalent to a[0], etc.; a complete overhaul of
> hashable_expr_equal_p seems like a larger task than this patch
> series.
>
> I can't see how to write a testcase for this in C though as direct
> assignment to an array is not possible; such assignments occur only
> with constant pool data, which is dealt with in the next patch.
It's a general issue that if there's > 1 common way to represent an
expression, then DOM will often miss discovery of the CSE opportunity
because of the way it hashes expressions.

Ideally we'd be moving to a canonical form, but I also realize that in
the case of memory references like this, that may not be feasible.

It does make me wonder how many CSEs we're really missing due to the two
ways to represent array accesses.


> Bootstrap + check-gcc on x86-none-linux-gnu,
> arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
>
> gcc/ChangeLog:
>
> * tree-sra.c (completely_scalarize): Move some code into:
> (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
> is aligned array. --- gcc/tree-sra.c | 110
> ++++++++++++++++++++++++++++++++++++--------------------- 1 file
> changed, 69 insertions(+), 41 deletions(-)
>
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
> 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
> scalarizable_type_p (tree type) } }
>
> +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
> *sz_out)
Function comment needed.

I may have missed it in the earlier patches, but can you please make
sure any new functions you created have comments in those as well.  Such 
patches are pre-approved.

With the added function comment, this patch is fine.

jeff


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 5/5] Always completely replace constant pool entries
  2015-08-25 11:06 ` [RFC 5/5] Always completely replace constant pool entries Alan Lawrence
@ 2015-08-25 20:09   ` Jeff Law
  2015-08-26  7:29     ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Jeff Law @ 2015-08-25 20:09 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches; +Cc: rguenther, mjambor

On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> I used this as a means of better-testing the previous changes, as it exercises
> the constant replacement code a whole lot more. Indeed, quite a few tests are
> now optimized away to nothing on AArch64...
>
> Always pulling in constants, is almost certainly not what we want, but we may
> nonetheless want something more aggressive than the usual --param, e.g. for the
> ssa-dom-cse-2.c test. Thoughts welcomed?
I'm of the opinion that we have too many knobs already.  So I'd perhaps 
ask whether or not this option is likely to be useful to end users?

As for the patch itself, any thoughts on reasonable heuristics for when 
to pull in the constants?  Clearly we don't want the patch as-is, but 
are there cases we can identify when we want to be more aggressive?

jeff


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 4/5] Handle constant-pool entries
  2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
@ 2015-08-25 20:19   ` Jeff Law
  2015-08-26  7:24     ` Richard Biener
  2015-08-26 15:51     ` Alan Lawrence
  2015-08-26 14:08   ` Martin Jambor
  1 sibling, 2 replies; 45+ messages in thread
From: Jeff Law @ 2015-08-25 20:19 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches; +Cc: rguenther, mjambor

On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> This makes SRA replace loads of records/arrays from constant pool entries,
> with elementwise assignments of the constant values, hence, overcoming the
> fundamental problem in PR/63679.
>
> As a first pass, the approach I took was to look for constant-pool loads as
> we scanned through other accesses, and add them as candidates there; to build a
> constant replacement_decl for any such accesses in completely_scalarize; and to
> use any existing replacement_decl rather than creating a variable in
> create_access_replacement. (I did try using CONSTANT_CLASS_P in the latter, but
> that does not allow addresses of labels, which can still end up in the constant
> pool.)
>
> Feedback as to the approach or how it might be better structured / fitted into
> SRA, is solicited ;).
>
> Bootstrapped + check-gcc on x86-none-linux-gnu, aarch64-none-linux-gnu and
> arm-none-linux-gnueabihf, including with the next patch (rfc), which greatly increases the number of testcases in which this code is exercised!
>
> Have also verified that the ssa-dom-cse-2.c scan-tree-dump test passes (using a stage 1 compiler only, without execution) on alpha, hppa, powerpc, sparc, avr, and sh.
>
> gcc/ChangeLog:
>
> 	* tree-sra.c (create_access): Scan for uses of constant pool and add
> 	to candidates.
> 	(subst_initial): New.
> 	(scalarize_elem): Build replacement_decl using subst_initial.
> 	(create_access_replacement): Use replacement_decl if set.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Remove xfail, add --param
> 	sra-max-scalarization-size-Ospeed.
> ---
>   gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c |  7 +---
>   gcc/tree-sra.c                                | 56 +++++++++++++++++++++++++--
>   2 files changed, 55 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index af35fcc..a3ff2df 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -865,6 +865,17 @@ create_access (tree expr, gimple stmt, bool write)
>     else
>       ptr = false;
>
> +  /* FORNOW: scan for uses of constant pool as we go along.  */
I'm not sure why you have this marked as FORNOW.  If I'm reading all 
this code correctly, you're lazily adding items from the constant pool 
into the candidates table when you find they're used.  That seems better 
than walking the entire constant pool adding them all to the candidates.

I don't see this as fundamentally wrong or unclean.

The question I have is why this differs from the effects of patch #5. 
That would seem to indicate that there's things we're not getting into 
the candidate tables with this approach?!?



> @@ -1025,6 +1036,37 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>       }
>   }
>
> +static tree
> +subst_initial (tree expr, tree var)
Function comment.

I think this patch is fine with the function comment added and removing 
the FORNOW part of the comment in create_access.  It may be worth noting 
in create_access's comment that it can add new items to the candidates 
tables for constant pool entries.


Jeff

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 1/5] Refactor completely_scalarize_var
  2015-08-25 11:30 ` [PATCH 1/5] Refactor completely_scalarize_var Alan Lawrence
  2015-08-25 19:36   ` Jeff Law
@ 2015-08-25 21:42   ` Martin Jambor
  1 sibling, 0 replies; 45+ messages in thread
From: Martin Jambor @ 2015-08-25 21:42 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, rguenther

Hi,

On Tue, Aug 25, 2015 at 12:06:13PM +0100, Alan Lawrence wrote:
> This is a small refactoring/renaming patch, it just moves the call to
> "completely_scalarize_record" out from completely_scalarize_var, and renames
> the latter to create_total_scalarization_access.
> 
> This is because the next patch needs to drop the "_record" suffix and I felt
> it would be confusing to have both completely_scalarize and
> completely_scalarize_var. However, it also makes the new function name
> (create_total_scalarization_access) consistent with the existing code & comment.
> 
> Bootstrapped + check-gcc on x86_64.
> 
> gcc/ChangeLog:
> 
> 	* tree-sra.c (completely_scalarize_var): Rename to...
> 	(create_total_scalarization_access): ... Here. Drop call to
> 	completely_scalarize_record.
> 
> 	(analyze_all_variable_accesses): Replace completely_scalarize_var
> 	with create_total_scalarization_access and completely_scalarize_record.
> ---
>  gcc/tree-sra.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 818c290..a0c92b0 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -985,7 +985,7 @@ completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
>     type_consists_of_records_p.   */
>  
>  static void
> -completely_scalarize_var (tree var)
> +create_total_scalarization_access (tree var)

If you change what the function does, you have to change the comment
too.  If I am not mistaken, even with the whole patch set applied, the
first sentence would still be: "Create total_scalarization accesses
for all scalar type fields in VAR and for VAR as a whole." And with
this change, only the part after "and" will remain true.

Thanks,

Martin

>  {
>    HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE (var));
>    struct access *access;
> @@ -994,8 +994,6 @@ completely_scalarize_var (tree var)
>    access->expr = var;
>    access->type = TREE_TYPE (var);
>    access->grp_total_scalarization = 1;
> -
> -  completely_scalarize_record (var, var, 0, var);
>  }
>  
>  /* Return true if REF has an VIEW_CONVERT_EXPR somewhere in it.  */
> @@ -2529,7 +2527,8 @@ analyze_all_variable_accesses (void)
>  	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
>  		<= max_scalarization_size)
>  	      {
> -		completely_scalarize_var (var);
> +		create_total_scalarization_access (var);
> +		completely_scalarize_record (var, var, 0, var);
>  		if (dump_file && (dump_flags & TDF_DETAILS))
>  		  {
>  		    fprintf (dump_file, "Will attempt to totally scalarize ");
> -- 
> 1.8.3
> 

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 11:21 ` [PATCH 2/5] completely_scalarize arrays as well as records Alan Lawrence
  2015-08-25 19:40   ` Jeff Law
@ 2015-08-25 21:44   ` Martin Jambor
  2015-08-25 21:55     ` Jeff Law
  2015-08-27 16:00     ` Alan Lawrence
  1 sibling, 2 replies; 45+ messages in thread
From: Martin Jambor @ 2015-08-25 21:44 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, rguenther

Hi,

On Tue, Aug 25, 2015 at 12:06:14PM +0100, Alan Lawrence wrote:
> This changes the completely_scalarize_record path to also work on arrays (thus
> allowing records containing arrays, etc.). This just required extending the
> existing type_consists_of_records_p and completely_scalarize_record methods
> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed both
> methods so as not to mention 'record'.

thanks for working on this.  I see Jeff has already approved the
patch, but I have two comments nevertheless.  First, I would be much
happier if you added a proper comment to scalarize_elem function which
you forgot completely.  The name is not very descriptive and it has
quite few parameters too.

Second, this patch should also fix PR 67283.  It would be great if you
could verify that and add it to the changelog when committing if that
is indeed the case.

Thanks,

Martin


> 
> Bootstrapped + check-gcc on aarch64-none-linux-gnu, arm-none-linux-gnueabihf and x86_64-none-linux-gnu.
> 
> Have also verified the scan-tree-dump check in the new sra-15.c passes (using a stage 1 compiler only, no execution test) on alpha, hppa, powerpc, sparc, avr and sh.
> 
> gcc/ChangeLog:
> 
> 	* tree-sra.c (type_consists_of_records_p): Rename to...
> 	(scalarizable_type_p): ...this, add case for ARRAY_TYPE.
> 
> 	(completely_scalarize_record): Rename to...
> 	(completely_scalarize): ...this, add ARRAY_TYPE case, move some code to:
> 	(scalarize_elem): New.
> 
> gcc/testsuite/ChangeLog:
> 	* gcc.dg/tree-ssa/sra-15.c: New.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  38 +++++++++
>  gcc/tree-sra.c                         | 146 ++++++++++++++++++++++-----------
>  2 files changed, 135 insertions(+), 49 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> new file mode 100644
> index 0000000..e251058
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> @@ -0,0 +1,38 @@
> +/* Verify that SRA total scalarization works on records containing arrays.  */
> +/* Test skipped for targets with small (often default) MOVE_RATIO.  */
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  char c;
> +  unsigned short f[2][2];
> +  int i;
> +  unsigned short f3, f4;
> +};
> +
> +
> +int __attribute__ ((noinline))
> +foo (struct S *p)
> +{
> +  struct S l;
> +
> +  l = *p;
> +  l.i++;
> +  l.f[1][0] += 3;
> +  *p = l;
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
> +  foo (&a);
> +  if (a.i != 5 || a.f[1][0] != 12)
> +    abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index a0c92b0..08fa8dc 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -915,74 +915,122 @@ create_access (tree expr, gimple stmt, bool write)
>  }
>  
>  
> -/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
> -   register types or (recursively) records with only these two kinds of fields.
> -   It also returns false if any of these records contains a bit-field.  */
> +/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or ARRAY_TYPE with
> +   fields that are either of gimple register types (excluding bit-fields)
> +   or (recursively) scalarizable types.  */
>  
>  static bool
> -type_consists_of_records_p (tree type)
> +scalarizable_type_p (tree type)
>  {
> -  tree fld;
> +  gcc_assert (!is_gimple_reg_type (type));
>  
> -  if (TREE_CODE (type) != RECORD_TYPE)
> -    return false;
> +  switch (TREE_CODE (type))
> +  {
> +  case RECORD_TYPE:
> +    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> +      if (TREE_CODE (fld) == FIELD_DECL)
> +	{
> +	  tree ft = TREE_TYPE (fld);
>  
> -  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> -      {
> -	tree ft = TREE_TYPE (fld);
> +	  if (DECL_BIT_FIELD (fld))
> +	    return false;
>  
> -	if (DECL_BIT_FIELD (fld))
> -	  return false;
> +	  if (!is_gimple_reg_type (ft)
> +	      && !scalarizable_type_p (ft))
> +	    return false;
> +	}
>  
> -	if (!is_gimple_reg_type (ft)
> -	    && !type_consists_of_records_p (ft))
> -	  return false;
> -      }
> +    return true;
>  
> -  return true;
> +  case ARRAY_TYPE:
> +    {
> +      tree elem = TREE_TYPE (type);
> +      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
> +	return false;
> +      if (!is_gimple_reg_type (elem)
> +	 && !scalarizable_type_p (elem))
> +	return false;
> +      return true;
> +    }
> +  default:
> +    return false;
> +  }
>  }
>  
> -/* Create total_scalarization accesses for all scalar type fields in DECL that
> -   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
> -   must be the top-most VAR_DECL representing the variable, OFFSET must be the
> -   offset of DECL within BASE.  REF must be the memory reference expression for
> -   the given decl.  */
> +static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
> +
> +/* Create total_scalarization accesses for all scalar fields of a member
> +   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
> +   must be the top-most VAR_DECL representing the variable; within that,
> +   OFFSET locates the member and REF must be the memory reference expression for
> +   the member.  */
>  
>  static void
> -completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
> -			     tree ref)
> +completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>  {
> -  tree fld, decl_type = TREE_TYPE (decl);
> +  switch (TREE_CODE (decl_type))
> +    {
> +    case RECORD_TYPE:
> +      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> +	if (TREE_CODE (fld) == FIELD_DECL)
> +	  {
> +	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
> +	    tree ft = TREE_TYPE (fld);
> +	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
>  
> -  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> +	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
> +			    ft);
> +	  }
> +      break;
> +    case ARRAY_TYPE:
>        {
> -	HOST_WIDE_INT pos = offset + int_bit_position (fld);
> -	tree ft = TREE_TYPE (fld);
> -	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
> -			    NULL_TREE);
> -
> -	if (is_gimple_reg_type (ft))
> +	tree elemtype = TREE_TYPE (decl_type);
> +	tree elem_size = TYPE_SIZE (elemtype);
> +	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
> +	int el_size = tree_to_uhwi (elem_size);
> +	gcc_assert (el_size);
> +
> +	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
> +	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
> +	gcc_assert (TREE_CODE (minidx) == INTEGER_CST
> +		    && TREE_CODE (maxidx) == INTEGER_CST);
> +	unsigned HOST_WIDE_INT len = tree_to_uhwi (maxidx)
> +				     + 1 - tree_to_uhwi (minidx);
> +	/* 4th operand to ARRAY_REF is size in units of the type alignment.  */
> +	for (unsigned HOST_WIDE_INT idx = 0; idx < len; idx++)
>  	  {
> -	    struct access *access;
> -	    HOST_WIDE_INT size;
> -
> -	    size = tree_to_uhwi (DECL_SIZE (fld));
> -	    access = create_access_1 (base, pos, size);
> -	    access->expr = nref;
> -	    access->type = ft;
> -	    access->grp_total_scalarization = 1;
> -	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +	    tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
> +	    tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, NULL_TREE,
> +				NULL_TREE);
> +	    int el_off = offset + idx * el_size;
> +	    scalarize_elem (base, el_off, el_size, nref, elemtype);
>  	  }
> -	else
> -	  completely_scalarize_record (base, fld, pos, nref);
>        }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +static void
> +scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
> +		 tree ref, tree type)
> +{
> +  if (is_gimple_reg_type (type))
> +  {
> +    struct access *access = create_access_1 (base, pos, size);
> +    access->expr = ref;
> +    access->type = type;
> +    access->grp_total_scalarization = 1;
> +    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +  }
> +  else
> +    completely_scalarize (base, type, pos, ref);
>  }
>  
>  /* Create total_scalarization accesses for all scalar type fields in VAR and
> -   for VAR as a whole.  VAR must be of a RECORD_TYPE conforming to
> -   type_consists_of_records_p.   */
> +   for VAR as a whole.  VAR must be of a RECORD_TYPE or ARRAY_TYPE conforming to
> +   scalarizable_type_p.  */
>  
>  static void
>  create_total_scalarization_access (tree var)
> @@ -2522,13 +2570,13 @@ analyze_all_variable_accesses (void)
>  	tree var = candidate (i);
>  
>  	if (TREE_CODE (var) == VAR_DECL
> -	    && type_consists_of_records_p (TREE_TYPE (var)))
> +	    && scalarizable_type_p (TREE_TYPE (var)))
>  	  {
>  	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
>  		<= max_scalarization_size)
>  	      {
>  		create_total_scalarization_access (var);
> -		completely_scalarize_record (var, var, 0, var);
> +		completely_scalarize (var, TREE_TYPE (var), 0, var);
>  		if (dump_file && (dump_flags & TDF_DETAILS))
>  		  {
>  		    fprintf (dump_file, "Will attempt to totally scalarize ");
> -- 
> 1.8.3
> 

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 21:44   ` [PATCH 2/5] completely_scalarize arrays as well as records Martin Jambor
@ 2015-08-25 21:55     ` Jeff Law
  2015-08-26  7:11       ` Richard Biener
  2015-08-27 16:00     ` Alan Lawrence
  1 sibling, 1 reply; 45+ messages in thread
From: Jeff Law @ 2015-08-25 21:55 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches, rguenther

On 08/25/2015 03:42 PM, Martin Jambor wrote:
> Hi,
>
> On Tue, Aug 25, 2015 at 12:06:14PM +0100, Alan Lawrence wrote:
>> This changes the completely_scalarize_record path to also work on arrays (thus
>> allowing records containing arrays, etc.). This just required extending the
>> existing type_consists_of_records_p and completely_scalarize_record methods
>> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed both
>> methods so as not to mention 'record'.
>
> thanks for working on this.  I see Jeff has already approved the
> patch, but I have two comments nevertheless.  First, I would be much
> happier if you added a proper comment to scalarize_elem function which
> you forgot completely.  The name is not very descriptive and it has
> quite few parameters too.
Right.  I mentioned that I missed the lack of function comments when 
looking at #3 and asked Alan to go back and fix them in #1 and #2.

>
> Second, this patch should also fix PR 67283.  It would be great if you
> could verify that and add it to the changelog when committing if that
> is indeed the case.
Excellent.  Yes, definitely mention the BZ.

jeff

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-25 12:30 ` [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE Alan Lawrence
  2015-08-25 19:54   ` Jeff Law
@ 2015-08-25 22:51   ` Martin Jambor
  1 sibling, 0 replies; 45+ messages in thread
From: Martin Jambor @ 2015-08-25 22:51 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, rguenther

Hi,

On Tue, Aug 25, 2015 at 12:06:15PM +0100, Alan Lawrence wrote:
> When SRA completely scalarizes an array, this patch changes the
> generated accesses from e.g.
> 
>    MEM[(int[8] *)&a + 4B] = 1;
> 
> to
> 
>    a[1] = 1;
> 
> This overcomes a limitation in dom2, that accesses to equivalent
> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
> propagation in the ssa-dom-cse-2.c testcase (after the next patch
> that makes SRA handle constant-pool loads).
> 
> I tried to work around this by making dom2's hashable_expr_equal_p
> less conservative, but found that on platforms without AArch64's
> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
> *)&a] equivalent to a[0], etc.; a complete overhaul of
> hashable_expr_equal_p seems like a larger task than this patch
> series.

Uff.  Well, while I am obviously not excited about such workaround
landing in SRA, if maintainers agree that it is reasonable, I suppose
I'll manage to live with it.

I also have more specific comments:

> 
> I can't see how to write a testcase for this in C though as direct assignment to an array is not possible; such assignments occur only with constant pool data, which is dealt with in the next patch.
> 
> Bootstrap + check-gcc on x86-none-linux-gnu, arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
> 
> gcc/ChangeLog:
> 
> 	* tree-sra.c (completely_scalarize): Move some code into:
> 	(get_elem_size): New.
> 	(build_ref_for_offset): Build ARRAY_REF if base is aligned array.
> ---
>  gcc/tree-sra.c | 110 ++++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 69 insertions(+), 41 deletions(-)
> 
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 08fa8dc..af35fcc 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -957,6 +957,20 @@ scalarizable_type_p (tree type)
>    }
>  }
>  
> +static bool
> +get_elem_size (const_tree type, unsigned HOST_WIDE_INT *sz_out)

As Jeff already pointed out, this function needs a comment.

> +{
> +  gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
> +  tree t_size = TYPE_SIZE (TREE_TYPE (type));
> +  if (!t_size || !tree_fits_uhwi_p (t_size))
> +    return false;
> +  unsigned HOST_WIDE_INT sz = tree_to_uhwi (t_size);
> +  if (!sz)
> +    return false;
> +  *sz_out = sz;
> +  return true;
> +}
> +
>  static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
>  
>  /* Create total_scalarization accesses for all scalar fields of a member
> @@ -985,10 +999,9 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>      case ARRAY_TYPE:
>        {
>  	tree elemtype = TREE_TYPE (decl_type);
> -	tree elem_size = TYPE_SIZE (elemtype);
> -	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
> -	int el_size = tree_to_uhwi (elem_size);
> -	gcc_assert (el_size);
> +	unsigned HOST_WIDE_INT el_size;
> +	if (!get_elem_size (decl_type, &el_size))
> +	  gcc_assert (false);

This is usually written as gcc_unreachable ()

>  
>  	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
>  	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
> @@ -1563,7 +1576,7 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset,
>    tree off;
>    tree mem_ref;
>    HOST_WIDE_INT base_offset;
> -  unsigned HOST_WIDE_INT misalign;
> +  unsigned HOST_WIDE_INT misalign, el_sz;
>    unsigned int align;
>  
>    gcc_checking_assert (offset % BITS_PER_UNIT == 0);
> @@ -1572,47 +1585,62 @@ build_ref_for_offset (location_t loc, tree base, HOST_WIDE_INT offset,
>  
>    /* get_addr_base_and_unit_offset returns NULL for references with a variable
>       offset such as array[var_index].  */
> -  if (!base)
> -    {
> -      gassign *stmt;
> -      tree tmp, addr;
> -
> -      gcc_checking_assert (gsi);
> -      tmp = make_ssa_name (build_pointer_type (TREE_TYPE (prev_base)));
> -      addr = build_fold_addr_expr (unshare_expr (prev_base));
> -      STRIP_USELESS_TYPE_CONVERSION (addr);
> -      stmt = gimple_build_assign (tmp, addr);
> -      gimple_set_location (stmt, loc);
> -      if (insert_after)
> -	gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
> -      else
> -	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
> -
> -      off = build_int_cst (reference_alias_ptr_type (prev_base),
> -			   offset / BITS_PER_UNIT);
> -      base = tmp;
> -    }
> -  else if (TREE_CODE (base) == MEM_REF)
> -    {
> -      off = build_int_cst (TREE_TYPE (TREE_OPERAND (base, 1)),
> -			   base_offset + offset / BITS_PER_UNIT);
> -      off = int_const_binop (PLUS_EXPR, TREE_OPERAND (base, 1), off);
> -      base = unshare_expr (TREE_OPERAND (base, 0));
> +  if (base
> +      && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
> +      && misalign == 0
> +      && get_elem_size (TREE_TYPE (base), &el_sz)
> +      && ((offset % el_sz) == 0)
> +      && useless_type_conversion_p (exp_type, TREE_TYPE (TREE_TYPE (base)))
> +      && (align >= TYPE_ALIGN (exp_type)))

If this goes through, please add a comment explaining why we are doing
this, especially if you do not have a test-case.

> +    {
> +      tree idx = build_int_cst (TYPE_DOMAIN (TREE_TYPE (base)), offset / el_sz);
> +      base = unshare_expr (base);
> +      mem_ref = build4 (ARRAY_REF, exp_type, base, idx, NULL_TREE, NULL_TREE);

Since you are changing the function not to always produce a MEM_REF,
you need to adjust its comment again ;-)

Thanks,

Martin

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-25 19:54   ` Jeff Law
@ 2015-08-26  6:34     ` Bin.Cheng
  2015-08-26  7:40       ` Richard Biener
  2015-08-26  7:20     ` Richard Biener
  1 sibling, 1 reply; 45+ messages in thread
From: Bin.Cheng @ 2015-08-26  6:34 UTC (permalink / raw)
  To: Jeff Law; +Cc: Alan Lawrence, gcc-patches List, Richard Biener, mjambor

On Wed, Aug 26, 2015 at 3:50 AM, Jeff Law <law@redhat.com> wrote:
> On 08/25/2015 05:06 AM, Alan Lawrence wrote:
>>
>> When SRA completely scalarizes an array, this patch changes the
>> generated accesses from e.g.
>>
>> MEM[(int[8] *)&a + 4B] = 1;
>>
>> to
>>
>> a[1] = 1;
>>
>> This overcomes a limitation in dom2, that accesses to equivalent
>> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
>> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
>> propagation in the ssa-dom-cse-2.c testcase (after the next patch
>> that makes SRA handle constant-pool loads).
>>
>> I tried to work around this by making dom2's hashable_expr_equal_p
>> less conservative, but found that on platforms without AArch64's
>> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
>> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
>> *)&a] equivalent to a[0], etc.; a complete overhaul of
>> hashable_expr_equal_p seems like a larger task than this patch
>> series.
>>
>> I can't see how to write a testcase for this in C though as direct
>> assignment to an array is not possible; such assignments occur only
>> with constant pool data, which is dealt with in the next patch.
>
> It's a general issue that if there's > 1 common way to represent an
> expression, then DOM will often miss discovery of the CSE opportunity
> because of the way it hashes expressions.
>
> Ideally we'd be moving to a canonical form, but I also realize that in
> the case of memory references like this, that may not be feasible.
IIRC, there were talks about lowering all memory reference on GIMPLE?
Which is the reverse approach.  Since SRA is in quite early
compilation stage, don't know if lowered memory reference has impact
on other optimizers.

Thanks,
bin
>
> It does make me wonder how many CSEs we're really missing due to the two
> ways to represent array accesses.
>
>
>> Bootstrap + check-gcc on x86-none-linux-gnu,
>> arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
>>
>> gcc/ChangeLog:
>>
>> * tree-sra.c (completely_scalarize): Move some code into:
>> (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
>> is aligned array. --- gcc/tree-sra.c | 110
>> ++++++++++++++++++++++++++++++++++++--------------------- 1 file
>> changed, 69 insertions(+), 41 deletions(-)
>>
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
>> 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
>> scalarizable_type_p (tree type) } }
>>
>> +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
>> *sz_out)
>
> Function comment needed.
>
> I may have missed it in the earlier patches, but can you please make
> sure any new functions you created have comments in those as well.  Such
> patches are pre-approved.
>
> With the added function comment, this patch is fine.
>
> jeff
>
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 21:55     ` Jeff Law
@ 2015-08-26  7:11       ` Richard Biener
  2015-08-26  9:39         ` Martin Jambor
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-08-26  7:11 UTC (permalink / raw)
  To: Jeff Law; +Cc: Alan Lawrence, GCC Patches, Richard Biener

On Tue, Aug 25, 2015 at 11:44 PM, Jeff Law <law@redhat.com> wrote:
> On 08/25/2015 03:42 PM, Martin Jambor wrote:
>>
>> Hi,
>>
>> On Tue, Aug 25, 2015 at 12:06:14PM +0100, Alan Lawrence wrote:
>>>
>>> This changes the completely_scalarize_record path to also work on arrays
>>> (thus
>>> allowing records containing arrays, etc.). This just required extending
>>> the
>>> existing type_consists_of_records_p and completely_scalarize_record
>>> methods
>>> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed
>>> both
>>> methods so as not to mention 'record'.
>>
>>
>> thanks for working on this.  I see Jeff has already approved the
>> patch, but I have two comments nevertheless.  First, I would be much
>> happier if you added a proper comment to scalarize_elem function which
>> you forgot completely.  The name is not very descriptive and it has
>> quite few parameters too.
>
> Right.  I mentioned that I missed the lack of function comments when looking
> at #3 and asked Alan to go back and fix them in #1 and #2.
>
>>
>> Second, this patch should also fix PR 67283.  It would be great if you
>> could verify that and add it to the changelog when committing if that
>> is indeed the case.
>
> Excellent.  Yes, definitely mention the BZ.

One extra question is does the way we limit total scalarization work well
for arrays?  I suppose we have either sth like the maximum size of an
aggregate we scalarize or the maximum number of component accesses
we create?

Thanks,
Richard.

> jeff
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-25 19:54   ` Jeff Law
  2015-08-26  6:34     ` Bin.Cheng
@ 2015-08-26  7:20     ` Richard Biener
  1 sibling, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-08-26  7:20 UTC (permalink / raw)
  To: Jeff Law; +Cc: Alan Lawrence, GCC Patches, Richard Biener, Martin Jambor

On Tue, Aug 25, 2015 at 9:50 PM, Jeff Law <law@redhat.com> wrote:
> On 08/25/2015 05:06 AM, Alan Lawrence wrote:
>>
>> When SRA completely scalarizes an array, this patch changes the
>> generated accesses from e.g.
>>
>> MEM[(int[8] *)&a + 4B] = 1;
>>
>> to
>>
>> a[1] = 1;
>>
>> This overcomes a limitation in dom2, that accesses to equivalent
>> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
>> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
>> propagation in the ssa-dom-cse-2.c testcase (after the next patch
>> that makes SRA handle constant-pool loads).
>>
>> I tried to work around this by making dom2's hashable_expr_equal_p
>> less conservative, but found that on platforms without AArch64's
>> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
>> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
>> *)&a] equivalent to a[0], etc.; a complete overhaul of
>> hashable_expr_equal_p seems like a larger task than this patch
>> series.
>>
>> I can't see how to write a testcase for this in C though as direct
>> assignment to an array is not possible; such assignments occur only
>> with constant pool data, which is dealt with in the next patch.
>
> It's a general issue that if there's > 1 common way to represent an
> expression, then DOM will often miss discovery of the CSE opportunity
> because of the way it hashes expressions.
>
> Ideally we'd be moving to a canonical form, but I also realize that in
> the case of memory references like this, that may not be feasible.
>
> It does make me wonder how many CSEs we're really missing due to the two
> ways to represent array accesses.
>
>
>> Bootstrap + check-gcc on x86-none-linux-gnu,
>> arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
>>
>> gcc/ChangeLog:
>>
>> * tree-sra.c (completely_scalarize): Move some code into:
>> (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
>> is aligned array. --- gcc/tree-sra.c | 110
>> ++++++++++++++++++++++++++++++++++++--------------------- 1 file
>> changed, 69 insertions(+), 41 deletions(-)
>>
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
>> 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
>> scalarizable_type_p (tree type) } }
>>
>> +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
>> *sz_out)
>
> Function comment needed.
>
> I may have missed it in the earlier patches, but can you please make
> sure any new functions you created have comments in those as well.  Such
> patches are pre-approved.
>
> With the added function comment, this patch is fine.

Err ... you generally _cannot_ create ARRAY_REFs out of thin air because
of correctness issues with data-ref and data dependence analysis.  You can
of course keep ARRAY_REFs if the original access was an ARRAY_REF.

But I'm not convinced this is what the pass does.

We've went through great lengths removing all the code from gimplification
and folding that tried to be clever in producing array refs from accesses to
sth with an ARRAY_TYPE - this all eventually lead to wrong-code issues
later.

So I'd rather _not_ have this patch.  (as always I'm too slow responding
and Jeff is too fast ;))

Thanks,
Richard.

> jeff
>
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 4/5] Handle constant-pool entries
  2015-08-25 20:19   ` Jeff Law
@ 2015-08-26  7:24     ` Richard Biener
  2015-08-26 15:51     ` Alan Lawrence
  1 sibling, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-08-26  7:24 UTC (permalink / raw)
  To: Jeff Law; +Cc: Alan Lawrence, GCC Patches, Richard Biener, Martin Jambor

On Tue, Aug 25, 2015 at 10:13 PM, Jeff Law <law@redhat.com> wrote:
> On 08/25/2015 05:06 AM, Alan Lawrence wrote:
>>
>> This makes SRA replace loads of records/arrays from constant pool entries,
>> with elementwise assignments of the constant values, hence, overcoming the
>> fundamental problem in PR/63679.
>>
>> As a first pass, the approach I took was to look for constant-pool loads
>> as
>> we scanned through other accesses, and add them as candidates there; to
>> build a
>> constant replacement_decl for any such accesses in completely_scalarize;
>> and to
>> use any existing replacement_decl rather than creating a variable in
>> create_access_replacement. (I did try using CONSTANT_CLASS_P in the
>> latter, but
>> that does not allow addresses of labels, which can still end up in the
>> constant
>> pool.)
>>
>> Feedback as to the approach or how it might be better structured / fitted
>> into
>> SRA, is solicited ;).
>>
>> Bootstrapped + check-gcc on x86-none-linux-gnu, aarch64-none-linux-gnu and
>> arm-none-linux-gnueabihf, including with the next patch (rfc), which
>> greatly increases the number of testcases in which this code is exercised!
>>
>> Have also verified that the ssa-dom-cse-2.c scan-tree-dump test passes
>> (using a stage 1 compiler only, without execution) on alpha, hppa, powerpc,
>> sparc, avr, and sh.
>>
>> gcc/ChangeLog:
>>
>>         * tree-sra.c (create_access): Scan for uses of constant pool and
>> add
>>         to candidates.
>>         (subst_initial): New.
>>         (scalarize_elem): Build replacement_decl using subst_initial.
>>         (create_access_replacement): Use replacement_decl if set.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.dg/tree-ssa/ssa-dom-cse-2.c: Remove xfail, add --param
>>         sra-max-scalarization-size-Ospeed.
>> ---
>>   gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c |  7 +---
>>   gcc/tree-sra.c                                | 56
>> +++++++++++++++++++++++++--
>>   2 files changed, 55 insertions(+), 8 deletions(-)
>>
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>> index af35fcc..a3ff2df 100644
>> --- a/gcc/tree-sra.c
>> +++ b/gcc/tree-sra.c
>> @@ -865,6 +865,17 @@ create_access (tree expr, gimple stmt, bool write)
>>     else
>>       ptr = false;
>>
>> +  /* FORNOW: scan for uses of constant pool as we go along.  */
>
> I'm not sure why you have this marked as FORNOW.  If I'm reading all this
> code correctly, you're lazily adding items from the constant pool into the
> candidates table when you find they're used.  That seems better than walking
> the entire constant pool adding them all to the candidates.
>
> I don't see this as fundamentally wrong or unclean.
>
> The question I have is why this differs from the effects of patch #5. That
> would seem to indicate that there's things we're not getting into the
> candidate tables with this approach?!?
>
>
>
>> @@ -1025,6 +1036,37 @@ completely_scalarize (tree base, tree decl_type,
>> HOST_WIDE_INT offset, tree ref)
>>       }
>>   }
>>
>> +static tree
>> +subst_initial (tree expr, tree var)
>
> Function comment.
>
> I think this patch is fine with the function comment added and removing the
> FORNOW part of the comment in create_access.  It may be worth noting in
> create_access's comment that it can add new items to the candidates tables
> for constant pool entries.

I'm happy seeing this code in SRA as I never liked that we already decide
at gimplification time which initializers to expand and which to init from
a constant pool entry.  So ... can we now "remove" gimplify_init_constructor
by _always_ emitting a constant pool entry and an assignment from it
(obviously only if the constructor can be put into the constant pool)?  Defering
the expansion decision to SRA makes it possible to better estimate whether
the code is hot/cold or whether the initialized variable can be replaced by
the constant pool entry completely (variable ends up readonly).

Oh, and we'd no longer create the awful split code at -O0 ...

So can you explore that a bit once this series is settled?  This is probably
also related to 5/5 as this makes all the target dependent decisions in SRA
now and thus the initial IL from gimplification should be the same for all
targets (that's always a nice thing to have IMHO).

Thanks,
Richard.

>
> Jeff

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 5/5] Always completely replace constant pool entries
  2015-08-25 20:09   ` Jeff Law
@ 2015-08-26  7:29     ` Richard Biener
  0 siblings, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-08-26  7:29 UTC (permalink / raw)
  To: Jeff Law; +Cc: Alan Lawrence, GCC Patches, Richard Biener, Martin Jambor

On Tue, Aug 25, 2015 at 9:54 PM, Jeff Law <law@redhat.com> wrote:
> On 08/25/2015 05:06 AM, Alan Lawrence wrote:
>>
>> I used this as a means of better-testing the previous changes, as it
>> exercises
>> the constant replacement code a whole lot more. Indeed, quite a few tests
>> are
>> now optimized away to nothing on AArch64...
>>
>> Always pulling in constants, is almost certainly not what we want, but we
>> may
>> nonetheless want something more aggressive than the usual --param, e.g.
>> for the
>> ssa-dom-cse-2.c test. Thoughts welcomed?
>
> I'm of the opinion that we have too many knobs already.  So I'd perhaps ask
> whether or not this option is likely to be useful to end users?
>
> As for the patch itself, any thoughts on reasonable heuristics for when to
> pull in the constants?  Clearly we don't want the patch as-is, but are there
> cases we can identify when we want to be more aggressive?

Well - I still think that we need to enhance those followup passes to directly
handle the constant pool entry.  Expanding the assignment piecewise for
arbitrary large initializers is certainly a no-go.  IIRC I enhanced FRE to do
this at some point.  For DOM it's much harder due to the way it is structured
and I'd like to keep DOM simple.

Note that we still want SRA to partly scalarize the initializer if
only few elements
remain accessed (so we can optimize the initializer away).  Of course
that requires
catching most followup optimization opportunities before the 2nd SRA run.

Richard.

> jeff
>
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-26  6:34     ` Bin.Cheng
@ 2015-08-26  7:40       ` Richard Biener
  2015-08-26  7:41         ` Bin.Cheng
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-08-26  7:40 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: Jeff Law, Alan Lawrence, gcc-patches List, mjambor

On Wed, 26 Aug 2015, Bin.Cheng wrote:

> On Wed, Aug 26, 2015 at 3:50 AM, Jeff Law <law@redhat.com> wrote:
> > On 08/25/2015 05:06 AM, Alan Lawrence wrote:
> >>
> >> When SRA completely scalarizes an array, this patch changes the
> >> generated accesses from e.g.
> >>
> >> MEM[(int[8] *)&a + 4B] = 1;
> >>
> >> to
> >>
> >> a[1] = 1;
> >>
> >> This overcomes a limitation in dom2, that accesses to equivalent
> >> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
> >> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
> >> propagation in the ssa-dom-cse-2.c testcase (after the next patch
> >> that makes SRA handle constant-pool loads).
> >>
> >> I tried to work around this by making dom2's hashable_expr_equal_p
> >> less conservative, but found that on platforms without AArch64's
> >> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
> >> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
> >> *)&a] equivalent to a[0], etc.; a complete overhaul of
> >> hashable_expr_equal_p seems like a larger task than this patch
> >> series.
> >>
> >> I can't see how to write a testcase for this in C though as direct
> >> assignment to an array is not possible; such assignments occur only
> >> with constant pool data, which is dealt with in the next patch.
> >
> > It's a general issue that if there's > 1 common way to represent an
> > expression, then DOM will often miss discovery of the CSE opportunity
> > because of the way it hashes expressions.
> >
> > Ideally we'd be moving to a canonical form, but I also realize that in
> > the case of memory references like this, that may not be feasible.
> IIRC, there were talks about lowering all memory reference on GIMPLE?
> Which is the reverse approach.  Since SRA is in quite early
> compilation stage, don't know if lowered memory reference has impact
> on other optimizers.

Yeah, I'd only do the lowering after loop opts.  Which also may make
the DOM issue moot as the array refs would be lowered as well and thus
DOM would see a consistent set of references again.  The lowering should
also simplify SLSR and expose address computation redundancies to DOM.

I'd place such lowering before the late reassoc (any takers?  I suppose
you can pick up one of the bitfield lowering passes posted in the
previous years as this should also handle bitfield accesses correctly).

Thanks,
Richard.

> Thanks,
> bin
> >
> > It does make me wonder how many CSEs we're really missing due to the two
> > ways to represent array accesses.
> >
> >
> >> Bootstrap + check-gcc on x86-none-linux-gnu,
> >> arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
> >>
> >> gcc/ChangeLog:
> >>
> >> * tree-sra.c (completely_scalarize): Move some code into:
> >> (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
> >> is aligned array. --- gcc/tree-sra.c | 110
> >> ++++++++++++++++++++++++++++++++++++--------------------- 1 file
> >> changed, 69 insertions(+), 41 deletions(-)
> >>
> >> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
> >> 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
> >> scalarizable_type_p (tree type) } }
> >>
> >> +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
> >> *sz_out)
> >
> > Function comment needed.
> >
> > I may have missed it in the earlier patches, but can you please make
> > sure any new functions you created have comments in those as well.  Such
> > patches are pre-approved.
> >
> > With the added function comment, this patch is fine.
> >
> > jeff
> >
> >
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.
  2015-08-26  7:40       ` Richard Biener
@ 2015-08-26  7:41         ` Bin.Cheng
  0 siblings, 0 replies; 45+ messages in thread
From: Bin.Cheng @ 2015-08-26  7:41 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Alan Lawrence, gcc-patches List, mjambor

On Wed, Aug 26, 2015 at 3:29 PM, Richard Biener <rguenther@suse.de> wrote:
> On Wed, 26 Aug 2015, Bin.Cheng wrote:
>
>> On Wed, Aug 26, 2015 at 3:50 AM, Jeff Law <law@redhat.com> wrote:
>> > On 08/25/2015 05:06 AM, Alan Lawrence wrote:
>> >>
>> >> When SRA completely scalarizes an array, this patch changes the
>> >> generated accesses from e.g.
>> >>
>> >> MEM[(int[8] *)&a + 4B] = 1;
>> >>
>> >> to
>> >>
>> >> a[1] = 1;
>> >>
>> >> This overcomes a limitation in dom2, that accesses to equivalent
>> >> chunks of e.g. MEM[(int[8] *)&a] are not hashable_expr_equal_p with
>> >> accesses to e.g. MEM[(int[8] *)&a]. This is necessary for constant
>> >> propagation in the ssa-dom-cse-2.c testcase (after the next patch
>> >> that makes SRA handle constant-pool loads).
>> >>
>> >> I tried to work around this by making dom2's hashable_expr_equal_p
>> >> less conservative, but found that on platforms without AArch64's
>> >> vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
>> >> mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
>> >> *)&a] equivalent to a[0], etc.; a complete overhaul of
>> >> hashable_expr_equal_p seems like a larger task than this patch
>> >> series.
>> >>
>> >> I can't see how to write a testcase for this in C though as direct
>> >> assignment to an array is not possible; such assignments occur only
>> >> with constant pool data, which is dealt with in the next patch.
>> >
>> > It's a general issue that if there's > 1 common way to represent an
>> > expression, then DOM will often miss discovery of the CSE opportunity
>> > because of the way it hashes expressions.
>> >
>> > Ideally we'd be moving to a canonical form, but I also realize that in
>> > the case of memory references like this, that may not be feasible.
>> IIRC, there were talks about lowering all memory reference on GIMPLE?
>> Which is the reverse approach.  Since SRA is in quite early
>> compilation stage, don't know if lowered memory reference has impact
>> on other optimizers.
>
> Yeah, I'd only do the lowering after loop opts.  Which also may make
> the DOM issue moot as the array refs would be lowered as well and thus
> DOM would see a consistent set of references again.  The lowering should
> also simplify SLSR and expose address computation redundancies to DOM.
>
> I'd place such lowering before the late reassoc (any takers?  I suppose
> you can pick up one of the bitfield lowering passes posted in the
> previous years as this should also handle bitfield accesses correctly).
I ran into several issues related to lowered memory references (some
of them are about slsr), and want to have a look at this.  But only
after finishing major issues in IVO...

As for slsr, I think the problem is more about we need to prove
equality of expressions by diving into definition chain of ssa_var,
just like tree_to_affine_expand.  I think this has already been
discussed too.  Anyway, lowering memory reference provides a canonical
form and should benefit other optimizers.

Thanks,
bin
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>> >
>> > It does make me wonder how many CSEs we're really missing due to the two
>> > ways to represent array accesses.
>> >
>> >
>> >> Bootstrap + check-gcc on x86-none-linux-gnu,
>> >> arm-none-linux-gnueabihf, aarch64-none-linux-gnu.
>> >>
>> >> gcc/ChangeLog:
>> >>
>> >> * tree-sra.c (completely_scalarize): Move some code into:
>> >> (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
>> >> is aligned array. --- gcc/tree-sra.c | 110
>> >> ++++++++++++++++++++++++++++++++++++--------------------- 1 file
>> >> changed, 69 insertions(+), 41 deletions(-)
>> >>
>> >> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
>> >> 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
>> >> scalarizable_type_p (tree type) } }
>> >>
>> >> +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
>> >> *sz_out)
>> >
>> > Function comment needed.
>> >
>> > I may have missed it in the earlier patches, but can you please make
>> > sure any new functions you created have comments in those as well.  Such
>> > patches are pre-approved.
>> >
>> > With the added function comment, this patch is fine.
>> >
>> > jeff
>> >
>> >
>>
>>
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-26  7:11       ` Richard Biener
@ 2015-08-26  9:39         ` Martin Jambor
  2015-08-26 10:12           ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Martin Jambor @ 2015-08-26  9:39 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Alan Lawrence, GCC Patches, Richard Biener

Hi,

On Wed, Aug 26, 2015 at 09:07:33AM +0200, Richard Biener wrote:
> On Tue, Aug 25, 2015 at 11:44 PM, Jeff Law <law@redhat.com> wrote:
> > On 08/25/2015 03:42 PM, Martin Jambor wrote:
> >>
> >> Hi,
> >>
> >> On Tue, Aug 25, 2015 at 12:06:14PM +0100, Alan Lawrence wrote:
> >>>
> >>> This changes the completely_scalarize_record path to also work on arrays
> >>> (thus
> >>> allowing records containing arrays, etc.). This just required extending
> >>> the
> >>> existing type_consists_of_records_p and completely_scalarize_record
> >>> methods
> >>> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I renamed
> >>> both
> >>> methods so as not to mention 'record'.
> >>
> >>
> >> thanks for working on this.  I see Jeff has already approved the
> >> patch, but I have two comments nevertheless.  First, I would be much
> >> happier if you added a proper comment to scalarize_elem function which
> >> you forgot completely.  The name is not very descriptive and it has
> >> quite few parameters too.
> >
> > Right.  I mentioned that I missed the lack of function comments when looking
> > at #3 and asked Alan to go back and fix them in #1 and #2.
> >
> >>
> >> Second, this patch should also fix PR 67283.  It would be great if you
> >> could verify that and add it to the changelog when committing if that
> >> is indeed the case.
> >
> > Excellent.  Yes, definitely mention the BZ.
> 
> One extra question is does the way we limit total scalarization work well
> for arrays?  I suppose we have either sth like the maximum size of an
> aggregate we scalarize or the maximum number of component accesses
> we create?
> 

Only the former and that would be kept intact.  It is in fact visible
in the context of the last hunk of the patch.

Martin

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-26  9:39         ` Martin Jambor
@ 2015-08-26 10:12           ` Richard Biener
  2015-08-26 16:30             ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-08-26 10:12 UTC (permalink / raw)
  To: Martin Jambor; +Cc: Jeff Law, Alan Lawrence, GCC Patches, Richard Biener

On August 26, 2015 11:30:26 AM GMT+02:00, Martin Jambor <mjambor@suse.cz> wrote:
>Hi,
>
>On Wed, Aug 26, 2015 at 09:07:33AM +0200, Richard Biener wrote:
>> On Tue, Aug 25, 2015 at 11:44 PM, Jeff Law <law@redhat.com> wrote:
>> > On 08/25/2015 03:42 PM, Martin Jambor wrote:
>> >>
>> >> Hi,
>> >>
>> >> On Tue, Aug 25, 2015 at 12:06:14PM +0100, Alan Lawrence wrote:
>> >>>
>> >>> This changes the completely_scalarize_record path to also work on
>arrays
>> >>> (thus
>> >>> allowing records containing arrays, etc.). This just required
>extending
>> >>> the
>> >>> existing type_consists_of_records_p and
>completely_scalarize_record
>> >>> methods
>> >>> to handle things of ARRAY_TYPE as well as RECORD_TYPE. Hence, I
>renamed
>> >>> both
>> >>> methods so as not to mention 'record'.
>> >>
>> >>
>> >> thanks for working on this.  I see Jeff has already approved the
>> >> patch, but I have two comments nevertheless.  First, I would be
>much
>> >> happier if you added a proper comment to scalarize_elem function
>which
>> >> you forgot completely.  The name is not very descriptive and it
>has
>> >> quite few parameters too.
>> >
>> > Right.  I mentioned that I missed the lack of function comments
>when looking
>> > at #3 and asked Alan to go back and fix them in #1 and #2.
>> >
>> >>
>> >> Second, this patch should also fix PR 67283.  It would be great if
>you
>> >> could verify that and add it to the changelog when committing if
>that
>> >> is indeed the case.
>> >
>> > Excellent.  Yes, definitely mention the BZ.
>> 
>> One extra question is does the way we limit total scalarization work
>well
>> for arrays?  I suppose we have either sth like the maximum size of an
>> aggregate we scalarize or the maximum number of component accesses
>> we create?
>> 
>
>Only the former and that would be kept intact.  It is in fact visible
>in the context of the last hunk of the patch.

OK.  IIRC the gimplification code also has the latter and also considers zeroing the whole aggregate before initializing non-zero fields.  IMHO it makes sense to reuse some of the analysis and classification routines it has.

Richard.

>Martin


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 4/5] Handle constant-pool entries
  2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
  2015-08-25 20:19   ` Jeff Law
@ 2015-08-26 14:08   ` Martin Jambor
  1 sibling, 0 replies; 45+ messages in thread
From: Martin Jambor @ 2015-08-26 14:08 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, rguenther

Hi,

On Tue, Aug 25, 2015 at 12:06:16PM +0100, Alan Lawrence wrote:
> This makes SRA replace loads of records/arrays from constant pool entries,
> with elementwise assignments of the constant values, hence, overcoming the
> fundamental problem in PR/63679.
> 
> As a first pass, the approach I took was to look for constant-pool loads as
> we scanned through other accesses, and add them as candidates there; to build a
> constant replacement_decl for any such accesses in completely_scalarize; and to
> use any existing replacement_decl rather than creating a variable in
> create_access_replacement. (I did try using CONSTANT_CLASS_P in the latter, but
> that does not allow addresses of labels, which can still end up in the constant
> pool.)
> 
> Feedback as to the approach or how it might be better structured / fitted into
> SRA, is solicited ;).

I'm not familiar with constant pools very much, but I'll try:

> 
> Bootstrapped + check-gcc on x86-none-linux-gnu, aarch64-none-linux-gnu and
> arm-none-linux-gnueabihf, including with the next patch (rfc), which greatly increases the number of testcases in which this code is exercised!
> 
> Have also verified that the ssa-dom-cse-2.c scan-tree-dump test passes (using a stage 1 compiler only, without execution) on alpha, hppa, powerpc, sparc, avr, and sh.
> 
> gcc/ChangeLog:
> 
> 	* tree-sra.c (create_access): Scan for uses of constant pool and add
> 	to candidates.
> 	(subst_initial): New.
> 	(scalarize_elem): Build replacement_decl using subst_initial.
> 	(create_access_replacement): Use replacement_decl if set.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Remove xfail, add --param
> 	sra-max-scalarization-size-Ospeed.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c |  7 +---
>  gcc/tree-sra.c                                | 56 +++++++++++++++++++++++++--
>  2 files changed, 55 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
> index 9eccdc9..b13d583 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized" } */
> +/* { dg-options "-O3 -fno-tree-fre -fno-tree-pre -fdump-tree-optimized --param sra-max-scalarization-size-Ospeed=32" } */
>  
>  int
>  foo ()
> @@ -17,7 +17,4 @@ foo ()
>  /* After late unrolling the above loop completely DOM should be
>     able to optimize this to return 28.  */
>  
> -/* See PR63679 and PR64159, if the target forces the initializer to memory then
> -   DOM is not able to perform this optimization.  */
> -
> -/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail aarch64*-*-* alpha*-*-* hppa*-*-* powerpc*-*-* sparc*-*-* s390*-*-* } } } */
> +/* { dg-final { scan-tree-dump "return 28;" "optimized" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index af35fcc..a3ff2df 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -865,6 +865,17 @@ create_access (tree expr, gimple stmt, bool write)
>    else
>      ptr = false;
>  
> +  /* FORNOW: scan for uses of constant pool as we go along.  */
> +  if (TREE_CODE (base) == VAR_DECL && DECL_IN_CONSTANT_POOL (base)
> +      && !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
> +    {
> +      gcc_assert (!write);
> +      bitmap_set_bit (candidate_bitmap, DECL_UID (base));
> +      tree_node **slot = candidates->find_slot_with_hash (base, DECL_UID (base),
> +							  INSERT);
> +      *slot = base;
> +    }
> +

I believe you only want to do this if (sra_mode ==
SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA).

The idea of candidates is that we gather them in find_var_candidates
and ten we only eliminate them, this has the benefit of not worrying
about disqualifying a candidate and then erroneously re-adding it
later.  So if you could find a way to structure your code this way, I'd
much happier.  If it is impossible without traversing the whole
function just for that purpose, we may need some mechanism to prevent
us from making a disqualified decl a candidate again.  Or, if we come
to the conclusion that constant pool decls do not ever get
disqualified, a gcc_assert making sure it actually does not happen in
disqualify_candidate.

And of course at find_var_candidates time we check that all candidates
pass simple checks in maybe_add_sra_candidate.  I suppose many of them
do not make sense for constant pool decls but at least please have a
look whether that is the case for all of them or not.

>    if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>      return NULL;
>  
> @@ -1025,6 +1036,37 @@ completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>      }
>  }
>  
> +static tree
> +subst_initial (tree expr, tree var)

This needs a comment and a better name.  A name that would make it
clear this is for constant pool stuff only.

> +{
> +  if (TREE_CODE (expr) == VAR_DECL)
> +    {
> +      gcc_assert (DECL_IN_CONSTANT_POOL (expr));
> +      gcc_assert (expr == var);
> +      return DECL_INITIAL (expr);
> +    }
> +  if (TREE_CODE (expr) == COMPONENT_REF)
> +    {
> +      gcc_assert (TREE_CODE (TREE_OPERAND (expr, 1)) == FIELD_DECL);
> +      gcc_assert (TREE_OPERAND (expr, 2) == NULL_TREE);
> +      return fold_build3 (COMPONENT_REF, TREE_TYPE (expr),
> +			  subst_initial (TREE_OPERAND (expr, 0), var),
> +			  TREE_OPERAND (expr, 1),
> +			  NULL_TREE);
> +    }
> +  if (TREE_CODE (expr) == ARRAY_REF)
> +    {
> +      gcc_assert (TREE_OPERAND (expr, 2) == NULL_TREE);
> +      gcc_assert (TREE_OPERAND (expr, 3) == NULL_TREE);
> +      return fold (build4 (ARRAY_REF, TREE_TYPE (expr),
> +			   subst_initial (TREE_OPERAND (expr, 0), var),
> +			   TREE_OPERAND (expr, 1),
> +			   NULL_TREE,
> +			   NULL_TREE));
> +    }
> +  gcc_unreachable ();
> +}
> +
>  static void
>  scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
>  		 tree ref, tree type)
> @@ -1033,6 +1075,9 @@ scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
>    {
>      struct access *access = create_access_1 (base, pos, size);
>      access->expr = ref;
> +    if (TREE_CODE (base) == VAR_DECL
> +	&& DECL_IN_CONSTANT_POOL (base))
> +      access->replacement_decl = subst_initial (ref, base);

So replacement_decl ceases to be a decl and should be renamed to
replacement_expr.  (Such cleanups can be done as a followup, of
course).

Thanks,

Martin

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [RFC 4/5] Handle constant-pool entries
  2015-08-25 20:19   ` Jeff Law
  2015-08-26  7:24     ` Richard Biener
@ 2015-08-26 15:51     ` Alan Lawrence
  1 sibling, 0 replies; 45+ messages in thread
From: Alan Lawrence @ 2015-08-26 15:51 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, rguenther, mjambor

Jeff Law wrote:
>
> The question I have is why this differs from the effects of patch #5. 
> That would seem to indicate that there's things we're not getting into 
> the candidate tables with this approach?!?

I'll answer this first, as I think (Richard and) Martin have identified enough 
other issues with this patch that will take longer to address.... but if you 
look at the context to the hunk in patch 5, it is iterating through the 
candidates (from patch 4), and then filtering out any candidates bigger than 
max-scalarization-size, which filtering patch 5 removes.

--Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-26 10:12           ` Richard Biener
@ 2015-08-26 16:30             ` Alan Lawrence
  2015-08-26 19:18               ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-26 16:30 UTC (permalink / raw)
  To: Richard Biener; +Cc: Martin Jambor, Jeff Law, GCC Patches, Richard Biener

Richard Biener wrote:

>>> One extra question is does the way we limit total scalarization work
>> well
>>> for arrays?  I suppose we have either sth like the maximum size of an
>>> aggregate we scalarize or the maximum number of component accesses
>>> we create?
>>>
>> Only the former and that would be kept intact.  It is in fact visible
>> in the context of the last hunk of the patch.
> 
> OK.  IIRC the gimplification code also has the latter and also considers zeroing the whole aggregate before initializing non-zero fields.  IMHO it makes sense to reuse some of the analysis and classification routines it has.

Do you mean gimplify_init_constructor? Yes, there's quite a lot of logic there 
;). That feels like a separate patch - and belonging to the constant-handling 
subseries of this series - as gimplify_init_constructor already deals with both 
record and array types, and I don't see anything there that's specifically good 
for total-scalarization of arrays....?

IOW, do you mean that to block this patch, or can it be separate (I can address 
Martin + Jeff's comments fairly quickly and independently) ?

Cheers, Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-26 16:30             ` Alan Lawrence
@ 2015-08-26 19:18               ` Richard Biener
  0 siblings, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-08-26 19:18 UTC (permalink / raw)
  To: Alan Lawrence, Richard Biener; +Cc: Martin Jambor, Jeff Law, GCC Patches

On August 26, 2015 6:08:55 PM GMT+02:00, Alan Lawrence <alan.lawrence@arm.com> wrote:
>Richard Biener wrote:
>
>>>> One extra question is does the way we limit total scalarization
>work
>>> well
>>>> for arrays?  I suppose we have either sth like the maximum size of
>an
>>>> aggregate we scalarize or the maximum number of component accesses
>>>> we create?
>>>>
>>> Only the former and that would be kept intact.  It is in fact
>visible
>>> in the context of the last hunk of the patch.
>> 
>> OK.  IIRC the gimplification code also has the latter and also
>considers zeroing the whole aggregate before initializing non-zero
>fields.  IMHO it makes sense to reuse some of the analysis and
>classification routines it has.
>
>Do you mean gimplify_init_constructor? Yes, there's quite a lot of
>logic there 
>;). That feels like a separate patch - and belonging to the
>constant-handling 
>subseries of this series

Yes.

 - as gimplify_init_constructor already deals
>with both 
>record and array types, and I don't see anything there that's
>specifically good 
>for total-scalarization of arrays....?
>
>IOW, do you mean that to block this patch, or can it be separate (I can
>address 
>Martin + Jeff's comments fairly quickly and independently) ?

No, but I'd like this being explores with the init sub series.  We don't want two places doing total scalarization of initualizers , gimplification and SRA and with different/conflicting heuristics.  IMHO the gimplification total scalarization happens too early.

Richard.

>Cheers, Alan


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-25 21:44   ` [PATCH 2/5] completely_scalarize arrays as well as records Martin Jambor
  2015-08-25 21:55     ` Jeff Law
@ 2015-08-27 16:00     ` Alan Lawrence
  2015-08-28  7:19       ` Christophe Lyon
  1 sibling, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-27 16:00 UTC (permalink / raw)
  To: martin.jambor; +Cc: gcc-patches, rguenther, law

Martin Jambor wrote:
>
> First, I would be much
> happier if you added a proper comment to scalarize_elem function which
> you forgot completely.  The name is not very descriptive and it has
> quite few parameters too.
>
> Second, this patch should also fix PR 67283.  It would be great if you
> could verify that and add it to the changelog when committing if that
> is indeed the case.

Thanks for pointing both of those out. I've added a comment to scalarize_elem,
deleted the bogus comment in the new test, and yes I can confirm that the patch
fixes PR 67283 on x86_64, and also AArch64 if
 --param sra-max-scalarization-size-Ospeed is passed. (I've not added any
testcase specifically taken from that PR, however.)

Pushed as r277265.
---
 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  37 ++++++++
 gcc/tree-sra.c                         | 149 ++++++++++++++++++++++-----------
 2 files changed, 138 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
new file mode 100644
index 0000000..a22062e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
@@ -0,0 +1,37 @@
+/* Verify that SRA total scalarization works on records containing arrays.  */
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+struct S
+{
+  char c;
+  unsigned short f[2][2];
+  int i;
+  unsigned short f3, f4;
+};
+
+
+int __attribute__ ((noinline))
+foo (struct S *p)
+{
+  struct S l;
+
+  l = *p;
+  l.i++;
+  l.f[1][0] += 3;
+  *p = l;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
+  foo (&a);
+  if (a.i != 5 || a.f[1][0] != 12)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 8b3a0ad..3caf84a 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -915,73 +915,126 @@ create_access (tree expr, gimple stmt, bool write)
 }
 
 
-/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
-   register types or (recursively) records with only these two kinds of fields.
-   It also returns false if any of these records contains a bit-field.  */
+/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or ARRAY_TYPE with
+   fields that are either of gimple register types (excluding bit-fields)
+   or (recursively) scalarizable types.  */
 
 static bool
-type_consists_of_records_p (tree type)
+scalarizable_type_p (tree type)
 {
-  tree fld;
+  gcc_assert (!is_gimple_reg_type (type));
 
-  if (TREE_CODE (type) != RECORD_TYPE)
-    return false;
+  switch (TREE_CODE (type))
+  {
+  case RECORD_TYPE:
+    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
+      if (TREE_CODE (fld) == FIELD_DECL)
+	{
+	  tree ft = TREE_TYPE (fld);
 
-  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
-      {
-	tree ft = TREE_TYPE (fld);
+	  if (DECL_BIT_FIELD (fld))
+	    return false;
 
-	if (DECL_BIT_FIELD (fld))
-	  return false;
+	  if (!is_gimple_reg_type (ft)
+	      && !scalarizable_type_p (ft))
+	    return false;
+	}
 
-	if (!is_gimple_reg_type (ft)
-	    && !type_consists_of_records_p (ft))
-	  return false;
-      }
+    return true;
 
-  return true;
+  case ARRAY_TYPE:
+    {
+      tree elem = TREE_TYPE (type);
+      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
+	return false;
+      if (!is_gimple_reg_type (elem)
+	 && !scalarizable_type_p (elem))
+	return false;
+      return true;
+    }
+  default:
+    return false;
+  }
 }
 
-/* Create total_scalarization accesses for all scalar type fields in DECL that
-   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
-   must be the top-most VAR_DECL representing the variable, OFFSET must be the
-   offset of DECL within BASE.  REF must be the memory reference expression for
-   the given decl.  */
+static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
+
+/* Create total_scalarization accesses for all scalar fields of a member
+   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
+   must be the top-most VAR_DECL representing the variable; within that,
+   OFFSET locates the member and REF must be the memory reference expression for
+   the member.  */
 
 static void
-completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
-			     tree ref)
+completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
 {
-  tree fld, decl_type = TREE_TYPE (decl);
+  switch (TREE_CODE (decl_type))
+    {
+    case RECORD_TYPE:
+      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
+	if (TREE_CODE (fld) == FIELD_DECL)
+	  {
+	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
+	    tree ft = TREE_TYPE (fld);
+	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
 
-  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
+	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
+			    ft);
+	  }
+      break;
+    case ARRAY_TYPE:
       {
-	HOST_WIDE_INT pos = offset + int_bit_position (fld);
-	tree ft = TREE_TYPE (fld);
-	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
-			    NULL_TREE);
-
-	if (is_gimple_reg_type (ft))
+	tree elemtype = TREE_TYPE (decl_type);
+	tree elem_size = TYPE_SIZE (elemtype);
+	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
+	int el_size = tree_to_uhwi (elem_size);
+	gcc_assert (el_size);
+
+	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
+	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
+	gcc_assert (TREE_CODE (minidx) == INTEGER_CST
+		    && TREE_CODE (maxidx) == INTEGER_CST);
+	unsigned HOST_WIDE_INT len = tree_to_uhwi (maxidx)
+				     + 1 - tree_to_uhwi (minidx);
+	/* 4th operand to ARRAY_REF is size in units of the type alignment.  */
+	for (unsigned HOST_WIDE_INT idx = 0; idx < len; idx++)
 	  {
-	    struct access *access;
-	    HOST_WIDE_INT size;
-
-	    size = tree_to_uhwi (DECL_SIZE (fld));
-	    access = create_access_1 (base, pos, size);
-	    access->expr = nref;
-	    access->type = ft;
-	    access->grp_total_scalarization = 1;
-	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+	    tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
+	    tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, NULL_TREE,
+				NULL_TREE);
+	    int el_off = offset + idx * el_size;
+	    scalarize_elem (base, el_off, el_size, nref, elemtype);
 	  }
-	else
-	  completely_scalarize_record (base, fld, pos, nref);
       }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Create total_scalarization accesses for a member of type TYPE, which must
+   satisfy either is_gimple_reg_type or scalarizable_type_p.  BASE must be the
+   top-most VAR_DECL representing the variable; within that, POS and SIZE locate
+   the member and REF must be the reference expression for it.  */
+
+static void
+scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
+		 tree ref, tree type)
+{
+  if (is_gimple_reg_type (type))
+  {
+    struct access *access = create_access_1 (base, pos, size);
+    access->expr = ref;
+    access->type = type;
+    access->grp_total_scalarization = 1;
+    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+  }
+  else
+    completely_scalarize (base, type, pos, ref);
 }
 
 /* Create a total_scalarization access for VAR as a whole.  VAR must be of a
-   RECORD_TYPE conforming to type_consists_of_records_p.  */
+   RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p.  */
 
 static void
 create_total_scalarization_access (tree var)
@@ -2521,13 +2574,13 @@ analyze_all_variable_accesses (void)
 	tree var = candidate (i);
 
 	if (TREE_CODE (var) == VAR_DECL
-	    && type_consists_of_records_p (TREE_TYPE (var)))
+	    && scalarizable_type_p (TREE_TYPE (var)))
 	  {
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
 		<= max_scalarization_size)
 	      {
 		create_total_scalarization_access (var);
-		completely_scalarize_record (var, var, 0, var);
+		completely_scalarize (var, TREE_TYPE (var), 0, var);
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  {
 		    fprintf (dump_file, "Will attempt to totally scalarize ");
-- 
1.8.3

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Fixing sra-12.c (was: Re: [PATCH 2/5] completely_scalarize arrays as well as records)
  2015-08-25 19:40   ` Jeff Law
@ 2015-08-27 16:54     ` Alan Lawrence
  2015-08-27 20:58       ` Fixing sra-12.c Jeff Law
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-27 16:54 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, jasonwucj, shiva0217

Jeff Law wrote:
>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>> new file mode 100644
>> index 0000000..e251058
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>> @@ -0,0 +1,38 @@
>> +/* Verify that SRA total scalarization works on records containing arrays.  */
>> +/* Test skipped for targets with small (often default) MOVE_RATIO.  */
> ?!?  I don't see anything that skips this test for any targets. 
> Presumably this was copied from sra-12.c.  I suspect this comment should 
> just be removed.

You are right, thank you. Copied from sra-12.c, was I that obvious? ;). Indeed I 
notice that the same trick (of passing --param 
sra-max-scalarization-size-Ospeed=32) enables sra-12.c to pass on 
avr-unknown-linux-gnu and sh-unknown-linux-gnu, too; I haven't managed to build 
a compiler for nds32 yet...

--Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: Fixing sra-12.c
  2015-08-27 16:54     ` Fixing sra-12.c (was: Re: [PATCH 2/5] completely_scalarize arrays as well as records) Alan Lawrence
@ 2015-08-27 20:58       ` Jeff Law
  0 siblings, 0 replies; 45+ messages in thread
From: Jeff Law @ 2015-08-27 20:58 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, jasonwucj, shiva0217

On 08/27/2015 10:48 AM, Alan Lawrence wrote:
> Jeff Law wrote:
>>
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>>> b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>>> new file mode 100644
>>> index 0000000..e251058
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>>> @@ -0,0 +1,38 @@
>>> +/* Verify that SRA total scalarization works on records containing
>>> arrays.  */
>>> +/* Test skipped for targets with small (often default) MOVE_RATIO.  */
>> ?!?  I don't see anything that skips this test for any targets.
>> Presumably this was copied from sra-12.c.  I suspect this comment
>> should just be removed.
>
> You are right, thank you. Copied from sra-12.c, was I that obvious? ;).
> Indeed I notice that the same trick (of passing --param
> sra-max-scalarization-size-Ospeed=32) enables sra-12.c to pass on
> avr-unknown-linux-gnu and sh-unknown-linux-gnu, too; I haven't managed
> to build a compiler for nds32 yet...
The comment just caught my eye as I didn't see any target bits in the 
test.  A quick search for MOVE_RATIO in the testsuite popped up sra-12.

Jeff

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-27 16:00     ` Alan Lawrence
@ 2015-08-28  7:19       ` Christophe Lyon
  2015-08-28  8:06         ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Christophe Lyon @ 2015-08-28  7:19 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: martin.jambor, gcc-patches, Richard Biener, Jeff Law

On 27 August 2015 at 17:43, Alan Lawrence <alan.lawrence@arm.com> wrote:
> Martin Jambor wrote:
>>
>> First, I would be much
>> happier if you added a proper comment to scalarize_elem function which
>> you forgot completely.  The name is not very descriptive and it has
>> quite few parameters too.
>>
>> Second, this patch should also fix PR 67283.  It would be great if you
>> could verify that and add it to the changelog when committing if that
>> is indeed the case.
>
> Thanks for pointing both of those out. I've added a comment to scalarize_elem,
> deleted the bogus comment in the new test, and yes I can confirm that the patch
> fixes PR 67283 on x86_64, and also AArch64 if
>  --param sra-max-scalarization-size-Ospeed is passed. (I've not added any
> testcase specifically taken from that PR, however.)
>
> Pushed as r277265.

Actually, is r227265.

Since since commit I've noticed that
g++.dg/torture/pr64312.C
fails at -O1 in my config, saying "virtual memory exhaustion" (arm* targets)
I run my validations under ulimit -v 10GB, which seems already large enough.

Do we consider this a bug?

Christophe.

> ---
>  gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  37 ++++++++
>  gcc/tree-sra.c                         | 149 ++++++++++++++++++++++-----------
>  2 files changed, 138 insertions(+), 48 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> new file mode 100644
> index 0000000..a22062e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> @@ -0,0 +1,37 @@
> +/* Verify that SRA total scalarization works on records containing arrays.  */
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  char c;
> +  unsigned short f[2][2];
> +  int i;
> +  unsigned short f3, f4;
> +};
> +
> +
> +int __attribute__ ((noinline))
> +foo (struct S *p)
> +{
> +  struct S l;
> +
> +  l = *p;
> +  l.i++;
> +  l.f[1][0] += 3;
> +  *p = l;
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
> +  foo (&a);
> +  if (a.i != 5 || a.f[1][0] != 12)
> +    abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 8b3a0ad..3caf84a 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -915,73 +915,126 @@ create_access (tree expr, gimple stmt, bool write)
>  }
>
>
> -/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
> -   register types or (recursively) records with only these two kinds of fields.
> -   It also returns false if any of these records contains a bit-field.  */
> +/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or ARRAY_TYPE with
> +   fields that are either of gimple register types (excluding bit-fields)
> +   or (recursively) scalarizable types.  */
>
>  static bool
> -type_consists_of_records_p (tree type)
> +scalarizable_type_p (tree type)
>  {
> -  tree fld;
> +  gcc_assert (!is_gimple_reg_type (type));
>
> -  if (TREE_CODE (type) != RECORD_TYPE)
> -    return false;
> +  switch (TREE_CODE (type))
> +  {
> +  case RECORD_TYPE:
> +    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> +      if (TREE_CODE (fld) == FIELD_DECL)
> +       {
> +         tree ft = TREE_TYPE (fld);
>
> -  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> -      {
> -       tree ft = TREE_TYPE (fld);
> +         if (DECL_BIT_FIELD (fld))
> +           return false;
>
> -       if (DECL_BIT_FIELD (fld))
> -         return false;
> +         if (!is_gimple_reg_type (ft)
> +             && !scalarizable_type_p (ft))
> +           return false;
> +       }
>
> -       if (!is_gimple_reg_type (ft)
> -           && !type_consists_of_records_p (ft))
> -         return false;
> -      }
> +    return true;
>
> -  return true;
> +  case ARRAY_TYPE:
> +    {
> +      tree elem = TREE_TYPE (type);
> +      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
> +       return false;
> +      if (!is_gimple_reg_type (elem)
> +        && !scalarizable_type_p (elem))
> +       return false;
> +      return true;
> +    }
> +  default:
> +    return false;
> +  }
>  }
>
> -/* Create total_scalarization accesses for all scalar type fields in DECL that
> -   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
> -   must be the top-most VAR_DECL representing the variable, OFFSET must be the
> -   offset of DECL within BASE.  REF must be the memory reference expression for
> -   the given decl.  */
> +static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
> +
> +/* Create total_scalarization accesses for all scalar fields of a member
> +   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
> +   must be the top-most VAR_DECL representing the variable; within that,
> +   OFFSET locates the member and REF must be the memory reference expression for
> +   the member.  */
>
>  static void
> -completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
> -                            tree ref)
> +completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>  {
> -  tree fld, decl_type = TREE_TYPE (decl);
> +  switch (TREE_CODE (decl_type))
> +    {
> +    case RECORD_TYPE:
> +      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> +       if (TREE_CODE (fld) == FIELD_DECL)
> +         {
> +           HOST_WIDE_INT pos = offset + int_bit_position (fld);
> +           tree ft = TREE_TYPE (fld);
> +           tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
>
> -  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> +           scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
> +                           ft);
> +         }
> +      break;
> +    case ARRAY_TYPE:
>        {
> -       HOST_WIDE_INT pos = offset + int_bit_position (fld);
> -       tree ft = TREE_TYPE (fld);
> -       tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
> -                           NULL_TREE);
> -
> -       if (is_gimple_reg_type (ft))
> +       tree elemtype = TREE_TYPE (decl_type);
> +       tree elem_size = TYPE_SIZE (elemtype);
> +       gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
> +       int el_size = tree_to_uhwi (elem_size);
> +       gcc_assert (el_size);
> +
> +       tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
> +       tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
> +       gcc_assert (TREE_CODE (minidx) == INTEGER_CST
> +                   && TREE_CODE (maxidx) == INTEGER_CST);
> +       unsigned HOST_WIDE_INT len = tree_to_uhwi (maxidx)
> +                                    + 1 - tree_to_uhwi (minidx);
> +       /* 4th operand to ARRAY_REF is size in units of the type alignment.  */
> +       for (unsigned HOST_WIDE_INT idx = 0; idx < len; idx++)
>           {
> -           struct access *access;
> -           HOST_WIDE_INT size;
> -
> -           size = tree_to_uhwi (DECL_SIZE (fld));
> -           access = create_access_1 (base, pos, size);
> -           access->expr = nref;
> -           access->type = ft;
> -           access->grp_total_scalarization = 1;
> -           /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +           tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
> +           tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, NULL_TREE,
> +                               NULL_TREE);
> +           int el_off = offset + idx * el_size;
> +           scalarize_elem (base, el_off, el_size, nref, elemtype);
>           }
> -       else
> -         completely_scalarize_record (base, fld, pos, nref);
>        }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +/* Create total_scalarization accesses for a member of type TYPE, which must
> +   satisfy either is_gimple_reg_type or scalarizable_type_p.  BASE must be the
> +   top-most VAR_DECL representing the variable; within that, POS and SIZE locate
> +   the member and REF must be the reference expression for it.  */
> +
> +static void
> +scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
> +                tree ref, tree type)
> +{
> +  if (is_gimple_reg_type (type))
> +  {
> +    struct access *access = create_access_1 (base, pos, size);
> +    access->expr = ref;
> +    access->type = type;
> +    access->grp_total_scalarization = 1;
> +    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +  }
> +  else
> +    completely_scalarize (base, type, pos, ref);
>  }
>
>  /* Create a total_scalarization access for VAR as a whole.  VAR must be of a
> -   RECORD_TYPE conforming to type_consists_of_records_p.  */
> +   RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p.  */
>
>  static void
>  create_total_scalarization_access (tree var)
> @@ -2521,13 +2574,13 @@ analyze_all_variable_accesses (void)
>         tree var = candidate (i);
>
>         if (TREE_CODE (var) == VAR_DECL
> -           && type_consists_of_records_p (TREE_TYPE (var)))
> +           && scalarizable_type_p (TREE_TYPE (var)))
>           {
>             if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
>                 <= max_scalarization_size)
>               {
>                 create_total_scalarization_access (var);
> -               completely_scalarize_record (var, var, 0, var);
> +               completely_scalarize (var, TREE_TYPE (var), 0, var);
>                 if (dump_file && (dump_flags & TDF_DETAILS))
>                   {
>                     fprintf (dump_file, "Will attempt to totally scalarize ");
> --
> 1.8.3
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28  7:19       ` Christophe Lyon
@ 2015-08-28  8:06         ` Richard Biener
  2015-08-28  8:16           ` Christophe Lyon
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-08-28  8:06 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Alan Lawrence, martin.jambor, gcc-patches, Jeff Law

On Fri, 28 Aug 2015, Christophe Lyon wrote:

> On 27 August 2015 at 17:43, Alan Lawrence <alan.lawrence@arm.com> wrote:
> > Martin Jambor wrote:
> >>
> >> First, I would be much
> >> happier if you added a proper comment to scalarize_elem function which
> >> you forgot completely.  The name is not very descriptive and it has
> >> quite few parameters too.
> >>
> >> Second, this patch should also fix PR 67283.  It would be great if you
> >> could verify that and add it to the changelog when committing if that
> >> is indeed the case.
> >
> > Thanks for pointing both of those out. I've added a comment to scalarize_elem,
> > deleted the bogus comment in the new test, and yes I can confirm that the patch
> > fixes PR 67283 on x86_64, and also AArch64 if
> >  --param sra-max-scalarization-size-Ospeed is passed. (I've not added any
> > testcase specifically taken from that PR, however.)
> >
> > Pushed as r277265.
> 
> Actually, is r227265.
> 
> Since since commit I've noticed that
> g++.dg/torture/pr64312.C
> fails at -O1 in my config, saying "virtual memory exhaustion" (arm* targets)
> I run my validations under ulimit -v 10GB, which seems already large enough.
> 
> Do we consider this a bug?

Sure we do.  You have to investigate this (I guess we run into some
endless looping/recursing that eats memory somewhere).

Richard.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28  8:06         ` Richard Biener
@ 2015-08-28  8:16           ` Christophe Lyon
  2015-08-28  8:31             ` Richard Biener
  2015-08-28 10:09             ` Alan Lawrence
  0 siblings, 2 replies; 45+ messages in thread
From: Christophe Lyon @ 2015-08-28  8:16 UTC (permalink / raw)
  To: Richard Biener; +Cc: Alan Lawrence, martin.jambor, gcc-patches, Jeff Law

On 28 August 2015 at 09:48, Richard Biener <rguenther@suse.de> wrote:
> On Fri, 28 Aug 2015, Christophe Lyon wrote:
>
>> On 27 August 2015 at 17:43, Alan Lawrence <alan.lawrence@arm.com> wrote:
>> > Martin Jambor wrote:
>> >>
>> >> First, I would be much
>> >> happier if you added a proper comment to scalarize_elem function which
>> >> you forgot completely.  The name is not very descriptive and it has
>> >> quite few parameters too.
>> >>
>> >> Second, this patch should also fix PR 67283.  It would be great if you
>> >> could verify that and add it to the changelog when committing if that
>> >> is indeed the case.
>> >
>> > Thanks for pointing both of those out. I've added a comment to scalarize_elem,
>> > deleted the bogus comment in the new test, and yes I can confirm that the patch
>> > fixes PR 67283 on x86_64, and also AArch64 if
>> >  --param sra-max-scalarization-size-Ospeed is passed. (I've not added any
>> > testcase specifically taken from that PR, however.)
>> >
>> > Pushed as r277265.
>>
>> Actually, is r227265.
>>
>> Since since commit I've noticed that
>> g++.dg/torture/pr64312.C
>> fails at -O1 in my config, saying "virtual memory exhaustion" (arm* targets)
>> I run my validations under ulimit -v 10GB, which seems already large enough.
>>
>> Do we consider this a bug?
>
> Sure we do.  You have to investigate this (I guess we run into some
> endless looping/recursing that eats memory somewhere).
>

I asked because I assumed that Alan saw it pass in his configuration.

> Richard.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28  8:16           ` Christophe Lyon
@ 2015-08-28  8:31             ` Richard Biener
  2015-08-28 10:09             ` Alan Lawrence
  1 sibling, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-08-28  8:31 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Alan Lawrence, martin.jambor, gcc-patches, Jeff Law

On Fri, 28 Aug 2015, Christophe Lyon wrote:

> On 28 August 2015 at 09:48, Richard Biener <rguenther@suse.de> wrote:
> > On Fri, 28 Aug 2015, Christophe Lyon wrote:
> >
> >> On 27 August 2015 at 17:43, Alan Lawrence <alan.lawrence@arm.com> wrote:
> >> > Martin Jambor wrote:
> >> >>
> >> >> First, I would be much
> >> >> happier if you added a proper comment to scalarize_elem function which
> >> >> you forgot completely.  The name is not very descriptive and it has
> >> >> quite few parameters too.
> >> >>
> >> >> Second, this patch should also fix PR 67283.  It would be great if you
> >> >> could verify that and add it to the changelog when committing if that
> >> >> is indeed the case.
> >> >
> >> > Thanks for pointing both of those out. I've added a comment to scalarize_elem,
> >> > deleted the bogus comment in the new test, and yes I can confirm that the patch
> >> > fixes PR 67283 on x86_64, and also AArch64 if
> >> >  --param sra-max-scalarization-size-Ospeed is passed. (I've not added any
> >> > testcase specifically taken from that PR, however.)
> >> >
> >> > Pushed as r277265.
> >>
> >> Actually, is r227265.
> >>
> >> Since since commit I've noticed that
> >> g++.dg/torture/pr64312.C
> >> fails at -O1 in my config, saying "virtual memory exhaustion" (arm* targets)
> >> I run my validations under ulimit -v 10GB, which seems already large enough.
> >>
> >> Do we consider this a bug?
> >
> > Sure we do.  You have to investigate this (I guess we run into some
> > endless looping/recursing that eats memory somewhere).
> >
> 
> I asked because I assumed that Alan saw it pass in his configuration.

Well, it should still be investigated - whether you caused it or not ;)
It's a bug.

Richard.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28  8:16           ` Christophe Lyon
  2015-08-28  8:31             ` Richard Biener
@ 2015-08-28 10:09             ` Alan Lawrence
  2015-08-28 13:35               ` Richard Biener
  1 sibling, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-28 10:09 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Richard Biener, martin.jambor, gcc-patches, Jeff Law

Christophe Lyon wrote:
> 
> I asked because I assumed that Alan saw it pass in his configuration.


Bah. No - I now discover a problem in my C++ testsuite setup that was causing a 
large number of tests to not be executed. I see the problem too now, 
investigating....

--Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28 10:09             ` Alan Lawrence
@ 2015-08-28 13:35               ` Richard Biener
  2015-08-28 14:05                 ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-08-28 13:35 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: Christophe Lyon, martin.jambor, gcc-patches, Jeff Law

On Fri, 28 Aug 2015, Alan Lawrence wrote:

> Christophe Lyon wrote:
> > 
> > I asked because I assumed that Alan saw it pass in his configuration.
> 
> 
> Bah. No - I now discover a problem in my C++ testsuite setup that was causing
> a large number of tests to not be executed. I see the problem too now,
> investigating....

Btw, your patch broke Ada:

+===========================GNAT BUG 
DETECTED==============================+
| 6.0.0 20150828 (experimental) (x86_64-pc-linux-gnu) GCC error:           
|
| in completely_scalarize, at tree-sra.c:996                               
|
| Error detected around ../rts/a-coorse.ads:46:24                          
|

    case ARRAY_TYPE:
      {
        tree elemtype = TREE_TYPE (decl_type);
        tree elem_size = TYPE_SIZE (elemtype);
        gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
        int el_size = tree_to_uhwi (elem_size);
        gcc_assert (el_size);

        tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
        tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
        gcc_assert (TREE_CODE (minidx) == INTEGER_CST
                    && TREE_CODE (maxidx) == INTEGER_CST);

obviously you missed VLAs.  min/max value can also be NULL.

Richard.

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28 13:35               ` Richard Biener
@ 2015-08-28 14:05                 ` Alan Lawrence
  2015-08-28 15:17                   ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-28 14:05 UTC (permalink / raw)
  To: Richard Biener; +Cc: Christophe Lyon, martin.jambor, gcc-patches, Jeff Law

Richard Biener wrote:
> On Fri, 28 Aug 2015, Alan Lawrence wrote:
> 
>> Christophe Lyon wrote:
>>> I asked because I assumed that Alan saw it pass in his configuration.
>>
>> Bah. No - I now discover a problem in my C++ testsuite setup that was causing
>> a large number of tests to not be executed. I see the problem too now,
>> investigating....
> 
> Btw, your patch broke Ada:
> 
> +===========================GNAT BUG 
> DETECTED==============================+
> | 6.0.0 20150828 (experimental) (x86_64-pc-linux-gnu) GCC error:           
> |
> | in completely_scalarize, at tree-sra.c:996                               
> |
> | Error detected around ../rts/a-coorse.ads:46:24                          
> |
> 
>     case ARRAY_TYPE:
>       {
>         tree elemtype = TREE_TYPE (decl_type);
>         tree elem_size = TYPE_SIZE (elemtype);
>         gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
>         int el_size = tree_to_uhwi (elem_size);
>         gcc_assert (el_size);
> 
>         tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
>         tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
>         gcc_assert (TREE_CODE (minidx) == INTEGER_CST
>                     && TREE_CODE (maxidx) == INTEGER_CST);
> 
> obviously you missed VLAs.  min/max value can also be NULL.
> 
> Richard.
> 

Right. I think VLA's are the problem with pr64312.C also. I'm testing a fix 
(that declares arrays with any of these properties as unscalarizable).

Monday is a bank holiday in UK and so I expect to get back to you on Tuesday.

--Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records
  2015-08-28 14:05                 ` Alan Lawrence
@ 2015-08-28 15:17                   ` Alan Lawrence
  2015-09-07 13:20                     ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-08-28 15:17 UTC (permalink / raw)
  To: Alan Lawrence
  Cc: Richard Biener, Christophe Lyon, martin.jambor, gcc-patches, Jeff Law

Alan Lawrence wrote:
> 
> Right. I think VLA's are the problem with pr64312.C also. I'm testing a fix 
> (that declares arrays with any of these properties as unscalarizable).
> 
> Monday is a bank holiday in UK and so I expect to get back to you on Tuesday.
> 
> --Alan

In the meantime I've reverted the patch pending further testing on x86, aarch64 
and arm.

--Alan

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-08-28 15:17                   ` Alan Lawrence
@ 2015-09-07 13:20                     ` Alan Lawrence
  2015-09-08 12:47                       ` Martin Jambor
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-09-07 13:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, christophe.lyon, martin.jambor, law

In-Reply-To: <55E0697D.2010008@arm.com>

On 28/08/15 16:08, Alan Lawrence wrote:
> Alan Lawrence wrote:
>>
>> Right. I think VLA's are the problem with pr64312.C also. I'm testing a fix
>> (that declares arrays with any of these properties as unscalarizable).
> ... 
> In the meantime I've reverted the patch pending further testing on x86, aarch64
> and arm.

I've now tested g++ and fortran (+ bootstrap + check-gcc) on x86, AArch64 and
ARM, and Ada on x86 and ARM.

So far the list of failures from the original patch seems to be:

* g++.dg/torture/pr64312.C on ARM and m68k-linux
* Building Ada on x86
* Ada ACATS c87b31a on ARM (where the Ada frontend builds fine)

Here's a new version, that fixes all the above, by adding a dose of paranoia in
scalarizable_type_p... (I wonder about adding a comment in completely_scalarize
that such cases have already been ruled out?)

OK to install?

Cheers, Alan
---
 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  37 ++++++++
 gcc/tree-sra.c                         | 155 +++++++++++++++++++++++----------
 2 files changed, 144 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
new file mode 100644
index 0000000..a22062e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
@@ -0,0 +1,37 @@
+/* Verify that SRA total scalarization works on records containing arrays.  */
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+struct S
+{
+  char c;
+  unsigned short f[2][2];
+  int i;
+  unsigned short f3, f4;
+};
+
+
+int __attribute__ ((noinline))
+foo (struct S *p)
+{
+  struct S l;
+
+  l = *p;
+  l.i++;
+  l.f[1][0] += 3;
+  *p = l;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
+  foo (&a);
+  if (a.i != 5 || a.f[1][0] != 12)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 8b3a0ad..d9fe058 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -915,73 +915,132 @@ create_access (tree expr, gimple stmt, bool write)
 }
 
 
-/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
-   register types or (recursively) records with only these two kinds of fields.
-   It also returns false if any of these records contains a bit-field.  */
+/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length
+   ARRAY_TYPE with fields that are either of gimple register types (excluding
+   bit-fields) or (recursively) scalarizable types.  */
 
 static bool
-type_consists_of_records_p (tree type)
+scalarizable_type_p (tree type)
 {
-  tree fld;
+  gcc_assert (!is_gimple_reg_type (type));
 
-  if (TREE_CODE (type) != RECORD_TYPE)
-    return false;
+  switch (TREE_CODE (type))
+  {
+  case RECORD_TYPE:
+    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
+      if (TREE_CODE (fld) == FIELD_DECL)
+	{
+	  tree ft = TREE_TYPE (fld);
 
-  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
-      {
-	tree ft = TREE_TYPE (fld);
+	  if (DECL_BIT_FIELD (fld))
+	    return false;
 
-	if (DECL_BIT_FIELD (fld))
-	  return false;
+	  if (!is_gimple_reg_type (ft)
+	      && !scalarizable_type_p (ft))
+	    return false;
+	}
 
-	if (!is_gimple_reg_type (ft)
-	    && !type_consists_of_records_p (ft))
-	  return false;
-      }
+    return true;
 
-  return true;
+  case ARRAY_TYPE:
+    {
+      if (TYPE_DOMAIN (type) == NULL_TREE
+	  || !TREE_CONSTANT (TYPE_MIN_VALUE (TYPE_DOMAIN (type)))
+	  || !TREE_CONSTANT (TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
+	  || !TREE_CONSTANT (TYPE_SIZE (type))
+	  || (tree_to_shwi (TYPE_SIZE (type)) <= 0))
+	return false;
+      tree elem = TREE_TYPE (type);
+      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
+	return false;
+      if (!is_gimple_reg_type (elem)
+	 && !scalarizable_type_p (elem))
+	return false;
+      return true;
+    }
+  default:
+    return false;
+  }
 }
 
-/* Create total_scalarization accesses for all scalar type fields in DECL that
-   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
-   must be the top-most VAR_DECL representing the variable, OFFSET must be the
-   offset of DECL within BASE.  REF must be the memory reference expression for
-   the given decl.  */
+static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
+
+/* Create total_scalarization accesses for all scalar fields of a member
+   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
+   must be the top-most VAR_DECL representing the variable; within that,
+   OFFSET locates the member and REF must be the memory reference expression for
+   the member.  */
 
 static void
-completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
-			     tree ref)
+completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
 {
-  tree fld, decl_type = TREE_TYPE (decl);
+  switch (TREE_CODE (decl_type))
+    {
+    case RECORD_TYPE:
+      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
+	if (TREE_CODE (fld) == FIELD_DECL)
+	  {
+	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
+	    tree ft = TREE_TYPE (fld);
+	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
 
-  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
+	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
+			    ft);
+	  }
+      break;
+    case ARRAY_TYPE:
       {
-	HOST_WIDE_INT pos = offset + int_bit_position (fld);
-	tree ft = TREE_TYPE (fld);
-	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
-			    NULL_TREE);
-
-	if (is_gimple_reg_type (ft))
+	tree elemtype = TREE_TYPE (decl_type);
+	tree elem_size = TYPE_SIZE (elemtype);
+	gcc_assert (elem_size && tree_fits_uhwi_p (elem_size));
+	int el_size = tree_to_uhwi (elem_size);
+	gcc_assert (el_size);
+
+	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
+	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
+	gcc_assert (TREE_CODE (minidx) == INTEGER_CST
+		    && TREE_CODE (maxidx) == INTEGER_CST);
+	unsigned HOST_WIDE_INT len = tree_to_uhwi (maxidx)
+				     + 1 - tree_to_uhwi (minidx);
+	/* 4th operand to ARRAY_REF is size in units of the type alignment.  */
+	for (unsigned HOST_WIDE_INT idx = 0; idx < len; idx++)
 	  {
-	    struct access *access;
-	    HOST_WIDE_INT size;
-
-	    size = tree_to_uhwi (DECL_SIZE (fld));
-	    access = create_access_1 (base, pos, size);
-	    access->expr = nref;
-	    access->type = ft;
-	    access->grp_total_scalarization = 1;
-	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+	    tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
+	    tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, NULL_TREE,
+				NULL_TREE);
+	    int el_off = offset + idx * el_size;
+	    scalarize_elem (base, el_off, el_size, nref, elemtype);
 	  }
-	else
-	  completely_scalarize_record (base, fld, pos, nref);
       }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Create total_scalarization accesses for a member of type TYPE, which must
+   satisfy either is_gimple_reg_type or scalarizable_type_p.  BASE must be the
+   top-most VAR_DECL representing the variable; within that, POS and SIZE locate
+   the member and REF must be the reference expression for it.  */
+
+static void
+scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
+		 tree ref, tree type)
+{
+  if (is_gimple_reg_type (type))
+  {
+    struct access *access = create_access_1 (base, pos, size);
+    access->expr = ref;
+    access->type = type;
+    access->grp_total_scalarization = 1;
+    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+  }
+  else
+    completely_scalarize (base, type, pos, ref);
 }
 
 /* Create a total_scalarization access for VAR as a whole.  VAR must be of a
-   RECORD_TYPE conforming to type_consists_of_records_p.  */
+   RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p.  */
 
 static void
 create_total_scalarization_access (tree var)
@@ -2521,13 +2580,13 @@ analyze_all_variable_accesses (void)
 	tree var = candidate (i);
 
 	if (TREE_CODE (var) == VAR_DECL
-	    && type_consists_of_records_p (TREE_TYPE (var)))
+	    && scalarizable_type_p (TREE_TYPE (var)))
 	  {
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
 		<= max_scalarization_size)
 	      {
 		create_total_scalarization_access (var);
-		completely_scalarize_record (var, var, 0, var);
+		completely_scalarize (var, TREE_TYPE (var), 0, var);
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  {
 		    fprintf (dump_file, "Will attempt to totally scalarize ");
-- 
1.9.1

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-09-07 13:20                     ` Alan Lawrence
@ 2015-09-08 12:47                       ` Martin Jambor
  2015-09-14 17:41                         ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Martin Jambor @ 2015-09-08 12:47 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, rguenther, christophe.lyon, law

Hi,

On Mon, Sep 07, 2015 at 02:15:45PM +0100, Alan Lawrence wrote:
> In-Reply-To: <55E0697D.2010008@arm.com>
> 
> On 28/08/15 16:08, Alan Lawrence wrote:
> > Alan Lawrence wrote:
> >>
> >> Right. I think VLA's are the problem with pr64312.C also. I'm testing a fix
> >> (that declares arrays with any of these properties as unscalarizable).
> > ... 
> > In the meantime I've reverted the patch pending further testing on x86, aarch64
> > and arm.
> 
> I've now tested g++ and fortran (+ bootstrap + check-gcc) on x86, AArch64 and
> ARM, and Ada on x86 and ARM.
> 
> So far the list of failures from the original patch seems to be:
> 
> * g++.dg/torture/pr64312.C on ARM and m68k-linux
> * Building Ada on x86
> * Ada ACATS c87b31a on ARM (where the Ada frontend builds fine)
> 
> Here's a new version, that fixes all the above, by adding a dose of
> paranoia in scalarizable_type_p...

I have only had a bref look at scalarizable_type_p then, considering
all of the rest unchanged, and the tests there seem natural to me.
(Note that I do not have the authority to approve the patch.)

> (I wonder about adding a comment
> in completely_scalarize that such cases have already been ruled
> out?)

The comment already references scalarizable_type_p which is enough at
least for me.

Thanks,

Martin

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-09-08 12:47                       ` Martin Jambor
@ 2015-09-14 17:41                         ` Alan Lawrence
  2015-09-15  7:49                           ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-09-14 17:41 UTC (permalink / raw)
  To: gcc-patches, rguenther, christophe.lyon, law

Ping. (Rerevert with 5 lines extra paranoia in scalarizable_type_p).

Thanks, Alan

On 08/09/15 13:43, Martin Jambor wrote:
> Hi,
>
> On Mon, Sep 07, 2015 at 02:15:45PM +0100, Alan Lawrence wrote:
>> In-Reply-To: <55E0697D.2010008@arm.com>
>>
>> On 28/08/15 16:08, Alan Lawrence wrote:
>>> Alan Lawrence wrote:
>>>>
>>>> Right. I think VLA's are the problem with pr64312.C also. I'm testing a fix
>>>> (that declares arrays with any of these properties as unscalarizable).
>>> ...
>>> In the meantime I've reverted the patch pending further testing on x86, aarch64
>>> and arm.
>>
>> I've now tested g++ and fortran (+ bootstrap + check-gcc) on x86, AArch64 and
>> ARM, and Ada on x86 and ARM.
>>
>> So far the list of failures from the original patch seems to be:
>>
>> * g++.dg/torture/pr64312.C on ARM and m68k-linux
>> * Building Ada on x86
>> * Ada ACATS c87b31a on ARM (where the Ada frontend builds fine)
>>
>> Here's a new version, that fixes all the above, by adding a dose of
>> paranoia in scalarizable_type_p...
>
> I have only had a bref look at scalarizable_type_p then, considering
> all of the rest unchanged, and the tests there seem natural to me.
> (Note that I do not have the authority to approve the patch.)
>
>> (I wonder about adding a comment
>> in completely_scalarize that such cases have already been ruled
>> out?)
>
> The comment already references scalarizable_type_p which is enough at
> least for me.
>
> Thanks,
>
> Martin
>

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-09-14 17:41                         ` Alan Lawrence
@ 2015-09-15  7:49                           ` Richard Biener
  2015-09-17 17:12                             ` Alan Lawrence
  0 siblings, 1 reply; 45+ messages in thread
From: Richard Biener @ 2015-09-15  7:49 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, christophe.lyon, law

On Mon, 14 Sep 2015, Alan Lawrence wrote:

> Ping. (Rerevert with 5 lines extra paranoia in scalarizable_type_p).

Sorry for chiming in so late...

+      if (TYPE_DOMAIN (type) == NULL_TREE
+         || !TREE_CONSTANT (TYPE_MIN_VALUE (TYPE_DOMAIN (type)))
+         || !TREE_CONSTANT (TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
+         || !TREE_CONSTANT (TYPE_SIZE (type))
+         || (tree_to_shwi (TYPE_SIZE (type)) <= 0))
+       return false;

TREE_CONSTANT isn't the correct thing to test.  You should use
TREE_CODE () == INTEGER_CST instead.  Also you need to handle
NULL_TREE TYPE_MIN/MAX_VALUE as that can happen as well.

+      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
+       return false;

that can't happen (TREE_TYPE (array-type) is never a DECL).

+       int el_size = tree_to_uhwi (elem_size);
+       gcc_assert (el_size);

so you assert on el_size being > 0 later but above you test
only array size ...

+           tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);

use t_idx = size_int (idx);

+           tree nref = build4 (ARRAY_REF, elemtype, ref, t_idx, 
NULL_TREE,
+                               NULL_TREE);


Otherwise looks ok to me.

Thanks,
Richard.

> Thanks, Alan
> 
> On 08/09/15 13:43, Martin Jambor wrote:
> > Hi,
> > 
> > On Mon, Sep 07, 2015 at 02:15:45PM +0100, Alan Lawrence wrote:
> > > In-Reply-To: <55E0697D.2010008@arm.com>
> > > 
> > > On 28/08/15 16:08, Alan Lawrence wrote:
> > > > Alan Lawrence wrote:
> > > > > 
> > > > > Right. I think VLA's are the problem with pr64312.C also. I'm testing
> > > > > a fix
> > > > > (that declares arrays with any of these properties as unscalarizable).
> > > > ...
> > > > In the meantime I've reverted the patch pending further testing on x86,
> > > > aarch64
> > > > and arm.
> > > 
> > > I've now tested g++ and fortran (+ bootstrap + check-gcc) on x86, AArch64
> > > and
> > > ARM, and Ada on x86 and ARM.
> > > 
> > > So far the list of failures from the original patch seems to be:
> > > 
> > > * g++.dg/torture/pr64312.C on ARM and m68k-linux
> > > * Building Ada on x86
> > > * Ada ACATS c87b31a on ARM (where the Ada frontend builds fine)
> > > 
> > > Here's a new version, that fixes all the above, by adding a dose of
> > > paranoia in scalarizable_type_p...
> > 
> > I have only had a bref look at scalarizable_type_p then, considering
> > all of the rest unchanged, and the tests there seem natural to me.
> > (Note that I do not have the authority to approve the patch.)
> > 
> > > (I wonder about adding a comment
> > > in completely_scalarize that such cases have already been ruled
> > > out?)
> > 
> > The comment already references scalarizable_type_p which is enough at
> > least for me.
> > 
> > Thanks,
> > 
> > Martin
> > 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-09-15  7:49                           ` Richard Biener
@ 2015-09-17 17:12                             ` Alan Lawrence
  2015-09-18  8:36                               ` Richard Biener
  0 siblings, 1 reply; 45+ messages in thread
From: Alan Lawrence @ 2015-09-17 17:12 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, mjambor, christophe.lyon, law

On 15/09/15 08:43, Richard Biener wrote:
>
> Sorry for chiming in so late...

Not at all, TYVM for your help!

> TREE_CONSTANT isn't the correct thing to test.  You should use
> TREE_CODE () == INTEGER_CST instead.

Done (in some cases, via tree_fits_shwi_p).

> Also you need to handle
> NULL_TREE TYPE_MIN/MAX_VALUE as that can happen as well.

I've not found any documentation as to what these mean, but from experiment,
it seems that a zero-length array has MIN_VALUE 0 and MAX_VALUE null (as well
as zero TYPE_SIZE) - so I allow that. In contrast a variable-length array also
has zero TYPE_SIZE, but a large range of MIN-MAX, and I think I want to rule
those out.

Another awkward case is Ada arrays with large offset (e.g. array(2^32...2^32+1)
which has only two elements); I don't see either of tree_to_shwi or tree_to_uhwi
as necessarily being "better" here, each will handle (some) (rare) cases the
other will not, so I've tried to use tree_to_shwi throughout for consistency.

Summary: taken advantage of tree_fits_shwi_p, as this includes a check against
NULL_TREE and that TREE_CODE () == INTEGER_CST.

> +      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
> +       return false;
>
> that can't happen (TREE_TYPE (array-type) is never a DECL).

Removed.

> +       int el_size = tree_to_uhwi (elem_size);
> +       gcc_assert (el_size);
>
> so you assert on el_size being > 0 later but above you test
> only array size ...

Good point, thanks.

> +           tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
>
> use t_idx = size_int (idx);

Done.

I've also added another test, of scalarizing a structure containing a
zero-length array, as earlier attempts accidentally prevented this.

Bootstrapped + check-gcc,g++,ada,fortran on ARM and x86_64;
Bootstrapped + check-gcc,g++,fortran on AArch64.

OK for trunk?

Thanks,
Alan

gcc/ChangeLog:

	PR tree-optimization/67283
	* tree-sra.c (type_consists_of_records_p): Rename to...
	(scalarizable_type_p): ...this, add case for ARRAY_TYPE.
	(completely_scalarize_record): Rename to...
	(completely_scalarize): ...this, add ARRAY_TYPE case, move some code to:
	(scalarize_elem): New.
	(analyze_all_variable_accesses): Follow renamings.

gcc/testsuite/ChangeLog:

	PR tree-optimization/67283
	* gcc.dg/tree-ssa/sra-15.c: New.
	* gcc.dg/tree-ssa/sra-16.c: New.
---
 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  37 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/sra-16.c |  37 ++++++++
 gcc/tree-sra.c                         | 165 +++++++++++++++++++++++----------
 3 files changed, 191 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-16.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
new file mode 100644
index 0000000..a22062e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
@@ -0,0 +1,37 @@
+/* Verify that SRA total scalarization works on records containing arrays.  */
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+struct S
+{
+  char c;
+  unsigned short f[2][2];
+  int i;
+  unsigned short f3, f4;
+};
+
+
+int __attribute__ ((noinline))
+foo (struct S *p)
+{
+  struct S l;
+
+  l = *p;
+  l.i++;
+  l.f[1][0] += 3;
+  *p = l;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
+  foo (&a);
+  if (a.i != 5 || a.f[1][0] != 12)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c
new file mode 100644
index 0000000..fef34c0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c
@@ -0,0 +1,37 @@
+/* Verify that SRA total scalarization works on records containing arrays.  */
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=16" } */
+
+extern void abort (void);
+
+struct S
+{
+  long zilch[0];
+  char c;
+  int i;
+  unsigned short f3, f4;
+};
+
+
+int __attribute__ ((noinline))
+foo (struct S *p)
+{
+  struct S l;
+
+  l = *p;
+  l.i++;
+  l.f3++;
+  *p = l;
+}
+
+int
+main (int argc, char **argv)
+{
+  struct S a = { { }, 0, 4, 0, 0};
+  foo (&a);
+  if (a.i != 5 || a.f3 != 1)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 8b3a0ad..8247554 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -915,73 +915,142 @@ create_access (tree expr, gimple stmt, bool write)
 }
 
 
-/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
-   register types or (recursively) records with only these two kinds of fields.
-   It also returns false if any of these records contains a bit-field.  */
+/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length
+   ARRAY_TYPE with fields that are either of gimple register types (excluding
+   bit-fields) or (recursively) scalarizable types.  */
 
 static bool
-type_consists_of_records_p (tree type)
+scalarizable_type_p (tree type)
 {
-  tree fld;
+  gcc_assert (!is_gimple_reg_type (type));
 
-  if (TREE_CODE (type) != RECORD_TYPE)
-    return false;
+  switch (TREE_CODE (type))
+  {
+  case RECORD_TYPE:
+    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
+      if (TREE_CODE (fld) == FIELD_DECL)
+	{
+	  tree ft = TREE_TYPE (fld);
 
-  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
-      {
-	tree ft = TREE_TYPE (fld);
+	  if (DECL_BIT_FIELD (fld))
+	    return false;
 
-	if (DECL_BIT_FIELD (fld))
-	  return false;
+	  if (!is_gimple_reg_type (ft)
+	      && !scalarizable_type_p (ft))
+	    return false;
+	}
 
-	if (!is_gimple_reg_type (ft)
-	    && !type_consists_of_records_p (ft))
-	  return false;
-      }
+    return true;
 
-  return true;
+  case ARRAY_TYPE:
+    {
+      if (TYPE_DOMAIN (type) == NULL_TREE
+	  || !tree_fits_shwi_p (TYPE_SIZE (type))
+	  || !tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (type)))
+	  || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= 0)
+	  || !tree_fits_shwi_p (TYPE_MIN_VALUE (TYPE_DOMAIN (type))))
+	return false;
+      if (tree_to_shwi (TYPE_SIZE (type)) == 0
+	  && TYPE_MAX_VALUE (TYPE_DOMAIN (type)) == NULL_TREE)
+	/* Zero-element array, should not prevent scalarization.  */
+	;
+      else if ((tree_to_shwi (TYPE_SIZE (type)) <= 0)
+	       || !tree_fits_shwi_p (TYPE_MAX_VALUE (TYPE_DOMAIN (type))))
+	return false;
+
+      tree elem = TREE_TYPE (type);
+      if (!is_gimple_reg_type (elem)
+	 && !scalarizable_type_p (elem))
+	return false;
+      return true;
+    }
+  default:
+    return false;
+  }
 }
 
-/* Create total_scalarization accesses for all scalar type fields in DECL that
-   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
-   must be the top-most VAR_DECL representing the variable, OFFSET must be the
-   offset of DECL within BASE.  REF must be the memory reference expression for
-   the given decl.  */
+static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
+
+/* Create total_scalarization accesses for all scalar fields of a member
+   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
+   must be the top-most VAR_DECL representing the variable; within that,
+   OFFSET locates the member and REF must be the memory reference expression for
+   the member.  */
 
 static void
-completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
-			     tree ref)
+completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
 {
-  tree fld, decl_type = TREE_TYPE (decl);
+  switch (TREE_CODE (decl_type))
+    {
+    case RECORD_TYPE:
+      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
+	if (TREE_CODE (fld) == FIELD_DECL)
+	  {
+	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
+	    tree ft = TREE_TYPE (fld);
+	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
 
-  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
-    if (TREE_CODE (fld) == FIELD_DECL)
+	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
+			    ft);
+	  }
+      break;
+    case ARRAY_TYPE:
       {
-	HOST_WIDE_INT pos = offset + int_bit_position (fld);
-	tree ft = TREE_TYPE (fld);
-	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
-			    NULL_TREE);
-
-	if (is_gimple_reg_type (ft))
+	tree elemtype = TREE_TYPE (decl_type);
+	tree elem_size = TYPE_SIZE (elemtype);
+	gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
+	HOST_WIDE_INT el_size = tree_to_shwi (elem_size);
+	gcc_assert (el_size > 0);
+
+	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
+	gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
+	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
+	if (maxidx)
 	  {
-	    struct access *access;
-	    HOST_WIDE_INT size;
-
-	    size = tree_to_uhwi (DECL_SIZE (fld));
-	    access = create_access_1 (base, pos, size);
-	    access->expr = nref;
-	    access->type = ft;
-	    access->grp_total_scalarization = 1;
-	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+	    gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
+	    /* MINIDX and MAXIDX are inclusive.  Try to avoid overflow.  */
+	    unsigned HOST_WIDE_INT lenp1 = tree_to_shwi (maxidx)
+					- tree_to_shwi (minidx);
+	    unsigned HOST_WIDE_INT idx = 0;
+	    do
+	      {
+		tree nref = build4 (ARRAY_REF, elemtype, ref, size_int (idx),
+				    NULL_TREE, NULL_TREE);
+		int el_off = offset + idx * el_size;
+		scalarize_elem (base, el_off, el_size, nref, elemtype);
+	      }
+	    while (++idx <= lenp1);
 	  }
-	else
-	  completely_scalarize_record (base, fld, pos, nref);
       }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Create total_scalarization accesses for a member of type TYPE, which must
+   satisfy either is_gimple_reg_type or scalarizable_type_p.  BASE must be the
+   top-most VAR_DECL representing the variable; within that, POS and SIZE locate
+   the member and REF must be the reference expression for it.  */
+
+static void
+scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
+		 tree ref, tree type)
+{
+  if (is_gimple_reg_type (type))
+  {
+    struct access *access = create_access_1 (base, pos, size);
+    access->expr = ref;
+    access->type = type;
+    access->grp_total_scalarization = 1;
+    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
+  }
+  else
+    completely_scalarize (base, type, pos, ref);
 }
 
 /* Create a total_scalarization access for VAR as a whole.  VAR must be of a
-   RECORD_TYPE conforming to type_consists_of_records_p.  */
+   RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p.  */
 
 static void
 create_total_scalarization_access (tree var)
@@ -2521,13 +2590,13 @@ analyze_all_variable_accesses (void)
 	tree var = candidate (i);
 
 	if (TREE_CODE (var) == VAR_DECL
-	    && type_consists_of_records_p (TREE_TYPE (var)))
+	    && scalarizable_type_p (TREE_TYPE (var)))
 	  {
 	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
 		<= max_scalarization_size)
 	      {
 		create_total_scalarization_access (var);
-		completely_scalarize_record (var, var, 0, var);
+		completely_scalarize (var, TREE_TYPE (var), 0, var);
 		if (dump_file && (dump_flags & TDF_DETAILS))
 		  {
 		    fprintf (dump_file, "Will attempt to totally scalarize ");
-- 
1.9.1

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 2/5] completely_scalarize arrays as well as records.
  2015-09-17 17:12                             ` Alan Lawrence
@ 2015-09-18  8:36                               ` Richard Biener
  0 siblings, 0 replies; 45+ messages in thread
From: Richard Biener @ 2015-09-18  8:36 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches, mjambor, christophe.lyon, law

On Thu, 17 Sep 2015, Alan Lawrence wrote:

> On 15/09/15 08:43, Richard Biener wrote:
> >
> > Sorry for chiming in so late...
> 
> Not at all, TYVM for your help!
> 
> > TREE_CONSTANT isn't the correct thing to test.  You should use
> > TREE_CODE () == INTEGER_CST instead.
> 
> Done (in some cases, via tree_fits_shwi_p).
> 
> > Also you need to handle
> > NULL_TREE TYPE_MIN/MAX_VALUE as that can happen as well.
> 
> I've not found any documentation as to what these mean, but from experiment,
> it seems that a zero-length array has MIN_VALUE 0 and MAX_VALUE null (as well
> as zero TYPE_SIZE) - so I allow that. In contrast a variable-length array also
> has zero TYPE_SIZE, but a large range of MIN-MAX, and I think I want to rule
> those out.
> 
> Another awkward case is Ada arrays with large offset (e.g. array(2^32...2^32+1)
> which has only two elements); I don't see either of tree_to_shwi or tree_to_uhwi
> as necessarily being "better" here, each will handle (some) (rare) cases the
> other will not, so I've tried to use tree_to_shwi throughout for consistency.
> 
> Summary: taken advantage of tree_fits_shwi_p, as this includes a check against
> NULL_TREE and that TREE_CODE () == INTEGER_CST.
> 
> > +      if (DECL_P (elem) && DECL_BIT_FIELD (elem))
> > +       return false;
> >
> > that can't happen (TREE_TYPE (array-type) is never a DECL).
> 
> Removed.
> 
> > +       int el_size = tree_to_uhwi (elem_size);
> > +       gcc_assert (el_size);
> >
> > so you assert on el_size being > 0 later but above you test
> > only array size ...
> 
> Good point, thanks.
> 
> > +           tree t_idx = build_int_cst (TYPE_DOMAIN (decl_type), idx);
> >
> > use t_idx = size_int (idx);
> 
> Done.
> 
> I've also added another test, of scalarizing a structure containing a
> zero-length array, as earlier attempts accidentally prevented this.
> 
> Bootstrapped + check-gcc,g++,ada,fortran on ARM and x86_64;
> Bootstrapped + check-gcc,g++,fortran on AArch64.
> 
> OK for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Alan
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/67283
> 	* tree-sra.c (type_consists_of_records_p): Rename to...
> 	(scalarizable_type_p): ...this, add case for ARRAY_TYPE.
> 	(completely_scalarize_record): Rename to...
> 	(completely_scalarize): ...this, add ARRAY_TYPE case, move some code to:
> 	(scalarize_elem): New.
> 	(analyze_all_variable_accesses): Follow renamings.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/67283
> 	* gcc.dg/tree-ssa/sra-15.c: New.
> 	* gcc.dg/tree-ssa/sra-16.c: New.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/sra-15.c |  37 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/sra-16.c |  37 ++++++++
>  gcc/tree-sra.c                         | 165 +++++++++++++++++++++++----------
>  3 files changed, 191 insertions(+), 48 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-16.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> new file mode 100644
> index 0000000..a22062e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-15.c
> @@ -0,0 +1,37 @@
> +/* Verify that SRA total scalarization works on records containing arrays.  */
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=32" } */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  char c;
> +  unsigned short f[2][2];
> +  int i;
> +  unsigned short f3, f4;
> +};
> +
> +
> +int __attribute__ ((noinline))
> +foo (struct S *p)
> +{
> +  struct S l;
> +
> +  l = *p;
> +  l.i++;
> +  l.f[1][0] += 3;
> +  *p = l;
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  struct S a = {0, { {5, 7}, {9, 11} }, 4, 0, 0};
> +  foo (&a);
> +  if (a.i != 5 || a.f[1][0] != 12)
> +    abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c
> new file mode 100644
> index 0000000..fef34c0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-16.c
> @@ -0,0 +1,37 @@
> +/* Verify that SRA total scalarization works on records containing arrays.  */
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-release_ssa --param sra-max-scalarization-size-Ospeed=16" } */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  long zilch[0];
> +  char c;
> +  int i;
> +  unsigned short f3, f4;
> +};
> +
> +
> +int __attribute__ ((noinline))
> +foo (struct S *p)
> +{
> +  struct S l;
> +
> +  l = *p;
> +  l.i++;
> +  l.f3++;
> +  *p = l;
> +}
> +
> +int
> +main (int argc, char **argv)
> +{
> +  struct S a = { { }, 0, 4, 0, 0};
> +  foo (&a);
> +  if (a.i != 5 || a.f3 != 1)
> +    abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 8b3a0ad..8247554 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -915,73 +915,142 @@ create_access (tree expr, gimple stmt, bool write)
>  }
>  
>  
> -/* Return true iff TYPE is a RECORD_TYPE with fields that are either of gimple
> -   register types or (recursively) records with only these two kinds of fields.
> -   It also returns false if any of these records contains a bit-field.  */
> +/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length
> +   ARRAY_TYPE with fields that are either of gimple register types (excluding
> +   bit-fields) or (recursively) scalarizable types.  */
>  
>  static bool
> -type_consists_of_records_p (tree type)
> +scalarizable_type_p (tree type)
>  {
> -  tree fld;
> +  gcc_assert (!is_gimple_reg_type (type));
>  
> -  if (TREE_CODE (type) != RECORD_TYPE)
> -    return false;
> +  switch (TREE_CODE (type))
> +  {
> +  case RECORD_TYPE:
> +    for (tree fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> +      if (TREE_CODE (fld) == FIELD_DECL)
> +	{
> +	  tree ft = TREE_TYPE (fld);
>  
> -  for (fld = TYPE_FIELDS (type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> -      {
> -	tree ft = TREE_TYPE (fld);
> +	  if (DECL_BIT_FIELD (fld))
> +	    return false;
>  
> -	if (DECL_BIT_FIELD (fld))
> -	  return false;
> +	  if (!is_gimple_reg_type (ft)
> +	      && !scalarizable_type_p (ft))
> +	    return false;
> +	}
>  
> -	if (!is_gimple_reg_type (ft)
> -	    && !type_consists_of_records_p (ft))
> -	  return false;
> -      }
> +    return true;
>  
> -  return true;
> +  case ARRAY_TYPE:
> +    {
> +      if (TYPE_DOMAIN (type) == NULL_TREE
> +	  || !tree_fits_shwi_p (TYPE_SIZE (type))
> +	  || !tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (type)))
> +	  || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= 0)
> +	  || !tree_fits_shwi_p (TYPE_MIN_VALUE (TYPE_DOMAIN (type))))
> +	return false;
> +      if (tree_to_shwi (TYPE_SIZE (type)) == 0
> +	  && TYPE_MAX_VALUE (TYPE_DOMAIN (type)) == NULL_TREE)
> +	/* Zero-element array, should not prevent scalarization.  */
> +	;
> +      else if ((tree_to_shwi (TYPE_SIZE (type)) <= 0)
> +	       || !tree_fits_shwi_p (TYPE_MAX_VALUE (TYPE_DOMAIN (type))))
> +	return false;
> +
> +      tree elem = TREE_TYPE (type);
> +      if (!is_gimple_reg_type (elem)
> +	 && !scalarizable_type_p (elem))
> +	return false;
> +      return true;
> +    }
> +  default:
> +    return false;
> +  }
>  }
>  
> -/* Create total_scalarization accesses for all scalar type fields in DECL that
> -   must be of a RECORD_TYPE conforming to type_consists_of_records_p.  BASE
> -   must be the top-most VAR_DECL representing the variable, OFFSET must be the
> -   offset of DECL within BASE.  REF must be the memory reference expression for
> -   the given decl.  */
> +static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree, tree);
> +
> +/* Create total_scalarization accesses for all scalar fields of a member
> +   of type DECL_TYPE conforming to scalarizable_type_p.  BASE
> +   must be the top-most VAR_DECL representing the variable; within that,
> +   OFFSET locates the member and REF must be the memory reference expression for
> +   the member.  */
>  
>  static void
> -completely_scalarize_record (tree base, tree decl, HOST_WIDE_INT offset,
> -			     tree ref)
> +completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref)
>  {
> -  tree fld, decl_type = TREE_TYPE (decl);
> +  switch (TREE_CODE (decl_type))
> +    {
> +    case RECORD_TYPE:
> +      for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> +	if (TREE_CODE (fld) == FIELD_DECL)
> +	  {
> +	    HOST_WIDE_INT pos = offset + int_bit_position (fld);
> +	    tree ft = TREE_TYPE (fld);
> +	    tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE);
>  
> -  for (fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld))
> -    if (TREE_CODE (fld) == FIELD_DECL)
> +	    scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), nref,
> +			    ft);
> +	  }
> +      break;
> +    case ARRAY_TYPE:
>        {
> -	HOST_WIDE_INT pos = offset + int_bit_position (fld);
> -	tree ft = TREE_TYPE (fld);
> -	tree nref = build3 (COMPONENT_REF, TREE_TYPE (fld), ref, fld,
> -			    NULL_TREE);
> -
> -	if (is_gimple_reg_type (ft))
> +	tree elemtype = TREE_TYPE (decl_type);
> +	tree elem_size = TYPE_SIZE (elemtype);
> +	gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
> +	HOST_WIDE_INT el_size = tree_to_shwi (elem_size);
> +	gcc_assert (el_size > 0);
> +
> +	tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type));
> +	gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
> +	tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type));
> +	if (maxidx)
>  	  {
> -	    struct access *access;
> -	    HOST_WIDE_INT size;
> -
> -	    size = tree_to_uhwi (DECL_SIZE (fld));
> -	    access = create_access_1 (base, pos, size);
> -	    access->expr = nref;
> -	    access->type = ft;
> -	    access->grp_total_scalarization = 1;
> -	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +	    gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
> +	    /* MINIDX and MAXIDX are inclusive.  Try to avoid overflow.  */
> +	    unsigned HOST_WIDE_INT lenp1 = tree_to_shwi (maxidx)
> +					- tree_to_shwi (minidx);
> +	    unsigned HOST_WIDE_INT idx = 0;
> +	    do
> +	      {
> +		tree nref = build4 (ARRAY_REF, elemtype, ref, size_int (idx),
> +				    NULL_TREE, NULL_TREE);
> +		int el_off = offset + idx * el_size;
> +		scalarize_elem (base, el_off, el_size, nref, elemtype);
> +	      }
> +	    while (++idx <= lenp1);
>  	  }
> -	else
> -	  completely_scalarize_record (base, fld, pos, nref);
>        }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +/* Create total_scalarization accesses for a member of type TYPE, which must
> +   satisfy either is_gimple_reg_type or scalarizable_type_p.  BASE must be the
> +   top-most VAR_DECL representing the variable; within that, POS and SIZE locate
> +   the member and REF must be the reference expression for it.  */
> +
> +static void
> +scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size,
> +		 tree ref, tree type)
> +{
> +  if (is_gimple_reg_type (type))
> +  {
> +    struct access *access = create_access_1 (base, pos, size);
> +    access->expr = ref;
> +    access->type = type;
> +    access->grp_total_scalarization = 1;
> +    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
> +  }
> +  else
> +    completely_scalarize (base, type, pos, ref);
>  }
>  
>  /* Create a total_scalarization access for VAR as a whole.  VAR must be of a
> -   RECORD_TYPE conforming to type_consists_of_records_p.  */
> +   RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p.  */
>  
>  static void
>  create_total_scalarization_access (tree var)
> @@ -2521,13 +2590,13 @@ analyze_all_variable_accesses (void)
>  	tree var = candidate (i);
>  
>  	if (TREE_CODE (var) == VAR_DECL
> -	    && type_consists_of_records_p (TREE_TYPE (var)))
> +	    && scalarizable_type_p (TREE_TYPE (var)))
>  	  {
>  	    if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var)))
>  		<= max_scalarization_size)
>  	      {
>  		create_total_scalarization_access (var);
> -		completely_scalarize_record (var, var, 0, var);
> +		completely_scalarize (var, TREE_TYPE (var), 0, var);
>  		if (dump_file && (dump_flags & TDF_DETAILS))
>  		  {
>  		    fprintf (dump_file, "Will attempt to totally scalarize ");
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2015-09-18  8:36 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-25 11:06 [PATCH 0/5][tree-sra.c] PR/63679 Make SRA replace constant pool loads Alan Lawrence
2015-08-25 11:06 ` [RFC 4/5] Handle constant-pool entries Alan Lawrence
2015-08-25 20:19   ` Jeff Law
2015-08-26  7:24     ` Richard Biener
2015-08-26 15:51     ` Alan Lawrence
2015-08-26 14:08   ` Martin Jambor
2015-08-25 11:06 ` [RFC 5/5] Always completely replace constant pool entries Alan Lawrence
2015-08-25 20:09   ` Jeff Law
2015-08-26  7:29     ` Richard Biener
2015-08-25 11:21 ` [PATCH 2/5] completely_scalarize arrays as well as records Alan Lawrence
2015-08-25 19:40   ` Jeff Law
2015-08-27 16:54     ` Fixing sra-12.c (was: Re: [PATCH 2/5] completely_scalarize arrays as well as records) Alan Lawrence
2015-08-27 20:58       ` Fixing sra-12.c Jeff Law
2015-08-25 21:44   ` [PATCH 2/5] completely_scalarize arrays as well as records Martin Jambor
2015-08-25 21:55     ` Jeff Law
2015-08-26  7:11       ` Richard Biener
2015-08-26  9:39         ` Martin Jambor
2015-08-26 10:12           ` Richard Biener
2015-08-26 16:30             ` Alan Lawrence
2015-08-26 19:18               ` Richard Biener
2015-08-27 16:00     ` Alan Lawrence
2015-08-28  7:19       ` Christophe Lyon
2015-08-28  8:06         ` Richard Biener
2015-08-28  8:16           ` Christophe Lyon
2015-08-28  8:31             ` Richard Biener
2015-08-28 10:09             ` Alan Lawrence
2015-08-28 13:35               ` Richard Biener
2015-08-28 14:05                 ` Alan Lawrence
2015-08-28 15:17                   ` Alan Lawrence
2015-09-07 13:20                     ` Alan Lawrence
2015-09-08 12:47                       ` Martin Jambor
2015-09-14 17:41                         ` Alan Lawrence
2015-09-15  7:49                           ` Richard Biener
2015-09-17 17:12                             ` Alan Lawrence
2015-09-18  8:36                               ` Richard Biener
2015-08-25 11:30 ` [PATCH 1/5] Refactor completely_scalarize_var Alan Lawrence
2015-08-25 19:36   ` Jeff Law
2015-08-25 21:42   ` Martin Jambor
2015-08-25 12:30 ` [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE Alan Lawrence
2015-08-25 19:54   ` Jeff Law
2015-08-26  6:34     ` Bin.Cheng
2015-08-26  7:40       ` Richard Biener
2015-08-26  7:41         ` Bin.Cheng
2015-08-26  7:20     ` Richard Biener
2015-08-25 22:51   ` Martin Jambor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).