public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch, fortran] Fix PR 85544
@ 2018-12-16 22:00 Thomas Koenig
  2018-12-22 13:41 ` *ping* " Thomas Koenig
  2018-12-22 15:09 ` Paul Richard Thomas
  0 siblings, 2 replies; 3+ messages in thread
From: Thomas Koenig @ 2018-12-16 22:00 UTC (permalink / raw)
  To: fortran, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1309 bytes --]

Hello world,

the PR pointed out an old regression because the front-end optimization
pass was substituting 2**n with ishift(1,n), where n was an array.

Simply removing the optimization for that case would have been easy,
but also introduced a performance regression.

So, for this, I moved the optimization to trans-*, where it makes more
sense.

Regression-tested.  This turned up that one of our tests, mvbits_1.f90,
depends on the behavior that 2**32 is zero.  This is certainly not
guaranteed by the standard, but I chose to keep the behavior as not
to introduce any changes in behavior.

This fixes a regression, so I would like to backport to all active
branches if this if possible.

Oh yes, if anybody feels strongly that we should also optimize 32**n
and powers of other powers to two, now is the time to speak up :-)

OK for affected branches?

Regards

	Thomas

2018-12-16  Thomas Koenig  <tkoenig@gcc.gnu.org>

         PR fortran/85544
         * frontend-passes.c (optimize_power): Remove.
         (optimize_op): Remove call to optimize_power.
         * trans-expr.c (gfc_conv_power_op): Handle cases of 1**integer,
         (2|4|8|16) ** integer and (-1) ** integer.

2018-12-16  Thomas Koenig  <tkoenig@gcc.gnu.org>

         PR fortran/85544
         * gfortran.dg/power_7.f90: New test.

[-- Attachment #2: p3.diff --]
[-- Type: text/x-patch, Size: 5244 bytes --]

Index: frontend-passes.c
===================================================================
--- frontend-passes.c	(Revision 267172)
+++ frontend-passes.c	(Arbeitskopie)
@@ -1863,84 +1863,6 @@ combine_array_constructor (gfc_expr *e)
   return true;
 }
 
-/* Change (-1)**k into 1-ishift(iand(k,1),1) and
- 2**k into ishift(1,k) */
-
-static bool
-optimize_power (gfc_expr *e)
-{
-  gfc_expr *op1, *op2;
-  gfc_expr *iand, *ishft;
-
-  if (e->ts.type != BT_INTEGER)
-    return false;
-
-  op1 = e->value.op.op1;
-
-  if (op1 == NULL || op1->expr_type != EXPR_CONSTANT)
-    return false;
-
-  if (mpz_cmp_si (op1->value.integer, -1L) == 0)
-    {
-      gfc_free_expr (op1);
-
-      op2 = e->value.op.op2;
-
-      if (op2 == NULL)
-	return false;
-
-      iand = gfc_build_intrinsic_call (current_ns, GFC_ISYM_IAND,
-				       "_internal_iand", e->where, 2, op2,
-				       gfc_get_int_expr (e->ts.kind,
-							 &e->where, 1));
-
-      ishft = gfc_build_intrinsic_call (current_ns, GFC_ISYM_ISHFT,
-					"_internal_ishft", e->where, 2, iand,
-					gfc_get_int_expr (e->ts.kind,
-							  &e->where, 1));
-
-      e->value.op.op = INTRINSIC_MINUS;
-      e->value.op.op1 = gfc_get_int_expr (e->ts.kind, &e->where, 1);
-      e->value.op.op2 = ishft;
-      return true;
-    }
-  else if (mpz_cmp_si (op1->value.integer, 2L) == 0)
-    {
-      gfc_free_expr (op1);
-
-      op2 = e->value.op.op2;
-      if (op2 == NULL)
-	return false;
-
-      ishft = gfc_build_intrinsic_call (current_ns, GFC_ISYM_ISHFT,
-					"_internal_ishft", e->where, 2,
-					gfc_get_int_expr (e->ts.kind,
-							  &e->where, 1),
-					op2);
-      *e = *ishft;
-      return true;
-    }
-
-  else if (mpz_cmp_si (op1->value.integer, 1L) == 0)
-    {
-      op2 = e->value.op.op2;
-      if (op2 == NULL)
-	return false;
-
-      gfc_free_expr (op1);
-      gfc_free_expr (op2);
-
-      e->expr_type = EXPR_CONSTANT;
-      e->value.op.op1 = NULL;
-      e->value.op.op2 = NULL;
-      mpz_init_set_si (e->value.integer, 1);
-      /* Typespec and location are still OK.  */
-      return true;
-    }
-
-  return false;
-}
-
 /* Recursive optimization of operators.  */
 
 static bool
@@ -2001,9 +1923,6 @@ optimize_op (gfc_expr *e)
     case INTRINSIC_DIVIDE:
       return combine_array_constructor (e) || changed;
 
-    case INTRINSIC_POWER:
-      return optimize_power (e);
-
     default:
       break;
     }
Index: trans-expr.c
===================================================================
--- trans-expr.c	(Revision 267187)
+++ trans-expr.c	(Arbeitskopie)
@@ -3056,6 +3056,83 @@ gfc_conv_power_op (gfc_se * se, gfc_expr * expr)
     if (gfc_conv_cst_int_power (se, lse.expr, rse.expr))
       return;
 
+  if (INTEGER_CST_P (lse.expr)
+      && TREE_CODE (TREE_TYPE (rse.expr)) == INTEGER_TYPE)
+    {
+      wi::tree_to_wide_ref wlhs = wi::to_wide (lse.expr);
+      HOST_WIDE_INT v;
+      v = wlhs.to_shwi ();
+      if (v == 1)
+	{
+	  /* 1**something is always 1.  */
+	  se->expr = build_int_cst (TREE_TYPE (lse.expr), 1);
+	  return;
+	}
+      else if (v == 2 || v == 4 || v == 8 || v == 16)
+	{
+	  /* 2**n = 1<<n, 4**n = 1<<(n+n), 8**n = 1 <<(3*n), 16**n =
+	   1<<(4*n), but we have to make sure to return zero if the
+	   number of bits is too large. */
+	  tree lshift;
+	  tree type;
+	  tree shift;
+	  tree ge;
+	  tree cond;
+	  tree num_bits;
+	  tree cond2;
+
+	  type = TREE_TYPE (lse.expr);
+
+	  if (v == 2)
+	    shift = rse.expr;
+	  else if (v == 4)
+	    shift = fold_build2_loc (input_location, PLUS_EXPR,
+				     TREE_TYPE (rse.expr),
+				       rse.expr, rse.expr);
+	  else if (v == 8)
+	    shift = fold_build2_loc (input_location, MULT_EXPR,
+				     TREE_TYPE (rse.expr),
+				     build_int_cst (TREE_TYPE (rse.expr), 3),
+				     rse.expr);
+	  else if (v == 16)
+	    shift = fold_build2_loc (input_location, MULT_EXPR,
+				     TREE_TYPE (rse.expr),
+				     build_int_cst (TREE_TYPE (rse.expr), 4),
+				     rse.expr);
+	  else
+	    gcc_unreachable ();
+
+	  lshift = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+				    build_int_cst (type, 1), shift);
+	  ge = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+				rse.expr, build_int_cst (type, 0));
+	  cond = fold_build3_loc (input_location, COND_EXPR, type, ge, lshift,
+				 build_int_cst (type, 0));
+	  num_bits = build_int_cst (TREE_TYPE (rse.expr), TYPE_PRECISION (type));
+	  cond2 = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+				   rse.expr, num_bits);
+	  se->expr = fold_build3_loc (input_location, COND_EXPR, type, cond2,
+				      build_int_cst (type, 0), cond);
+	  return;
+	}
+      else if (v == -1)
+	{
+	  /* (-1)**n is 1 - ((n & 1) << 1) */
+	  tree type;
+	  tree tmp;
+
+	  type = TREE_TYPE (lse.expr);
+	  tmp = fold_build2_loc (input_location, BIT_AND_EXPR, type,
+				 rse.expr, build_int_cst (type, 1));
+	  tmp = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+				 tmp, build_int_cst (type, 1));
+	  tmp = fold_build2_loc (input_location, MINUS_EXPR, type,
+				 build_int_cst (type, 1), tmp);
+	  se->expr = tmp;
+	  return;
+	}
+    }
+
   gfc_int4_type_node = gfc_get_int_type (4);
 
   /* In case of integer operands with kinds 1 or 2, we call the integer kind 4

[-- Attachment #3: power_7.f90 --]
[-- Type: text/x-fortran, Size: 815 bytes --]

! { dg-do run }
! { dg-additional-options "-fdump-tree-original" }
! PR 85544 - this used to ICE.
program p
   integer, parameter :: na = -3, ne = 10
   integer :: i, a(na:ne), b(na:ne)
   integer :: v
   a = [(i, i=na, ne)]
   b = [2**a]
   if (any (b /= [0,0,0,1,2,4,8,16,32,64,128,256,512,1024])) stop 1
   b = [1**a]
   if (any (b /= 1)) stop 2
   b = [(-1)**a]
   if (any (b /= [-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1]) )stop 3
   b = [8**a]
   if (any (b /= [0,0,0,1,8,64,512,4096,32768,262144,2097152,16777216,&
        134217728,1073741824])) stop 4
   b = [4**a]
   if (any (b /= [0,0,0,1,4,16,64,256,1024,4096,16384,65536,262144,1048576])) stop 5
   
   v = 1
   do i=1,6
      v = v * 16
      if (v /= 16**i) stop 6 
   end do
 end program p
! { dg-final { scan-tree-dump-not "_gfortran_pow" "original" } }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* *ping* [patch, fortran] Fix PR 85544
  2018-12-16 22:00 [patch, fortran] Fix PR 85544 Thomas Koenig
@ 2018-12-22 13:41 ` Thomas Koenig
  2018-12-22 15:09 ` Paul Richard Thomas
  1 sibling, 0 replies; 3+ messages in thread
From: Thomas Koenig @ 2018-12-22 13:41 UTC (permalink / raw)
  To: fortran, gcc-patches

Ping?

> the PR pointed out an old regression because the front-end optimization
> pass was substituting 2**n with ishift(1,n), where n was an array.
> 
> Simply removing the optimization for that case would have been easy,
> but also introduced a performance regression.
> 
> So, for this, I moved the optimization to trans-*, where it makes more
> sense.
> 
> Regression-tested.  This turned up that one of our tests, mvbits_1.f90,
> depends on the behavior that 2**32 is zero.  This is certainly not
> guaranteed by the standard, but I chose to keep the behavior as not
> to introduce any changes in behavior.
> 
> This fixes a regression, so I would like to backport to all active
> branches if this if possible.
> 
> Oh yes, if anybody feels strongly that we should also optimize 32**n
> and powers of other powers to two, now is the time to speak up :-)
> 
> OK for affected branches?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch, fortran] Fix PR 85544
  2018-12-16 22:00 [patch, fortran] Fix PR 85544 Thomas Koenig
  2018-12-22 13:41 ` *ping* " Thomas Koenig
@ 2018-12-22 15:09 ` Paul Richard Thomas
  1 sibling, 0 replies; 3+ messages in thread
From: Paul Richard Thomas @ 2018-12-22 15:09 UTC (permalink / raw)
  To: Thomas Koenig; +Cc: fortran, gcc-patches

Hi Thomas,

That's OK for 7-  through 9-branches.

Thanks for the fix.

Paul

On Sun, 16 Dec 2018 at 22:01, Thomas Koenig <tkoenig@netcologne.de> wrote:
>
> Hello world,
>
> the PR pointed out an old regression because the front-end optimization
> pass was substituting 2**n with ishift(1,n), where n was an array.
>
> Simply removing the optimization for that case would have been easy,
> but also introduced a performance regression.
>
> So, for this, I moved the optimization to trans-*, where it makes more
> sense.
>
> Regression-tested.  This turned up that one of our tests, mvbits_1.f90,
> depends on the behavior that 2**32 is zero.  This is certainly not
> guaranteed by the standard, but I chose to keep the behavior as not
> to introduce any changes in behavior.
>
> This fixes a regression, so I would like to backport to all active
> branches if this if possible.
>
> Oh yes, if anybody feels strongly that we should also optimize 32**n
> and powers of other powers to two, now is the time to speak up :-)
>
> OK for affected branches?
>
> Regards
>
>         Thomas
>
> 2018-12-16  Thomas Koenig  <tkoenig@gcc.gnu.org>
>
>          PR fortran/85544
>          * frontend-passes.c (optimize_power): Remove.
>          (optimize_op): Remove call to optimize_power.
>          * trans-expr.c (gfc_conv_power_op): Handle cases of 1**integer,
>          (2|4|8|16) ** integer and (-1) ** integer.
>
> 2018-12-16  Thomas Koenig  <tkoenig@gcc.gnu.org>
>
>          PR fortran/85544
>          * gfortran.dg/power_7.f90: New test.



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-12-22 13:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-16 22:00 [patch, fortran] Fix PR 85544 Thomas Koenig
2018-12-22 13:41 ` *ping* " Thomas Koenig
2018-12-22 15:09 ` Paul Richard Thomas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).