From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-305782-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 2266 invoked by alias); 27 Oct 2011 23:36:12 -0000
Received: (qmail 3978 invoked by uid 22791); 27 Oct 2011 23:33:58 -0000
X-SWARE-Spam-Status: No, hits=0.6 required=5.0	tests=AWL,BAYES_50,KAM_STOCKGEN,RP_MATCHES_RCVD,TW_TM
X-Spam-Check-By: sourceware.org
Received: from smtp25.services.sfr.fr (HELO smtp25.services.sfr.fr) (93.17.128.120)    by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 27 Oct 2011 23:33:41 +0000
Received: from filter.sfr.fr (localhost [127.0.0.1])	by msfrf2512.sfr.fr (SMTP Server) with ESMTP id 879957000044;	Fri, 28 Oct 2011 01:33:40 +0200 (CEST)
Received: from gimli.local (145.15.72.86.rev.sfr.net [86.72.15.145])	by msfrf2512.sfr.fr (SMTP Server) with ESMTP id E1BD97000040;	Fri, 28 Oct 2011 01:33:39 +0200 (CEST)
X-SFR-UUID: 20111027233339924.E1BD97000040@msfrf2512.sfr.fr
Content-Type: multipart/mixed; boundary="===============6664296882618565386=="
MIME-Version: 1.0
From: Mikael Morin <mikael.morin@sfr.fr>
To: gfortran <fortran@gcc.gnu.org> , GCC patches <gcc-patches@gcc.gnu.org>
Message-ID: <20111027233339.18581.86093@gimli.local>
In-Reply-To: <20111027232818.18581.901@gimli.local>
References: <20111027232818.18581.901@gimli.local>
Subject: [Patch, fortran] [62..66/66] inline sum and product: Inline sum.
Date: Thu, 27 Oct 2011 23:36:00 -0000
X-IsSubscribed: yes
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
X-SW-Source: 2011-10/txt/msg02566.txt.bz2


--===============6664296882618565386==
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-length: 442

This is the purpose of all the serie of patches: inline sum and product.
Patches 62..65, possibly don't deserve a patch of their own, they just make
the changes in gfc_conv_intrinsic_arith easier to read.

The combined diff (context, ignoring indent changes) also attached here.

Patch 62: Change conditions.
Patch 63: Update argument handling.
Patch 64: Change loop usage.
Patch 65: Change gfc_se structs initializers.
Patch 66: Inline sum.

--===============6664296882618565386==
Content-Type: text/x-diff; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="pr43829-62..66.diff"
Content-length: 13917

diff --git a/trans-array.c b/trans-array.c
index acd9aec18fefc6631ad443c5dff2d3014a9d6565..262743d0d3779b4f02a63d604bd9a621401ae84e 100644
*** a/trans-array.c
--- b/trans-array.c
*************** gfc_conv_expr_descriptor (gfc_se * se, g
*** 6187,6193 ****
  	    gcc_assert ((expr->value.function.esym != NULL
  			 && expr->value.function.esym->attr.elemental)
  			|| (expr->value.function.isym != NULL
! 			    && expr->value.function.isym->elemental));
  	  else
  	    gcc_assert (ss_type == GFC_SS_INTRINSIC);
  
--- 6187,6194 ----
  	    gcc_assert ((expr->value.function.esym != NULL
  			 && expr->value.function.esym->attr.elemental)
  			|| (expr->value.function.isym != NULL
! 			    && expr->value.function.isym->elemental)
! 			|| gfc_inline_intrinsic_function_p (expr));
  	  else
  	    gcc_assert (ss_type == GFC_SS_INTRINSIC);
  
diff --git a/trans-intrinsic.c b/trans-intrinsic.c
index 3cdc1e0970aa4f787a60c96e50572a0bb1cb2a6e..973f912a624bdd442b3851471432c358118438d6 100644
*** a/trans-intrinsic.c
--- b/trans-intrinsic.c
*************** gfc_conv_intrinsic_count (gfc_se * se, g
*** 2557,2562 ****
--- 2557,2576 ----
    se->expr = resvar;
  }
  
+ 
+ /* Update given gfc_se to have ss component pointing to the nested gfc_ss
+    struct and return the corresponding loopinfo.  */
+ 
+ static gfc_loopinfo *
+ enter_nested_loop (gfc_se *se)
+ {
+   se->ss = se->ss->nested_ss;
+   gcc_assert (se->ss == se->ss->loop->ss);
+ 
+   return se->ss->loop;
+ }
+ 
+ 
  /* Inline implementation of the sum and product intrinsics.  */
  static void
  gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op,
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2568,2587 ****
    stmtblock_t body;
    stmtblock_t block;
    tree tmp;
!   gfc_loopinfo loop;
!   gfc_actual_arglist *actual;
!   gfc_ss *arrayss;
!   gfc_ss *maskss;
    gfc_se arrayse;
    gfc_se maskse;
    gfc_expr *arrayexpr;
    gfc_expr *maskexpr;
  
!   if (se->ss)
      {
!       gfc_conv_intrinsic_funcall (se, expr);
!       return;
      }
  
    type = gfc_typenode_for_spec (&expr->ts);
    /* Initialize the result.  */
--- 2582,2604 ----
    stmtblock_t body;
    stmtblock_t block;
    tree tmp;
!   gfc_loopinfo loop, *ploop;
!   gfc_actual_arglist *arg_array, *arg_mask;
!   gfc_ss *arrayss = NULL;
!   gfc_ss *maskss = NULL;
    gfc_se arrayse;
    gfc_se maskse;
+   gfc_se *parent_se;
    gfc_expr *arrayexpr;
    gfc_expr *maskexpr;
  
!   if (expr->rank > 0)
      {
!       gcc_assert (gfc_inline_intrinsic_function_p (expr));
!       parent_se = se;
      }
+   else
+     parent_se = NULL;
  
    type = gfc_typenode_for_spec (&expr->ts);
    /* Initialize the result.  */
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2608,2630 ****
  
    gfc_add_modify (&se->pre, resvar, tmp);
  
!   /* Walk the arguments.  */
!   actual = expr->value.function.actual;
!   arrayexpr = actual->expr;
!   arrayss = gfc_walk_expr (arrayexpr);
!   gcc_assert (arrayss != gfc_ss_terminator);
  
    if (op == NE_EXPR || norm2)
      /* PARITY and NORM2.  */
      maskexpr = NULL;
    else
      {
!       actual = actual->next->next;
!       gcc_assert (actual);
!       maskexpr = actual->expr;
      }
  
!   if (maskexpr && maskexpr->rank != 0)
      {
        maskss = gfc_walk_expr (maskexpr);
        gcc_assert (maskss != gfc_ss_terminator);
--- 2625,2651 ----
  
    gfc_add_modify (&se->pre, resvar, tmp);
  
!   arg_array = expr->value.function.actual;
! 
!   arrayexpr = arg_array->expr;
  
    if (op == NE_EXPR || norm2)
      /* PARITY and NORM2.  */
      maskexpr = NULL;
    else
      {
!       arg_mask  = arg_array->next->next;
!       gcc_assert (arg_mask != NULL);
!       maskexpr = arg_mask->expr;
      }
  
!   if (expr->rank == 0)
!     {
!       /* Walk the arguments.  */
!       arrayss = gfc_walk_expr (arrayexpr);
!       gcc_assert (arrayss != gfc_ss_terminator);
! 
!       if (maskexpr && maskexpr->rank > 0)
  	{
  	  maskss = gfc_walk_expr (maskexpr);
  	  gcc_assert (maskss != gfc_ss_terminator);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2635,2641 ****
    /* Initialize the scalarizer.  */
    gfc_init_loopinfo (&loop);
    gfc_add_ss_to_loop (&loop, arrayss);
!   if (maskss)
      gfc_add_ss_to_loop (&loop, maskss);
  
    /* Initialize the loop.  */
--- 2656,2662 ----
        /* Initialize the scalarizer.  */
        gfc_init_loopinfo (&loop);
        gfc_add_ss_to_loop (&loop, arrayss);
!       if (maskexpr && maskexpr->rank > 0)
  	gfc_add_ss_to_loop (&loop, maskss);
  
        /* Initialize the loop.  */
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2643,2658 ****
    gfc_conv_loop_setup (&loop, &expr->where);
  
    gfc_mark_ss_chain_used (arrayss, 1);
!   if (maskss)
      gfc_mark_ss_chain_used (maskss, 1);
    /* Generate the loop body.  */
!   gfc_start_scalarized_body (&loop, &body);
  
    /* If we have a mask, only add this element if the mask is set.  */
!   if (maskss)
      {
!       gfc_init_se (&maskse, NULL);
!       gfc_copy_loopinfo_to_se (&maskse, &loop);
        maskse.ss = maskss;
        gfc_conv_expr_val (&maskse, maskexpr);
        gfc_add_block_to_block (&body, &maskse.pre);
--- 2664,2689 ----
        gfc_conv_loop_setup (&loop, &expr->where);
  
        gfc_mark_ss_chain_used (arrayss, 1);
!       if (maskexpr && maskexpr->rank > 0)
  	gfc_mark_ss_chain_used (maskss, 1);
+ 
+       ploop = &loop;
+     }
+   else
+     /* All the work has been done in the parent loops.  */
+     ploop = enter_nested_loop (se);
+ 
+   gcc_assert (ploop);
+ 
    /* Generate the loop body.  */
!   gfc_start_scalarized_body (ploop, &body);
  
    /* If we have a mask, only add this element if the mask is set.  */
!   if (maskexpr && maskexpr->rank > 0)
      {
!       gfc_init_se (&maskse, parent_se);
!       gfc_copy_loopinfo_to_se (&maskse, ploop);
!       if (expr->rank == 0)
  	maskse.ss = maskss;
        gfc_conv_expr_val (&maskse, maskexpr);
        gfc_add_block_to_block (&body, &maskse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2663,2670 ****
      gfc_init_block (&block);
  
    /* Do the actual summation/product.  */
!   gfc_init_se (&arrayse, NULL);
!   gfc_copy_loopinfo_to_se (&arrayse, &loop);
    arrayse.ss = arrayss;
    gfc_conv_expr_val (&arrayse, arrayexpr);
    gfc_add_block_to_block (&block, &arrayse.pre);
--- 2694,2702 ----
      gfc_init_block (&block);
  
    /* Do the actual summation/product.  */
!   gfc_init_se (&arrayse, parent_se);
!   gfc_copy_loopinfo_to_se (&arrayse, ploop);
!   if (expr->rank == 0)
      arrayse.ss = arrayss;
    gfc_conv_expr_val (&arrayse, arrayexpr);
    gfc_add_block_to_block (&block, &arrayse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2740,2746 ****
  
    gfc_add_block_to_block (&block, &arrayse.post);
  
!   if (maskss)
      {
        /* We enclose the above in if (mask) {...} .  */
  
--- 2772,2778 ----
  
    gfc_add_block_to_block (&block, &arrayse.post);
  
!   if (maskexpr && maskexpr->rank > 0)
      {
        /* We enclose the above in if (mask) {...} .  */
  
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2752,2781 ****
      tmp = gfc_finish_block (&block);
    gfc_add_expr_to_block (&body, tmp);
  
!   gfc_trans_scalarizing_loops (&loop, &body);
  
    /* For a scalar mask, enclose the loop in an if statement.  */
!   if (maskexpr && maskss == NULL)
      {
-       gfc_init_se (&maskse, NULL);
-       gfc_conv_expr_val (&maskse, maskexpr);
        gfc_init_block (&block);
!       gfc_add_block_to_block (&block, &loop.pre);
!       gfc_add_block_to_block (&block, &loop.post);
        tmp = gfc_finish_block (&block);
  
        tmp = build3_v (COND_EXPR, maskse.expr, tmp,
  		      build_empty_stmt (input_location));
        gfc_add_expr_to_block (&block, tmp);
        gfc_add_block_to_block (&se->pre, &block);
      }
    else
      {
!       gfc_add_block_to_block (&se->pre, &loop.pre);
!       gfc_add_block_to_block (&se->pre, &loop.post);
      }
  
!   gfc_cleanup_loop (&loop);
  
    if (norm2)
      {
--- 2784,2826 ----
      tmp = gfc_finish_block (&block);
    gfc_add_expr_to_block (&body, tmp);
  
!   gfc_trans_scalarizing_loops (ploop, &body);
  
    /* For a scalar mask, enclose the loop in an if statement.  */
!   if (maskexpr && maskexpr->rank == 0)
      {
        gfc_init_block (&block);
!       gfc_add_block_to_block (&block, &ploop->pre);
!       gfc_add_block_to_block (&block, &ploop->post);
        tmp = gfc_finish_block (&block);
  
+       if (expr->rank > 0)
+ 	{
+ 	  tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp,
+ 			  build_empty_stmt (input_location));
+ 	  gfc_advance_se_ss_chain (se);
+ 	}
+       else
+ 	{
+ 	  gcc_assert (expr->rank == 0);
+ 	  gfc_init_se (&maskse, NULL);
+ 	  gfc_conv_expr_val (&maskse, maskexpr);
  	  tmp = build3_v (COND_EXPR, maskse.expr, tmp,
  			  build_empty_stmt (input_location));
+ 	}
+ 
        gfc_add_expr_to_block (&block, tmp);
        gfc_add_block_to_block (&se->pre, &block);
+       gcc_assert (se->post.head == NULL);
      }
    else
      {
!       gfc_add_block_to_block (&se->pre, &ploop->pre);
!       gfc_add_block_to_block (&se->pre, &ploop->post);
      }
  
!   if (expr->rank == 0)
!     gfc_cleanup_loop (ploop);
  
    if (norm2)
      {
*************** walk_inline_intrinsic_transpose (gfc_ss 
*** 6795,6806 ****
--- 6840,6966 ----
  }
  
  
+ /* Move the given dimension of the given gfc_ss list to a nested gfc_ss list.
+    This has the side effect of reversing the nested list, so there is no
+    need to call gfc_reverse_ss on it (the given list is assumed not to be
+    reversed yet).   */
+ 
+ static gfc_ss *
+ nest_loop_dimension (gfc_ss *ss, int dim)
+ {
+   int ss_dim, i;
+   gfc_ss *new_ss, *prev_ss = gfc_ss_terminator;
+   gfc_loopinfo *new_loop;
+ 
+   gcc_assert (ss != gfc_ss_terminator);
+ 
+   for (; ss != gfc_ss_terminator; ss = ss->next)
+     {
+       new_ss = gfc_get_ss ();
+       new_ss->next = prev_ss;
+       new_ss->parent = ss;
+       new_ss->info = ss->info;
+       new_ss->info->refcount++;
+       if (ss->dimen != 0)
+ 	{
+ 	  gcc_assert (ss->info->type != GFC_SS_SCALAR
+ 		      && ss->info->type != GFC_SS_REFERENCE);
+ 
+ 	  new_ss->dimen = 1;
+ 	  new_ss->dim[0] = ss->dim[dim];
+ 
+ 	  gcc_assert (dim < ss->dimen);
+ 
+ 	  ss_dim = --ss->dimen;
+ 	  for (i = dim; i < ss_dim; i++)
+ 	    ss->dim[i] = ss->dim[i + 1];
+ 
+ 	  ss->dim[ss_dim] = 0;
+ 	}
+       prev_ss = new_ss;
+ 
+       if (ss->nested_ss)
+ 	{
+ 	  ss->nested_ss->parent = new_ss;
+ 	  new_ss->nested_ss = ss->nested_ss;
+ 	}
+       ss->nested_ss = new_ss;
+     }
+ 
+   new_loop = gfc_get_loopinfo ();
+   gfc_init_loopinfo (new_loop);
+ 
+   gcc_assert (prev_ss != NULL);
+   gcc_assert (prev_ss != gfc_ss_terminator);
+   gfc_add_ss_to_loop (new_loop, prev_ss);
+   return new_ss->parent;
+ }
+ 
+ 
+ /* Create the gfc_ss list for the SUM/PRODUCT arguments when the function
+    is to be inlined.  */
+ 
+ static gfc_ss *
+ walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr)
+ {
+   gfc_ss *tmp_ss, *tail, *array_ss;
+   gfc_actual_arglist *arg1, *arg2, *arg3;
+   int sum_dim;
+   bool scalar_mask = false;
+ 
+   /* The rank of the result will be determined later.  */
+   arg1 = expr->value.function.actual;
+   arg2 = arg1->next;
+   arg3 = arg2->next;
+   gcc_assert (arg3 != NULL);
+ 
+   if (expr->rank == 0)
+     return ss;
+ 
+   tmp_ss = gfc_ss_terminator;
+ 
+   if (arg3->expr)
+     {
+       gfc_ss *mask_ss;
+ 
+       mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr);
+       if (mask_ss == tmp_ss)
+ 	scalar_mask = 1;
+ 
+       tmp_ss = mask_ss;
+     }
+ 
+   array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr);
+   gcc_assert (array_ss != tmp_ss);
+ 
+   /* Odd thing: If the mask is scalar, it is used by the frontend after
+      the array (to make it array around the nested loop). Thus it shall
+      be after array_ss once the gfc_ss list is reversed.  */
+   if (scalar_mask)
+     tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr);
+   else
+     tmp_ss = array_ss;
+ 
+   /* "Hide" the dimension on which we will sum in the first arg's scalarization
+      chain.  */
+   sum_dim = mpz_get_si (arg2->expr->value.integer) - 1;
+   tail = nest_loop_dimension (tmp_ss, sum_dim);
+   tail->next = ss;
+ 
+   return tmp_ss;
+ }
+ 
+ 
  static gfc_ss *
  walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr)
  {
  
    switch (expr->value.function.isym->id)
      {
+       case GFC_ISYM_PRODUCT:
+       case GFC_ISYM_SUM:
+ 	return walk_inline_intrinsic_arith (ss, expr);
+ 
        case GFC_ISYM_TRANSPOSE:
  	return walk_inline_intrinsic_transpose (ss, expr);
  
*************** gfc_walk_intrinsic_libfunc (gfc_ss * ss,
*** 6862,6872 ****
--- 7022,7047 ----
  bool
  gfc_inline_intrinsic_function_p (gfc_expr *expr)
  {
+   gfc_actual_arglist *args;
+ 
    if (!expr->value.function.isym)
      return false;
  
    switch (expr->value.function.isym->id)
      {
+     case GFC_ISYM_PRODUCT:
+     case GFC_ISYM_SUM:
+       /* Disable inline expansion if code size matters.  */
+       if (optimize_size)
+ 	return false;
+ 
+       args = expr->value.function.actual;
+       /* We need to be able to subset the SUM argument at compile-time.  */
+       if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT)
+ 	return false;
+ 
+       return true;
+ 
      case GFC_ISYM_TRANSPOSE:
        return true;
  
diff --git a/trans.h b/trans.h
index 5757865b3a180a32e5baa320c56d235924df68dc..22033d38d157f5c85eba6fcb8ee92ab28dc22535 100644
*** a/trans.h
--- b/trans.h
*************** typedef struct gfc_loopinfo
*** 310,315 ****
--- 310,316 ----
  }
  gfc_loopinfo;
  
+ #define gfc_get_loopinfo() XCNEW (gfc_loopinfo)
  
  /* Information about a symbol that has been shadowed by a temporary.  */
  typedef struct

--===============6664296882618565386==--