diff --git a/trans-array.c b/trans-array.c
index acd9aec18fefc6631ad443c5dff2d3014a9d6565..262743d0d3779b4f02a63d604bd9a621401ae84e 100644
*** a/trans-array.c
--- b/trans-array.c
*************** gfc_conv_expr_descriptor (gfc_se * se, g
*** 6187,6193 ****
  	    gcc_assert ((expr->value.function.esym != NULL
  			 && expr->value.function.esym->attr.elemental)
  			|| (expr->value.function.isym != NULL
! 			    && expr->value.function.isym->elemental));
  	  else
  	    gcc_assert (ss_type == GFC_SS_INTRINSIC);
  
--- 6187,6194 ----
  	    gcc_assert ((expr->value.function.esym != NULL
  			 && expr->value.function.esym->attr.elemental)
  			|| (expr->value.function.isym != NULL
! 			    && expr->value.function.isym->elemental)
! 			|| gfc_inline_intrinsic_function_p (expr));
  	  else
  	    gcc_assert (ss_type == GFC_SS_INTRINSIC);
  
diff --git a/trans-intrinsic.c b/trans-intrinsic.c
index 25c54fb6db9cbc7e80e3b3adca77b3bb13b15304..973f912a624bdd442b3851471432c358118438d6 100644
*** a/trans-intrinsic.c
--- b/trans-intrinsic.c
*************** gfc_conv_intrinsic_count (gfc_se * se, g
*** 2557,2562 ****
--- 2557,2576 ----
    se->expr = resvar;
  }
  
+ 
+ /* Update given gfc_se to have ss component pointing to the nested gfc_ss
+    struct and return the corresponding loopinfo.  */
+ 
+ static gfc_loopinfo *
+ enter_nested_loop (gfc_se *se)
+ {
+   se->ss = se->ss->nested_ss;
+   gcc_assert (se->ss == se->ss->loop->ss);
+ 
+   return se->ss->loop;
+ }
+ 
+ 
  /* Inline implementation of the sum and product intrinsics.  */
  static void
  gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op,
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2570,2587 ****
    tree tmp;
    gfc_loopinfo loop, *ploop;
    gfc_actual_arglist *arg_array, *arg_mask;
!   gfc_ss *arrayss;
!   gfc_ss *maskss;
    gfc_se arrayse;
    gfc_se maskse;
    gfc_se *parent_se;
    gfc_expr *arrayexpr;
    gfc_expr *maskexpr;
  
!   if (se->ss)
      {
!       gfc_conv_intrinsic_funcall (se, expr);
!       return;
      }
    else
      parent_se = NULL;
--- 2584,2601 ----
    tree tmp;
    gfc_loopinfo loop, *ploop;
    gfc_actual_arglist *arg_array, *arg_mask;
!   gfc_ss *arrayss = NULL;
!   gfc_ss *maskss = NULL;
    gfc_se arrayse;
    gfc_se maskse;
    gfc_se *parent_se;
    gfc_expr *arrayexpr;
    gfc_expr *maskexpr;
  
!   if (expr->rank > 0)
      {
!       gcc_assert (gfc_inline_intrinsic_function_p (expr));
!       parent_se = se;
      }
    else
      parent_se = NULL;
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2613,2622 ****
  
    arg_array = expr->value.function.actual;
  
-   /* Walk the arguments.  */
    arrayexpr = arg_array->expr;
-   arrayss = gfc_walk_expr (arrayexpr);
-   gcc_assert (arrayss != gfc_ss_terminator);
  
    if (op == NE_EXPR || norm2)
      /* PARITY and NORM2.  */
--- 2627,2633 ----
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2628,2633 ****
--- 2639,2650 ----
        maskexpr = arg_mask->expr;
      }
  
+   if (expr->rank == 0)
+     {
+       /* Walk the arguments.  */
+       arrayss = gfc_walk_expr (arrayexpr);
+       gcc_assert (arrayss != gfc_ss_terminator);
+ 
        if (maskexpr && maskexpr->rank > 0)
  	{
  	  maskss = gfc_walk_expr (maskexpr);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2651,2656 ****
--- 2668,2680 ----
  	gfc_mark_ss_chain_used (maskss, 1);
  
        ploop = &loop;
+     }
+   else
+     /* All the work has been done in the parent loops.  */
+     ploop = enter_nested_loop (se);
+ 
+   gcc_assert (ploop);
+ 
    /* Generate the loop body.  */
    gfc_start_scalarized_body (ploop, &body);
  
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2659,2664 ****
--- 2683,2689 ----
      {
        gfc_init_se (&maskse, parent_se);
        gfc_copy_loopinfo_to_se (&maskse, ploop);
+       if (expr->rank == 0)
  	maskse.ss = maskss;
        gfc_conv_expr_val (&maskse, maskexpr);
        gfc_add_block_to_block (&body, &maskse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2671,2676 ****
--- 2696,2702 ----
    /* Do the actual summation/product.  */
    gfc_init_se (&arrayse, parent_se);
    gfc_copy_loopinfo_to_se (&arrayse, ploop);
+   if (expr->rank == 0)
      arrayse.ss = arrayss;
    gfc_conv_expr_val (&arrayse, arrayexpr);
    gfc_add_block_to_block (&block, &arrayse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2763,2779 ****
    /* For a scalar mask, enclose the loop in an if statement.  */
    if (maskexpr && maskexpr->rank == 0)
      {
-       gfc_init_se (&maskse, NULL);
-       gfc_conv_expr_val (&maskse, maskexpr);
        gfc_init_block (&block);
        gfc_add_block_to_block (&block, &ploop->pre);
        gfc_add_block_to_block (&block, &ploop->post);
        tmp = gfc_finish_block (&block);
  
        tmp = build3_v (COND_EXPR, maskse.expr, tmp,
  		      build_empty_stmt (input_location));
        gfc_add_expr_to_block (&block, tmp);
        gfc_add_block_to_block (&se->pre, &block);
      }
    else
      {
--- 2789,2817 ----
    /* For a scalar mask, enclose the loop in an if statement.  */
    if (maskexpr && maskexpr->rank == 0)
      {
        gfc_init_block (&block);
        gfc_add_block_to_block (&block, &ploop->pre);
        gfc_add_block_to_block (&block, &ploop->post);
        tmp = gfc_finish_block (&block);
  
+       if (expr->rank > 0)
+ 	{
+ 	  tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp,
+ 			  build_empty_stmt (input_location));
+ 	  gfc_advance_se_ss_chain (se);
+ 	}
+       else
+ 	{
+ 	  gcc_assert (expr->rank == 0);
+ 	  gfc_init_se (&maskse, NULL);
+ 	  gfc_conv_expr_val (&maskse, maskexpr);
  	  tmp = build3_v (COND_EXPR, maskse.expr, tmp,
  			  build_empty_stmt (input_location));
+ 	}
+ 
        gfc_add_expr_to_block (&block, tmp);
        gfc_add_block_to_block (&se->pre, &block);
+       gcc_assert (se->post.head == NULL);
      }
    else
      {
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2781,2786 ****
--- 2819,2825 ----
        gfc_add_block_to_block (&se->pre, &ploop->post);
      }
  
+   if (expr->rank == 0)
      gfc_cleanup_loop (ploop);
  
    if (norm2)
*************** walk_inline_intrinsic_transpose (gfc_ss 
*** 6801,6812 ****
--- 6840,6966 ----
  }
  
  
+ /* Move the given dimension of the given gfc_ss list to a nested gfc_ss list.
+    This has the side effect of reversing the nested list, so there is no
+    need to call gfc_reverse_ss on it (the given list is assumed not to be
+    reversed yet).   */
+ 
+ static gfc_ss *
+ nest_loop_dimension (gfc_ss *ss, int dim)
+ {
+   int ss_dim, i;
+   gfc_ss *new_ss, *prev_ss = gfc_ss_terminator;
+   gfc_loopinfo *new_loop;
+ 
+   gcc_assert (ss != gfc_ss_terminator);
+ 
+   for (; ss != gfc_ss_terminator; ss = ss->next)
+     {
+       new_ss = gfc_get_ss ();
+       new_ss->next = prev_ss;
+       new_ss->parent = ss;
+       new_ss->info = ss->info;
+       new_ss->info->refcount++;
+       if (ss->dimen != 0)
+ 	{
+ 	  gcc_assert (ss->info->type != GFC_SS_SCALAR
+ 		      && ss->info->type != GFC_SS_REFERENCE);
+ 
+ 	  new_ss->dimen = 1;
+ 	  new_ss->dim[0] = ss->dim[dim];
+ 
+ 	  gcc_assert (dim < ss->dimen);
+ 
+ 	  ss_dim = --ss->dimen;
+ 	  for (i = dim; i < ss_dim; i++)
+ 	    ss->dim[i] = ss->dim[i + 1];
+ 
+ 	  ss->dim[ss_dim] = 0;
+ 	}
+       prev_ss = new_ss;
+ 
+       if (ss->nested_ss)
+ 	{
+ 	  ss->nested_ss->parent = new_ss;
+ 	  new_ss->nested_ss = ss->nested_ss;
+ 	}
+       ss->nested_ss = new_ss;
+     }
+ 
+   new_loop = gfc_get_loopinfo ();
+   gfc_init_loopinfo (new_loop);
+ 
+   gcc_assert (prev_ss != NULL);
+   gcc_assert (prev_ss != gfc_ss_terminator);
+   gfc_add_ss_to_loop (new_loop, prev_ss);
+   return new_ss->parent;
+ }
+ 
+ 
+ /* Create the gfc_ss list for the SUM/PRODUCT arguments when the function
+    is to be inlined.  */
+ 
+ static gfc_ss *
+ walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr)
+ {
+   gfc_ss *tmp_ss, *tail, *array_ss;
+   gfc_actual_arglist *arg1, *arg2, *arg3;
+   int sum_dim;
+   bool scalar_mask = false;
+ 
+   /* The rank of the result will be determined later.  */
+   arg1 = expr->value.function.actual;
+   arg2 = arg1->next;
+   arg3 = arg2->next;
+   gcc_assert (arg3 != NULL);
+ 
+   if (expr->rank == 0)
+     return ss;
+ 
+   tmp_ss = gfc_ss_terminator;
+ 
+   if (arg3->expr)
+     {
+       gfc_ss *mask_ss;
+ 
+       mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr);
+       if (mask_ss == tmp_ss)
+ 	scalar_mask = 1;
+ 
+       tmp_ss = mask_ss;
+     }
+ 
+   array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr);
+   gcc_assert (array_ss != tmp_ss);
+ 
+   /* Odd thing: If the mask is scalar, it is used by the frontend after
+      the array (to make it array around the nested loop). Thus it shall
+      be after array_ss once the gfc_ss list is reversed.  */
+   if (scalar_mask)
+     tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr);
+   else
+     tmp_ss = array_ss;
+ 
+   /* "Hide" the dimension on which we will sum in the first arg's scalarization
+      chain.  */
+   sum_dim = mpz_get_si (arg2->expr->value.integer) - 1;
+   tail = nest_loop_dimension (tmp_ss, sum_dim);
+   tail->next = ss;
+ 
+   return tmp_ss;
+ }
+ 
+ 
  static gfc_ss *
  walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr)
  {
  
    switch (expr->value.function.isym->id)
      {
+       case GFC_ISYM_PRODUCT:
+       case GFC_ISYM_SUM:
+ 	return walk_inline_intrinsic_arith (ss, expr);
+ 
        case GFC_ISYM_TRANSPOSE:
  	return walk_inline_intrinsic_transpose (ss, expr);
  
*************** gfc_walk_intrinsic_libfunc (gfc_ss * ss,
*** 6868,6878 ****
--- 7022,7047 ----
  bool
  gfc_inline_intrinsic_function_p (gfc_expr *expr)
  {
+   gfc_actual_arglist *args;
+ 
    if (!expr->value.function.isym)
      return false;
  
    switch (expr->value.function.isym->id)
      {
+     case GFC_ISYM_PRODUCT:
+     case GFC_ISYM_SUM:
+       /* Disable inline expansion if code size matters.  */
+       if (optimize_size)
+ 	return false;
+ 
+       args = expr->value.function.actual;
+       /* We need to be able to subset the SUM argument at compile-time.  */
+       if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT)
+ 	return false;
+ 
+       return true;
+ 
      case GFC_ISYM_TRANSPOSE:
        return true;
  
diff --git a/trans.h b/trans.h
index 5757865b3a180a32e5baa320c56d235924df68dc..22033d38d157f5c85eba6fcb8ee92ab28dc22535 100644
*** a/trans.h
--- b/trans.h
*************** typedef struct gfc_loopinfo
*** 310,315 ****
--- 310,316 ----
  }
  gfc_loopinfo;
  
+ #define gfc_get_loopinfo() XCNEW (gfc_loopinfo)
  
  /* Information about a symbol that has been shadowed by a temporary.  */
  typedef struct