Re: Help needed with inline expansion of MVBITS

public inbox for fortran@gcc.gnu.org
 help / color / mirror / Atom feed

* Re: Help needed with inline expansion of MVBITS
@ 2020-09-07 21:30 Harald Anlauf
  0 siblings, 0 replies; 8+ messages in thread
From: Harald Anlauf @ 2020-09-07 21:30 UTC (permalink / raw)
  To: Paul Richard Thomas; +Cc: fortran

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: mvbits_v6.patch --]
[-- Type: text/x-patch, Size: 12217 bytes --]

diff --git a/gcc/fortran/dependency.h b/gcc/fortran/dependency.h
index 2c8fcb0b8b4..217029f81ac 100644
--- a/gcc/fortran/dependency.h
+++ b/gcc/fortran/dependency.h
@@ -18,6 +18,9 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

+#ifndef GFC_DEPENDENCY_H
+#define GFC_DEPENDENCY_H
+
 /****************************** Enums *********************************/
 enum gfc_dep_check
 {
@@ -42,3 +45,5 @@ int gfc_dep_resolver (gfc_ref *, gfc_ref *, gfc_reverse *,
 int gfc_are_equivalenced_arrays (gfc_expr *, gfc_expr *);

 gfc_expr * gfc_discard_nops (gfc_expr *);
+
+#endif /* GFC_DEPENDENCY_H */
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 73769615c20..c2a4865f28f 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -3311,21 +3311,7 @@ gfc_resolve_mvbits (gfc_code *c)
 {
   static const sym_intent INTENTS[] = {INTENT_IN, INTENT_IN, INTENT_IN,
 				       INTENT_INOUT, INTENT_IN};
-
   const char *name;
-  gfc_typespec ts;
-  gfc_clear_ts (&ts);
-
-  /* FROMPOS, LEN and TOPOS are restricted to small values.  As such,
-     they will be converted so that they fit into a C int.  */
-  ts.type = BT_INTEGER;
-  ts.kind = gfc_c_int_kind;
-  if (c->ext.actual->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->next->next->expr, &ts, 2);

   /* TO and FROM are guaranteed to have the same kind parameter.  */
   name = gfc_get_string (PREFIX ("mvbits_i%d"),
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 32fe9886c57..ed70dc55a4f 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -11790,6 +11790,224 @@ conv_intrinsic_event_query (gfc_code *code)
   return gfc_finish_block (&se.pre);
 }

+
+static tree
+conv_intrinsic_mvbits (gfc_code *code)
+{
+  gfc_se se;
+  gfc_ss *ss, *lss;
+  gfc_loopinfo loop;
+  stmtblock_t body;
+  gfc_se loopse;
+
+  gfc_se argse[5];
+  gfc_expr *arg[5];
+  gfc_actual_arglist *actual;
+  int n;
+
+  tree from, frompos, len, to, topos;
+  tree lenmask, oldbits, newbits, bitsize;
+  tree type, above, mask1, mask2;
+  tree tmp;
+
+  gcc_assert (code->resolved_sym);
+
+  lss = gfc_ss_terminator;
+  ss = gfc_ss_terminator;
+  if (code->resolved_sym->attr.elemental)
+    ss = gfc_walk_elemental_function_args (ss, code->ext.actual,
+					   code->resolved_sym,
+					   GFC_SS_REFERENCE);
+  if (ss != gfc_ss_terminator)
+    {
+      /* An elemental subroutine call with array valued arguments has
+	 to be scalarized.  */
+      gfc_se depse;
+
+      /* gfc_walk_elemental_function_args renders the ss chain in the
+	 reverse order to the actual argument order.  */
+      ss = gfc_reverse_ss (ss);
+
+      /* Initialize the loop.  */
+      gfc_init_se (&loopse, NULL);
+      gfc_init_loopinfo (&loop);
+      gfc_add_ss_to_loop (&loop, ss);
+
+      gfc_conv_ss_startstride (&loop);
+      gfc_conv_loop_setup (&loop, &code->loc);
+      gfc_mark_ss_chain_used (ss, 1);
+
+      /* Convert the arguments, checking for dependencies.  */
+      gfc_copy_loopinfo_to_se (&loopse, &loop);
+      loopse.ss = ss;
+
+      gfc_init_se (&depse, NULL);
+      gfc_conv_elemental_dependencies (&depse, &loopse, code->resolved_sym,
+				       code->ext.actual, ELEM_CHECK_VARIABLE);
+
+      gfc_add_block_to_block (&loop.pre,  &depse.pre);
+      gfc_add_block_to_block (&loop.post, &depse.post);
+
+      /* Generate the loop body.  */
+      gfc_start_scalarized_body (&loop, &body);
+      lss = ss;
+    }
+  else
+    gfc_init_block (&body);
+
+  actual = code->ext.actual;
+  for (n = 0; n < 5; n++, actual = actual->next)
+    {
+      arg[n] = actual->expr;
+      gfc_init_se (&argse[n], NULL);
+
+      if (lss != gfc_ss_terminator)
+	{
+	  gfc_copy_loopinfo_to_se (&argse[n], &loop);
+	  /* Find the ss for the expression if it is there.  */
+	  argse[n].ss = lss;
+	  gfc_mark_ss_chain_used (lss, 1);
+	}
+
+      gfc_conv_expr (&argse[n], arg[n]);
+
+      /* Retain the present position in the ss list.  */
+      if (lss != gfc_ss_terminator)
+	lss = argse[n].ss;
+    }
+
+  from    = argse[0].expr;
+  frompos = argse[1].expr;
+  len     = argse[2].expr;
+  to      = argse[3].expr;
+  topos   = argse[4].expr;
+
+  /* The type of the result (TO).  */
+  type    = TREE_TYPE (to);
+  bitsize = build_int_cst (integer_type_node, TYPE_PRECISION (type));
+
+  /* Optionally generate code for runtime argument check.  */
+  if (gfc_option.rtcheck & GFC_RTCHECK_BITS)
+    {
+      tree nbits, below, ccond;
+      tree fp = fold_convert (long_integer_type_node, frompos);
+      tree ln = fold_convert (long_integer_type_node, len);
+      tree tp = fold_convert (long_integer_type_node, topos);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, frompos,
+			       build_int_cst (TREE_TYPE (frompos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, frompos,
+			       fold_convert (TREE_TYPE (frompos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", fp, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, len,
+			       build_int_cst (TREE_TYPE (len), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, len,
+			       fold_convert (TREE_TYPE (len), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[2].pre,
+			       &arg[2]->where,
+			       "LEN argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", ln, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, topos,
+			       build_int_cst (TREE_TYPE (topos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, topos,
+			       fold_convert (TREE_TYPE (topos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", tp, bitsize);
+
+      /* The tests above ensure that FROMPOS, LEN and TOPOS fit into short
+	 integers.  Additions below cannot overflow.  */
+      nbits = fold_convert (long_integer_type_node, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, fp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", fp, ln, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, tp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", tp, ln, bitsize);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].pre);
+
+  /* lenmask = (LEN >= bit_size (TYPE)) ? ~(TYPE)0 : ((TYPE)1 << LEN) - 1  */
+  above = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+			   len, fold_convert (TREE_TYPE (len), bitsize));
+  mask1 = build_int_cst (type, -1);
+  mask2 = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			   build_int_cst (type, 1), len);
+  mask2 = fold_build2_loc (input_location, MINUS_EXPR, type,
+			   mask2, build_int_cst (type, 1));
+  lenmask = fold_build3_loc (input_location, COND_EXPR, type,
+			     above, mask1, mask2);
+
+  /* newbits = (((UTYPE)(FROM) >> FROMPOS) & lenmask) << TOPOS  */
+  /* For valid frompos+len <= bit_size(FROM) the conversion to unsigned is
+     not necessary; artificial bits from rshift will be masked.  */
+  newbits = fold_build2_loc (input_location, RSHIFT_EXPR, type,
+			     fold_convert (type, from), frompos);
+  newbits = fold_build2_loc (input_location, BIT_AND_EXPR, type,
+			     newbits, lenmask);
+  newbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     newbits, topos);
+
+  /* oldbits = TO & (~(lenmask << TOPOS))  */
+  oldbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     lenmask, topos);
+  oldbits = fold_build1_loc (input_location, BIT_NOT_EXPR, type, oldbits);
+  oldbits = fold_build2_loc (input_location, BIT_AND_EXPR, type, oldbits, to);
+
+  /* TO = newbits | oldbits  */
+  tmp = fold_build2_loc (input_location, BIT_IOR_EXPR, type, oldbits, newbits);
+
+  gfc_add_modify (&body, to, tmp);
+
+  if (ss != gfc_ss_terminator)
+    {
+      gfc_init_se (&se, NULL);
+      gfc_add_block_to_block (&se.pre, &loopse.pre);
+
+      /* Finish up the loop block and the loop.  */
+      gfc_trans_scalarizing_loops (&loop, &body);
+      gfc_add_block_to_block (&se.pre, &loop.pre);
+      gfc_add_block_to_block (&se.pre, &loop.post);
+      gfc_cleanup_loop (&loop);
+      for (n = 0; n < 5; n++)
+	gfc_add_block_to_block (&se.pre, &argse[n].post);
+      return gfc_finish_block (&se.pre);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].post);
+
+  return gfc_finish_block (&body);
+}
+
+
 static tree
 conv_intrinsic_move_alloc (gfc_code *code)
 {
@@ -12119,6 +12337,11 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
       res = conv_intrinsic_kill_sub (code);
       break;

+    case GFC_ISYM_MVBITS:
+      res = conv_intrinsic_mvbits (code);
+//      res = NULL_TREE;
+      break;
+
     case GFC_ISYM_SYSTEM_CLOCK:
       res = conv_intrinsic_system_clock (code);
       break;
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 1f183b9dcd0..9b875f031ec 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -217,7 +217,7 @@ replace_ss (gfc_se *se, gfc_ss *old_ss, gfc_ss *new_ss)
    elemental subroutines.  Make temporaries for output arguments if any such
    dependencies are found.  Output arguments are chosen because internal_unpack
    can be used, as is, to copy the result back to the variable.  */
-static void
+void
 gfc_conv_elemental_dependencies (gfc_se * se, gfc_se * loopse,
 				 gfc_symbol * sym, gfc_actual_arglist * arg,
 				 gfc_dep_check check_variable)
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index e126fe92782..95119c68104 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GFC_TRANS_H

 #include "predict.h"  /* For enum br_predictor and PRED_*.  */
+#include "dependency.h"	/* For gfc_dep_check.  */

 /* Mangled symbols take the form __module__name or __module.submodule__name.  */
 #define GFC_MAX_MANGLED_SYMBOL_LEN  (GFC_MAX_SYMBOL_LEN*3+5)
@@ -1169,4 +1170,7 @@ extern const char gfc_msg_wrong_return[];
 #define OMPWS_NOWAIT		16	/* Use NOWAIT on OMP_FOR.  */
 extern int ompws_flags;

+void gfc_conv_elemental_dependencies (gfc_se *, gfc_se *, gfc_symbol *,
+				      gfc_actual_arglist *, gfc_dep_check);
+
 #endif /* GFC_TRANS_H */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
  2020-09-06 18:01   ` Paul Richard Thomas
@ 2020-09-07 21:26     ` Harald Anlauf
  0 siblings, 0 replies; 8+ messages in thread
From: Harald Anlauf @ 2020-09-07 21:26 UTC (permalink / raw)
  To: Paul Richard Thomas; +Cc: fortran

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: mvbits_v6.patch --]
[-- Type: text/x-patch, Size: 12217 bytes --]

diff --git a/gcc/fortran/dependency.h b/gcc/fortran/dependency.h
index 2c8fcb0b8b4..217029f81ac 100644
--- a/gcc/fortran/dependency.h
+++ b/gcc/fortran/dependency.h
@@ -18,6 +18,9 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

+#ifndef GFC_DEPENDENCY_H
+#define GFC_DEPENDENCY_H
+
 /****************************** Enums *********************************/
 enum gfc_dep_check
 {
@@ -42,3 +45,5 @@ int gfc_dep_resolver (gfc_ref *, gfc_ref *, gfc_reverse *,
 int gfc_are_equivalenced_arrays (gfc_expr *, gfc_expr *);

 gfc_expr * gfc_discard_nops (gfc_expr *);
+
+#endif /* GFC_DEPENDENCY_H */
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 73769615c20..c2a4865f28f 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -3311,21 +3311,7 @@ gfc_resolve_mvbits (gfc_code *c)
 {
   static const sym_intent INTENTS[] = {INTENT_IN, INTENT_IN, INTENT_IN,
 				       INTENT_INOUT, INTENT_IN};
-
   const char *name;
-  gfc_typespec ts;
-  gfc_clear_ts (&ts);
-
-  /* FROMPOS, LEN and TOPOS are restricted to small values.  As such,
-     they will be converted so that they fit into a C int.  */
-  ts.type = BT_INTEGER;
-  ts.kind = gfc_c_int_kind;
-  if (c->ext.actual->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->next->next->expr, &ts, 2);

   /* TO and FROM are guaranteed to have the same kind parameter.  */
   name = gfc_get_string (PREFIX ("mvbits_i%d"),
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 32fe9886c57..ed70dc55a4f 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -11790,6 +11790,224 @@ conv_intrinsic_event_query (gfc_code *code)
   return gfc_finish_block (&se.pre);
 }

+
+static tree
+conv_intrinsic_mvbits (gfc_code *code)
+{
+  gfc_se se;
+  gfc_ss *ss, *lss;
+  gfc_loopinfo loop;
+  stmtblock_t body;
+  gfc_se loopse;
+
+  gfc_se argse[5];
+  gfc_expr *arg[5];
+  gfc_actual_arglist *actual;
+  int n;
+
+  tree from, frompos, len, to, topos;
+  tree lenmask, oldbits, newbits, bitsize;
+  tree type, above, mask1, mask2;
+  tree tmp;
+
+  gcc_assert (code->resolved_sym);
+
+  lss = gfc_ss_terminator;
+  ss = gfc_ss_terminator;
+  if (code->resolved_sym->attr.elemental)
+    ss = gfc_walk_elemental_function_args (ss, code->ext.actual,
+					   code->resolved_sym,
+					   GFC_SS_REFERENCE);
+  if (ss != gfc_ss_terminator)
+    {
+      /* An elemental subroutine call with array valued arguments has
+	 to be scalarized.  */
+      gfc_se depse;
+
+      /* gfc_walk_elemental_function_args renders the ss chain in the
+	 reverse order to the actual argument order.  */
+      ss = gfc_reverse_ss (ss);
+
+      /* Initialize the loop.  */
+      gfc_init_se (&loopse, NULL);
+      gfc_init_loopinfo (&loop);
+      gfc_add_ss_to_loop (&loop, ss);
+
+      gfc_conv_ss_startstride (&loop);
+      gfc_conv_loop_setup (&loop, &code->loc);
+      gfc_mark_ss_chain_used (ss, 1);
+
+      /* Convert the arguments, checking for dependencies.  */
+      gfc_copy_loopinfo_to_se (&loopse, &loop);
+      loopse.ss = ss;
+
+      gfc_init_se (&depse, NULL);
+      gfc_conv_elemental_dependencies (&depse, &loopse, code->resolved_sym,
+				       code->ext.actual, ELEM_CHECK_VARIABLE);
+
+      gfc_add_block_to_block (&loop.pre,  &depse.pre);
+      gfc_add_block_to_block (&loop.post, &depse.post);
+
+      /* Generate the loop body.  */
+      gfc_start_scalarized_body (&loop, &body);
+      lss = ss;
+    }
+  else
+    gfc_init_block (&body);
+
+  actual = code->ext.actual;
+  for (n = 0; n < 5; n++, actual = actual->next)
+    {
+      arg[n] = actual->expr;
+      gfc_init_se (&argse[n], NULL);
+
+      if (lss != gfc_ss_terminator)
+	{
+	  gfc_copy_loopinfo_to_se (&argse[n], &loop);
+	  /* Find the ss for the expression if it is there.  */
+	  argse[n].ss = lss;
+	  gfc_mark_ss_chain_used (lss, 1);
+	}
+
+      gfc_conv_expr (&argse[n], arg[n]);
+
+      /* Retain the present position in the ss list.  */
+      if (lss != gfc_ss_terminator)
+	lss = argse[n].ss;
+    }
+
+  from    = argse[0].expr;
+  frompos = argse[1].expr;
+  len     = argse[2].expr;
+  to      = argse[3].expr;
+  topos   = argse[4].expr;
+
+  /* The type of the result (TO).  */
+  type    = TREE_TYPE (to);
+  bitsize = build_int_cst (integer_type_node, TYPE_PRECISION (type));
+
+  /* Optionally generate code for runtime argument check.  */
+  if (gfc_option.rtcheck & GFC_RTCHECK_BITS)
+    {
+      tree nbits, below, ccond;
+      tree fp = fold_convert (long_integer_type_node, frompos);
+      tree ln = fold_convert (long_integer_type_node, len);
+      tree tp = fold_convert (long_integer_type_node, topos);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, frompos,
+			       build_int_cst (TREE_TYPE (frompos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, frompos,
+			       fold_convert (TREE_TYPE (frompos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", fp, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, len,
+			       build_int_cst (TREE_TYPE (len), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, len,
+			       fold_convert (TREE_TYPE (len), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[2].pre,
+			       &arg[2]->where,
+			       "LEN argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", ln, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, topos,
+			       build_int_cst (TREE_TYPE (topos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, topos,
+			       fold_convert (TREE_TYPE (topos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", tp, bitsize);
+
+      /* The tests above ensure that FROMPOS, LEN and TOPOS fit into short
+	 integers.  Additions below cannot overflow.  */
+      nbits = fold_convert (long_integer_type_node, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, fp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", fp, ln, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, tp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", tp, ln, bitsize);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].pre);
+
+  /* lenmask = (LEN >= bit_size (TYPE)) ? ~(TYPE)0 : ((TYPE)1 << LEN) - 1  */
+  above = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+			   len, fold_convert (TREE_TYPE (len), bitsize));
+  mask1 = build_int_cst (type, -1);
+  mask2 = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			   build_int_cst (type, 1), len);
+  mask2 = fold_build2_loc (input_location, MINUS_EXPR, type,
+			   mask2, build_int_cst (type, 1));
+  lenmask = fold_build3_loc (input_location, COND_EXPR, type,
+			     above, mask1, mask2);
+
+  /* newbits = (((UTYPE)(FROM) >> FROMPOS) & lenmask) << TOPOS  */
+  /* For valid frompos+len <= bit_size(FROM) the conversion to unsigned is
+     not necessary; artificial bits from rshift will be masked.  */
+  newbits = fold_build2_loc (input_location, RSHIFT_EXPR, type,
+			     fold_convert (type, from), frompos);
+  newbits = fold_build2_loc (input_location, BIT_AND_EXPR, type,
+			     newbits, lenmask);
+  newbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     newbits, topos);
+
+  /* oldbits = TO & (~(lenmask << TOPOS))  */
+  oldbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     lenmask, topos);
+  oldbits = fold_build1_loc (input_location, BIT_NOT_EXPR, type, oldbits);
+  oldbits = fold_build2_loc (input_location, BIT_AND_EXPR, type, oldbits, to);
+
+  /* TO = newbits | oldbits  */
+  tmp = fold_build2_loc (input_location, BIT_IOR_EXPR, type, oldbits, newbits);
+
+  gfc_add_modify (&body, to, tmp);
+
+  if (ss != gfc_ss_terminator)
+    {
+      gfc_init_se (&se, NULL);
+      gfc_add_block_to_block (&se.pre, &loopse.pre);
+
+      /* Finish up the loop block and the loop.  */
+      gfc_trans_scalarizing_loops (&loop, &body);
+      gfc_add_block_to_block (&se.pre, &loop.pre);
+      gfc_add_block_to_block (&se.pre, &loop.post);
+      gfc_cleanup_loop (&loop);
+      for (n = 0; n < 5; n++)
+	gfc_add_block_to_block (&se.pre, &argse[n].post);
+      return gfc_finish_block (&se.pre);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].post);
+
+  return gfc_finish_block (&body);
+}
+
+
 static tree
 conv_intrinsic_move_alloc (gfc_code *code)
 {
@@ -12119,6 +12337,11 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
       res = conv_intrinsic_kill_sub (code);
       break;

+    case GFC_ISYM_MVBITS:
+      res = conv_intrinsic_mvbits (code);
+//      res = NULL_TREE;
+      break;
+
     case GFC_ISYM_SYSTEM_CLOCK:
       res = conv_intrinsic_system_clock (code);
       break;
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 1f183b9dcd0..9b875f031ec 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -217,7 +217,7 @@ replace_ss (gfc_se *se, gfc_ss *old_ss, gfc_ss *new_ss)
    elemental subroutines.  Make temporaries for output arguments if any such
    dependencies are found.  Output arguments are chosen because internal_unpack
    can be used, as is, to copy the result back to the variable.  */
-static void
+void
 gfc_conv_elemental_dependencies (gfc_se * se, gfc_se * loopse,
 				 gfc_symbol * sym, gfc_actual_arglist * arg,
 				 gfc_dep_check check_variable)
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index e126fe92782..95119c68104 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GFC_TRANS_H

 #include "predict.h"  /* For enum br_predictor and PRED_*.  */
+#include "dependency.h"	/* For gfc_dep_check.  */

 /* Mangled symbols take the form __module__name or __module.submodule__name.  */
 #define GFC_MAX_MANGLED_SYMBOL_LEN  (GFC_MAX_SYMBOL_LEN*3+5)
@@ -1169,4 +1170,7 @@ extern const char gfc_msg_wrong_return[];
 #define OMPWS_NOWAIT		16	/* Use NOWAIT on OMP_FOR.  */
 extern int ompws_flags;

+void gfc_conv_elemental_dependencies (gfc_se *, gfc_se *, gfc_symbol *,
+				      gfc_actual_arglist *, gfc_dep_check);
+
 #endif /* GFC_TRANS_H */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
  2020-09-03 21:23 ` Harald Anlauf
  2020-09-04  7:16   ` Paul Richard Thomas
@ 2020-09-06 18:01   ` Paul Richard Thomas
  2020-09-07 21:26     ` Harald Anlauf
  1 sibling, 1 reply; 8+ messages in thread
From: Paul Richard Thomas @ 2020-09-06 18:01 UTC (permalink / raw)
  To: Harald Anlauf; +Cc: fortran

Hi Harald,

With reference to the last version of the patch that I sent you, the gimple
error was caused by the gfc_start_block (&body); messing up the scope of
the temporary used for the limit in the DO statements. Gimple was correctly
reporting that this was the offending statement and so I was greatly
mystified that the error disappeared, when the call to mvbits was commented
out. This is fixed with gfc_init_block (&body);, which gives the block its
own scope and prevents this error. Also,  gfc_init_se (&se, NULL); does
everything that is needed for se.pre so remove any separate initialization
of that block.

mvbits_1.f90 still generates runtime errors for kind = 4 and kind = 8 at n
= 32 and 64 respectively. In both cases the sign is wrong. I think that
this is associated with the neglect of the cast to the unsigned type that
appears in the library version.

Best regards

Paul

On Thu, 3 Sep 2020 at 22:23, Harald Anlauf <anlauf@gmx.de> wrote:

> Dear all,
>
> with the help of Paul we could modify my first draft so that it handles the
> expansion of MVBITS as an elemental subroutine, and also the generation of
> the runtime checks work now.  The current version of this patch is attached
> for those interested.
>
> BUT: there's now a regression in the testsuite (mvbits_1.f90) that produces
> gimple errors.  I'm at a loss now, but maybe somebody else who does not
> have
> potatoes on his eyes can tell me what is wrong.
>
> A reduced version of mvbits_1.f90 that ICEs in gimplify_var_or_parm_decl
> with the patch is:
>
>
> implicit none
> integer(1) :: i1,j1
> integer(2) :: i2,j2
> integer(4) :: i4,j4
> integer(8) :: i8,j8
> integer    :: n
>
>  integer            :: ibits = bit_size(1_1) ! ICEs
> !integer, parameter :: ibits = bit_size(1_1) ! works
>
>  do n=1,ibits
>     i1=-1
>     call mvbits (1_1, 0, n, i1, 0)
>  end do
>
>  do n=1,bit_size(i2)
>     i2=-1
>     call mvbits (1_2, 0, n, i2, 0)
>  end do
>
>  do n=1,bit_size(i4)
>     i4=-1
>     call mvbits (1_4, 0, n, i4, 0)
>  end do
>
>  do n=1,bit_size(i8)
>     i8=-1
>     call mvbits (1_8, 0, n, i8, 0)
>  end do
> end
>
>
> Thanks,
> Harald
>

-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
  2020-09-03 21:23 ` Harald Anlauf
@ 2020-09-04  7:16   ` Paul Richard Thomas
  2020-09-06 18:01   ` Paul Richard Thomas
  1 sibling, 0 replies; 8+ messages in thread
From: Paul Richard Thomas @ 2020-09-04  7:16 UTC (permalink / raw)
  To: Harald Anlauf; +Cc: fortran

Hi Harald,

I'll be taking another look at the gimple problem later on today.

Cheers

Paul


On Thu, 3 Sep 2020 at 22:23, Harald Anlauf <anlauf@gmx.de> wrote:

> Dear all,
>
> with the help of Paul we could modify my first draft so that it handles the
> expansion of MVBITS as an elemental subroutine, and also the generation of
> the runtime checks work now.  The current version of this patch is attached
> for those interested.
>
> BUT: there's now a regression in the testsuite (mvbits_1.f90) that produces
> gimple errors.  I'm at a loss now, but maybe somebody else who does not
> have
> potatoes on his eyes can tell me what is wrong.
>
> A reduced version of mvbits_1.f90 that ICEs in gimplify_var_or_parm_decl
> with the patch is:
>
>
> implicit none
> integer(1) :: i1,j1
> integer(2) :: i2,j2
> integer(4) :: i4,j4
> integer(8) :: i8,j8
> integer    :: n
>
>  integer            :: ibits = bit_size(1_1) ! ICEs
> !integer, parameter :: ibits = bit_size(1_1) ! works
>
>  do n=1,ibits
>     i1=-1
>     call mvbits (1_1, 0, n, i1, 0)
>  end do
>
>  do n=1,bit_size(i2)
>     i2=-1
>     call mvbits (1_2, 0, n, i2, 0)
>  end do
>
>  do n=1,bit_size(i4)
>     i4=-1
>     call mvbits (1_4, 0, n, i4, 0)
>  end do
>
>  do n=1,bit_size(i8)
>     i8=-1
>     call mvbits (1_8, 0, n, i8, 0)
>  end do
> end
>
>
> Thanks,
> Harald
>


-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
  2020-08-29 15:34 Harald Anlauf
  2020-08-29 17:38 ` Paul Richard Thomas
@ 2020-09-03 21:23 ` Harald Anlauf
  2020-09-04  7:16   ` Paul Richard Thomas
  2020-09-06 18:01   ` Paul Richard Thomas
  1 sibling, 2 replies; 8+ messages in thread
From: Harald Anlauf @ 2020-09-03 21:23 UTC (permalink / raw)
  To: fortran

[-- Attachment #1: Type: text/plain, Size: 1087 bytes --]

Dear all,

with the help of Paul we could modify my first draft so that it handles the
expansion of MVBITS as an elemental subroutine, and also the generation of
the runtime checks work now.  The current version of this patch is attached
for those interested.

BUT: there's now a regression in the testsuite (mvbits_1.f90) that produces
gimple errors.  I'm at a loss now, but maybe somebody else who does not have
potatoes on his eyes can tell me what is wrong.

A reduced version of mvbits_1.f90 that ICEs in gimplify_var_or_parm_decl
with the patch is:


implicit none
integer(1) :: i1,j1
integer(2) :: i2,j2
integer(4) :: i4,j4
integer(8) :: i8,j8
integer    :: n

 integer            :: ibits = bit_size(1_1) ! ICEs
!integer, parameter :: ibits = bit_size(1_1) ! works

 do n=1,ibits
    i1=-1
    call mvbits (1_1, 0, n, i1, 0)
 end do

 do n=1,bit_size(i2)
    i2=-1
    call mvbits (1_2, 0, n, i2, 0)
 end do

 do n=1,bit_size(i4)
    i4=-1
    call mvbits (1_4, 0, n, i4, 0)
 end do

 do n=1,bit_size(i8)
    i8=-1
    call mvbits (1_8, 0, n, i8, 0)
 end do
end


Thanks,
Harald

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: mvbits_v5.patch --]
[-- Type: text/x-patch, Size: 12101 bytes --]

diff --git a/gcc/fortran/dependency.h b/gcc/fortran/dependency.h
index 2c8fcb0b8b4..217029f81ac 100644
--- a/gcc/fortran/dependency.h
+++ b/gcc/fortran/dependency.h
@@ -18,6 +18,9 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

+#ifndef GFC_DEPENDENCY_H
+#define GFC_DEPENDENCY_H
+
 /****************************** Enums *********************************/
 enum gfc_dep_check
 {
@@ -42,3 +45,5 @@ int gfc_dep_resolver (gfc_ref *, gfc_ref *, gfc_reverse *,
 int gfc_are_equivalenced_arrays (gfc_expr *, gfc_expr *);

 gfc_expr * gfc_discard_nops (gfc_expr *);
+
+#endif /* GFC_DEPENDENCY_H */
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 73769615c20..c2a4865f28f 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -3311,21 +3311,7 @@ gfc_resolve_mvbits (gfc_code *c)
 {
   static const sym_intent INTENTS[] = {INTENT_IN, INTENT_IN, INTENT_IN,
 				       INTENT_INOUT, INTENT_IN};
-
   const char *name;
-  gfc_typespec ts;
-  gfc_clear_ts (&ts);
-
-  /* FROMPOS, LEN and TOPOS are restricted to small values.  As such,
-     they will be converted so that they fit into a C int.  */
-  ts.type = BT_INTEGER;
-  ts.kind = gfc_c_int_kind;
-  if (c->ext.actual->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->expr, &ts, 2);
-  if (c->ext.actual->next->next->next->next->expr->ts.kind != gfc_c_int_kind)
-    gfc_convert_type (c->ext.actual->next->next->next->next->expr, &ts, 2);

   /* TO and FROM are guaranteed to have the same kind parameter.  */
   name = gfc_get_string (PREFIX ("mvbits_i%d"),
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 2483f016d8e..e01fafaccb6 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -11777,6 +11790,224 @@ conv_intrinsic_event_query (gfc_code *code)
   return gfc_finish_block (&se.pre);
 }

+
+static tree
+conv_intrinsic_mvbits (gfc_code *code)
+{
+  gfc_se se;
+  gfc_ss *ss, *lss;
+  gfc_loopinfo loop;
+  stmtblock_t body;
+  gfc_se loopse;
+
+  gfc_se argse[5];
+  gfc_expr *arg[5];
+  gfc_actual_arglist *actual;
+  int n;
+
+  tree from, frompos, len, to, topos;
+  tree lenmask, oldbits, newbits, bitsize;
+  tree type, above, mask1, mask2;
+  tree tmp;
+
+  gfc_init_se (&se, NULL);
+  gfc_start_block (&se.pre);
+
+  gcc_assert (code->resolved_sym);
+
+  lss = gfc_ss_terminator;
+  ss = gfc_ss_terminator;
+  if (code->resolved_sym->attr.elemental)
+    ss = gfc_walk_elemental_function_args (ss, code->ext.actual,
+					   code->resolved_sym,
+					   GFC_SS_REFERENCE);
+  if (ss != gfc_ss_terminator)
+    {
+      /* An elemental subroutine call with array valued arguments has
+	 to be scalarized.  */
+      gfc_se depse;
+
+      /* gfc_walk_elemental_function_args renders the ss chain in the
+	 reverse order to the actual argument order.  */
+      ss = gfc_reverse_ss (ss);
+
+      /* Initialize the loop.  */
+      gfc_init_se (&loopse, NULL);
+      gfc_init_loopinfo (&loop);
+      gfc_add_ss_to_loop (&loop, ss);
+
+      gfc_conv_ss_startstride (&loop);
+      gfc_conv_loop_setup (&loop, &code->loc);
+      gfc_mark_ss_chain_used (ss, 1);
+
+      /* Convert the arguments, checking for dependencies.  */
+      gfc_copy_loopinfo_to_se (&loopse, &loop);
+      loopse.ss = ss;
+
+      gfc_init_se (&depse, NULL);
+      gfc_conv_elemental_dependencies (&depse, &loopse, code->resolved_sym,
+				       code->ext.actual, ELEM_CHECK_VARIABLE);
+
+      gfc_add_block_to_block (&loop.pre,  &depse.pre);
+      gfc_add_block_to_block (&loop.post, &depse.post);
+
+      /* Generate the loop body.  */
+      gfc_start_scalarized_body (&loop, &body);
+      lss = ss;
+    }
+  else
+    gfc_start_block (&body);
+
+  actual = code->ext.actual;
+  for (n = 0; n < 5; n++, actual = actual->next)
+    {
+      arg[n] = actual->expr;
+      gfc_init_se (&argse[n], NULL);
+
+      if (lss != gfc_ss_terminator)
+	{
+	  gfc_copy_loopinfo_to_se (&argse[n], &loop);
+	  /* Find the ss for the expression if it is there.  */
+	  argse[n].ss = lss;
+	  gfc_mark_ss_chain_used (lss, 1);
+	}
+
+      gfc_conv_expr (&argse[n], arg[n]);
+
+      /* Retain the present position in the ss list.  */
+      if (lss != gfc_ss_terminator)
+	lss = argse[n].ss;
+    }
+
+  from    = argse[0].expr;
+  frompos = argse[1].expr;
+  len     = argse[2].expr;
+  to      = argse[3].expr;
+  topos   = argse[4].expr;
+
+  /* The type of the result (TO).  */
+  type    = TREE_TYPE (to);
+  bitsize = build_int_cst (integer_type_node, TYPE_PRECISION (type));
+
+  /* Optionally generate code for runtime argument check.  */
+  if (gfc_option.rtcheck & GFC_RTCHECK_BITS)
+    {
+      tree nbits, below, ccond;
+      tree fp = fold_convert (long_integer_type_node, frompos);
+      tree ln = fold_convert (long_integer_type_node, len);
+      tree tp = fold_convert (long_integer_type_node, topos);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, frompos,
+			       build_int_cst (TREE_TYPE (frompos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, frompos,
+			       fold_convert (TREE_TYPE (frompos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", fp, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, len,
+			       build_int_cst (TREE_TYPE (len), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, len,
+			       fold_convert (TREE_TYPE (len), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[2].pre,
+			       &arg[2]->where,
+			       "LEN argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", ln, bitsize);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, topos,
+			       build_int_cst (TREE_TYPE (topos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, topos,
+			       fold_convert (TREE_TYPE (topos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS argument (%ld) out of range 0:%d "
+			       "in intrinsic MVBITS", tp, bitsize);
+
+      /* The tests above ensure that FROMPOS, LEN and TOPOS fit into short
+	 integers.  Additions below cannot overflow.  */
+      nbits = fold_convert (long_integer_type_node, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, fp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[1].pre,
+			       &arg[1]->where,
+			       "FROMPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", fp, ln, bitsize);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, tp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &argse[4].pre,
+			       &arg[4]->where,
+			       "TOPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) "
+			       "in intrinsic MVBITS", tp, ln, bitsize);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].pre);
+
+  /* lenmask = (LEN >= bit_size (TYPE)) ? ~(TYPE)0 : ((TYPE)1 << LEN) - 1  */
+  above = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+			   len, fold_convert (TREE_TYPE (len), bitsize));
+  mask1 = build_int_cst (type, -1);
+  mask2 = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			   build_int_cst (type, 1), len);
+  mask2 = fold_build2_loc (input_location, MINUS_EXPR, type,
+			   mask2, build_int_cst (type, 1));
+  lenmask = fold_build3_loc (input_location, COND_EXPR, type,
+			     above, mask1, mask2);
+
+  /* newbits = (((UTYPE)(FROM) >> FROMPOS) & lenmask) << TOPOS  */
+  newbits = fold_build2_loc (input_location, RSHIFT_EXPR, type,
+			     fold_convert (type, from), frompos);
+  newbits = fold_build2_loc (input_location, BIT_AND_EXPR, type,
+			     newbits, lenmask);
+  newbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     newbits, topos);
+
+  /* oldbits = TO & (~(lenmask << TOPOS))  */
+  oldbits = fold_build2_loc (input_location, LSHIFT_EXPR, type,
+			     lenmask, topos);
+  oldbits = fold_build1_loc (input_location, BIT_NOT_EXPR, type, oldbits);
+  oldbits = fold_build2_loc (input_location, BIT_AND_EXPR, type, oldbits, to);
+
+  /* TO = newbits | oldbits  */
+  tmp = fold_build2_loc (input_location, BIT_IOR_EXPR, type, oldbits, newbits);
+
+  gfc_add_modify (&body, to, tmp);
+
+  if (ss != gfc_ss_terminator)
+    {
+      gfc_add_block_to_block (&se.pre, &loopse.pre);
+
+      /* Finish up the loop block and the loop.  */
+      gfc_trans_scalarizing_loops (&loop, &body);
+      gfc_add_block_to_block (&se.pre, &loop.pre);
+      gfc_add_block_to_block (&se.pre, &loop.post);
+      gfc_cleanup_loop (&loop);
+      for (n = 0; n < 5; n++)
+	gfc_add_block_to_block (&se.pre, &argse[n].post);
+      return gfc_finish_block (&se.pre);
+    }
+
+  for (n = 0; n < 5; n++)
+    gfc_add_block_to_block (&body, &argse[n].post);
+
+  return gfc_finish_block (&body);
+}
+
+
 static tree
 conv_intrinsic_move_alloc (gfc_code *code)
 {
@@ -12106,6 +12337,11 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
       res = conv_intrinsic_kill_sub (code);
       break;

+    case GFC_ISYM_MVBITS:
+      res = conv_intrinsic_mvbits (code);
+//      res = NULL_TREE;
+      break;
+
     case GFC_ISYM_SYSTEM_CLOCK:
       res = conv_intrinsic_system_clock (code);
       break;
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 54b56c4f01d..20c9f744067 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -217,7 +217,7 @@ replace_ss (gfc_se *se, gfc_ss *old_ss, gfc_ss *new_ss)
    elemental subroutines.  Make temporaries for output arguments if any such
    dependencies are found.  Output arguments are chosen because internal_unpack
    can be used, as is, to copy the result back to the variable.  */
-static void
+void
 gfc_conv_elemental_dependencies (gfc_se * se, gfc_se * loopse,
 				 gfc_symbol * sym, gfc_actual_arglist * arg,
 				 gfc_dep_check check_variable)
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index e126fe92782..95119c68104 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GFC_TRANS_H

 #include "predict.h"  /* For enum br_predictor and PRED_*.  */
+#include "dependency.h"	/* For gfc_dep_check.  */

 /* Mangled symbols take the form __module__name or __module.submodule__name.  */
 #define GFC_MAX_MANGLED_SYMBOL_LEN  (GFC_MAX_SYMBOL_LEN*3+5)
@@ -1169,4 +1170,7 @@ extern const char gfc_msg_wrong_return[];
 #define OMPWS_NOWAIT		16	/* Use NOWAIT on OMP_FOR.  */
 extern int ompws_flags;

+void gfc_conv_elemental_dependencies (gfc_se *, gfc_se *, gfc_symbol *,
+				      gfc_actual_arglist *, gfc_dep_check);
+
 #endif /* GFC_TRANS_H */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
       [not found]   ` <trinity-ba1eb65c-e247-49b4-aebd-9ce9fc59654d-1598816986556@3c-app-gmx-bs68>
@ 2020-08-31  7:03     ` Paul Richard Thomas
  0 siblings, 0 replies; 8+ messages in thread
From: Paul Richard Thomas @ 2020-08-31  7:03 UTC (permalink / raw)
  To: Harald Anlauf; +Cc: fortran

Hi Harald,

Yes, gfc_trans_call is indeed a good place to start - especially since I
contributed the scalarized part of it :-) I was just on the point of
suggesting that you use it as a template.

What might be a very good idea would be to add a boolean argument to
gfc_trans_call, that causes it to split to a stripped down equivalent in
trans-intrinsic.c, which then translates the specific code for the inline
elemental intrinsic. That way, we avoid code repetition.

Cheers

Paul


On Sun, 30 Aug 2020 at 20:49, Harald Anlauf <anlauf@gmx.de> wrote:

> Hi Paul,
>
> > If either the 'to' or 'from' are arrays, the scalarizing loop has to be
> set up and the scalarized array references provided.
>
> actually the other argument could be array of compatible shape.
>
> > A good example can be found in gfc_conv_intrinsic_arith.
>
> > Mikael Morin wrote an excellent explanation of the scalarizer some while
> since: https://gcc.gnu.org/wiki/GFortranScalarizer
>
> I looked at both and have to admit that I'll need to play a little bit
> before I understand them well enough.
>
> For the case at hand, it would be good to see the explicit implementation
> of an elemental subroutine
>
>   elemental subroutine axpy (a,x,y)  ! almost the BLAS version
>     real, intent(in)    :: a,x
>     real, intent(inout) :: y
>      y = y  + a*x
>   end
>
> > I will update your patch in parallel to your efforts and we can compare
> notes, when I have done. It would be really nice to have somebody else
> around, who is even vaguely comfortable with the scalarizer.
>
> Thanks for the offer!
>
> > You might also take a look at trans-array.c(structure_alloc_comps),
> where we implemented the simplest possible scalarized loop, albeit for full
> arrays.
>
> Again, this is above my current level of understanding.
>
> After some browsing I encountered gfc_trans_call, which is supposed to
> translate a CALL statement.
> This seems to be a better candidate to start with.  Or am I missing
> something?
>
> Cheers,
> Harald
>


-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Help needed with inline expansion of MVBITS
  2020-08-29 15:34 Harald Anlauf
@ 2020-08-29 17:38 ` Paul Richard Thomas
       [not found]   ` <trinity-ba1eb65c-e247-49b4-aebd-9ce9fc59654d-1598816986556@3c-app-gmx-bs68>
  2020-09-03 21:23 ` Harald Anlauf
  1 sibling, 1 reply; 8+ messages in thread
From: Paul Richard Thomas @ 2020-08-29 17:38 UTC (permalink / raw)
  To: Harald Anlauf; +Cc: fortran

Hi Harald,

If either the 'to' or 'from' are arrays, the scalarizing loop has to be set
up and the scalarized array references provided. A good example can be
found in gfc_conv_intrinsic_arith.

Mikael Morin wrote an excellent explanation of the scalarizer some while
since: https://gcc.gnu.org/wiki/GFortranScalarizer

I will update your patch in parallel to your efforts and we can compare
notes, when I have done. It would be really nice to have somebody else
around, who is even vaguely comfortable with the scalarizer.

You might also take a look at trans-array.c(structure_alloc_comps), where
we implemented the simplest possible scalarized loop, albeit for full
arrays.

Cheers

Paul


On Sat, 29 Aug 2020 at 16:34, Harald Anlauf <anlauf@gmx.de> wrote:

> Hi all,
>
> while looking for a way to resolve the missing final piece for PR90903,
> i.e. runtime
> checks for MVBITS, I decided to attack this along with an inline expansion
> of MVBITS,
> which is also suggested in libgfortran's mvbits.c.  I encountered two
> problems
> which I am likely to stupid to solve.  Maybe somebody can hit my head with
> the
> right piece of wood.
>
> 1) For an unknown reason, the runtime check code is not emitted.  I've
> never
>    had this issue when adding similar code to an intrinsic function.
>
> 2) MVBITS is an elemental subroutine.  This does work with the current
> gfortran,
>    but will ICE with my patch.  How do I solve that?
>
> I've attached my work-in-progress patch.
>
> Thanks in advance for helpful pointers!
>
> Harald
>
>

-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Help needed with inline expansion of MVBITS
@ 2020-08-29 15:34 Harald Anlauf
  2020-08-29 17:38 ` Paul Richard Thomas
  2020-09-03 21:23 ` Harald Anlauf
  0 siblings, 2 replies; 8+ messages in thread
From: Harald Anlauf @ 2020-08-29 15:34 UTC (permalink / raw)
  To: fortran

[-- Attachment #1: Type: text/plain, Size: 733 bytes --]

Hi all,

while looking for a way to resolve the missing final piece for PR90903, i.e. runtime
checks for MVBITS, I decided to attack this along with an inline expansion of MVBITS,
which is also suggested in libgfortran's mvbits.c.  I encountered two problems
which I am likely to stupid to solve.  Maybe somebody can hit my head with the
right piece of wood.

1) For an unknown reason, the runtime check code is not emitted.  I've never
   had this issue when adding similar code to an intrinsic function.

2) MVBITS is an elemental subroutine.  This does work with the current gfortran,
   but will ICE with my patch.  How do I solve that?

I've attached my work-in-progress patch.

Thanks in advance for helpful pointers!

Harald


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: mvbits.patch --]
[-- Type: text/x-patch, Size: 6795 bytes --]

diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 32fe9886c57..0485182da0a 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -11790,6 +11790,150 @@ conv_intrinsic_event_query (gfc_code *code)
   return gfc_finish_block (&se.pre);
 }

+static tree
+conv_intrinsic_mvbits (gfc_code *code)
+{
+  stmtblock_t block;
+  gfc_se from_se, fpos_se, len_se, to_se, tpos_se;
+  tree from, frompos, len, to, topos;
+  tree lenmask, oldbits, newbits, bitsize;
+  tree type, above, mask1, mask2, nbits;
+
+  gfc_expr *from_expr    = code->ext.actual->expr;
+  gfc_expr *frompos_expr = code->ext.actual->next->expr;
+  gfc_expr *len_expr     = code->ext.actual->next->next->expr;
+  gfc_expr *to_expr      = code->ext.actual->next->next->next->expr;
+  gfc_expr *topos_expr   = code->ext.actual->next->next->next->next->expr;
+
+  gfc_start_block (&block);
+
+  gfc_init_se (&from_se, NULL);
+  gfc_init_se (&fpos_se, NULL);
+  gfc_init_se (&len_se, NULL);
+  gfc_init_se (&to_se, NULL);
+  gfc_init_se (&tpos_se, NULL);
+
+  gfc_conv_expr (&from_se, from_expr);
+  gfc_conv_expr (&fpos_se, frompos_expr);
+  gfc_conv_expr (&len_se, len_expr);
+  gfc_conv_expr (&to_se, to_expr);
+  gfc_conv_expr (&tpos_se, topos_expr);
+
+  from    = from_se.expr;
+  frompos = fpos_se.expr;
+  len     = len_se.expr;
+  to      = to_se.expr;
+  topos   = tpos_se.expr;
+
+  gfc_add_block_to_block (&block, &fpos_se.pre);
+  gfc_add_block_to_block (&block, &len_se.pre);
+  gfc_add_block_to_block (&block, &tpos_se.pre);
+
+  type    = TREE_TYPE (from);
+  bitsize = build_int_cst (TREE_TYPE (frompos), TYPE_PRECISION (type));
+
+  /* Optionally generate code for runtime argument check.  */
+  if (gfc_option.rtcheck & GFC_RTCHECK_BITS)
+    {
+      tree below, ccond;
+      tree fp = fold_convert (long_integer_type_node, frompos);
+      tree ln = fold_convert (long_integer_type_node, len);
+      tree tp = fold_convert (long_integer_type_node, topos);
+      nbits = build_int_cst (long_integer_type_node, TYPE_PRECISION (type));
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, frompos,
+			       build_int_cst (TREE_TYPE (frompos), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, frompos,
+			       fold_convert (TREE_TYPE (frompos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &fpos_se.pre,
+			       &frompos_expr->where,
+			       "FROMPOS argument (%ld) out of range 0:%ld "
+			       "in intrinsic MVBITS", fp, nbits);
+      below = fold_build2_loc (input_location, LT_EXPR,
+			       logical_type_node, len,
+			       build_int_cst (TREE_TYPE (len), 0));
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, len,
+			       fold_convert (TREE_TYPE (len), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &len_se.pre,
+			       &len_expr->where,
+			       "LEN argument (%ld) out of range 0:%ld "
+			       "in intrinsic MVBITS", ln, nbits);
+      above = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, topos,
+			       fold_convert (TREE_TYPE (topos), bitsize));
+      ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR,
+			       logical_type_node, below, above);
+      gfc_trans_runtime_check (true, false, ccond, &tpos_se.pre,
+			       &topos_expr->where,
+			       "TOPOS argument (%ld) out of range 0:%ld "
+			       "in intrinsic MVBITS", tp, nbits);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, fp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &fpos_se.pre,
+			       &len_expr->where,
+			       "FROMPOS(%ld)+LEN(%ld)>BIT_SIZE(%ld) "
+			       "in intrinsic MVBITS", fp, ln, nbits);
+      above = fold_build2_loc (input_location, PLUS_EXPR,
+			       long_integer_type_node, tp, ln);
+      ccond = fold_build2_loc (input_location, GT_EXPR,
+			       logical_type_node, above, nbits);
+      gfc_trans_runtime_check (true, false, ccond, &tpos_se.pre,
+			       &topos_expr->where,
+			       "TOPOS(%ld)+LEN(%ld)>BIT_SIZE(%ld) "
+			       "in intrinsic MVBITS", tp, ln, nbits);
+    }
+
+  /* lenmask = (LEN >= bit_size (TYPE)) ? ~(TYPE)0 : ((TYPE)1 << LEN) - 1  */
+  mask1   = build_int_cst (type, -1);
+  mask2   = fold_build2_loc (input_location, LSHIFT_EXPR,
+			     type, build_int_cst (type, 1), len);
+  mask2   = fold_build2_loc (input_location, MINUS_EXPR,
+			     type, mask2, build_int_cst (type, 1));
+  above   = fold_build2_loc (input_location, GE_EXPR,
+			     logical_type_node, frompos, bitsize);
+  lenmask = fold_build3_loc (input_location, COND_EXPR,
+			     type, above, mask1, mask2);
+
+  /* gfc_add_expr_to_block (&block, lenmask); */
+  /* gfc_add_modify (&block, gfc_create_var (type, "lenmask"), lenmask); */
+
+  /* newbits = (((UTYPE)(FROM) >> FROMPOS) & lenmask) << TOPOS  */
+  newbits = fold_build2_loc (input_location, RSHIFT_EXPR,
+			     type, from, frompos);
+  newbits = fold_build2_loc (input_location, BIT_AND_EXPR,
+			     type, newbits, lenmask);
+  newbits = fold_build2_loc (input_location, LSHIFT_EXPR,
+			     type, newbits, topos);
+
+  /* oldbits = TO & (~(lenmask << TOPOS))  */
+  oldbits = fold_build2_loc (input_location, LSHIFT_EXPR,
+			     type, lenmask, topos);
+  oldbits = fold_build2_loc (input_location, BIT_XOR_EXPR,
+			     type, oldbits, mask1);
+  oldbits = fold_build2_loc (input_location, BIT_AND_EXPR,
+			     type, oldbits, to);
+
+  /* TO = newbits | oldbits  */
+  to      = fold_build2_loc (input_location, BIT_IOR_EXPR,
+			     type, oldbits, newbits);
+
+  gfc_add_modify (&block, to_se.expr, to);
+
+  gfc_add_block_to_block (&block, &fpos_se.post);
+  gfc_add_block_to_block (&block, &len_se.post);
+  gfc_add_block_to_block (&block, &tpos_se.post);
+
+  return gfc_finish_block (&block);
+}
+
 static tree
 conv_intrinsic_move_alloc (gfc_code *code)
 {
@@ -12119,6 +12263,11 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
       res = conv_intrinsic_kill_sub (code);
       break;

+    case GFC_ISYM_MVBITS:
+      res = conv_intrinsic_mvbits (code);
+//      res = NULL_TREE;
+      break;
+
     case GFC_ISYM_SYSTEM_CLOCK:
       res = conv_intrinsic_system_clock (code);
       break;

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-09-07 21:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-07 21:30 Help needed with inline expansion of MVBITS Harald Anlauf
  -- strict thread matches above, loose matches on Subject: below --
2020-08-29 15:34 Harald Anlauf
2020-08-29 17:38 ` Paul Richard Thomas
     [not found]   ` <trinity-ba1eb65c-e247-49b4-aebd-9ce9fc59654d-1598816986556@3c-app-gmx-bs68>
2020-08-31  7:03     ` Paul Richard Thomas
2020-09-03 21:23 ` Harald Anlauf
2020-09-04  7:16   ` Paul Richard Thomas
2020-09-06 18:01   ` Paul Richard Thomas
2020-09-07 21:26     ` Harald Anlauf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).