public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* [PR43721] Failure to optimise (a/b) and (a%b) into single  call
@ 2013-06-16 15:33 Kugan
  2013-06-17  9:37 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Kugan @ 2013-06-16 15:33 UTC (permalink / raw)
  To: gcc; +Cc: Richard Biener, Richard Earnshaw, Ramana Radhakrishnan

[-- Attachment #1: Type: text/plain, Size: 1193 bytes --]

Hi,

I am attempting to fix Bug 43721 - Failure to optimise (a/b) and (a%b) 
into single __aeabi_idivmod call 
(http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721)

execute_cse_sincos tree level pass does similar cse so I attempted to 
use similar approach here. Div/mod cse is not really using built-in 
functions though at this level.

For the case of div and mod operations, after CSE is performed, there 
isnt a way to represent the resulting stament in gimple. We will endup 
with divmod  taking two arguments and returning double the size of one 
arguments in the three address format (divmod will return reminder and 
quotient so the return type is double the size of argument type).

Since GIMPLE_ASSIGN will result in type checking failure in this case, I 
atempted use built-in functions (GIMPLE_CALL to represent the runtime 
library call). Name for the function here  is target specific and can be 
obtained from sdivmod_optab so the builtin function name defined in tree 
level is not used. I am not entirelt sure this is the right approach so 
I am attaching the first cut of the patch to get your feedback and 
understand the right approach to this problem.



Thank you,
Kugan


[-- Attachment #2: divmod.diff --]
[-- Type: text/x-patch, Size: 12289 bytes --]

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 2634ecc..21c483a 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -250,6 +250,10 @@ DEF_FUNCTION_TYPE_2 (BT_FN_INT_CONST_STRING_FILEPTR,
 		     BT_INT, BT_CONST_STRING, BT_FILEPTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_INT_INT_FILEPTR,
 		     BT_INT, BT_INT, BT_FILEPTR)
+DEF_FUNCTION_TYPE_2 (BT_FN_LONGLONG_INT_INT,
+		     BT_LONGLONG, BT_INT, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_ULONGLONG_UINT_UINT,
+		     BT_ULONGLONG, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTRMODE_PTR,
 		     BT_VOID, BT_PTRMODE, BT_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTRMODE,
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 402bb1f..1cae2bb 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1876,7 +1876,9 @@ mathfn_built_in_1 (tree type, enum built_in_function fn, bool implicit_p)
       CASE_MATHFN (BUILT_IN_Y0)
       CASE_MATHFN (BUILT_IN_Y1)
       CASE_MATHFN (BUILT_IN_YN)
-
+      case BUILT_IN_DIVMOD:
+      case BUILT_IN_UDIVMOD:
+        return builtin_decl_explicit (fn);
       default:
 	return NULL_TREE;
       }
@@ -2449,6 +2451,57 @@ expand_builtin_interclass_mathfn (tree exp, rtx target)
   return NULL_RTX;
 }
 
+/* Expand a call to the builtin divmod function to
+   library call.  */
+static rtx
+expand_builtin_divmod (tree exp, rtx target)
+{
+  rtx op0, op1;
+  enum machine_mode mode;
+  tree arg0, arg1;
+  rtx libval;
+  rtx libfunc;
+  rtx insns;
+  bool is_unsigned;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+
+  mode = TYPE_MODE (TREE_TYPE (arg0));
+  is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg0));
+
+  /* Get the libcall.  */
+  libfunc = optab_libfunc (is_unsigned ? udivmod_optab : sdivmod_optab, mode);
+  gcc_assert (libfunc);
+
+  op0 = expand_normal (arg0);
+  op1 = expand_normal (arg1);
+
+  if (MEM_P (op0))
+    op0 = force_reg (mode, op0);
+  if (MEM_P (op1))
+    op1 = force_reg (mode, op1);
+
+  /* The value returned by the library function will have twice as
+     many bits as the nominal MODE.  */
+  machine_mode libval_mode
+    = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
+                              MODE_INT);
+  start_sequence ();
+  libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
+                                    libval_mode, 2,
+                                    op0, mode,
+                                    op1, mode);
+  insns = get_insns ();
+  end_sequence ();
+  /* Move into the desired location.  */
+  if (target != const0_rtx)
+    emit_libcall_block (insns, target, libval,
+                        gen_rtx_fmt_ee (is_unsigned ?  UMOD : MOD, mode, op0, op1));
+
+  return target;
+}
+
 /* Expand a call to the builtin sincos math function.
    Return NULL_RTX if a normal call should be emitted rather than expanding the
    function in-line.  EXP is the expression that is a call to the builtin
@@ -5977,6 +6030,13 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
 	return target;
       break;
 
+    case BUILT_IN_DIVMOD:
+    case BUILT_IN_UDIVMOD:
+      target = expand_builtin_divmod (exp, target);
+      if (target)
+        return target;
+      break;
+
     CASE_FLT_FN (BUILT_IN_SINCOS):
       if (! flag_unsafe_math_optimizations)
 	break;
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 91879a6..7664700 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -599,6 +599,8 @@ DEF_C99_BUILTIN        (BUILT_IN_VSCANF, "vscanf", BT_FN_INT_CONST_STRING_VALIST
 DEF_C99_BUILTIN        (BUILT_IN_VSNPRINTF, "vsnprintf", BT_FN_INT_STRING_SIZE_CONST_STRING_VALIST_ARG, ATTR_FORMAT_PRINTF_NOTHROW_3_0)
 DEF_LIB_BUILTIN        (BUILT_IN_VSPRINTF, "vsprintf", BT_FN_INT_STRING_CONST_STRING_VALIST_ARG, ATTR_FORMAT_PRINTF_NOTHROW_2_0)
 DEF_C99_BUILTIN        (BUILT_IN_VSSCANF, "vsscanf", BT_FN_INT_CONST_STRING_CONST_STRING_VALIST_ARG, ATTR_FORMAT_SCANF_NOTHROW_2_0)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_DIVMOD, "__$_idivmod", BT_FN_LONGLONG_INT_INT, ATTR_NULL)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_UDIVMOD, "__$_uidivmod", BT_FN_ULONGLONG_UINT_UINT, ATTR_NULL)
 
 /* Category: ctype builtins.  */
 DEF_LIB_BUILTIN        (BUILT_IN_ISALNUM, "isalnum", BT_FN_INT_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index b4de411..3af01ac 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -664,7 +664,7 @@ struct gimple_opt_pass pass_cse_reciprocals =
    statements in the vector.  */
 
 static bool
-maybe_record_sincos (vec<gimple> *stmts,
+maybe_record_stmt (vec<gimple> *stmts,
 		     basic_block *top_bb, gimple use_stmt)
 {
   basic_block use_bb = gimple_bb (use_stmt);
@@ -684,6 +684,176 @@ maybe_record_sincos (vec<gimple> *stmts,
   return true;
 }
 
+/* Look for div and mod statements with the same operands and
+   create a library call if needed.  We first walk over all
+   immediate uses of one of the operand looking for matched statement
+   that we can combine.  In a second pass replace the statement with
+   a library calls and statements with the results from library call.  */
+
+static bool
+execute_cse_div_mod_1 (gimple s)
+{
+  gimple_stmt_iterator gsi;
+  imm_use_iterator use_iter;
+  gimple use_stmt;
+  bool found = false;
+  vec<gimple> stmts = vNULL;
+  basic_block top_bb = NULL;
+  bool cfg_changed = false;
+  int i;
+  tree type, op1, op2;
+
+  op1 = gimple_assign_rhs1 (s);
+  op2 = gimple_assign_rhs2 (s);
+  type = TREE_TYPE (op1);
+
+  /* if op2 is constant, It will be optimized without lib calls.  */
+  if (TREE_CODE (op2) != SSA_NAME)
+    return false;
+ 
+  if (HAVE_divsi3
+       || (GET_MODE_BITSIZE (TYPE_MODE (type)) != 32)
+       || !optab_libfunc (TYPE_UNSIGNED (type)? udivmod_optab : sdivmod_optab,
+                            TYPE_MODE (type)))
+    return false;
+
+  stmts.safe_push (s);
+  top_bb = gimple_bb (s);
+
+  /* look fpr a TRUNC_MOD_EXPR coresponding to stmt
+     TRUNC_DIV_EXPR s to combine.  */
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op2)
+    {
+      if (is_gimple_assign (use_stmt)
+          && (gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR))
+        {
+          tree rhs1 = gimple_assign_rhs1 (use_stmt);
+          tree rhs2 = gimple_assign_rhs2 (use_stmt);
+
+          if ((rhs1 == op1 && rhs2 == op2)
+              ||(rhs1 == op1 && rhs2 == op2))
+            found |= maybe_record_stmt (&stmts, &top_bb, use_stmt);
+        }
+    }
+
+  /* If we have matched instructions to combine, try creatiing Library
+     call and use the results.  */
+  if (found)
+    {
+      gimple def_stmt1 = NULL, def_stmt2;
+      bool insert_after_op1_def, insert_after_op2_def;
+      tree fndecl, res, rhs, temp;
+      gimple stmt, shift_stmt, call_stmt;
+
+      /* See if the library call is needed and available.  */
+      fndecl = mathfn_built_in (type, BUILT_IN_DIVMOD);
+      if (!fndecl)
+      {
+        stmts.release ();
+        return false;
+      }
+
+      if (TREE_CODE (op1) == SSA_NAME)
+        def_stmt1 = SSA_NAME_DEF_STMT (op1);
+      def_stmt2 = SSA_NAME_DEF_STMT (op2);
+
+      /* Is the call to be insterted adter op1 define stmt.  */
+      insert_after_op1_def = TREE_CODE (op1) == SSA_NAME
+                            && !SSA_NAME_IS_DEFAULT_DEF (op1)
+                            && gimple_code (def_stmt1) != GIMPLE_PHI
+                            && gimple_bb (def_stmt1) == top_bb;
+
+      /* Is the call to be insterted adter op2 define stmt.  */
+      insert_after_op2_def = !SSA_NAME_IS_DEFAULT_DEF (op2)
+                            && gimple_code (def_stmt2) != GIMPLE_PHI
+                            && gimple_bb (def_stmt2) == top_bb;
+
+      /* Create a library call and instert it at the place idenified.  */
+      call_stmt = gimple_build_call (fndecl, 2, op1, op2);
+      res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)),
+                                call_stmt, "divmod");
+      gimple_call_set_lhs (call_stmt, res);
+
+      /* Insert call at the beginning of top_bb but not earlier
+         than the name def statement.  */
+      if (insert_after_op1_def || insert_after_op2_def)
+        {
+          if (insert_after_op1_def && insert_after_op2_def)
+            {
+              for (gsi = gsi_start_bb (top_bb);
+                   !gsi_end_p (gsi); gsi_next (&gsi))
+                {
+                  gimple g = gsi_stmt (gsi);
+                  if (g == def_stmt1)
+                    {
+                      gsi = gsi_for_stmt (def_stmt2);
+                      break;
+                    }
+                  else if (g == def_stmt2)
+                    {
+                      gsi = gsi_for_stmt (def_stmt1);
+                      break;
+                    }
+                }
+            }
+
+          /* Only one of the definition is in the basic block, place after
+             that.  */
+          else if (insert_after_op1_def)
+            gsi = gsi_for_stmt (def_stmt1);
+          else
+            gsi = gsi_for_stmt (def_stmt2);
+
+          gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+        }
+      else
+        {
+          /* op1 and op2 are defined before the top_bb.  Insert as first
+             non label instruction.  */
+          gsi = gsi_after_labels (top_bb);
+          gsi_insert_before (&gsi, call_stmt, GSI_SAME_STMT);
+        }
+
+      /* Adjust the recorded old statements to use the results.  */
+      for (i = 0; stmts.iterate (i, &use_stmt); ++i)
+        {
+          switch (gimple_assign_rhs_code (use_stmt))
+            {
+            case TRUNC_MOD_EXPR:
+              /* Get the top 32 bit from result and assign it.  */
+              gsi = gsi_for_stmt (use_stmt);
+              shift_stmt = gimple_build_assign_with_ops (RSHIFT_EXPR,
+                          NULL_TREE, res, build_int_cst (TREE_TYPE (res), 32));
+              temp = make_temp_ssa_name (long_long_integer_type_node,
+                                         shift_stmt, "divmod");
+              gimple_assign_set_lhs (shift_stmt, temp);
+              gsi_insert_before (&gsi, shift_stmt, GSI_SAME_STMT);
+              rhs = fold_build1 (CONVERT_EXPR, type, temp);
+              break;
+
+            case TRUNC_DIV_EXPR:
+              /* Get the bottom 32 bit from result and assign it.  */
+              rhs = fold_build1 (CONVERT_EXPR, type, res);
+              gsi = gsi_for_stmt (use_stmt);
+              break;
+
+            default:
+              gcc_unreachable ();
+            }
+
+          /* Replace the statement with a copy.  */
+          stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), rhs);
+          gsi_replace (&gsi, stmt, true);
+
+          if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
+            cfg_changed = true;
+        }
+    }
+
+  stmts.release ();
+  return cfg_changed;
+}
+
 /* Look for sin, cos and cexpi calls with the same argument NAME and
    create a single call to cexpi CSEing the result in this case.
    We first walk over all immediate uses of the argument collecting
@@ -717,15 +887,15 @@ execute_cse_sincos_1 (tree name)
       switch (DECL_FUNCTION_CODE (fndecl))
 	{
 	CASE_FLT_FN (BUILT_IN_COS):
-	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
+	  seen_cos |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
 	  break;
 
 	CASE_FLT_FN (BUILT_IN_SIN):
-	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
+	  seen_sin |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
 	  break;
 
 	CASE_FLT_FN (BUILT_IN_CEXPI):
-	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
+	  seen_cexpi |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
 	  break;
 
 	default:;
@@ -1398,6 +1568,12 @@ execute_cse_sincos (void)
 	     gimple_purge_dead_eh_edges if we change something in the middle
 	     of a basic block.  */
 	  cleanup_eh = false;
+          if (stmt && is_gimple_assign (stmt)
+              && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_binary)
+              && (gimple_assign_rhs_code (stmt) ==  TRUNC_DIV_EXPR))
+            {
+	      cfg_changed |= execute_cse_div_mod_1 (stmt);
+            }
 
 	  if (is_gimple_call (stmt)
 	      && gimple_call_lhs (stmt)


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-09-03 15:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-16 15:33 [PR43721] Failure to optimise (a/b) and (a%b) into single call Kugan
2013-06-17  9:37 ` Richard Biener
2013-06-21  2:13   ` Kugan
2013-09-03 10:57     ` Richard Biener
2013-09-03 15:53       ` David Malcolm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).