public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RS6000] Fix PR61098, Poor code setting count register
@ 2014-05-08  1:49 Alan Modra
  2014-05-08 13:48 ` David Edelsohn
  0 siblings, 1 reply; 12+ messages in thread
From: Alan Modra @ 2014-05-08  1:49 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Edelsohn

On powerpc64, to set a large loop count we have code like the
following after split1:

(insn 67 14 68 4 (set (reg:DI 160)
        (const_int 99942400 [0x5f50000])) /home/amodra/unaligned_load.c:14 -1
     (nil))
(insn 68 67 42 4 (set (reg:DI 160)
        (ior:DI (reg:DI 160)
            (const_int 57600 [0xe100]))) /home/amodra/unaligned_load.c:14 -1
     (expr_list:REG_EQUAL (const_int 100000000 [0x5f5e100])
        (nil)))

and then test for loop exit with:

(jump_insn 65 31 45 5 (parallel [
            (set (pc)
                (if_then_else (ne (reg:DI 160)
                        (const_int 1 [0x1]))
                    (label_ref:DI 42)
                    (pc)))
            (set (reg:DI 160)
                (plus:DI (reg:DI 160)
                    (const_int -1 [0xffffffffffffffff])))
            (clobber (scratch:CC))
            (clobber (scratch:DI))
        ]) /home/amodra/unaligned_load.c:15 800 {*ctrdi_internal1}
     (int_list:REG_BR_PROB 9899 (nil))
 -> 42)

The jump_insn of course is meant for use with bdnz, which implies a
strong preference for reg 160 to live in the count register.  Trouble
is, the count register doesn't do arithmetic.

So, use a new psuedo for intermediate results.  On looking at this,
I noticed the !TARGET_POWERPC64 code in rs6000_emit_set_long_const was
broken, apparently expecting c1 and c2 to be the high and low 32 bits
of the constant.  That's no longer true, so I've fixed that as well.
Bootstrapped and regression tested powerpc64-linux.  OK for mainline
and branches?

	PR target/61098
	* config/rs6000/rs6000.c (rs6000_emit_set_const): Remove unneeded
	params and return value.  Simplify.  Update comment.
	(rs6000_emit_set_long_const): Remove unneeded param and return
	value.  Correct !TARGET_POWERPC64 handling of constants > 2G.
	If we can, use a new pseudo for intermediate calculations.

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 209926)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1068,7 +1069,7 @@ static tree rs6000_handle_longcall_attribute (tree
 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
-static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
 static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
 static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
@@ -7826,53 +7811,36 @@ rs6000_conditional_register_usage (void)
 }
 
 \f
-/* Try to output insns to set TARGET equal to the constant C if it can
-   be done in less than N insns.  Do all computations in MODE.
-   Returns the place where the output has been placed if it can be
-   done and the insns have been emitted.  If it would take more than N
-   insns, zero is returned and no insns and emitted.  */
+/* Output insns to set DEST equal to the constant SOURCE.  */
 
-rtx
-rs6000_emit_set_const (rtx dest, enum machine_mode mode,
-		       rtx source, int n ATTRIBUTE_UNUSED)
+void
+rs6000_emit_set_const (rtx dest, rtx source)
 {
-  rtx result, insn, set;
-  HOST_WIDE_INT c0, c1;
+  enum machine_mode mode = GET_MODE (dest);
+  rtx temp, insn, set;
+  HOST_WIDE_INT c;
 
+  gcc_checking_assert (CONST_INT_P (source));
+  c = INTVAL (source);
   switch (mode)
     {
-    case  QImode:
+    case QImode:
     case HImode:
-      if (dest == NULL)
-	dest = gen_reg_rtx (mode);
       emit_insn (gen_rtx_SET (VOIDmode, dest, source));
-      return dest;
+      return;
 
     case SImode:
-      result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
+      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
 
-      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
-			      GEN_INT (INTVAL (source)
-				       & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
+			      GEN_INT (c & (~ (HOST_WIDE_INT) 0xffff))));
       emit_insn (gen_rtx_SET (VOIDmode, dest,
-			      gen_rtx_IOR (SImode, copy_rtx (result),
-					   GEN_INT (INTVAL (source) & 0xffff))));
-      result = dest;
+			      gen_rtx_IOR (SImode, copy_rtx (temp),
+					   GEN_INT (c & 0xffff))));
       break;
 
     case DImode:
-      switch (GET_CODE (source))
-	{
-	case CONST_INT:
-	  c0 = INTVAL (source);
-	  c1 = -(c0 < 0);
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-
-      result = rs6000_emit_set_long_const (dest, c0, c1);
+      rs6000_emit_set_long_const (dest, c);
       break;
 
     default:
@@ -7883,37 +7851,38 @@ rs6000_conditional_register_usage (void)
   set = single_set (insn);
   if (! CONSTANT_P (SET_SRC (set)))
     set_unique_reg_note (insn, REG_EQUAL, source);
-
-  return result;
 }
 
-/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
-   fall back to a straight forward decomposition.  We do this to avoid
-   exponential run times encountered when looking for longer sequences
-   with rs6000_emit_set_const.  */
-static rtx
-rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+/* Output insns to set a DImode DEST equal to the constant C.  */
+
+static void
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 {
+  rtx temp;
+
   if (!TARGET_POWERPC64)
     {
-      rtx operand1, operand2;
+      rtx hi, lo;
 
-      operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
-					DImode);
-      operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
-					DImode);
-      emit_move_insn (operand1, GEN_INT (c1));
-      emit_move_insn (operand2, GEN_INT (c2));
+      hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
+				  DImode);
+      lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
+				  DImode);
+      emit_move_insn (hi, GEN_INT (c >> 32));
+      c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      emit_move_insn (lo, GEN_INT (c));
     }
   else
     {
       HOST_WIDE_INT ud1, ud2, ud3, ud4;
 
-      ud1 = c1 & 0xffff;
-      ud2 = (c1 & 0xffff0000) >> 16;
-      c2 = c1 >> 32;
-      ud3 = c2 & 0xffff;
-      ud4 = (c2 & 0xffff0000) >> 16;
+      ud1 = c & 0xffff;
+      c = c >> 16;
+      ud2 = c & 0xffff;
+      c = c >> 16;
+      ud3 = c & 0xffff;
+      c = c >> 16;
+      ud4 = c & 0xffff;
 
       if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
 	  || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
@@ -7922,67 +7891,74 @@ rs6000_conditional_register_usage (void)
       else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
 	       || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
 	{
-	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
       else if (ud3 == 0 && ud4 == 0)
 	{
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
 	  gcc_assert (ud2 & 0x8000);
-	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
-	  emit_move_insn (copy_rtx (dest),
+	  emit_move_insn (dest,
 			  gen_rtx_ZERO_EXTEND (DImode,
 					       gen_lowpart (SImode,
-							    copy_rtx (dest))));
+							    copy_rtx (temp))));
 	}
       else if ((ud4 == 0xffff && (ud3 & 0x8000))
 	       || (ud4 == 0 && ! (ud3 & 0x8000)))
 	{
-	  emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud2 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud2)));
-	  emit_move_insn (copy_rtx (dest),
-			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+	  emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			  gen_rtx_ASHIFT (DImode, copy_rtx (temp),
 					  GEN_INT (16)));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
       else
 	{
-	  emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud3 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud3)));
 
-	  emit_move_insn (copy_rtx (dest),
-			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+	  emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
+			  gen_rtx_ASHIFT (DImode, copy_rtx (temp),
 					  GEN_INT (32)));
 	  if (ud2 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud2 << 16)));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
     }
-  return dest;
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	(revision 209926)
+++ gcc/config/rs6000/rs6000-protos.h	(working copy)
@@ -114,7 +114,7 @@ extern void rs6000_emit_cbranch (enum machine_mode
 extern char * output_cbranch (rtx, const char *, int, rtx);
 extern char * output_e500_flip_gt_bit (rtx, rtx);
 extern const char * output_probe_stack_range (rtx, rtx);
-extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int);
+extern void rs6000_emit_set_const (rtx, rtx);
 extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
 extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
 extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 209926)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -8978,12 +8978,8 @@
 	(ior:SI (match_dup 0)
 		(match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], SImode, operands[1], 2);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 
 (define_insn "*mov<mode>_internal2"
@@ -10326,12 +10322,8 @@
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 
 (define_split
@@ -10341,12 +10333,8 @@
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 \f
 ;; TImode/PTImode is similar, except that we usually want to compute the

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-05-24 16:26 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-08  1:49 [RS6000] Fix PR61098, Poor code setting count register Alan Modra
2014-05-08 13:48 ` David Edelsohn
2014-05-09  2:41   ` Alan Modra
2014-05-11  2:24     ` David Edelsohn
2014-05-11 22:53       ` Alan Modra
2014-05-11 23:39         ` Alan Modra
2014-05-14  3:05       ` Alan Modra
2014-05-14  3:46         ` David Edelsohn
2014-05-14  9:56           ` Alan Modra
2014-05-14 21:27             ` David Edelsohn
2014-05-23 15:23             ` Alan Modra
2014-05-24 16:26               ` David Edelsohn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).