public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR target/34077
@ 2007-11-13 21:14 Michael Meissner
  2007-11-27  4:04 ` [Committed] " Michael Meissner
  0 siblings, 1 reply; 2+ messages in thread
From: Michael Meissner @ 2007-11-13 21:14 UTC (permalink / raw)
  To: gcc-patches, christophe.harle

The following patch fixes PR target/34077 where you cannot compile the bzip2,
gobmk, and h264ref SPEC 2006 benchmarks with the -minline-all-stringops
-minline-stringops-dynamically options.  Is this ok to apply?

[gcc patches]
2007-11-13  Michael Meissner  <michael.meissner@amd.com>

	PR target/34077
	* config/i386/i386.c (ix86_expand_movmem): If the copy size is a
	constant, avoid calling emit_cmp_and_jump_insns.  Use counter_mode
	to get the mode for loading a pseudo register with a count rather
	than duplicating code.

[gcc/testsuite patches]
2007-11-13  Michael Meissner  <michael.meissner@amd.com>

	PR target/34077
	* gcc.target/i386/pr34077.c: New testcase.

*** gcc/config/i386/i386.c.~1~	2007-11-01 11:59:45.000000000 -0400
--- gcc/config/i386/i386.c	2007-11-13 12:40:39.828792000 -0500
*************** ix86_expand_movmem (rtx dst, rtx src, rt
*** 15342,15353 ****
  
    /* Alignment code needs count to be in register.  */
    if (CONST_INT_P (count_exp) && desired_align > align)
!     {
!       enum machine_mode mode = SImode;
!       if (TARGET_64BIT && (count & ~0xffffffff))
! 	mode = DImode;
!       count_exp = force_reg (mode, count_exp);
!     }
    gcc_assert (desired_align >= 1 && align >= 1);
  
    /* Ensure that alignment prologue won't copy past end of block.  */
--- 15342,15348 ----
  
    /* Alignment code needs count to be in register.  */
    if (CONST_INT_P (count_exp) && desired_align > align)
!     count_exp = force_reg (counter_mode (count_exp), count_exp);
    gcc_assert (desired_align >= 1 && align >= 1);
  
    /* Ensure that alignment prologue won't copy past end of block.  */
*************** ix86_expand_movmem (rtx dst, rtx src, rt
*** 15358,15386 ****
  	 Make sure it is power of 2.  */
        epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
  
!       label = gen_label_rtx ();
!       emit_cmp_and_jump_insns (count_exp,
! 			       GEN_INT (epilogue_size_needed),
! 			       LTU, 0, counter_mode (count_exp), 1, label);
!       if (GET_CODE (count_exp) == CONST_INT)
! 	;
!       else if (expected_size == -1 || expected_size < epilogue_size_needed)
! 	predict_jump (REG_BR_PROB_BASE * 60 / 100);
        else
! 	predict_jump (REG_BR_PROB_BASE * 20 / 100);
      }
    /* Emit code to decide on runtime whether library call or inline should be
       used.  */
    if (dynamic_check != -1)
      {
!       rtx hot_label = gen_label_rtx ();
!       jump_around_label = gen_label_rtx ();
!       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
! 			       LEU, 0, GET_MODE (count_exp), 1, hot_label);
!       predict_jump (REG_BR_PROB_BASE * 90 / 100);
!       emit_block_move_via_libcall (dst, src, count_exp, false);
!       emit_jump (jump_around_label);
!       emit_label (hot_label);
      }
  
    /* Step 2: Alignment prologue.  */
--- 15353,15400 ----
  	 Make sure it is power of 2.  */
        epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
  
!       if (CONST_INT_P (count_exp))
! 	{
! 	  if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
! 	    goto epilogue;
! 	}
        else
! 	{
! 	  label = gen_label_rtx ();
! 	  emit_cmp_and_jump_insns (count_exp,
! 				   GEN_INT (epilogue_size_needed),
! 				   LTU, 0, counter_mode (count_exp), 1, label);
! 	  if (expected_size == -1 || expected_size < epilogue_size_needed)
! 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
! 	  else
! 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
! 	}
      }
+ 
    /* Emit code to decide on runtime whether library call or inline should be
       used.  */
    if (dynamic_check != -1)
      {
!       if (CONST_INT_P (count_exp))
! 	{
! 	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
! 	    {
! 	      emit_block_move_via_libcall (dst, src, count_exp, false);
! 	      count_exp = const0_rtx;
! 	      goto epilogue;
! 	    }
! 	}
!       else
! 	{
! 	  rtx hot_label = gen_label_rtx ();
! 	  jump_around_label = gen_label_rtx ();
! 	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
! 				   LEU, 0, GET_MODE (count_exp), 1, hot_label);
! 	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
! 	  emit_block_move_via_libcall (dst, src, count_exp, false);
! 	  emit_jump (jump_around_label);
! 	  emit_label (hot_label);
! 	}
      }
  
    /* Step 2: Alignment prologue.  */
*************** ix86_expand_movmem (rtx dst, rtx src, rt
*** 15453,15459 ****
      }
  
    /* Step 4: Epilogue to copy the remaining bytes.  */
! 
    if (label)
      {
        /* When the main loop is done, COUNT_EXP might hold original count,
--- 15467,15473 ----
      }
  
    /* Step 4: Epilogue to copy the remaining bytes.  */
!  epilogue:
    if (label)
      {
        /* When the main loop is done, COUNT_EXP might hold original count,
*** gcc/testsuite/gcc.target/i386/pr34077.c.~1~	2007-11-13 14:14:55.111703000 -0500
--- gcc/testsuite/gcc.target/i386/pr34077.c	2007-11-13 14:20:23.613145000 -0500
***************
*** 0 ****
--- 1,30 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -minline-all-stringops -minline-stringops-dynamically" } */
+ 
+ #include <string.h>
+ 
+ extern double ran(void);
+ 
+ struct spec_fd_t {
+   int limit;
+   int len;
+   int pos;
+   unsigned char *buf;
+ } spec_fd[3];
+ 
+ int spec_random_load (int fd) {
+   int i, j;
+   char random_text[(32)][(128*1024)];
+ 
+   for (j = 0; j < (128*1024); j++) {
+     random_text[i][j] = (int)(ran()*256);
+   }
+ 
+   for (i = 0 ; i < spec_fd[fd].limit; i+= (128*1024)) {
+     memcpy(spec_fd[fd].buf + i, random_text[(int)(ran()*(32))],
+ 	   (128*1024));
+   }
+ 
+   spec_fd[fd].len = 1024*1024;
+   return 0;
+ }

-- 
Michael Meissner, AMD
90 Central Street, MS 83-29, Boxborough, MA, 01719, USA
michael.meissner@amd.com


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Committed] PR target/34077
  2007-11-13 21:14 [PATCH] PR target/34077 Michael Meissner
@ 2007-11-27  4:04 ` Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2007-11-27  4:04 UTC (permalink / raw)
  To: gcc-patches; +Cc: christophe.harle

I committed the patch.

-- 
Michael Meissner, AMD
90 Central Street, MS 83-29, Boxborough, MA, 01719, USA
michael.meissner@amd.com


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-11-26 23:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-13 21:14 [PATCH] PR target/34077 Michael Meissner
2007-11-27  4:04 ` [Committed] " Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).