public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] lower-subreg: Decompose multiword shifts
@ 2007-08-02 15:35 Andreas Krebbel
  2007-08-02 16:56 ` Paolo Bonzini
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-02 15:35 UTC (permalink / raw)
  To: gcc-patches

Hello,

the attached patch enhances the lower subreg pass to be able to
decompose multiword shift instructions.

Currently GCC generates quite ugly code (on s390 31bit) for a function
like the first of the attached testcase:

u64
foo (u32 high, u32 low)
{
  return ((u64)high << 32) | low;
}

Although that should be a nop on s390 GCC comes up with:

foo:
.LFB2:
        lr      %r5,%r2
        lhi     %r4,0
        sldl    %r4,32
        or      %r5,%r3
        lr      %r2,%r4
        lr      %r3,%r5
        br      %r14

With the attached patch the function is optimized to a single
"br %r14".

Besides the fact that a shift decomposed into 2 simple move
instructions can often be merged into other INSNs also the rest of
lower subreg benefits since more multiword regs can be marked as
decomposable.

In spite of the 2 new functions the cc1 executable shrinks by
9345 bytes.

Bootstrapped on s390, s390x and i686. No testsuite regressions.

Ok for mainline?

Bye,

-Andreas-


2007-08-02  Andreas Krebbel  <krebbel1@de.ibm.com>

	* lower-subreg.c (resolve_subreg_use): Remove assertion.
	(find_decomposable_shift, resolve_multiword_shift): New functions.
	(decompose_multiword_subregs): Use the functions above to decompose
	multiword shifts.


2007-08-02  Andreas Krebbel  <krebbel1@de.ibm.com>

	* gcc.dg/multiword-shifts.c: New testcase.


Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c.orig	2007-08-02 11:46:48.000000000 +0200
--- gcc/lower-subreg.c	2007-08-02 11:48:02.000000000 +0200
*************** resolve_subreg_use (rtx *px, void *data)
*** 525,532 ****
      {
        /* Return 1 to the caller to indicate that we found a direct
  	 reference to a register which is being decomposed.  This can
! 	 happen inside notes.  */
!       gcc_assert (!insn);
        return 1;
      }
  
--- 525,531 ----
      {
        /* Return 1 to the caller to indicate that we found a direct
  	 reference to a register which is being decomposed.  This can
! 	 happen inside notes or multiword shift instructions.  */
        return 1;
      }
  
*************** resolve_use (rtx pat, rtx insn)
*** 944,949 ****
--- 943,1069 ----
    return false;
  }
  
+ /* Checks if INSN is a decomposable multiword-shift and sets the
+    decomposable_context bitmap accordingly.  A non-zero value is
+    returned if a decomposable shift was found.  */
+ 
+ static int
+ find_decomposable_shift (rtx insn)
+ {
+   rtx set;
+   rtx shift;
+   rtx shift_operand;
+   rtx shift_count;
+ 
+   set = single_set (insn);
+   if (!set)
+     return 0;
+ 
+   shift = SET_SRC (set);
+   if (GET_CODE (shift) != ASHIFT && GET_CODE (shift) != LSHIFTRT)
+     return 0;
+ 
+   shift_operand = XEXP (shift, 0);
+   shift_count = XEXP (shift, 1);
+   if (!REG_P (SET_DEST (set)) || !REG_P (shift_operand)
+       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
+       || HARD_REGISTER_NUM_P (REGNO (shift_operand))
+       || GET_CODE (shift_count) != CONST_INT
+       || !SCALAR_INT_MODE_P (GET_MODE (shift)))
+     return 0;
+ 
+   if (INTVAL (shift_count) != BITS_PER_WORD
+       || GET_MODE_BITSIZE (GET_MODE (shift_operand)) != 2 * BITS_PER_WORD)
+     return 0;
+ 
+   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
+   bitmap_set_bit (decomposable_context, REGNO (shift_operand));
+ 
+   return 1;
+ }
+ 
+ /* Decompose the word wide shift (in INSN) of a multiword pseudo into
+    a move and 'set to zero' insn.  Return a pointer to the new insn
+    when a replacement was done.  */
+ 
+ static rtx
+ resolve_multiword_shift (rtx insn)
+ {
+   rtx set;
+   rtx shift;
+   rtx shift_operand;
+   rtx insns;
+   rtx src_reg, dest_reg, dest_zero;
+   int src_reg_num, offset1, offset2;
+ 
+   set = single_set (insn);
+   if (!set)
+     return NULL_RTX;
+ 
+   shift = SET_SRC (set);
+   if (GET_CODE (shift) != ASHIFT && GET_CODE (shift) != LSHIFTRT)
+     return NULL_RTX;
+ 
+   shift_operand = XEXP (shift, 0);
+ 
+   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (shift_operand))
+     return NULL_RTX;
+ 
+   /* src_reg_num is the number of the word register to be shifted.  */
+   src_reg_num = GET_CODE (shift) == LSHIFTRT ? 1 : 0;
+ 
+   if (WORDS_BIG_ENDIAN)
+     src_reg_num = 1 - src_reg_num;
+ 
+   offset1 = UNITS_PER_WORD * (1 - src_reg_num);
+   offset2 = UNITS_PER_WORD * src_reg_num;
+ 
+   if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+     {
+       offset1 += UNITS_PER_WORD - 1;
+       offset2 += UNITS_PER_WORD - 1;
+     }
+ 
+   start_sequence ();
+ 
+   if (resolve_reg_p (SET_DEST (set)))
+     {
+       gcc_assert (GET_CODE (SET_DEST (set)) == CONCATN);
+ 
+       dest_reg = XVECEXP (SET_DEST (set), 0, 1 - src_reg_num);
+       dest_zero = XVECEXP (SET_DEST (set), 0, src_reg_num);
+     }
+   else
+     {
+       dest_reg = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset1);
+       dest_zero = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset2);
+     }
+ 
+   if (resolve_reg_p (shift_operand))
+     {
+       gcc_assert (GET_CODE (shift_operand) == CONCATN);
+ 
+       src_reg = XVECEXP (shift_operand, 0, src_reg_num);
+     }
+   else
+     src_reg = gen_rtx_SUBREG (word_mode, shift_operand, offset2);
+ 
+   emit_move_insn (dest_reg, src_reg);
+   emit_move_insn (dest_zero, CONST0_RTX (word_mode));
+   insns = get_insns ();
+ 
+   end_sequence ();
+ 
+   emit_insn_before (insns, insn);
+ 
+   if (dump_file)
+     fprintf (dump_file, "; Replacing SHIFT insn: %d with insns: %d and %d\n",
+ 	     INSN_UID (insn), INSN_UID (insns), INSN_UID (NEXT_INSN (insns)));
+ 
+   delete_insn (insn);
+   return insns;
+ }
+ 
  /* Look for registers which are always accessed via word-sized SUBREGs
     or via copies.  Decompose these registers into several word-sized
     pseudo-registers.  */
*************** decompose_multiword_subregs (void)
*** 1003,1008 ****
--- 1123,1131 ----
  	      || GET_CODE (PATTERN (insn)) == USE)
  	    continue;
  
+ 	  if (find_decomposable_shift (insn))
+ 	    continue;
+ 
  	  recog_memoized (insn);
  	  extract_insn (insn);
  
*************** decompose_multiword_subregs (void)
*** 1152,1157 ****
--- 1275,1293 ----
  			    SET_BIT (sub_blocks, bb->index);
  			}
  		    }
+ 		  else
+ 		    {
+ 		      rtx decomposed_shift;
+ 
+ 		      decomposed_shift = resolve_multiword_shift (insn);
+ 		      if (decomposed_shift != NULL_RTX)
+ 			{
+ 			  changed = true;
+ 			  insn = decomposed_shift;
+ 			  recog_memoized (insn);
+ 			  extract_insn (insn);
+ 			}
+ 		    }
  
  		  for (i = recog_data.n_operands - 1; i >= 0; --i)
  		    for_each_rtx (recog_data.operand_loc[i],
Index: gcc/testsuite/gcc.dg/multiword-shifts.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/multiword-shifts.c	2007-08-02 11:48:02.000000000 +0200
***************
*** 0 ****
--- 1,40 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3" } */
+ 
+ typedef unsigned int u32;
+ typedef unsigned long long u64;
+ 
+ u64 __attribute__((noinline))
+ foo (u32 high, u32 low)
+ {
+   return ((u64)high << 32) | low;
+ }
+ 
+ u32 __attribute__((noinline))
+ right (u64 t)
+ {
+   return (u32)(t >> 32);
+ }
+ 
+ u64 __attribute__((noinline))
+ left (u32 t)
+ {
+   return (u64)t << 32;
+ }
+ 
+ extern void abort ();
+ 
+ int
+ main ()
+ {
+   if (foo (13000, 12000) != 55834574860000ULL)
+     abort ();
+ 
+   if (right (55834574860000ULL) != 13000)
+     abort ();
+ 
+   if (left (13000) != 55834574848000ULL)
+     abort ();
+ 
+   return 0;
+ }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 15:35 [PATCH] lower-subreg: Decompose multiword shifts Andreas Krebbel
@ 2007-08-02 16:56 ` Paolo Bonzini
  2007-08-03 16:41   ` Andreas Krebbel
  2007-08-02 18:52 ` Rask Ingemann Lambertsen
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2007-08-02 16:56 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches


> Besides the fact that a shift decomposed into 2 simple move
> instructions can often be merged into other INSNs also the rest of
> lower subreg benefits since more multiword regs can be marked as
> decomposable.

Great! You could investigate changing the code so that the condition 
becomes

+   if (INTVAL (shift_count) < BITS_PER_WORD
+       || GET_MODE_BITSIZE (GET_MODE (shift_operand)) != 2 * BITS_PER_WORD)
+     return 0;

(your code has != in the first branch).  You can turn the code into a 
move+a shift, which simplifies to two moves if INTVAL (shift_count) == 
BITS_PER_WORD, in this case.

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 15:35 [PATCH] lower-subreg: Decompose multiword shifts Andreas Krebbel
  2007-08-02 16:56 ` Paolo Bonzini
@ 2007-08-02 18:52 ` Rask Ingemann Lambertsen
  2007-08-02 19:21   ` Andreas Krebbel
  2007-08-02 23:16 ` Rask Ingemann Lambertsen
  2007-08-07  1:27 ` Ian Lance Taylor
  3 siblings, 1 reply; 17+ messages in thread
From: Rask Ingemann Lambertsen @ 2007-08-02 18:52 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches

On Thu, Aug 02, 2007 at 05:34:34PM +0200, Andreas Krebbel wrote:
> 
> u64
> foo (u32 high, u32 low)
> {
>   return ((u64)high << 32) | low;
> }
> 
> Although that should be a nop on s390 GCC comes up with:
> 
> foo:
> .LFB2:
>         lr      %r5,%r2
>         lhi     %r4,0
>         sldl    %r4,32
>         or      %r5,%r3
>         lr      %r2,%r4
>         lr      %r3,%r5
>         br      %r14

   Odd, because on ia16, the equivalent multiword shift

u32
foo2 (u16 high, u16 low)
{
  return ((u32)high << 16) | low;
}

produces

foo2:
	pushw	%bp		;# 45	*pushhi1_nonimm
	movw	%sp,	%bp	;# 44	*movhi/1
	movw	6(%bp),	%ax	;# 26	*movhi/1
	movw	4(%bp),	%dx	;# 27	*movhi/1
	popw	%bp		;# 48	*pophi1
	ret			;# 49	*return

which is nearly perfect.

   After the first subreg pass, there is still a shift and two iors:

(note 4 3 9 2 NOTE_INSN_FUNCTION_BEG)

(insn 9 4 37 2 shifttest-lower.c:13 (clobber (reg:HI 32)) -1 (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (nil)))

(insn 37 9 7 2 shifttest-lower.c:13 (clobber (reg:HI 33 [+2 ])) -1 (nil))

(insn 7 37 8 2 shifttest-lower.c:13 (set (reg:HI 32)
        (reg/v:HI 22 [ low ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (nil)))

(insn 8 7 12 2 shifttest-lower.c:13 (set (reg:HI 33 [+2 ])
        (const_int 0 [0x0])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (expr_list:REG_NO_CONFLICT (reg/v:HI 22 [ low ])
            (nil))))

(insn 12 8 38 2 shifttest-lower.c:13 (clobber (reg:HI 34)) -1 (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (nil)))

(insn 38 12 10 2 shifttest-lower.c:13 (clobber (reg:HI 35 [+2 ])) -1 (nil))

(insn 10 38 11 2 shifttest-lower.c:13 (set (reg:HI 34)
        (reg/v:HI 21 [ high ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (nil)))

(insn 11 10 13 2 shifttest-lower.c:13 (set (reg:HI 35 [+2 ])
        (const_int 0 [0x0])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (expr_list:REG_NO_CONFLICT (reg/v:HI 21 [ high ])
            (nil))))

(insn 13 11 16 2 shifttest-lower.c:13 (set (reg:QI 27)
        (const_int 0 [0x0])) 8 {*movqi} (nil))

(insn 16 13 39 2 shifttest-lower.c:13 (clobber (reg:HI 36)) -1 (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (nil)))

(insn 39 16 14 2 shifttest-lower.c:13 (clobber (reg:HI 37 [+2 ])) -1 (nil))

(insn 14 39 15 2 shifttest-lower.c:13 (parallel [
            (set (reg:HI 37 [+2 ])
                (ashift:HI (reg:HI 34)
                    (reg:QI 27)))
            (clobber (reg:CC 13 cc))
        ]) 331 {*ashlhi3} (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (expr_list:REG_EQUAL (ashift:HI (reg:HI 34)
                (const_int 0 [0x0]))
            (nil))))

(insn 15 14 19 2 shifttest-lower.c:13 (set (reg:HI 36)
        (const_int 0 [0x0])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (nil)))

(insn 19 15 40 2 shifttest-lower.c:13 (clobber (reg:HI 30)) -1 (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 40 19 17 2 shifttest-lower.c:13 (clobber (reg:HI 31 [+2 ])) -1 (nil))

(insn 17 40 18 2 shifttest-lower.c:13 (parallel [
            (set (reg:HI 30)
                (ior:HI (reg:HI 32)
                    (reg:HI 36)))
            (clobber (reg:CC 13 cc))
        ]) 75 {*iorhi3} (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 18 17 22 2 shifttest-lower.c:13 (parallel [
            (set (reg:HI 31 [+2 ])
                (ior:HI (reg:HI 33 [+2 ])
                    (reg:HI 37 [+2 ])))
            (clobber (reg:CC 13 cc))
        ]) 75 {*iorhi3} (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 22 18 41 2 shifttest-lower.c:13 (clobber (reg:HI 28 [ <result> ])) -1 (nil))

(insn 41 22 20 2 shifttest-lower.c:13 (clobber (reg:HI 29 [ <result>+2 ])) -1 (nil))

(insn 20 41 21 2 shifttest-lower.c:13 (set (reg:HI 28 [ <result> ])
        (reg:HI 30)) 6 {*movhi} (nil))

(insn 21 20 26 2 shifttest-lower.c:13 (set (reg:HI 29 [ <result>+2 ])
        (reg:HI 31 [+2 ])) 6 {*movhi} (nil))

(insn 26 21 27 2 shifttest-lower.c:15 (set (reg:HI 2 a)
        (reg:HI 28 [ <result> ])) 6 {*movhi} (nil))

(insn 27 26 33 2 shifttest-lower.c:15 (set (reg:HI 4 d [+2 ])
        (reg:HI 29 [ <result>+2 ])) 6 {*movhi} (nil))

(insn 33 27 0 2 shifttest-lower.c:15 (use (reg/i:SI 2 a)) -1 (nil))


   The shift and iors are gone already at cse1:

(note 4 3 9 2 NOTE_INSN_FUNCTION_BEG)

(insn 9 4 7 2 shifttest-lower.c:13 (clobber (reg:HI 32 [ low ])) -1 (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (nil)))

(insn 7 9 8 2 shifttest-lower.c:13 (set (reg:HI 32 [ low ])
        (reg/v:HI 22 [ low ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (nil)))

(insn 8 7 12 2 shifttest-lower.c:13 (set (reg:HI 33 [+2 ])
        (const_int 0 [0x0])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (expr_list:REG_NO_CONFLICT (reg/v:HI 22 [ low ])
            (nil))))

(insn 12 8 10 2 shifttest-lower.c:13 (clobber (reg:HI 34 [ high ])) -1 (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (nil)))

(insn 10 12 11 2 shifttest-lower.c:13 (set (reg:HI 34 [ high ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (nil)))

(insn 11 10 13 2 shifttest-lower.c:13 (set (reg:HI 35 [+2 ])
        (reg:HI 33 [+2 ])) 6 {*movhi} (expr_list:REG_EQUAL (const_int 0 [0x0])
        (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
            (expr_list:REG_NO_CONFLICT (reg/v:HI 21 [ high ])
                (nil)))))

(insn 13 11 16 2 shifttest-lower.c:13 (set (reg:QI 27)
        (subreg:QI (reg:HI 33 [+2 ]) 0)) 8 {*movqi} (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

(insn 16 13 14 2 shifttest-lower.c:13 (clobber (reg:HI 36)) -1 (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (nil)))

(insn 14 16 15 2 shifttest-lower.c:13 (set (reg:HI 37 [+2 ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (expr_list:REG_EQUAL (reg/v:HI 21 [ high ])
            (nil))))

(insn 15 14 19 2 shifttest-lower.c:13 (set (reg:HI 36)
        (reg:HI 33 [+2 ])) 6 {*movhi} (expr_list:REG_EQUAL (const_int 0 [0x0])
        (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
            (nil))))

(insn 19 15 17 2 shifttest-lower.c:13 (clobber (reg:HI 30)) -1 (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 17 19 18 2 shifttest-lower.c:13 (set (reg:HI 30)
        (reg/v:HI 22 [ low ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 18 17 20 2 shifttest-lower.c:13 (set (reg:HI 31 [+2 ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 20 18 21 2 shifttest-lower.c:13 (set (reg:HI 28 [ <result> ])
        (reg/v:HI 22 [ low ])) 6 {*movhi} (nil))

(insn 21 20 26 2 shifttest-lower.c:13 (set (reg:HI 29 [ <result>+2 ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (nil))

(insn 26 21 27 2 shifttest-lower.c:15 (set (reg:HI 2 a [ <result> ])
        (reg/v:HI 22 [ low ])) 6 {*movhi} (nil))

(insn 27 26 33 2 shifttest-lower.c:15 (set (reg:HI 4 d [+2 ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (nil))

(insn 33 27 0 2 shifttest-lower.c:15 (use (reg/i:SI 2 a)) -1 (nil))


   After fwprop1, only this is left:

(note 4 3 9 2 NOTE_INSN_FUNCTION_BEG)

(insn 9 4 12 2 shifttest-lower.c:13 (clobber (reg:HI 32 [ low ])) -1 (expr_list:REG_LIBCALL_ID (const_int 3 [0x3])
        (nil)))

(insn 12 9 16 2 shifttest-lower.c:13 (clobber (reg:HI 34 [ high ])) -1 (expr_list:REG_LIBCALL_ID (const_int 4 [0x4])
        (nil)))

(insn 16 12 19 2 shifttest-lower.c:13 (clobber (reg:HI 36)) -1 (expr_list:REG_LIBCALL_ID (const_int 5 [0x5])
        (nil)))

(insn 19 16 26 2 shifttest-lower.c:13 (clobber (reg:HI 30)) -1 (expr_list:REG_LIBCALL_ID (const_int 6 [0x6])
        (nil)))

(insn 26 19 27 2 shifttest-lower.c:15 (set (reg:HI 2 a [ <result> ])
        (reg/v:HI 22 [ low ])) 6 {*movhi} (nil))

(insn 27 26 33 2 shifttest-lower.c:15 (set (reg:HI 4 d [+2 ])
        (reg/v:HI 21 [ high ])) 6 {*movhi} (nil))

(insn 33 27 0 2 shifttest-lower.c:15 (use (reg/i:SI 2 a)) -1 (nil))


   Why isn't the same magic happening on s390?

> Index: gcc/testsuite/gcc.dg/multiword-shifts.c
> ===================================================================
> *** /dev/null	1970-01-01 00:00:00.000000000 +0000
> --- gcc/testsuite/gcc.dg/multiword-shifts.c	2007-08-02 11:48:02.000000000 +0200
> ***************
> *** 0 ****
> --- 1,40 ----
> + /* { dg-do run } */
> + /* { dg-options "-O3" } */
> + 
> + typedef unsigned int u32;
> + typedef unsigned long long u64;

   Please use uint32_t and uint64_t from stdint.h (or some other reliable
means of getting types of the sizes you want).

-- 
Rask Ingemann Lambertsen

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 18:52 ` Rask Ingemann Lambertsen
@ 2007-08-02 19:21   ` Andreas Krebbel
  2007-08-07  1:41     ` Rask Ingemann Lambertsen
  0 siblings, 1 reply; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-02 19:21 UTC (permalink / raw)
  To: Rask Ingemann Lambertsen; +Cc: gcc-patches

Hello,

> Why isn't the same magic happening on s390?

Your target does not seem to provide shift instructions dealing with
register pairs. So I would guess that optab already decomposes such
shifts for you - right?!  At least I don't see an SI mode shift in the
RTL pieces you've sent. Certainly that optimization only makes sense if
your back end provides such an instruction.

> > + typedef unsigned int u32;
> > + typedef unsigned long long u64;
> 
>    Please use uint32_t and uint64_t from stdint.h (or some other reliable
> means of getting types of the sizes you want).

Yes the names I've choosen aren't correct for any platform but I think the types are nevertheless a good choice considering that I need something as wide as register and something else with twice the size.

Looks like an ideal place for using __attribute__((mode(word))) ;)

Bye,

-Andreas-

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 15:35 [PATCH] lower-subreg: Decompose multiword shifts Andreas Krebbel
  2007-08-02 16:56 ` Paolo Bonzini
  2007-08-02 18:52 ` Rask Ingemann Lambertsen
@ 2007-08-02 23:16 ` Rask Ingemann Lambertsen
  2007-08-03 12:03   ` Michael Matz
  2007-08-03 14:34   ` Andreas Krebbel
  2007-08-07  1:27 ` Ian Lance Taylor
  3 siblings, 2 replies; 17+ messages in thread
From: Rask Ingemann Lambertsen @ 2007-08-02 23:16 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches

On Thu, Aug 02, 2007 at 05:34:34PM +0200, Andreas Krebbel wrote:
> Hello,
> 
> the attached patch enhances the lower subreg pass to be able to
> decompose multiword shift instructions.
> 
> Currently GCC generates quite ugly code (on s390 31bit) for a function
> like the first of the attached testcase:
> 
> u64
> foo (u32 high, u32 low)
> {
>   return ((u64)high << 32) | low;
> }

   On x86_64 with -O2 -march=k6 -m32, I get:

foo:
	pushl	%ebp		# 39	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 40	*movsi_1/1	[length = 2]
	movl	8(%ebp), %edx	# 37	*movsi_1/1	[length = 3]
	movl	12(%ebp), %eax	# 10	*movsi_1/1	[length = 3]
	leave			# 43	leave	[length = 1]
	movl	%edx, %ecx	# 46	*movsi_1/1	[length = 2]
	movl	%ecx, %edx	# 11	*movsi_1/1	[length = 2]
	ret			# 44	return_internal	[length = 1]

   This appears sort of fine, i.e. no shifts, with just two useless "movl"
instructions. But the shift remains until the split4 pass, leaving the two
"movl" instructions behind. So I tried your patch:

foo:
	pushl	%ebp		# 43	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 44	*movsi_1/1	[length = 2]
	pushl	%esi		# 45	*pushsi2	[length = 1]
	pushl	%ebx		# 46	*pushsi2	[length = 1]
	movl	12(%ebp), %eax	# 39	*movsi_1/1	[length = 3]
	popl	%ebx		# 49	popsi1	[length = 1]
	movl	8(%ebp), %ecx	# 41	*movsi_1/1	[length = 3]
	popl	%esi		# 50	popsi1	[length = 1]
	leave			# 51	leave	[length = 1]
	movl	%ecx, %edx	# 36	*movsi_1/1	[length = 2]
	ret			# 52	return_internal	[length = 1]

I don't know if it's your fault, but it's worse!

-- 
Rask Ingemann Lambertsen

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 23:16 ` Rask Ingemann Lambertsen
@ 2007-08-03 12:03   ` Michael Matz
  2007-08-03 14:34   ` Andreas Krebbel
  1 sibling, 0 replies; 17+ messages in thread
From: Michael Matz @ 2007-08-03 12:03 UTC (permalink / raw)
  To: Rask Ingemann Lambertsen; +Cc: Andreas Krebbel, gcc-patches

Hi,

On Fri, 3 Aug 2007, Rask Ingemann Lambertsen wrote:

> foo:
> 	pushl	%ebp		# 43	*pushsi2	[length = 1]
> 	movl	%esp, %ebp	# 44	*movsi_1/1	[length = 2]
> 	pushl	%esi		# 45	*pushsi2	[length = 1]
> 	pushl	%ebx		# 46	*pushsi2	[length = 1]
> 	movl	12(%ebp), %eax	# 39	*movsi_1/1	[length = 3]
> 	popl	%ebx		# 49	popsi1	[length = 1]
> 	movl	8(%ebp), %ecx	# 41	*movsi_1/1	[length = 3]
> 	popl	%esi		# 50	popsi1	[length = 1]
> 	leave			# 51	leave	[length = 1]
> 	movl	%ecx, %edx	# 36	*movsi_1/1	[length = 2]
> 	ret			# 52	return_internal	[length = 1]
> 
> I don't know if it's your fault, but it's worse!

Can't be Andreas' fault, except that he obviously exposes another problem.  
The function now saves %esi and %ebx, although they aren't used at all.  
Without those the function would look saner:

> foo:
>       pushl   %ebp            # 43    *pushsi2        [length = 1]
>       movl    %esp, %ebp      # 44    *movsi_1/1      [length = 2]
>       movl    12(%ebp), %eax  # 39    *movsi_1/1      [length = 3]
>       movl    8(%ebp), %ecx   # 41    *movsi_1/1      [length = 3]
>       leave                   # 51    leave   [length = 1]
>       movl    %ecx, %edx      # 36    *movsi_1/1      [length = 2]
>       ret                     # 52    return_internal [length = 1]

Leaving only one useless move in there, one superflous move left, but 
better.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 23:16 ` Rask Ingemann Lambertsen
  2007-08-03 12:03   ` Michael Matz
@ 2007-08-03 14:34   ` Andreas Krebbel
  2007-08-03 15:37     ` Michael Matz
  1 sibling, 1 reply; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-03 14:34 UTC (permalink / raw)
  To: Rask Ingemann Lambertsen; +Cc: gcc-patches, matz

Hello,

as Michael already stated there are two superfluous register
save/restore pairs. Both seem to have completely different causes.

foo:
	pushl	%ebp		# 43	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 44	*movsi_1/1	[length = 2]
	pushl	%esi		# 45	*pushsi2	[length = 1]
	pushl	%ebx		# 46	*pushsi2	[length = 1]
	movl	12(%ebp), %eax	# 39	*movsi_1/1	[length = 3]
	popl	%ebx		# 49	popsi1	[length = 1]
	movl	8(%ebp), %ecx	# 41	*movsi_1/1	[length = 3]
	popl	%esi		# 50	popsi1	[length = 1]
	leave			# 51	leave	[length = 1]
	movl	%ecx, %edx	# 36	*movsi_1/1	[length = 2]
	ret			# 52	return_internal	[length = 1]

%esi:

With and without my patch lower subreg decomposes the following
clobber insn:

(insn 12 9 10 2 t.c:6 (clobber (reg:DI 61)) -1 (expr_list:REG_LIBCALL_ID (const_int 0 [0x0])
        (insn_list:REG_LIBCALL 13 (nil))))

into:

(insn 12 29 30 2 t.c:6 (clobber (reg:SI 67)) -1 (expr_list:REG_LIBCALL_ID (const_int 0 [0x0])
        (nil)))

(insn 30 12 10 2 t.c:6 (clobber (reg:SI 68 [+4 ])) -1 (nil))

Also the REG_LIBCALL note and the respective REG_RETVAL notes are
removed. But only one of the resulting INSN has a REG_LIBCALL_ID note
afterwards. DCE seem to care about these notes when deleting
insns. After cse1 only the second clobber is deleted (insn 30).

The difference with my patch is that *all* other insns of that libcall
can be deleted by fwprop. So only insn 12 is left from the libcall and
unfortunately stays until reload which assigns si to pseudo 67.


I've tried to copy the REG_LIBCALL_ID note to the second clobber in
lower subreg and was hoping that DCE then would be able to remove the
libcall block as a whole but this didn't work.

But since this clobber is probably just issued to make it explicit
that the whole multiword register is written by the libcall I think
after decomposing that multiword register we can safely remove the
clobber insn anyway - right?! The following patch does this what makes
the pushl %esi/popl %esi pair to disappear in the example above.


Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c.orig     2007-08-03 12:38:10.000000000 +0200
--- gcc/lower-subreg.c  2007-08-03 14:05:16.000000000 +0200
*************** resolve_clobber (rtx pat, rtx insn)
*** 900,910 ****
--- 900,918 ----
    enum machine_mode orig_mode;
    unsigned int words, i;
    int ret;
+   rtx reg_libcall_id_note;

    reg = XEXP (pat, 0);
    if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
      return false;

+   reg_libcall_id_note = find_reg_note (insn, REG_LIBCALL_ID, NULL_RTX);
+   if (resolve_reg_p (reg) && reg_libcall_id_note != NULL_RTX)
+     {
+       delete_insn (insn);
+       return true;
+     }
+
    orig_mode = GET_MODE (reg);
    words = GET_MODE_SIZE (orig_mode);
    words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;


%ebx:

After reload the following two insns exist:

(insn:HI 7 4 8 2 t.c:6 (parallel [
            (set (reg:DI 0 ax [orig:62 low ] [62])
                (zero_extend:DI (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                            (const_int 12 [0xc])) [2 low+0 S4 A32])))
            (clobber (reg:CC 17 flags))
        ]) 112 {zero_extendsidi2_32} (nil))

(insn:HI 8 7 35 2 t.c:6 (parallel [
            (set (reg:DI 2 cx [orig:63 high ] [63])
                (zero_extend:DI (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                            (const_int 8 [0x8])) [2 high+0 S4 A32])))
            (clobber (reg:CC 17 flags))
        ]) 112 {zero_extendsidi2_32} (nil))


Both gets split:

(insn 39 4 40 2 t.c:6 (set (reg:SI 0 ax [orig:62 low ] [62])
        (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                (const_int 12 [0xc])) [2 low+0 S4 A32])) 40 {*movsi_1} (nil))

(insn 40 39 41 2 t.c:6 (set (reg:SI 1 dx [ low+4 ])
        (const_int 0 [0x0])) 40 {*movsi_1} (nil))

(insn 41 40 42 2 t.c:6 (set (reg:SI 2 cx [orig:63 high ] [63])
        (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                (const_int 8 [0x8])) [2 high+0 S4 A32])) 40 {*movsi_1} (nil))

(insn 42 41 36 2 t.c:6 (set (reg:SI 3 bx [ high+4 ])
        (const_int 0 [0x0])) 40 {*movsi_1} (nil))

And *directly* afterwards the prologue and epilogue generation pass
runs. The problem seems to be that we are running the pass generating
the register save/restore instructions directly after a pass which
might (and in this case does) generate dead code. I think this problem
would disappear with an intermediate DCE step in between the split
pass and prologue/epilogue generation.

187 dce finally removes insns 40 and 42 but the pushl/popl insns are
already generated:

(insn 39 4 41 2 t.c:6 (set (reg:SI 0 ax [orig:62 low ] [62])
        (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                (const_int 12 [0xc])) [2 low+0 S4 A32])) 40 {*movsi_1} (nil))

(insn 41 39 36 2 t.c:6 (set (reg:SI 2 cx [orig:63 high ] [63])
        (mem/c/i:SI (plus:SI (reg/f:SI 6 bp)
                (const_int 8 [0x8])) [2 high+0 S4 A32])) 40 {*movsi_1} (nil))


Bye,

-Andreas-

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-03 14:34   ` Andreas Krebbel
@ 2007-08-03 15:37     ` Michael Matz
  2007-08-07  1:08       ` Ian Lance Taylor
  0 siblings, 1 reply; 17+ messages in thread
From: Michael Matz @ 2007-08-03 15:37 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: Rask Ingemann Lambertsen, gcc-patches

Hi,

On Fri, 3 Aug 2007, Andreas Krebbel wrote:

> With and without my patch lower subreg decomposes the following
> clobber insn:
> 
> (insn 12 9 10 2 t.c:6 (clobber (reg:DI 61)) -1 (expr_list:REG_LIBCALL_ID (const_int 0 [0x0])
>         (insn_list:REG_LIBCALL 13 (nil))))

That's the required introducing insn of a libcall sequence dealing with 
subregs of multi-word regs, so ...

> into:
> 
> (insn 12 29 30 2 t.c:6 (clobber (reg:SI 67)) -1 (expr_list:REG_LIBCALL_ID (const_int 0 [0x0])
>         (nil)))
> 
> (insn 30 12 10 2 t.c:6 (clobber (reg:SI 68 [+4 ])) -1 (nil))

... this would be wrong of lower-subreg.  I thought it handled libcall 
sequences correctly, Ian?  If lower-subreg manages to decompose _all_ 
writing subreg references in the libcall sequence then the whole libcall 
structure can and should be removed (which includes the introducing 
clobber and the finishing self-move (with the RETVAL note)).  Removing 
only part of that structure is going to cause confusion later on (as 
here).

> I've tried to copy the REG_LIBCALL_ID note to the second clobber in 
> lower subreg and was hoping that DCE then would be able to remove the 
> libcall block as a whole but this didn't work.

As the clobber misses the REG_LIBCALL note (pointing to the end insn of 
the whole libcall sequence) I don't think the REG_LIBCALL_ID notes 
establish anything interesting.  Or well, they do make DCE note that all 
insns with the same ID are required, when one of such is required.  But 
from the looks of it they won't be deleted as a libcall sequence if the 
REG_LIBCALL or REG_RETVAL notes are missing (delete_unmarked_insns).

> But since this clobber is probably just issued to make it explicit
> that the whole multiword register is written by the libcall

That's correct.

> I think after decomposing that multiword register we can safely remove 
> the clobber insn anyway - right?!

If lower-subreg was able to decompose all writes into subregs of the 
clobbered reg in that libcall sequence, then yes.  I don't know if 
lower-subreg ever is not able to do that once it determined that it can 
start transforming at all.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 16:56 ` Paolo Bonzini
@ 2007-08-03 16:41   ` Andreas Krebbel
  2007-08-03 19:02     ` Paolo Bonzini
  0 siblings, 1 reply; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-03 16:41 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gcc-patches

> +   if (INTVAL (shift_count) < BITS_PER_WORD
> +       || GET_MODE_BITSIZE (GET_MODE (shift_operand)) != 2 * BITS_PER_WORD)
> +     return 0;
> 
> (your code has != in the first branch).  You can turn the code into a 
> move+a shift, which simplifies to two moves if INTVAL (shift_count) == 
> BITS_PER_WORD, in this case.

Sounds like a good idea. Although compared to emitting just move
instructions shifts would be more complex since I probably would need to
use the optab machinery - right?!

Ulrich already proposed to also optimize multiword zeroextends that
way. I'll try to enhance my patch.

Bye,

-Andreas-

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-03 16:41   ` Andreas Krebbel
@ 2007-08-03 19:02     ` Paolo Bonzini
  0 siblings, 0 replies; 17+ messages in thread
From: Paolo Bonzini @ 2007-08-03 19:02 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches


> Sounds like a good idea. Although compared to emitting just move
> instructions shifts would be more complex since I probably would need to
> use the optab machinery - right?!

Yes, but Kenny just committed to dse.c a patch to use it, so you can 
look at it for an example.

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-03 15:37     ` Michael Matz
@ 2007-08-07  1:08       ` Ian Lance Taylor
  0 siblings, 0 replies; 17+ messages in thread
From: Ian Lance Taylor @ 2007-08-07  1:08 UTC (permalink / raw)
  To: Michael Matz; +Cc: Andreas Krebbel, Rask Ingemann Lambertsen, gcc-patches

Michael Matz <matz@suse.de> writes:

> ... this would be wrong of lower-subreg.  I thought it handled libcall 
> sequences correctly, Ian?  If lower-subreg manages to decompose _all_ 
> writing subreg references in the libcall sequence then the whole libcall 
> structure can and should be removed (which includes the introducing 
> clobber and the finishing self-move (with the RETVAL note)).  Removing 
> only part of that structure is going to cause confusion later on (as 
> here).

Well, I think it handles libcalls correctly, but I see now that the
sequences emitted by emit_no_conflict_block get a bit strange.
lower-subreg is going to tend to throw away the work done by
emit_no_conflict_block, in that it will decompose the registers making
the REG_NO_CONFLICT notes useless.  I think the result is still
correct, except that the initial clobbers are now useless.

I'm testing a patch to remove that initial clobber.

Ian

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 15:35 [PATCH] lower-subreg: Decompose multiword shifts Andreas Krebbel
                   ` (2 preceding siblings ...)
  2007-08-02 23:16 ` Rask Ingemann Lambertsen
@ 2007-08-07  1:27 ` Ian Lance Taylor
  2007-08-07 10:47   ` Andreas Krebbel
  3 siblings, 1 reply; 17+ messages in thread
From: Ian Lance Taylor @ 2007-08-07  1:27 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches

Andreas Krebbel <Andreas.Krebbel@de.ibm.com> writes:

> 2007-08-02  Andreas Krebbel  <krebbel1@de.ibm.com>
> 
> 	* lower-subreg.c (resolve_subreg_use): Remove assertion.
> 	(find_decomposable_shift, resolve_multiword_shift): New functions.
> 	(decompose_multiword_subregs): Use the functions above to decompose
> 	multiword shifts.
> 
> 
> 2007-08-02  Andreas Krebbel  <krebbel1@de.ibm.com>
> 
> 	* gcc.dg/multiword-shifts.c: New testcase.

> + /* Decompose the word wide shift (in INSN) of a multiword pseudo into
> +   if (resolve_reg_p (SET_DEST (set)))
> +     {
> +       gcc_assert (GET_CODE (SET_DEST (set)) == CONCATN);
> + 
> +       dest_reg = XVECEXP (SET_DEST (set), 0, 1 - src_reg_num);
> +       dest_zero = XVECEXP (SET_DEST (set), 0, src_reg_num);
> +     }
> +   else
> +     {
> +       dest_reg = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset1);
> +       dest_zero = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset2);
> +     }
> + 
> +   if (resolve_reg_p (shift_operand))
> +     {
> +       gcc_assert (GET_CODE (shift_operand) == CONCATN);
> + 
> +       src_reg = XVECEXP (shift_operand, 0, src_reg_num);
> +     }
> +   else
> +     src_reg = gen_rtx_SUBREG (word_mode, shift_operand, offset2);


I think this should just be

  dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
                                          GET_MODE (SET_DEST (dest)),
                                          offset1);
  dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
                                           GET_MODE (SET_DEST (dest)),
                                           offset2);
  src_reg = simplify_gen_subreg_concatn (word_mode, shift_operand,
                                         GET_MODE (shift_operand),
                                         offset2);

The patch is OK with that change, assuming it still passes bootstrap
and testsuite.

Thanks.

Ian

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-02 19:21   ` Andreas Krebbel
@ 2007-08-07  1:41     ` Rask Ingemann Lambertsen
  2007-08-07  8:29       ` Andreas Krebbel
  0 siblings, 1 reply; 17+ messages in thread
From: Rask Ingemann Lambertsen @ 2007-08-07  1:41 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches

On Thu, Aug 02, 2007 at 09:21:40PM +0200, Andreas Krebbel wrote:
> Hello,
> 
> > > + typedef unsigned int u32;
> > > + typedef unsigned long long u64;
> > 
> >    Please use uint32_t and uint64_t from stdint.h (or some other reliable
> > means of getting types of the sizes you want).
> 
> Yes the names I've choosen aren't correct for any platform but I think the types are nevertheless a good choice

   For starters, some targets have 16 bits wide int so your testcase will
fail there with excess errors. Since your shift counts and constants in the
testcase assume 32 bits for u32 and 64 bits for u64, you should use the
stdint.h types because they work on all targets.

> considering that I need something as wide as register and something else with twice the size.

   Somewhat surpricingly, a fairly common 64-bit target such as x86_64 has a
long long with a width of only 64 bits rather than the expected 128 bits. If
you scanned the RTL dumps to check that the decomposition happened, x86_64
would fail the testcase.

-- 
Rask Ingemann Lambertsen

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-07  1:41     ` Rask Ingemann Lambertsen
@ 2007-08-07  8:29       ` Andreas Krebbel
  0 siblings, 0 replies; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-07  8:29 UTC (permalink / raw)
  To: Rask Ingemann Lambertsen; +Cc: gcc-patches

Hello,

>    For starters, some targets have 16 bits wide int so your testcase will
> fail there with excess errors. Since your shift counts and constants in the
> testcase assume 32 bits for u32 and 64 bits for u64, you should use the
> stdint.h types because they work on all targets.

But I don't want to test 32 shifts on *all* targets. I just wanted it
on targets where a register is 32 bits wide. And yes I completely
forgot to limit the testcase to those. I'll restrict the testcase to
ilp32 targets. Although this doesn't mean that a register is always 32
bit it at least disables the testcase for targets where it would fail.

> > considering that I need something as wide as register and something else with twice the size.
> 
>    Somewhat surpricingly, a fairly common 64-bit target such as x86_64 has a
> long long with a width of only 64 bits rather than the expected 128 bits. If
> you scanned the RTL dumps to check that the decomposition happened, x86_64
> would fail the testcase.

Yes I know and since it isn't easy to express a 128 bit integer data
type the testcase should be disabled for 64bit targets as well.

Bye,

-Andreas-

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-07  1:27 ` Ian Lance Taylor
@ 2007-08-07 10:47   ` Andreas Krebbel
  2007-08-08 18:40     ` Andreas Tobler
  0 siblings, 1 reply; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-07 10:47 UTC (permalink / raw)
  To: Ian Lance Taylor; +Cc: gcc-patches

Hello,

> The patch is OK with that change, assuming it still passes bootstrap
> and testsuite.

Thanks for reviewing. I've applied the attached patch after retesting
on s390, s390x, i686 and x86_64.

2007-08-07  Andreas Krebbel  <krebbel1@de.ibm.com>

	* lower-subreg.c (resolve_subreg_use): Remove assertion.
	(find_decomposable_shift_zext, resolve_shift_zext): New functions.
	(decompose_multiword_subregs): Use the functions above to decompose
	multiword shifts and zero-extends.

2007-08-07  Andreas Krebbel  <krebbel1@de.ibm.com>

	* gcc.dg/multiword-1.c: New testcase.


Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c.orig	2007-08-06 15:19:12.000000000 +0200
--- gcc/lower-subreg.c	2007-08-07 12:30:53.000000000 +0200
*************** resolve_subreg_use (rtx *px, void *data)
*** 525,532 ****
      {
        /* Return 1 to the caller to indicate that we found a direct
  	 reference to a register which is being decomposed.  This can
! 	 happen inside notes.  */
!       gcc_assert (!insn);
        return 1;
      }
  
--- 525,532 ----
      {
        /* Return 1 to the caller to indicate that we found a direct
  	 reference to a register which is being decomposed.  This can
! 	 happen inside notes, multiword shift or zero-extend
! 	 instructions.  */
        return 1;
      }
  
*************** resolve_use (rtx pat, rtx insn)
*** 944,949 ****
--- 944,1098 ----
    return false;
  }
  
+ /* Checks if INSN is a decomposable multiword-shift or zero-extend and
+    sets the decomposable_context bitmap accordingly.  A non-zero value
+    is returned if a decomposable insn has been found.  */
+ 
+ static int
+ find_decomposable_shift_zext (rtx insn)
+ {
+   rtx set;
+   rtx op;
+   rtx op_operand;
+ 
+   set = single_set (insn);
+   if (!set)
+     return 0;
+ 
+   op = SET_SRC (set);
+   if (GET_CODE (op) != ASHIFT
+       && GET_CODE (op) != LSHIFTRT
+       && GET_CODE (op) != ZERO_EXTEND)
+     return 0;
+ 
+   op_operand = XEXP (op, 0);
+   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
+       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
+       || HARD_REGISTER_NUM_P (REGNO (op_operand))
+       || !SCALAR_INT_MODE_P (GET_MODE (op)))
+     return 0;
+ 
+   if (GET_CODE (op) == ZERO_EXTEND)
+     {
+       if (GET_MODE (op_operand) != word_mode
+ 	  || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
+ 	return 0;
+     }
+   else /* left or right shift */
+     {
+       if (GET_CODE (XEXP (op, 1)) != CONST_INT
+ 	  || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
+ 	  || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
+ 	return 0;
+     }
+ 
+   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
+ 
+   if (GET_CODE (op) != ZERO_EXTEND)
+     bitmap_set_bit (decomposable_context, REGNO (op_operand));
+ 
+   return 1;
+ }
+ 
+ /* Decompose a more than word wide shift (in INSN) of a multiword
+    pseudo or a multiword zero-extend of a wordmode pseudo into a move
+    and 'set to zero' insn.  Return a pointer to the new insn when a
+    replacement was done.  */
+ 
+ static rtx
+ resolve_shift_zext (rtx insn)
+ {
+   rtx set;
+   rtx op;
+   rtx op_operand;
+   rtx insns;
+   rtx src_reg, dest_reg, dest_zero;
+   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
+ 
+   set = single_set (insn);
+   if (!set)
+     return NULL_RTX;
+ 
+   op = SET_SRC (set);
+   if (GET_CODE (op) != ASHIFT
+       && GET_CODE (op) != LSHIFTRT
+       && GET_CODE (op) != ZERO_EXTEND)
+     return NULL_RTX;
+ 
+   op_operand = XEXP (op, 0);
+ 
+   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
+     return NULL_RTX;
+ 
+   /* src_reg_num is the number of the word mode register which we
+      are operating on.  For a left shift and a zero_extend on little
+      endian machines this is register 0.  */
+   src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
+ 
+   if (WORDS_BIG_ENDIAN)
+     src_reg_num = 1 - src_reg_num;
+ 
+   if (GET_CODE (op) == ZERO_EXTEND)
+     dest_reg_num = src_reg_num;
+   else
+     dest_reg_num = 1 - src_reg_num;
+ 
+   offset1 = UNITS_PER_WORD * dest_reg_num;
+   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
+   src_offset = UNITS_PER_WORD * src_reg_num;
+ 
+   if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+     {
+       offset1 += UNITS_PER_WORD - 1;
+       offset2 += UNITS_PER_WORD - 1;
+       src_offset += UNITS_PER_WORD - 1;
+     }
+ 
+   start_sequence ();
+ 
+   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
+                                           GET_MODE (SET_DEST (set)),
+                                           offset1);
+   dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
+                                            GET_MODE (SET_DEST (set)),
+                                            offset2);
+   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
+                                          GET_MODE (op_operand),
+                                          src_offset);
+   if (GET_CODE (op) != ZERO_EXTEND)
+     {
+       int shift_count = INTVAL (XEXP (op, 1));
+       if (shift_count > BITS_PER_WORD)
+ 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
+ 				LSHIFT_EXPR : RSHIFT_EXPR,
+ 				word_mode, src_reg,
+ 				build_int_cst (NULL_TREE,
+ 					       shift_count - BITS_PER_WORD),
+ 				dest_reg, 1);
+     }
+ 
+   if (dest_reg != src_reg)
+     emit_move_insn (dest_reg, src_reg);
+   emit_move_insn (dest_zero, CONST0_RTX (word_mode));
+   insns = get_insns ();
+ 
+   end_sequence ();
+ 
+   emit_insn_before (insns, insn);
+ 
+   if (dump_file)
+     {
+       rtx in;
+       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
+       for (in = insns; in != insn; in = NEXT_INSN (in))
+ 	fprintf (dump_file, "%d ", INSN_UID (in));
+       fprintf (dump_file, "\n");
+     }
+ 
+   delete_insn (insn);
+   return insns;
+ }
+ 
  /* Look for registers which are always accessed via word-sized SUBREGs
     or via copies.  Decompose these registers into several word-sized
     pseudo-registers.  */
*************** decompose_multiword_subregs (void)
*** 1003,1008 ****
--- 1152,1160 ----
  	      || GET_CODE (PATTERN (insn)) == USE)
  	    continue;
  
+ 	  if (find_decomposable_shift_zext (insn))
+ 	    continue;
+ 
  	  recog_memoized (insn);
  	  extract_insn (insn);
  
*************** decompose_multiword_subregs (void)
*** 1152,1157 ****
--- 1304,1322 ----
  			    SET_BIT (sub_blocks, bb->index);
  			}
  		    }
+ 		  else
+ 		    {
+ 		      rtx decomposed_shift;
+ 
+ 		      decomposed_shift = resolve_shift_zext (insn);
+ 		      if (decomposed_shift != NULL_RTX)
+ 			{
+ 			  changed = true;
+ 			  insn = decomposed_shift;
+ 			  recog_memoized (insn);
+ 			  extract_insn (insn);
+ 			}
+ 		    }
  
  		  for (i = recog_data.n_operands - 1; i >= 0; --i)
  		    for_each_rtx (recog_data.operand_loc[i],
Index: gcc/testsuite/gcc.dg/multiword-1.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/multiword-1.c	2007-08-07 10:15:23.000000000 +0200
***************
*** 0 ****
--- 1,68 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3" } */
+ /* { dg-require-effective-target ilp32 } */
+ 
+ typedef unsigned int u32;
+ typedef unsigned long long u64;
+ 
+ u64 __attribute__((noinline))
+ foo (u32 high, u32 low)
+ {
+   return ((u64)high << 32) | low;
+ }
+ 
+ u32 __attribute__((noinline))
+ right (u64 t)
+ {
+   return (u32)(t >> 32);
+ }
+ 
+ u64 __attribute__((noinline))
+ left (u32 t)
+ {
+   return (u64)t << 32;
+ }
+ 
+ u32 __attribute__((noinline))
+ right2 (u64 t)
+ {
+   return (u32)(t >> 40);
+ }
+ 
+ u64 __attribute__((noinline))
+ left2 (u32 t)
+ {
+   return (u64)t << 40;
+ }
+ 
+ u64 __attribute__((noinline))
+ zeroextend (u32 t)
+ {
+   return (u64)t;
+ }
+ 
+ extern void abort ();
+ 
+ int
+ main ()
+ {
+   if (foo (13000, 12000) != 55834574860000ULL)
+     abort ();
+ 
+   if (right (55834574860000ULL) != 13000)
+     abort ();
+ 
+   if (left (13000) != 55834574848000ULL)
+     abort ();
+ 
+   if (right2 (55834574860000ULL) != 50)
+     abort ();
+ 
+   if (left2 (13000) != 14293651161088000ULL)
+     abort ();
+ 
+   if (zeroextend (13000) != 13000ULL)
+     abort ();
+ 
+   return 0;
+ }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-07 10:47   ` Andreas Krebbel
@ 2007-08-08 18:40     ` Andreas Tobler
  2007-08-09  7:45       ` Andreas Krebbel
  0 siblings, 1 reply; 17+ messages in thread
From: Andreas Tobler @ 2007-08-08 18:40 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: Ian Lance Taylor, gcc-patches

Andreas Krebbel wrote:
> Hello,
> 
>> The patch is OK with that change, assuming it still passes bootstrap
>> and testsuite.
> 
> Thanks for reviewing. I've applied the attached patch after retesting
> on s390, s390x, i686 and x86_64.
> 
> 2007-08-07  Andreas Krebbel  <krebbel1@de.ibm.com>
> 
> 	* lower-subreg.c (resolve_subreg_use): Remove assertion.
> 	(find_decomposable_shift_zext, resolve_shift_zext): New functions.
> 	(decompose_multiword_subregs): Use the functions above to decompose
> 	multiword shifts and zero-extends.

I get a bootstrap failure on sparc-solaris just after this commit 
(127270), before it was ok.

Starting program: /export/data/devel-test/gcc-svn/objdir/gcc/cc1 
-fpreprocessed libgcc2.i -quiet -dumpbase libgcc2.c -mcpu=v9 
-auxbase-strip _fixunssfdi.o -g -g -g -O2 -O2 -O2 -W -Wall 
-Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes 
-Wold-style-definition -version -fkeep-inline-functions -fPIC -o libgcc2.s
GNU C version 4.3.0 20070807 (experimental) (sparc-sun-solaris2.8)
         compiled by GNU C version 3.4.3, GMP version 4.2.1, MPFR 
version 2.2.1.
GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096
Compiler executable checksum: c8bf21b328ed0b35dd163e9c65e923cd
/export/data/devel-test/gcc-svn/trunk/libgcc/../gcc/libgcc2.c: In 
function '__fixunssfdi':
/export/data/devel-test/gcc-svn/trunk/libgcc/../gcc/libgcc2.c:1348: 
internal compiler error: in simplify_subreg, at simplify-rtx.c:4679
Please submit a full bug report,
with preprocessed source if appropriate.
See <URL:http://gcc.gnu.org/bugs.html> for instructions.

Program exited with code 04.

Luckily the above code compiles with O0 :(

Shall I attach the .i ?

Andreas

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] lower-subreg: Decompose multiword shifts
  2007-08-08 18:40     ` Andreas Tobler
@ 2007-08-09  7:45       ` Andreas Krebbel
  0 siblings, 0 replies; 17+ messages in thread
From: Andreas Krebbel @ 2007-08-09  7:45 UTC (permalink / raw)
  To: Andreas Tobler; +Cc: gcc-patches

Hello,

> Shall I attach the .i ?

Yes please that would be helpful. I'll try to build a cross gcc.

Bye,

-Andreas-

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2007-08-09  7:45 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-02 15:35 [PATCH] lower-subreg: Decompose multiword shifts Andreas Krebbel
2007-08-02 16:56 ` Paolo Bonzini
2007-08-03 16:41   ` Andreas Krebbel
2007-08-03 19:02     ` Paolo Bonzini
2007-08-02 18:52 ` Rask Ingemann Lambertsen
2007-08-02 19:21   ` Andreas Krebbel
2007-08-07  1:41     ` Rask Ingemann Lambertsen
2007-08-07  8:29       ` Andreas Krebbel
2007-08-02 23:16 ` Rask Ingemann Lambertsen
2007-08-03 12:03   ` Michael Matz
2007-08-03 14:34   ` Andreas Krebbel
2007-08-03 15:37     ` Michael Matz
2007-08-07  1:08       ` Ian Lance Taylor
2007-08-07  1:27 ` Ian Lance Taylor
2007-08-07 10:47   ` Andreas Krebbel
2007-08-08 18:40     ` Andreas Tobler
2007-08-09  7:45       ` Andreas Krebbel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).