* [PATCH] lower-subreg: Decompose multiword shifts and zero-extends
@ 2007-08-06 14:03 Andreas Krebbel
2007-08-06 15:01 ` Paolo Bonzini
0 siblings, 1 reply; 2+ messages in thread
From: Andreas Krebbel @ 2007-08-06 14:03 UTC (permalink / raw)
To: gcc-patches
Hello,
here is an enhanced version of my decompose multiword shifts patch.
This version also deals with multiword zero-extends of wordmode
pseudos as suggested by Ulrich and with more than word wide shifts as
suggested by Paolo.
Handling zero-extends as well fixes also the problem Rask were seeing
on i386:
With the new patch and -O2 -march=k6 -m32 I get (for the first
function of the attached testcase):
foo:
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %eax
movl 8(%ebp), %edx
leave
ret
instead of (without any patch):
foo:
pushl %ebp # 39 *pushsi2 [length = 1]
movl %esp, %ebp # 40 *movsi_1/1 [length = 2]
movl 8(%ebp), %edx # 37 *movsi_1/1 [length = 3]
movl 12(%ebp), %eax # 10 *movsi_1/1 [length = 3]
leave # 43 leave [length = 1]
movl %edx, %ecx # 46 *movsi_1/1 [length = 2]
movl %ecx, %edx # 11 *movsi_1/1 [length = 2]
ret # 44 return_internal [length = 1]
So there is also a small improvement here.
Bootstrapped on s390, s390x, i686 and x86_64.
No testsuite regressions.
Ok for mainline?
Bye,
-Andreas-
2007-08-06 Andreas Krebbel <krebbel1@de.ibm.com>
* lower-subreg.c (resolve_subreg_use): Remove assertion.
(find_decomposable_shift_zeroextend,
resolve_shift_zeroextend): New functions.
(decompose_multiword_subregs): Use the functions above to decompose
multiword shifts and zero-extends.
Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c.orig 2007-08-06 10:56:55.000000000 +0200
--- gcc/lower-subreg.c 2007-08-06 14:34:01.000000000 +0200
*************** resolve_subreg_use (rtx *px, void *data)
*** 525,532 ****
{
/* Return 1 to the caller to indicate that we found a direct
reference to a register which is being decomposed. This can
! happen inside notes. */
! gcc_assert (!insn);
return 1;
}
--- 525,531 ----
{
/* Return 1 to the caller to indicate that we found a direct
reference to a register which is being decomposed. This can
! happen inside notes or multiword shift instructions. */
return 1;
}
*************** resolve_use (rtx pat, rtx insn)
*** 944,949 ****
--- 943,1105 ----
return false;
}
+ /* Checks if INSN is a decomposable multiword-shift or zero-extend and
+ sets the decomposable_context bitmap accordingly. A non-zero value
+ is returned if a decomposable insn has been found. */
+
+ static int
+ find_decomposable_shift_zeroextend (rtx insn)
+ {
+ rtx set;
+ rtx op;
+ rtx op_operand;
+
+ set = single_set (insn);
+ if (!set)
+ return 0;
+
+ op = SET_SRC (set);
+ if (GET_CODE (op) != ASHIFT
+ && GET_CODE (op) != LSHIFTRT
+ && GET_CODE (op) != ZERO_EXTEND)
+ return 0;
+
+ op_operand = XEXP (op, 0);
+ if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
+ || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
+ || HARD_REGISTER_NUM_P (REGNO (op_operand))
+ || !SCALAR_INT_MODE_P (GET_MODE (op)))
+ return 0;
+
+ if (GET_CODE (op) == ZERO_EXTEND)
+ {
+ if (GET_MODE (op_operand) != word_mode
+ || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
+ return 0;
+ }
+ else /* left or right shift */
+ {
+ if (GET_CODE (XEXP (op, 1)) != CONST_INT
+ || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
+ || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
+ return 0;
+ }
+
+ bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
+
+ if (GET_CODE (op) != ZERO_EXTEND)
+ bitmap_set_bit (decomposable_context, REGNO (op_operand));
+
+ return 1;
+ }
+
+ /* Decompose a word wide shift (in INSN) of a multiword
+ pseudo or a multiword zero-extend of a wordmode pseudo into a move
+ and 'set to zero' insn. Return a pointer to the new insn when a
+ replacement was done. */
+
+ static rtx
+ resolve_shift_zeroextend (rtx insn)
+ {
+ rtx set;
+ rtx op;
+ rtx op_operand;
+ rtx insns;
+ rtx src_reg, dest_reg, dest_zero;
+ int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
+
+ set = single_set (insn);
+ if (!set)
+ return NULL_RTX;
+
+ op = SET_SRC (set);
+ if (GET_CODE (op) != ASHIFT
+ && GET_CODE (op) != LSHIFTRT
+ && GET_CODE (op) != ZERO_EXTEND)
+ return NULL_RTX;
+
+ op_operand = XEXP (op, 0);
+
+ if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
+ return NULL_RTX;
+
+ /* src_reg_num is the number of the word mode register which we
+ are operating on. For a left shift and a zero_extend on little
+ endian machines this is register 0. */
+ src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
+
+ if (WORDS_BIG_ENDIAN)
+ src_reg_num = 1 - src_reg_num;
+
+ if (GET_CODE (op) == ZERO_EXTEND)
+ dest_reg_num = src_reg_num;
+ else
+ dest_reg_num = 1 - src_reg_num;
+
+ offset1 = UNITS_PER_WORD * dest_reg_num;
+ offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
+ src_offset = UNITS_PER_WORD * src_reg_num;
+
+ if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ {
+ offset1 += UNITS_PER_WORD - 1;
+ offset2 += UNITS_PER_WORD - 1;
+ src_offset += UNITS_PER_WORD - 1;
+ }
+
+ start_sequence ();
+
+ if (resolve_reg_p (SET_DEST (set)))
+ {
+ gcc_assert (GET_CODE (SET_DEST (set)) == CONCATN);
+
+ dest_reg = XVECEXP (SET_DEST (set), 0, dest_reg_num);
+ dest_zero = XVECEXP (SET_DEST (set), 0, 1 - dest_reg_num);
+ }
+ else
+ {
+ dest_reg = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset1);
+ dest_zero = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset2);
+ }
+
+ if (resolve_reg_p (op_operand))
+ {
+ gcc_assert (GET_CODE (op_operand) == CONCATN);
+
+ src_reg = XVECEXP (op_operand, 0, src_reg_num);
+ }
+ else
+ src_reg = gen_rtx_SUBREG (word_mode, op_operand, src_offset);
+
+ if (GET_CODE (op) != ZERO_EXTEND)
+ {
+ int shift_count = INTVAL (XEXP (op, 1));
+ if (shift_count > BITS_PER_WORD)
+ src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
+ LSHIFT_EXPR : RSHIFT_EXPR,
+ word_mode, src_reg,
+ build_int_cst (NULL_TREE,
+ shift_count - BITS_PER_WORD),
+ dest_reg, 1);
+ }
+
+ if (dest_reg != src_reg)
+ emit_move_insn (dest_reg, src_reg);
+ emit_move_insn (dest_zero, CONST0_RTX (word_mode));
+ insns = get_insns ();
+
+ end_sequence ();
+
+ emit_insn_before (insns, insn);
+
+ if (dump_file)
+ fprintf (dump_file, "; Replacing insn: %d with insns: %d and %d\n",
+ INSN_UID (insn), INSN_UID (insns), INSN_UID (NEXT_INSN (insns)));
+
+ delete_insn (insn);
+ return insns;
+ }
+
/* Look for registers which are always accessed via word-sized SUBREGs
or via copies. Decompose these registers into several word-sized
pseudo-registers. */
*************** decompose_multiword_subregs (void)
*** 1003,1008 ****
--- 1159,1167 ----
|| GET_CODE (PATTERN (insn)) == USE)
continue;
+ if (find_decomposable_shift_zeroextend (insn))
+ continue;
+
recog_memoized (insn);
extract_insn (insn);
*************** decompose_multiword_subregs (void)
*** 1152,1157 ****
--- 1311,1329 ----
SET_BIT (sub_blocks, bb->index);
}
}
+ else
+ {
+ rtx decomposed_shift;
+
+ decomposed_shift = resolve_shift_zeroextend (insn);
+ if (decomposed_shift != NULL_RTX)
+ {
+ changed = true;
+ insn = decomposed_shift;
+ recog_memoized (insn);
+ extract_insn (insn);
+ }
+ }
for (i = recog_data.n_operands - 1; i >= 0; --i)
for_each_rtx (recog_data.operand_loc[i],
Index: gcc/testsuite/gcc.dg/multiword-1.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/multiword-1.c 2007-08-06 10:57:17.000000000 +0200
***************
*** 0 ****
--- 1,67 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3" } */
+
+ typedef unsigned int u32;
+ typedef unsigned long long u64;
+
+ u64 __attribute__((noinline))
+ foo (u32 high, u32 low)
+ {
+ return ((u64)high << 32) | low;
+ }
+
+ u32 __attribute__((noinline))
+ right (u64 t)
+ {
+ return (u32)(t >> 32);
+ }
+
+ u64 __attribute__((noinline))
+ left (u32 t)
+ {
+ return (u64)t << 32;
+ }
+
+ u32 __attribute__((noinline))
+ right2 (u64 t)
+ {
+ return (u32)(t >> 40);
+ }
+
+ u64 __attribute__((noinline))
+ left2 (u32 t)
+ {
+ return (u64)t << 40;
+ }
+
+ u64 __attribute__((noinline))
+ zeroextend (u32 t)
+ {
+ return (u64)t;
+ }
+
+ extern void abort ();
+
+ int
+ main ()
+ {
+ if (foo (13000, 12000) != 55834574860000ULL)
+ abort ();
+
+ if (right (55834574860000ULL) != 13000)
+ abort ();
+
+ if (left (13000) != 55834574848000ULL)
+ abort ();
+
+ if (right2 (55834574860000ULL) != 50)
+ abort ();
+
+ if (left2 (13000) != 14293651161088000ULL)
+ abort ();
+
+ if (zeroextend (13000) != 13000ULL)
+ abort ();
+
+ return 0;
+ }
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] lower-subreg: Decompose multiword shifts and zero-extends
2007-08-06 14:03 [PATCH] lower-subreg: Decompose multiword shifts and zero-extends Andreas Krebbel
@ 2007-08-06 15:01 ` Paolo Bonzini
0 siblings, 0 replies; 2+ messages in thread
From: Paolo Bonzini @ 2007-08-06 15:01 UTC (permalink / raw)
To: Andreas Krebbel; +Cc: gcc-patches
> * lower-subreg.c (resolve_subreg_use): Remove assertion.
> (find_decomposable_shift_zeroextend,
> resolve_shift_zeroextend): New functions.
Simple naming issue: I would call the function
find_decomposable_shift_or_zext and resolve_shift_or_zext (see current
uses in double_int_zext and zext_optab).
Paolo
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2007-08-06 15:01 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-06 14:03 [PATCH] lower-subreg: Decompose multiword shifts and zero-extends Andreas Krebbel
2007-08-06 15:01 ` Paolo Bonzini
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).