public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [SH] PR 49468 - Integer SI abs
@ 2011-09-26  3:57 Oleg Endo
  2011-09-27 14:46 ` Kaz Kojima
  0 siblings, 1 reply; 4+ messages in thread
From: Oleg Endo @ 2011-09-26  3:57 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

Hello, 

The attached patch improves the generated code for integer abs
operations on SH, in particular SH4. There was already some code that
was supposed to utilize SH's conditional execution it but it was never
triggered, because the standard branch-free abs code was generated at a
very early stage.
Since the DI mode part of the original patch is causing problems I've
stripped it out for now.

Tested with make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2,-m2a-single,-m4-single,-m4a-single\}\{-mb,-ml\}" with no new
failures.

CSiBE code size tests show small decreases in a couple of places, with
the highest drop being in libpng-1.2.5,pngrutil (15800 -> 15712) for
-m4-single. 

Cheers,
Oleg


2011-09-26  Oleg Endo  <oleg.endo@t-online.de>
       
	* config/sh/sh.md (negdi2): Moved expansion into split to
	allow more combination options.  Added T_REG clobber.
	(*negdi2, abssi2, *abssi2, *negabssi2): Added.
	(cneg): Changed from insn to insn_and_split.  Renamed to
	negsi_cond.  Added alternative for non-SH4.

[-- Attachment #2: sh_abs_si.patch --]
[-- Type: text/x-patch, Size: 4830 bytes --]

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 179143)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4282,28 +4282,39 @@
   "sub	r63, %1, %0"
   [(set_attr "type" "arith_media")])
 
+
+
+;; Don't expand immediately because otherwise neg:DI (abs:DI) will not be
+;; combined.
 (define_expand "negdi2"
-  [(set (match_operand:DI 0 "arith_reg_operand" "")
-	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
   ""
+  "")
+
+(define_insn_and_split "*negdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
+  [(const_int 0)]
   "
 {
-  if (TARGET_SH1)
-    {
-      int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
-      int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+  int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+  int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
 
-      rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
-      rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+  rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+  rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
 
-      rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
-      rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
+  rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
+  rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
 
-      emit_insn (gen_clrt ());
-      emit_insn (gen_negc (low_dst, low_src));
-      emit_insn (gen_negc (high_dst, high_src));
-      DONE;
-    }
+  emit_insn (gen_clrt ());
+  emit_insn (gen_negc (low_dst, low_src));
+  emit_insn (gen_negc (high_dst, high_src));
+  DONE;
 }")
 
 (define_insn "negsi2"
@@ -4326,27 +4337,77 @@
 		(const_int -1)))]
   "TARGET_SHMEDIA" "")
 
-/* The SH4 202 can do zero-offset branches without pipeline stalls.
-   This can be used as some kind of conditional execution, which is useful
-   for abs.  */
-(define_split
+(define_expand "abssi2"
   [(set (match_operand:SI 0 "arith_reg_dest" "")
-	(plus:SI (xor:SI (neg:SI (reg:SI T_REG))
-			 (match_operand:SI 1 "arith_reg_operand" ""))
-		 (reg:SI T_REG)))]
-  "TARGET_HARD_SH4"
+  	(abs:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  ""
+  "")
+
+(define_insn_and_split "*abssi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+  	(abs:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
   [(const_int 0)]
-  "emit_insn (gen_movsi_i (operands[0], operands[1]));
-   emit_insn (gen_cneg (operands[0], operands[0], operands[0]));
-   DONE;")
+  "
+{
+  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
+				 const1_rtx));
+  DONE;
+}")
 
-(define_insn "cneg"
+(define_insn_and_split "*negabssi2"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(if_then_else:SI (eq:SI (reg:SI T_REG) (const_int 0))
-		      (match_operand:SI 1 "arith_reg_operand" "0")
-		      (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  	(neg:SI (abs:SI (match_operand:SI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
+				 const0_rtx));
+  DONE;
+}")
+
+
+;; The SH4 202 can do zero-offset branches without pipeline stalls.
+;; This can be used as some kind of conditional execution, which is useful
+;; for abs.
+;; Actually the instruction scheduling should decide whether to use a
+;; zero-offset branch or not for any generic case involving a single
+;; instruction on SH4 202.
+
+(define_insn_and_split "negsi_cond"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else:SI (eq:SI (reg:SI T_REG)
+					  (match_operand:SI 3 "const_int_operand" "M,N"))
+	 (match_operand:SI 1 "arith_reg_operand" "0,0")
+	 (neg:SI (match_operand:SI 2 "arith_reg_operand" "r,r"))))]
   "TARGET_HARD_SH4"
-  "bf 0f\;neg %2,%0\\n0:"
+  "@
+	bt 0f\;neg %2,%0\\n0:
+	bf 0f\;neg %2,%0\\n0:"
+  "!TARGET_HARD_SH4"
+  [(const_int 0)]
+  "
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_insn (gen_movsi (operands[0], operands[1]));
+
+  emit_jump_insn (INTVAL (operands[3])
+				  ? gen_branch_true (skip_neg_label)
+				  : gen_branch_false (skip_neg_label));
+
+  emit_label_after (skip_neg_label,
+						emit_insn (gen_negsi2 (operands[0], operands[1])));
+  DONE;
+}"
   [(set_attr "type" "arith") ;; poor approximation
    (set_attr "length" "4")])
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [SH] PR 49468 - Integer SI abs
  2011-09-26  3:57 [SH] PR 49468 - Integer SI abs Oleg Endo
@ 2011-09-27 14:46 ` Kaz Kojima
  2011-09-28  2:26   ` Oleg Endo
  0 siblings, 1 reply; 4+ messages in thread
From: Kaz Kojima @ 2011-09-27 14:46 UTC (permalink / raw)
  To: oleg.endo; +Cc: gcc-patches

Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch improves the generated code for integer abs
> operations on SH, in particular SH4. There was already some code that
> was supposed to utilize SH's conditional execution it but it was never
> triggered, because the standard branch-free abs code was generated at a
> very early stage.
> Since the DI mode part of the original patch is causing problems I've
> stripped it out for now.
> 
> Tested with make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2,-m2a-single,-m4-single,-m4a-single\}\{-mb,-ml\}" with no new
> failures.
> 
> CSiBE code size tests show small decreases in a couple of places, with
> the highest drop being in libpng-1.2.5,pngrutil (15800 -> 15712) for
> -m4-single. 

Thanks for this work!  A few minor style issues:

>	* config/sh/sh.md (negdi2): Moved expansion into split to
>	allow more combination options.  Added T_REG clobber.
>	(*negdi2, abssi2, *abssi2, *negabssi2): Added.
>	(cneg): Changed from insn to insn_and_split.  Renamed to
>	negsi_cond.  Added alternative for non-SH4.

Please add PR target/49468 at just before this entry like as
the other entries which are tied with the PR.  The usual form
for ChangeLog entries is as a log of changes in imperative form.
For new insns, "New insns." is a usual way of gcc ChangeLog.
See other similar entries as examples.

>+(define_insn_and_split "negsi_cond"
>+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
>+	(if_then_else:SI (eq:SI (reg:SI T_REG)
>+					  (match_operand:SI 3 "const_int_operand" "M,N"))
[snip]
>+  emit_jump_insn (INTVAL (operands[3])
>+				  ? gen_branch_true (skip_neg_label)
>+				  : gen_branch_false (skip_neg_label));
>+
>+  emit_label_after (skip_neg_label,
>+						emit_insn (gen_negsi2 (operands[0], operands[1])));

Some lines of the patch look too long.  Use 8 spaces wide tabs
for GCC patches as GNU/GCC coding style says.

OK with those style changes.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [SH] PR 49468 - Integer SI abs
  2011-09-27 14:46 ` Kaz Kojima
@ 2011-09-28  2:26   ` Oleg Endo
  2011-09-28 22:43     ` Kaz Kojima
  0 siblings, 1 reply; 4+ messages in thread
From: Oleg Endo @ 2011-09-28  2:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kaz Kojima

[-- Attachment #1: Type: text/plain, Size: 756 bytes --]

On Tue, 2011-09-27 at 22:36 +0900, Kaz Kojima wrote:

> Thanks for this work!  A few minor style issues:
> 

Thanks for checking and sorry for the trouble. 
The attached patch and ChangeLog below should fix it.
I have also added a test case for SI mode abs.

Cheers,
Oleg


ChangeLog:

2011-09-28  Oleg Endo  <oleg.endo@t-online.de>

	PR target/49486
	* config/sh/sh.md (negdi2): Move expansion into split to
	allow more combination options.  Add T_REG clobber.
	(abssi2): New expander.
	(*negdi2, *abssi2, *negabssi2): New insns.
	(cneg): Change from insn to insn_and_split.  Rename to
	negsi_cond.  Add alternative for non-SH4.


testsuite/ChangeLog:

2011-09-28  Oleg Endo  <oleg.endo@t-online.de>

	PR target/49486
	* gcc.target/sh/pr49468-si.c: New.


[-- Attachment #2: sh_abs_si.patch --]
[-- Type: text/x-patch, Size: 5628 bytes --]

Index: gcc/testsuite/gcc.target/sh/pr49468-si.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr49468-si.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr49468-si.c	(revision 0)
@@ -0,0 +1,22 @@
+/* Check that 32 bit integer abs is generated as neg instruction and
+   conditional branch instead of default branch-free code.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-final { scan-assembler-times "neg" 2 } } */
+
+
+/* Normal integer absolute value.  */
+int
+abs_0 (int i)
+{
+  return (i < 0) ? -i : i;
+}
+
+/*  Negated integer absolute value.
+    The generated code should be the same, except that the branch 
+    condition is inverted.  */
+int
+abs_1 (int i)
+{
+  return (i > 0) ? -i : i;
+}
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 179143)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4282,28 +4282,39 @@
   "sub	r63, %1, %0"
   [(set_attr "type" "arith_media")])
 
+
+
+;; Don't expand immediately because otherwise neg:DI (abs:DI) will not be
+;; combined.
 (define_expand "negdi2"
-  [(set (match_operand:DI 0 "arith_reg_operand" "")
-	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
   ""
+  "")
+
+(define_insn_and_split "*negdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
+  [(const_int 0)]
   "
 {
-  if (TARGET_SH1)
-    {
-      int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
-      int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+  int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+  int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
 
-      rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
-      rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+  rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+  rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
 
-      rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
-      rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
+  rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
+  rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
 
-      emit_insn (gen_clrt ());
-      emit_insn (gen_negc (low_dst, low_src));
-      emit_insn (gen_negc (high_dst, high_src));
-      DONE;
-    }
+  emit_insn (gen_clrt ());
+  emit_insn (gen_negc (low_dst, low_src));
+  emit_insn (gen_negc (high_dst, high_src));
+  DONE;
 }")
 
 (define_insn "negsi2"
@@ -4326,27 +4337,77 @@
 		(const_int -1)))]
   "TARGET_SHMEDIA" "")
 
-/* The SH4 202 can do zero-offset branches without pipeline stalls.
-   This can be used as some kind of conditional execution, which is useful
-   for abs.  */
-(define_split
+(define_expand "abssi2"
   [(set (match_operand:SI 0 "arith_reg_dest" "")
-	(plus:SI (xor:SI (neg:SI (reg:SI T_REG))
-			 (match_operand:SI 1 "arith_reg_operand" ""))
-		 (reg:SI T_REG)))]
-  "TARGET_HARD_SH4"
+  	(abs:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  ""
+  "")
+
+(define_insn_and_split "*abssi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+  	(abs:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
   [(const_int 0)]
-  "emit_insn (gen_movsi_i (operands[0], operands[1]));
-   emit_insn (gen_cneg (operands[0], operands[0], operands[0]));
-   DONE;")
+  "
+{
+  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
+		 const1_rtx));
+  DONE;
+}")
 
-(define_insn "cneg"
+(define_insn_and_split "*negabssi2"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(if_then_else:SI (eq:SI (reg:SI T_REG) (const_int 0))
-		      (match_operand:SI 1 "arith_reg_operand" "0")
-		      (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  	(neg:SI (abs:SI (match_operand:SI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  emit_insn (gen_negsi_cond (operands[0], operands[1], operands[1],
+		 const0_rtx));
+  DONE;
+}")
+
+
+;; The SH4 202 can do zero-offset branches without pipeline stalls.
+;; This can be used as some kind of conditional execution, which is useful
+;; for abs.
+;; Actually the instruction scheduling should decide whether to use a
+;; zero-offset branch or not for any generic case involving a single
+;; instruction on SH4 202.
+
+(define_insn_and_split "negsi_cond"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else:SI (eq:SI (reg:SI T_REG)
+			  (match_operand:SI 3 "const_int_operand" "M,N"))
+	 (match_operand:SI 1 "arith_reg_operand" "0,0")
+	 (neg:SI (match_operand:SI 2 "arith_reg_operand" "r,r"))))]
   "TARGET_HARD_SH4"
-  "bf 0f\;neg %2,%0\\n0:"
+  "@
+	bt\\t0f\;neg\\t%2,%0\\n0:
+	bf\\t0f\;neg\\t%2,%0\\n0:"
+  "!TARGET_HARD_SH4"
+  [(const_int 0)]
+  "
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_insn (gen_movsi (operands[0], operands[1]));
+
+  emit_jump_insn (INTVAL (operands[3])
+		  ? gen_branch_true (skip_neg_label)
+		  : gen_branch_false (skip_neg_label));
+
+  emit_label_after (skip_neg_label,
+		    emit_insn (gen_negsi2 (operands[0], operands[1])));
+  DONE;
+}"
   [(set_attr "type" "arith") ;; poor approximation
    (set_attr "length" "4")])
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [SH] PR 49468 - Integer SI abs
  2011-09-28  2:26   ` Oleg Endo
@ 2011-09-28 22:43     ` Kaz Kojima
  0 siblings, 0 replies; 4+ messages in thread
From: Kaz Kojima @ 2011-09-28 22:43 UTC (permalink / raw)
  To: oleg.endo; +Cc: gcc-patches

Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch and ChangeLog below should fix it.
> I have also added a test case for SI mode abs.

Thanks!  I've applied your patch as revision 179320.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-09-28 21:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-26  3:57 [SH] PR 49468 - Integer SI abs Oleg Endo
2011-09-27 14:46 ` Kaz Kojima
2011-09-28  2:26   ` Oleg Endo
2011-09-28 22:43     ` Kaz Kojima

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).