public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, GCC/THUMB1] New define_insn_and_split pattern to optimize out unnecessary uxtb instructions
@ 2017-06-05  8:20 Prakhar Bahuguna
  0 siblings, 0 replies; only message in thread
From: Prakhar Bahuguna @ 2017-06-05  8:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, Richard.Earnshaw, Ramana.Radhakrishnan, Kyrylo.Tkachov

[-- Attachment #1: Type: text/plain, Size: 2868 bytes --]

GCC currently generates unnecessary uxtb instructions when compiling
gcc.target/arm/unsigned-extend-1.c for Thumb1 targets such as the Cortex-M0.
This test case is as follows:

unsigned char foo (unsigned char c)
{
  return (c >= '0') && (c <= '9');
}

The assembly output generated with "-mcpu=cortex-m0 -O2":

foo:
	mov	r3, r0
	mov	r2, #9
	mov	r0, #0
	sub	r3, r3, #48
	uxtb	r3, r3
	cmp	r2, r3
	adc	r0, r0, r0
	@ sp needed
	uxtb	r0, r0
	bx	lr

The uxtb instructions are supposed to be optimized out at the rtl combine pass,
but the current thumb1_addsi3_addgeu insn pattern is not friendly to the
combine pass. Upon entering the combine pass, we have this rtl:

(insn 6 3 7 2 (set (reg:SI 116)
        (plus:SI (reg/v:SI 114 [ c ])
            (const_int -48 [0xffffffffffffffd0])))

(insn 7 6 8 2 (set (reg:SI 117)
        (zero_extend:SI (subreg:QI (reg:SI 116) 0)))

(insn 8 7 9 2 (set (reg:SI 119)
        (const_int 9 [0x9]))

(insn 9 8 10 2 (set (reg:SI 120)
        (const_int 0 [0]))

(insn 10 9 12 2 (set (reg:SI 118)
        (plus:SI (plus:SI (reg:SI 120)
                (reg:SI 120))
            (geu:SI (reg:SI 119)
                (reg:SI 117))))

During the rtl combine pass, operand 0 of the first plus operator is
(plus:SI (reg:SI 120) (reg:SI 120)) and this will be optimized to (const_int 0)
as reg 120 is zero after insn 9. The first plus operator will be subsequently
optimized out as it performs addition with zero. Finally, insn 10 is turned
into:

(insn 10 9 12 2 (set (reg:SI 118)
                     (leu:SI (reg:SI 116) (const_int 9))))

The optimization to the leu comparator matches our expectation that reg 116
will be used directly instead of reg 117 to eliminate uxtb insn 7.
Unfortunately, this new insn 10 is an illegal rtl for Thumb1. Hence, the
optimization done in the combine pass is undone and the unnecessary uxtb
instruction is kept.

This patch implements a veneer for the insn pattern thumb1_addsi3_addgeu. This
veneer insn pattern looks like the new insn 10 in the above example and can
facilitate the combine pass to optimize out the unnecessary uxtb instructions.
During the rtl expand stage, GIMPLE trees are expanded to this veneer insn
pattern rather than the normal thumb1_addsi3_addgeu. Later on in the split1
stage, this veneer insn pattern will be turned into normal thumb1_addsi3_addgeu
pattern for future code generation.

gcc/ChangeLog:

2017-06-05  Prakhar Bahuguna  <prakhar.bahuguna@arm.com>

	* config/arm/arm.md (cstoresi_leu_thumb1): New define_insn_and_split
	pattern.
	(cstoresi4): Use above new pattern.

testsuite/ChangeLog:

2017-06-05  Prakhar Bahuguna  <prakhar.bahuguna@arm.com>

	* gcc.target/arm/thumb1-unsigned-extend-1.c: New file.

Testing done: Ran regression tests, added new test to confirm uxtb instructions
are optimized out for Thumb1 targets.

Okay for stage1?

-- 

Prakhar Bahuguna

[-- Attachment #2: 0001-New-define_insn_and_split-pattern-to-optimize-out-un.patch --]
[-- Type: text/plain, Size: 2753 bytes --]

From c0c5d410536238bf57967cbc50bdc8e7f7f0ab59 Mon Sep 17 00:00:00 2001
From: Prakhar Bahuguna <prakhar.bahuguna@arm.com>
Date: Fri, 24 Mar 2017 10:48:03 +0000
Subject: [PATCH] New define_insn_and_split pattern to optimize out unnecessary
 uxtb instructions

---
 gcc/config/arm/arm.md                              | 36 ++++++++++++++++++----
 .../gcc.target/arm/thumb1-unsigned-extend-1.c      | 10 ++++++
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e6e1ac54a85..656ede7c2f5 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -7691,18 +7691,14 @@
 
     case LEU:
       op3 = force_reg (SImode, operands[3]);
-      scratch = force_reg (SImode, const0_rtx);
-      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
-					  op3, operands[2]));
+      emit_insn (gen_cstoresi_leu_thumb1 (operands[0], operands[2], op3));
       break;
 
     case GEU:
       op3 = operands[3];
       if (!thumb1_cmp_operand (op3, SImode))
         op3 = force_reg (SImode, op3);
-      scratch = force_reg (SImode, const0_rtx);
-      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
-					  operands[2], op3));
+      emit_insn (gen_cstoresi_leu_thumb1 (operands[0], op3, operands[2]));
       break;
 
     case LTU:
@@ -7781,6 +7777,34 @@
    }"
 )
 
+(define_insn_and_split "cstoresi_leu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(leu:SI (match_operand:SI 1 "s_register_operand" "l")
+		(match_operand:SI 2 "thumb1_cmp_operand" "lI")))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && !reload_completed"
+  [(set (match_dup 3) (const_int 0))
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 3)
+			  (match_dup 3))
+		 (geu:SI (match_dup 4)
+			 (match_dup 1))))]
+  "
+    operands[3] = gen_reg_rtx (SImode);
+
+    if (CONST_INT_P (operands[2]))
+      {
+	operands[4] = gen_reg_rtx (SImode);
+	emit_move_insn (operands[4], operands[2]);
+      }
+    else
+      operands[4] = operands[2];
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "multiple")]
+)
+
 \f
 ;; Conditional move insns
 
diff --git a/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c b/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c
new file mode 100644
index 00000000000..e6066b65d60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb1-unsigned-extend-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-options "-mcpu=cortex-m0 -O2" } */
+
+unsigned char foo (unsigned char c)
+{
+  return (c >= '0') && (c <= '9');
+}
+
+/* { dg-final { scan-assembler-not "uxtb" } } */
-- 
2.13.0


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-06-05  8:20 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-05  8:20 [PATCH, GCC/THUMB1] New define_insn_and_split pattern to optimize out unnecessary uxtb instructions Prakhar Bahuguna

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).