public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch,AVR]: Fix PR50447
@ 2011-09-22 18:34 Georg-Johann Lay
  2011-09-23  7:53 ` Denis Chertykov
  2011-09-23  8:20 ` Paolo Bonzini
  0 siblings, 2 replies; 12+ messages in thread
From: Georg-Johann Lay @ 2011-09-22 18:34 UTC (permalink / raw)
  To: gcc-patches; +Cc: Denis Chertykov, Eric Weddington, Anatoly Sokolov

[-- Attachment #1: Type: text/plain, Size: 1069 bytes --]

This patch adds the PLUS part to fix the PR.

addsi3 has a 8-bit scratch register now so that constants that are not covered
by the constraints won't force a reload of the constant.

The output routine tries adding the constant and subtracting the negated
constant and then chooses the shortest sequence.  Moreover, if the lower bytes
of the constant are zero, there is no need to add them.

Besides that, the patch adds some add-and-zero-extend patterns.

Passed without regressions.

Ok to commit?

Johann

	PR target/50447
	* config/avr/avr.md: (adjust_len): Add alternative "out_plus".
	(addsi3): Rewrite using QI scratch register.  Adjust text
	peephole using plus:SI.
	(*addsi3_zero_extend.hi): New insn.
	(*subsi3_zero_extend.hi): New insn.
	(*subhi3_zero_extend1): Set attribute "cc" to "set_czn".
	(*subsi3_zero_extend): Ditto.
	(subsi3): Change predicate #2 to register_operand.
	* config/avr/avr-protos.h (avr_out_plus): New prototype.
	(avr_out_plus_1): New static function.
	(avr_out_plus): New function.
	(adjust_insn_length): Handle ADJUST_LEN_OUT_PLUS.

[-- Attachment #2: pr50447-2.diff --]
[-- Type: text/x-patch, Size: 14793 bytes --]

Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 179081)
+++ config/avr/avr.md	(working copy)
@@ -136,7 +136,7 @@ (define_attr "length" ""
 ;; Otherwise do special processing depending on the attribute.
 
 (define_attr "adjust_len"
-  "yes,no,reload_in32,out_bitop"
+  "yes,no,reload_in32,out_bitop,out_plus"
   (const_string "yes"))
 
 ;; Define mode iterators
@@ -909,26 +909,35 @@ (define_insn "*addhi3"
    (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")])
 
 (define_insn "addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,!w,!w,d,r,r")
-	  (plus:SI
-	   (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
-	   (match_operand:SI 2 "nonmemory_operand" "r,I,J,i,P,N")))]
+  [(set (match_operand:SI 0 "register_operand"          "=r,!w,!w,d,l,l ,d,r")
+        (plus:SI (match_operand:SI 1 "register_operand" "%0,0 ,0 ,0,0,0 ,0,0")
+                 (match_operand:SI 2 "nonmemory_operand" "r,I ,J ,s,P,N ,n,n")))
+   (clobber (match_scratch:QI 3                         "=X,X ,X ,X,X,X ,X,&d"))]
   ""
-  "@
-	add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2
-	adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
-	sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__
-	subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2))
-	sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
-	sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__"
-  [(set_attr "length" "4,3,3,4,5,5")
-   (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n")])
+  {
+    static const char * const asm_code[] =
+      {
+        "add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2",
+        "adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__",
+        "sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__",
+        "subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2))",
+        "sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__",
+        "sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__"
+      };
+
+    if (which_alternative >= (signed) (sizeof (asm_code) / sizeof (*asm_code)))
+      return avr_out_plus (operands, NULL);
+
+    return asm_code [which_alternative];
+  }
+  [(set_attr "length" "4,3,3,4,5,5,8,8")
+   (set_attr "adjust_len" "no,no,no,no,no,no,out_plus,out_plus")
+   (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n,clobber,clobber")])
 
 (define_insn "*addsi3_zero_extend"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (zero_extend:SI
-		  (match_operand:QI 1 "register_operand" "r"))
-		 (match_operand:SI 2 "register_operand" "0")))]
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+        (plus:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:SI 2 "register_operand"                 "0")))]
   ""
   "add %A0,%1
 	adc %B0,__zero_reg__
@@ -937,6 +946,18 @@ (define_insn "*addsi3_zero_extend"
   [(set_attr "length" "4")
    (set_attr "cc" "set_n")])
 
+(define_insn "*addsi3_zero_extend.hi"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+        (plus:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+                 (match_operand:SI 2 "register_operand"                 "0")))]
+  ""
+  "add %A0,%1
+	adc %B0,%B1
+	adc %C0,__zero_reg__
+	adc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
 ;-----------------------------------------------------------------------------
 ; sub bytes
 (define_insn "subqi3"
@@ -962,39 +983,47 @@ (define_insn "subhi3"
    (set_attr "cc" "set_czn,set_czn")])
 
 (define_insn "*subhi3_zero_extend1"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(minus:HI (match_operand:HI 1 "register_operand" "0")
-		  (zero_extend:HI
-		   (match_operand:QI 2 "register_operand" "r"))))]
+  [(set (match_operand:HI 0 "register_operand"                          "=r")
+        (minus:HI (match_operand:HI 1 "register_operand"                 "0")
+                  (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
   ""
   "sub %A0,%2
 	sbc %B0,__zero_reg__"
   [(set_attr "length" "2")
-   (set_attr "cc" "set_n")])
+   (set_attr "cc" "set_czn")])
 
 (define_insn "subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
-                 (match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand" "0")
+                  (match_operand:SI 2 "register_operand" "r")))]
   ""
-  "@
-	sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2
-	subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)"
-  [(set_attr "length" "4,4")
-   (set_attr "cc" "set_czn,set_czn")])
+  "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_czn")])
 
 (define_insn "*subsi3_zero_extend"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (match_operand:SI 1 "register_operand" "0")
-		  (zero_extend:SI
-		   (match_operand:QI 2 "register_operand" "r"))))]
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand"                 "0")
+                  (zero_extend:SI (match_operand:QI 2 "register_operand" "r"))))]
   ""
   "sub %A0,%2
 	sbc %B0,__zero_reg__
 	sbc %C0,__zero_reg__
 	sbc %D0,__zero_reg__"
   [(set_attr "length" "4")
-   (set_attr "cc" "set_n")])
+   (set_attr "cc" "set_czn")])
+
+(define_insn "*subsi3_zero_extend.hi"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand"                 "0")
+                  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2
+	sbc %B0,%B2
+	sbc %C0,__zero_reg__
+	sbc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_czn")])
 
 ;******************************************************************************
 ; mul
@@ -4016,42 +4045,44 @@ (define_insn "*sbix_branch_tmp_bit7"
 ;; ************************* Peepholes ********************************
 
 (define_peephole
-  [(set (match_operand:SI 0 "d_register_operand" "")
-        (plus:SI (match_dup 0)
-                 (const_int -1)))
-   (parallel
-    [(set (cc0)
-          (compare (match_dup 0)
-		   (const_int -1)))
-     (clobber (match_operand:QI 1 "d_register_operand" ""))])
+  [(parallel [(set (match_operand:SI 0 "d_register_operand" "")
+                   (plus:SI (match_dup 0)
+                            (const_int -1)))
+              (clobber (scratch:QI))])
+   (parallel [(set (cc0)
+                   (compare (match_dup 0)
+                            (const_int -1)))
+              (clobber (match_operand:QI 1 "d_register_operand" ""))])
    (set (pc)
-	(if_then_else (ne (cc0) (const_int 0))
-		      (label_ref (match_operand 2 "" ""))
-		      (pc)))]
+        (if_then_else (ne (cc0)
+                          (const_int 0))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
   ""
-  "*
-{
-  CC_STATUS_INIT;
-  if (test_hard_reg_class (ADDW_REGS, operands[0]))
-    output_asm_insn (AS2 (sbiw,%0,1) CR_TAB
-		     AS2 (sbc,%C0,__zero_reg__) CR_TAB
-		     AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands);
-  else
-    output_asm_insn (AS2 (subi,%A0,1) CR_TAB
-		     AS2 (sbc,%B0,__zero_reg__) CR_TAB
-		     AS2 (sbc,%C0,__zero_reg__) CR_TAB
-		     AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands);
-  switch (avr_jump_mode (operands[2],insn))
   {
-    case 1:
-      return AS1 (brcc,%2);
-    case 2:
-      return (AS1 (brcs,.+2) CR_TAB
-              AS1 (rjmp,%2));
-  }
-  return (AS1 (brcs,.+4) CR_TAB
-          AS1 (jmp,%2));
-}")
+    CC_STATUS_INIT;
+    if (test_hard_reg_class (ADDW_REGS, operands[0]))
+      output_asm_insn (AS2 (sbiw,%0,1) CR_TAB
+                       AS2 (sbc,%C0,__zero_reg__) CR_TAB
+                       AS2 (sbc,%D0,__zero_reg__) "\n", operands);
+    else
+      output_asm_insn (AS2 (subi,%A0,1) CR_TAB
+                       AS2 (sbc,%B0,__zero_reg__) CR_TAB
+                       AS2 (sbc,%C0,__zero_reg__) CR_TAB
+                       AS2 (sbc,%D0,__zero_reg__) "\n", operands);
+
+    switch (avr_jump_mode (operands[2], insn))
+      {
+      case 1:
+        return AS1 (brcc,%2);
+      case 2:
+        return (AS1 (brcs,.+2) CR_TAB
+                AS1 (rjmp,%2));
+      }
+
+    return (AS1 (brcs,.+4) CR_TAB
+            AS1 (jmp,%2));
+  })
 
 (define_peephole
   [(set (match_operand:HI 0 "d_register_operand" "")
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h	(revision 179081)
+++ config/avr/avr-protos.h	(working copy)
@@ -82,6 +82,7 @@ extern void avr_output_bld (rtx operands
 extern void avr_output_addr_vec_elt (FILE *stream, int value);
 extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
+extern const char* avr_out_plus (rtx*, int*);
 extern bool avr_popcount_each_byte (rtx, int, int);
 
 extern int extra_constraint_Q (rtx x);
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 179081)
+++ config/avr/avr.c	(working copy)
@@ -4503,6 +4503,167 @@ lshrsi3_out (rtx insn, rtx operands[], i
 }
 
 
+/* Output addition of register XOP[0] and compile time constant XOP[2]:
+
+      XOP[0] = XOP[0] + XOP[2]
+
+   and return "".  If PLEN == NULL, print assembler instructions to perform the
+   addition; otherwise, set *PLEN to the length of the instruction sequence (in
+   words) printed with PLEN == NULL.  XOP[3] is an 8-bit scratch register.
+   CODE == PLUS:  perform addition by using ADD instructions.
+   CODE == MINUS: perform addition by using SUB instructions.  */
+
+static void
+avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code)
+{
+  /* MODE of the operation.  */
+  enum machine_mode mode = GET_MODE (xop[0]);
+
+  /* Number of bytes to operate on.  */
+  int i, n_bytes = GET_MODE_SIZE (mode);
+
+  /* Value (0..0xff) held in clobber register op[3] or -1 if unknown.  */
+  int clobber_val = -1;
+
+  /* op[0]: 8-bit destination register
+     op[1]: 8-bit const int
+     op[2]: 8-bit scratch register */
+  rtx op[3];
+
+  /* Started the operation?  Before starting the operation we may skip
+     adding 0.  This is no more true after the operation started because
+     carry must be taken into account.  */
+  bool started = false;
+
+  /* Value to add.  There are two ways to add VAL: R += VAL and R -= -VAL.  */
+  rtx xval = xop[2];
+
+  if (MINUS == code)
+    xval = gen_int_mode (-UINTVAL (xval), mode);
+
+  op[2] = xop[3];
+
+  if (plen)
+    *plen = 0;
+
+  for (i = 0; i < n_bytes; i++)
+    {
+      /* We operate byte-wise on the destination.  */
+      rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i);
+      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+
+      /* 8-bit value to operate with this byte. */
+      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+      /* Registers R16..R31 can operate with immediate.  */
+      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
+
+      op[0] = reg8;
+      op[1] = GEN_INT (val8);
+      
+      if (!started && i % 2 == 0
+          && test_hard_reg_class (ADDW_REGS, reg8))
+        {
+          rtx xval16 = simplify_gen_subreg (HImode, xval, mode, i);
+          unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode);
+
+          /* Registers R24, X, Y, Z can use ADIW/SBIW with constants < 64
+             i.e. operate word-wise.  */
+
+          if (val16 < 64)
+            {
+              if (val16 != 0)
+                {
+                  started = true;
+                  avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1",
+                               op, plen, 1);
+                }
+
+              i++;
+              continue;
+            }
+        }
+
+      if (val8 == 0)
+        {
+          if (started)
+            avr_asm_len (code == PLUS
+                         ? "adc %0,__zero_reg__" : "sbc %0,__zero_reg__",
+                         op, plen, 1);
+          continue;
+        }
+
+      switch (code)
+        {
+        case PLUS:
+
+          gcc_assert (plen != NULL || REG_P (op[2]));
+
+          if (clobber_val != (int) val8)
+            avr_asm_len ("ldi %2,%1", op, plen, 1);
+          clobber_val = (int) val8;
+              
+          avr_asm_len (started ? "adc %0,%2" : "add %0,%2", op, plen, 1);
+
+          break; /* PLUS */
+
+        case MINUS:
+
+          if (ld_reg_p)
+            avr_asm_len (started ? "sbci %0,%1" : "subi %0,%1", op, plen, 1);
+          else
+            {
+              gcc_assert (plen != NULL || REG_P (op[2]));
+
+              if (clobber_val != (int) val8)
+                avr_asm_len ("ldi %2,%1", op, plen, 1);
+              clobber_val = (int) val8;
+              
+              avr_asm_len (started ? "sbc %0,%2" : "sub %0,%2", op, plen, 1);
+            }
+
+          break; /* MINUS */
+          
+        default:
+          /* Unknown code */
+          gcc_unreachable();
+        }
+
+      started = true;
+
+    } /* for all sub-bytes */
+}
+
+
+/* Output addition of register XOP[0] and compile time constant XOP[2]:
+
+      XOP[0] = XOP[0] + XOP[2]
+
+   and return "".  If PLEN == NULL, print assembler instructions to perform the
+   addition; otherwise, set *PLEN to the length of the instruction sequence (in
+   words) printed with PLEN == NULL.  */
+
+const char*
+avr_out_plus (rtx *xop, int *plen)
+{
+  int len_plus, len_minus;
+
+  /* Work out if  XOP[0] += XOP[2]  is better or  XOP[0] -= -XOP[2].  */
+  
+  avr_out_plus_1 (xop, &len_plus, PLUS);
+  avr_out_plus_1 (xop, &len_minus, MINUS);
+
+  if (plen)
+    *plen = (len_minus <= len_plus) ? len_minus : len_plus;
+  else if (len_minus <= len_plus)
+    avr_out_plus_1 (xop, NULL, MINUS);
+  else
+    avr_out_plus_1 (xop, NULL, PLUS);
+
+  return "";
+}
+
+
 /* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile
    time constant XOP[2]:
 
@@ -4851,6 +5012,10 @@ adjust_insn_length (rtx insn, int len)
           avr_out_bitop (insn, op, &len);
           break;
 
+        case ADJUST_LEN_OUT_PLUS:
+          avr_out_plus (op, &len);
+          break;
+
         default:
           gcc_unreachable();
         }

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447
  2011-09-22 18:34 [Patch,AVR]: Fix PR50447 Georg-Johann Lay
@ 2011-09-23  7:53 ` Denis Chertykov
  2011-09-23  9:23   ` Georg-Johann Lay
  2011-09-23  8:20 ` Paolo Bonzini
  1 sibling, 1 reply; 12+ messages in thread
From: Denis Chertykov @ 2011-09-23  7:53 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington, Anatoly Sokolov

2011/9/22 Georg-Johann Lay <avr@gjlay.de>:
> This patch adds the PLUS part to fix the PR.
>
> addsi3 has a 8-bit scratch register now so that constants that are not covered
> by the constraints won't force a reload of the constant.
>
> The output routine tries adding the constant and subtracting the negated
> constant and then chooses the shortest sequence.  Moreover, if the lower bytes
> of the constant are zero, there is no need to add them.
>
> Besides that, the patch adds some add-and-zero-extend patterns.
>
> Passed without regressions.


Why you removed immediate operand from subsi3:

 (define_insn "subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
-                 (match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (minus:SI (match_operand:SI 1 "register_operand" "0")
+                  (match_operand:SI 2 "register_operand" "r")))]
   ""
-  "@
-	sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2
-	subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)"
-  [(set_attr "length" "4,4")
-   (set_attr "cc" "set_czn,set_czn")])
+  "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_czn")])



Denis.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447
  2011-09-22 18:34 [Patch,AVR]: Fix PR50447 Georg-Johann Lay
  2011-09-23  7:53 ` Denis Chertykov
@ 2011-09-23  8:20 ` Paolo Bonzini
  2011-09-23  8:39   ` Paolo Bonzini
  2011-09-23  9:23   ` [Patch,AVR]: Fix PR50447 (2/n) Georg-Johann Lay
  1 sibling, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23  8:20 UTC (permalink / raw)
  To: Georg-Johann Lay
  Cc: gcc-patches, Denis Chertykov, Eric Weddington, Anatoly Sokolov

On 09/22/2011 07:43 PM, Georg-Johann Lay wrote:
> This patch adds the PLUS part to fix the PR.
>
> addsi3 has a 8-bit scratch register now so that constants that are not covered
> by the constraints won't force a reload of the constant.
>
> The output routine tries adding the constant and subtracting the negated
> constant and then chooses the shortest sequence.  Moreover, if the lower bytes
> of the constant are zero, there is no need to add them.
>
> Besides that, the patch adds some add-and-zero-extend patterns.
>
> Passed without regressions.
>
> Ok to commit?

The same can be done for cmp, no?

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447
  2011-09-23  8:20 ` Paolo Bonzini
@ 2011-09-23  8:39   ` Paolo Bonzini
  2011-09-23  9:23   ` [Patch,AVR]: Fix PR50447 (2/n) Georg-Johann Lay
  1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23  8:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: gcc-patches, Denis Chertykov, Eric Weddington, Anatoly Sokolov

On 09/22/2011 07:43 PM, Georg-Johann Lay wrote:
> This patch adds the PLUS part to fix the PR.
>
> addsi3 has a 8-bit scratch register now so that constants that are not covered
> by the constraints won't force a reload of the constant.
>
> The output routine tries adding the constant and subtracting the negated
> constant and then chooses the shortest sequence.  Moreover, if the lower bytes
> of the constant are zero, there is no need to add them.
>
> Besides that, the patch adds some add-and-zero-extend patterns.
>
> Passed without regressions.
>
> Ok to commit?

The same can be done for cmp, no?

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  8:20 ` Paolo Bonzini
  2011-09-23  8:39   ` Paolo Bonzini
@ 2011-09-23  9:23   ` Georg-Johann Lay
  2011-09-23  9:37     ` Paolo Bonzini
  1 sibling, 1 reply; 12+ messages in thread
From: Georg-Johann Lay @ 2011-09-23  9:23 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: gcc-patches, Denis Chertykov, Eric Weddington, Anatoly Sokolov

Paolo Bonzini schrieb:
> On 09/22/2011 07:43 PM, Georg-Johann Lay wrote:
>> This patch adds the PLUS part to fix the PR.
>>
>> addsi3 has a 8-bit scratch register now so that constants that are not
>> covered
>> by the constraints won't force a reload of the constant.
>>
>> The output routine tries adding the constant and subtracting the negated
>> constant and then chooses the shortest sequence.  Moreover, if the
>> lower bytes
>> of the constant are zero, there is no need to add them.
>>
>> Besides that, the patch adds some add-and-zero-extend patterns.
>>
>> Passed without regressions.
>>
>> Ok to commit?
> 
> The same can be done for cmp, no?

Yes, can be done for comparisons, too.  I'd prefer a separate patch for it. The
subject is bit misleading; should be (2/n).

> Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447
  2011-09-23  7:53 ` Denis Chertykov
@ 2011-09-23  9:23   ` Georg-Johann Lay
  0 siblings, 0 replies; 12+ messages in thread
From: Georg-Johann Lay @ 2011-09-23  9:23 UTC (permalink / raw)
  To: Denis Chertykov; +Cc: gcc-patches, Eric Weddington, Anatoly Sokolov

Denis Chertykov schrieb:
> 2011/9/22 Georg-Johann Lay <avr@gjlay.de>:
>> This patch adds the PLUS part to fix the PR.
>>
>> addsi3 has a 8-bit scratch register now so that constants that are not covered
>> by the constraints won't force a reload of the constant.
>>
>> The output routine tries adding the constant and subtracting the negated
>> constant and then chooses the shortest sequence.  Moreover, if the lower bytes
>> of the constant are zero, there is no need to add them.
>>
>> Besides that, the patch adds some add-and-zero-extend patterns.
>>
>> Passed without regressions.
> 
> 
> Why you removed immediate operand from subsi3:
> 
>  (define_insn "subsi3"
> -  [(set (match_operand:SI 0 "register_operand" "=r,d")
> -        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
> -                 (match_operand:SI 2 "nonmemory_operand" "r,i")))]
> +  [(set (match_operand:SI 0 "register_operand"          "=r")
> +        (minus:SI (match_operand:SI 1 "register_operand" "0")
> +                  (match_operand:SI 2 "register_operand" "r")))]
>    ""
> -  "@
> -	sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2
> -	subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)"
> -  [(set_attr "length" "4,4")
> -   (set_attr "cc" "set_czn,set_czn")])
> +  "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2"
> +  [(set_attr "length" "4")
> +   (set_attr "cc" "set_czn")])

A = B - const

is canonizalized to

A = B + (-const)

For compile time constants. And for SImode there are no symbols so that the
immediate part of memory_operand is dead code and I cleaned it up.

> Denis.

Johann


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  9:23   ` [Patch,AVR]: Fix PR50447 (2/n) Georg-Johann Lay
@ 2011-09-23  9:37     ` Paolo Bonzini
  2011-09-23  9:39       ` Paolo Bonzini
  2011-09-23  9:51       ` Paolo Bonzini
  0 siblings, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23  9:37 UTC (permalink / raw)
  To: Georg-Johann Lay
  Cc: gcc-patches, Denis Chertykov, Eric Weddington, Anatoly Sokolov

On 09/23/2011 10:41 AM, Georg-Johann Lay wrote:
> Yes, can be done for comparisons, too.  I'd prefer a separate patch for it. The
> subject is bit misleading; should be (2/n).

Also, I am curious about one thing: while this is of course a very 
pragmatic solution, you could also convert AVR to get rid of CC0, do 
this at expansion time, and get split-wide-types to work as intended.

compare-elim.c makes it relatively easy to remove CC0 nowadays.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  9:37     ` Paolo Bonzini
@ 2011-09-23  9:39       ` Paolo Bonzini
  2011-09-23  9:51       ` Paolo Bonzini
  1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23  9:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: gcc-patches, Denis Chertykov, Eric Weddington, Anatoly Sokolov

On 09/23/2011 10:41 AM, Georg-Johann Lay wrote:
> Yes, can be done for comparisons, too.  I'd prefer a separate patch for it. The
> subject is bit misleading; should be (2/n).

Also, I am curious about one thing: while this is of course a very 
pragmatic solution, you could also convert AVR to get rid of CC0, do 
this at expansion time, and get split-wide-types to work as intended.

compare-elim.c makes it relatively easy to remove CC0 nowadays.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  9:37     ` Paolo Bonzini
  2011-09-23  9:39       ` Paolo Bonzini
@ 2011-09-23  9:51       ` Paolo Bonzini
  2011-09-23 10:12         ` Georg-Johann Lay
  2011-09-27 19:27         ` Richard Henderson
  1 sibling, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23  9:51 UTC (permalink / raw)
  To: gcc-patches

On 09/23/2011 10:56 AM, Paolo Bonzini wrote:
> Also, I am curious about one thing: while this is of course a very
> pragmatic solution, you could also convert AVR to get rid of CC0, do
> this at expansion time, and get split-wide-types to work as intended.
>
> compare-elim.c makes it relatively easy to remove CC0 nowadays.

For example, a two-byte add can be written as

    (set (reg:QI L1) (plus:QI (reg:QI L2) (reg:QI L3)))
    (set (reg:QI H1) (plus:QI
                     (plus:QI (req:QI H2) (reg:QI H3))
                     (ltu:QI  (reg:QI L1) (reg:QI L3))))

After reload the second instruction can be split to

   (set (reg:CC_C CC) (compare:QI (reg:QI L1) (reg:QI L3)))
   (set (reg:QI   H1) (plus:QI
                      (plus:QI    (req:QI   H2) (reg:QI H3))
                      (lt:QI      (reg:CC_C CC) (const_int 0))))

(i.e. cp+adc).  compare-elim will then be able to turn this into add+adc:

   [parallel
     (set (reg:QI   L1) (plus:QI    (reg:QI L2) (reg:QI L3)))
     (set (reg:CC_C CC) (compare:QI
                        (plus:QI    (reg:QI L2) (reg:QI L3))
                                    (reg:QI L3))))]
   (set (reg:QI H1) (plus:QI
                    (plus:QI (req:QI   H2) (reg:QI H3))
                    (lt:QI   (reg:CC_C CC) (const_int 0))))

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  9:51       ` Paolo Bonzini
@ 2011-09-23 10:12         ` Georg-Johann Lay
  2011-09-23 13:46           ` Paolo Bonzini
  2011-09-27 19:27         ` Richard Henderson
  1 sibling, 1 reply; 12+ messages in thread
From: Georg-Johann Lay @ 2011-09-23 10:12 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gcc-patches

Paolo Bonzini schrieb:
> On 09/23/2011 10:56 AM, Paolo Bonzini wrote:
>> Also, I am curious about one thing: while this is of course a very
>> pragmatic solution, you could also convert AVR to get rid of CC0, do
>> this at expansion time, and get split-wide-types to work as intended.

My changes are just micro-optimizations to print things in a smarter way.  I
don't know enough of CCmode to do that transition.

>> compare-elim.c makes it relatively easy to remove CC0 nowadays.
> 
> For example, a two-byte add can be written as
> 
>    (set (reg:QI L1) (plus:QI (reg:QI L2) (reg:QI L3)))
>    (set (reg:QI H1) (plus:QI
>                     (plus:QI (req:QI H2) (reg:QI H3))
>                     (ltu:QI  (reg:QI L1) (reg:QI L3))))
> 
> After reload the second instruction can be split to
> 
>   (set (reg:CC_C CC) (compare:QI (reg:QI L1) (reg:QI L3)))
>   (set (reg:QI   H1) (plus:QI
>                      (plus:QI    (req:QI   H2) (reg:QI H3))
>                      (lt:QI      (reg:CC_C CC) (const_int 0))))

What happens if L1=L3?  AVR just has two-operand instructions so there are many
insns with "0" constraint.

How goes the LTU -> LT transition?

Ans reloading from memory might change Carry. How is that handled? Read
somewhere about that problem in some PR-chat and that this blocked the cc0 ->
CCmode transition (don't know if that's actually a restriction).

> (i.e. cp+adc).  compare-elim will then be able to turn this into add+adc:
> 
>   [parallel
>     (set (reg:QI   L1) (plus:QI    (reg:QI L2) (reg:QI L3)))
>     (set (reg:CC_C CC) (compare:QI
>                        (plus:QI    (reg:QI L2) (reg:QI L3))
>                                    (reg:QI L3))))]
>   (set (reg:QI H1) (plus:QI
>                    (plus:QI (req:QI   H2) (reg:QI H3))
>                    (lt:QI   (reg:CC_C CC) (const_int 0))))

What about the other flags like Z (zero) and N (negative)? They are not as
important as Carry but are still usable to avoid comparisons.  Describing all
that explicitly in RTL will be quite tedious...

> Paolo

Johann

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23 10:12         ` Georg-Johann Lay
@ 2011-09-23 13:46           ` Paolo Bonzini
  0 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2011-09-23 13:46 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches

On 09/23/2011 11:36 AM, Georg-Johann Lay wrote:
> Paolo Bonzini schrieb:
>> On 09/23/2011 10:56 AM, Paolo Bonzini wrote:
>>> Also, I am curious about one thing: while this is of course a very
>>> pragmatic solution, you could also convert AVR to get rid of CC0, do
>>> this at expansion time, and get split-wide-types to work as intended.
>
> My changes are just micro-optimizations to print things in a smarter way.

Yes, understood.  There are two relatively large changes that we're 
talking about.

One is changing cc0 to CCmode.  By itself it does not have large 
rewards.  It is just an enabler.  With CCmode, most of your algorithms 
would be usable anyway, so very little code would go away.

The second would be a large change instead: since AVR is not 
schedulable, it got by very well with complex patterns that produce 
multiple instructions.  Changing that and doing the expansion early 
would be harder than introducing CCmode, but would have a relatively big 
reward: split-wide-types would actually provide improvements rather than 
getting in the way most of the time; 8-bit registers could be allocated 
independently.  (There is one notable pessimization, 16-bit add/sub 
would be used quite rarely).  Also, fwprop would be able to perform all 
the simplifications you're doing in these patches.

>>> compare-elim.c makes it relatively easy to remove CC0 nowadays.
>>
>> For example, a two-byte add can be written as
>>
>>     (set (reg:QI L1) (plus:QI (reg:QI L2) (reg:QI L3)))
>>     (set (reg:QI H1) (plus:QI
>>                      (plus:QI (req:QI H2) (reg:QI H3))
>>                      (ltu:QI  (reg:QI L1) (reg:QI L3))))
>>
>> After reload the second instruction can be split to
>>
>>    (set (reg:CC_C CC) (compare:QI (reg:QI L1) (reg:QI L3)))
>>    (set (reg:QI   H1) (plus:QI
>>                       (plus:QI    (req:QI   H2) (reg:QI H3))
>>                       (lt:QI      (reg:CC_C CC) (const_int 0))))
>
> What happens if L1=L3?  AVR just has two-operand instructions so there are many
> insns with "0" constraint.

You can use L2 instead of L3, it is the same.  If L1=L2=L3, you have a 
problem.  If L1=L2=L3 it's actually an in-place left-shift; you could 
probably represent it as a PARALLEL and split it after reload to insns 
for lsl and rol.

You could also represent the 16-bit and 32-bit operations as PARALLELs 
and split them after reload, but I think it's worse.  Using PARALLELs 
has the advantage that you have access to values of the registers before 
the instruction; however, it prevents fwprop from doing your 
simplifications (because fwprop would not find a matching insn for "set 
the low byte to ~0 and OR the other three bytes").

> How goes the LTU ->  LT transition?

Typo.

> Ans reloading from memory might change Carry. How is that handled? Read
> somewhere about that problem in some PR-chat and that this blocked the cc0 ->
> CCmode transition (don't know if that's actually a restriction).

You represent everything without CC registers until after reload, so 
that reload can insert move instructions freely.  See the head comment 
in compare-elim.c.

>> (i.e. cp+adc).  compare-elim will then be able to turn this into add+adc:
>>
>>    [parallel
>>      (set (reg:QI   L1) (plus:QI    (reg:QI L2) (reg:QI L3)))
>>      (set (reg:CC_C CC) (compare:QI
>>                         (plus:QI    (reg:QI L2) (reg:QI L3))
>>                                     (reg:QI L3))))]
>>    (set (reg:QI H1) (plus:QI
>>                     (plus:QI (req:QI   H2) (reg:QI H3))
>>                     (lt:QI   (reg:CC_C CC) (const_int 0))))
>
> What about the other flags like Z (zero) and N (negative)? They are not as
> important as Carry but are still usable to avoid comparisons.  Describing all
> that explicitly in RTL will be quite tedious...

If add sets other flags correctly, it would just use a different mode. 
I wasn't sure, so I was conservative in my example.  You may want to 
look at the mn10300 port, which in fact only has three modes: CCmode 
requires the arithmetic instruction to set all of N/Z/V/C, while 
CCZNCmode and CCZNmode are for subsets.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch,AVR]: Fix PR50447 (2/n)
  2011-09-23  9:51       ` Paolo Bonzini
  2011-09-23 10:12         ` Georg-Johann Lay
@ 2011-09-27 19:27         ` Richard Henderson
  1 sibling, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2011-09-27 19:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gcc-patches

On 09/23/2011 02:10 AM, Paolo Bonzini wrote:
> On 09/23/2011 10:56 AM, Paolo Bonzini wrote:
>> Also, I am curious about one thing: while this is of course a very
>> pragmatic solution, you could also convert AVR to get rid of CC0, do
>> this at expansion time, and get split-wide-types to work as intended.
>>
>> compare-elim.c makes it relatively easy to remove CC0 nowadays.
> 
> For example, a two-byte add can be written as
> 
>    (set (reg:QI L1) (plus:QI (reg:QI L2) (reg:QI L3)))
>    (set (reg:QI H1) (plus:QI
>                     (plus:QI (req:QI H2) (reg:QI H3))
>                     (ltu:QI  (reg:QI L1) (reg:QI L3))))
> 
> After reload the second instruction can be split to
> 
>   (set (reg:CC_C CC) (compare:QI (reg:QI L1) (reg:QI L3)))
>   (set (reg:QI   H1) (plus:QI
>                      (plus:QI    (req:QI   H2) (reg:QI H3))
>                      (lt:QI      (reg:CC_C CC) (const_int 0))))
> 
> (i.e. cp+adc).  compare-elim will then be able to turn this into add+adc:
> 
>   [parallel
>     (set (reg:QI   L1) (plus:QI    (reg:QI L2) (reg:QI L3)))
>     (set (reg:CC_C CC) (compare:QI
>                        (plus:QI    (reg:QI L2) (reg:QI L3))
>                                    (reg:QI L3))))]
>   (set (reg:QI H1) (plus:QI
>                    (plus:QI (req:QI   H2) (reg:QI H3))
>                    (lt:QI   (reg:CC_C CC) (const_int 0))))

It's very hard to get this right for AVR.  Certainly split-wide-types
won't be able to do it.  Because (annoyingly) AVR does have a set of
HImode arithmetic insns that only operate on the "d" register class.
So we can't do all the mode splitting that one would like split-wide-
types to handle until after reload.

As for cc0, to me to looks like having AVR expose the flags register
is not too useful.  If we get to the point where AVR is the only
remaining cc0 target, we can convert it to use only cbranch.  After
all, it already implements its own compare-elim-alike pass in md_reorg.


r~

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-09-27 18:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-22 18:34 [Patch,AVR]: Fix PR50447 Georg-Johann Lay
2011-09-23  7:53 ` Denis Chertykov
2011-09-23  9:23   ` Georg-Johann Lay
2011-09-23  8:20 ` Paolo Bonzini
2011-09-23  8:39   ` Paolo Bonzini
2011-09-23  9:23   ` [Patch,AVR]: Fix PR50447 (2/n) Georg-Johann Lay
2011-09-23  9:37     ` Paolo Bonzini
2011-09-23  9:39       ` Paolo Bonzini
2011-09-23  9:51       ` Paolo Bonzini
2011-09-23 10:12         ` Georg-Johann Lay
2011-09-23 13:46           ` Paolo Bonzini
2011-09-27 19:27         ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).