public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* PATCH: PR target/44588: Very inefficient 8bit mod/div
@ 2010-06-21 20:38 H.J. Lu
  2010-06-22 17:08 ` H.J. Lu
  2010-06-22 18:28 ` Uros Bizjak
  0 siblings, 2 replies; 24+ messages in thread
From: H.J. Lu @ 2010-06-21 20:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Hi,

This patch adds 8bit divmov pattern for x86. X86 8bit divide
instructions return result in AX with

AL <- Quotient
AH <- Remainder

This patch models it and properly extends quotient. Tested
on Intel64 with -m64 and -m32.  There are no regressions.
OK for trunk?

BTW, there is only one divb used in subreg_get_info in
gcc compilers. The old code is

        movzbl  mode_size(%r13), %edi
        movzbl  mode_size(%r14), %esi
        xorl    %edx, %edx
        movl    %edi, %eax
        divw    %si
        testw   %dx, %dx
        jne     .L1194

The new one is

        movzbl  mode_size(%r13), %edi
        movl    %edi, %eax
        divb    mode_size(%r14)
        movzbl  %ah, %eax
        testb   %al, %al
        jne     .L1194

Thanks.


H.J.
---
gcc/

2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZV): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzv_3): Likewise.
	(*movqi_extzv_3_rex64): Likewise.
	(*movqi_extzv): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 777f8e7..2b5cdc0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZV
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2295,6 +2301,86 @@
 	(const_string "SI")
 	(const_string "QI")))])
 
+(define_insn "*movqi_extzv_3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_3_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+;; Use unspec so that it won't be removed unless the result is unused.
+(define_insn "*movqi_extzv"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "ext_register_operand" "Q,Q")]
+	  UNSPEC_MOVQI_EXTZV))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+    default:
+      return "mov{b}\t{%b1, %0|%0, %b1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_insn "movsi_insv_1"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
@@ -7556,17 +7642,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7603,6 +7678,63 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operand0,
+		     gen_rtx_<any_div_code> (HImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  */
+  operand1 = gen_rtx_<extract_code> (QImode, operand0,
+				     GEN_INT (8), GEN_INT (8));
+  emit_move_insn (operands[3], operand1);
+
+  /* Extract quotient from AL.  It can't be accessed as AX.  */
+  operand0 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operand0), 
+			     UNSPEC_MOVQI_EXTZV);
+  emit_move_insn (operands[0], operand0);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(any_div:HI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-21 20:38 PATCH: PR target/44588: Very inefficient 8bit mod/div H.J. Lu
@ 2010-06-22 17:08 ` H.J. Lu
  2010-06-22 18:28 ` Uros Bizjak
  1 sibling, 0 replies; 24+ messages in thread
From: H.J. Lu @ 2010-06-22 17:08 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2884 bytes --]

On Mon, Jun 21, 2010 at 12:33 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Hi,
>
> This patch adds 8bit divmov pattern for x86. X86 8bit divide
> instructions return result in AX with
>
> AL <- Quotient
> AH <- Remainder
>
> This patch models it and properly extends quotient. Tested
> on Intel64 with -m64 and -m32.  There are no regressions.
> OK for trunk?
>
> BTW, there is only one divb used in subreg_get_info in
> gcc compilers. The old code is
>
>        movzbl  mode_size(%r13), %edi
>        movzbl  mode_size(%r14), %esi
>        xorl    %edx, %edx
>        movl    %edi, %eax
>        divw    %si
>        testw   %dx, %dx
>        jne     .L1194
>
> The new one is
>
>        movzbl  mode_size(%r13), %edi
>        movl    %edi, %eax
>        divb    mode_size(%r14)
>        movzbl  %ah, %eax
>        testb   %al, %al
>        jne     .L1194
>
> Thanks.
>
>
> H.J.
> ---
> gcc/
>
> 2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/44588
>        * config/i386/i386.md (UNSPEC_MOVQI_EXTZV): New.
>        (any_div_code): Likewise.
>        (extend_code): Likewise.
>        (extract_code): Likewise.
>        (*movqi_extzv_3): Likewise.
>        (*movqi_extzv_3_rex64): Likewise.
>        (*movqi_extzv): Likewise.
>        (<u>divmodqi4): Likewise.
>        (*<u>divqi3): Likewise.
>        (<u>divqi3): Remvoved.
>
> gcc/testsuite/
>
> 2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/44588
>        * gcc.target/i386/umod-1.c: New.
>        * gcc.target/i386/umod-2.c: Likewise.
>        * gcc.target/i386/umod-3.c: Likewise.
>

Here is the updated patch.  It uses UNSPEC to extract
remainder in AH from AL. If we only need quotient, it
will be in AL.  Now we generate:

[hjl@gnu-6 divb]$ cat umod-2.c
extern unsigned char z;

unsigned char
foo (unsigned char x, unsigned char y)
{
  z = x/y;
   return x % y;
}
[hjl@gnu-6 divb]$ make umod-2.s
/export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -Os -S umod-2.c
[hjl@gnu-6 divb]$ cat umod-2.s
	.file	"umod-2.c"
	.text
.globl foo
	.type	foo, @function
foo:
.LFB0:
	.cfi_startproc
	movzbl	%dil, %eax
	divb	%sil
	movb	%al, z(%rip)
	movzbl	%ah, %eax
	ret

OK for trunk?

Thanks.


-- 
H.J.
---
gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

[-- Attachment #2: gcc-pr44588-6.patch --]
[-- Type: text/plain, Size: 6844 bytes --]

gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 777f8e7..2960324 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZH
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2295,6 +2301,59 @@
 	(const_string "SI")
 	(const_string "QI")))])
 
+;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
+;; that we can extract it from AL.
+(define_insn "*movqi_extzh"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzh_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_insn "movsi_insv_1"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
@@ -7556,17 +7615,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7603,6 +7651,58 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand1;
+  
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operands[0],
+		     gen_rtx_<any_div_code> (QImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  Use UNSPEC_MOVQI_EXTZH to extract it
+     from AL.  */
+  operand1 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operands[0]), 
+			     UNSPEC_MOVQI_EXTZH);
+  emit_move_insn (operands[3], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(any_div:QI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-21 20:38 PATCH: PR target/44588: Very inefficient 8bit mod/div H.J. Lu
  2010-06-22 17:08 ` H.J. Lu
@ 2010-06-22 18:28 ` Uros Bizjak
  2010-06-22 18:36   ` H.J. Lu
  1 sibling, 1 reply; 24+ messages in thread
From: Uros Bizjak @ 2010-06-22 18:28 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Mon, 2010-06-21 at 12:33 -0700, H.J. Lu wrote:
> Hi,
> 
> This patch adds 8bit divmov pattern for x86. X86 8bit divide
> instructions return result in AX with
> 
> AL <- Quotient
> AH <- Remainder
> 
> This patch models it and properly extends quotient. Tested
> on Intel64 with -m64 and -m32.  There are no regressions.
> OK for trunk?
> 
> BTW, there is only one divb used in subreg_get_info in
> gcc compilers. The old code is
> 
>         movzbl  mode_size(%r13), %edi
>         movzbl  mode_size(%r14), %esi
>         xorl    %edx, %edx
>         movl    %edi, %eax
>         divw    %si
>         testw   %dx, %dx
>         jne     .L1194
> 
> The new one is
> 
>         movzbl  mode_size(%r13), %edi
>         movl    %edi, %eax
>         divb    mode_size(%r14)
>         movzbl  %ah, %eax
>         testb   %al, %al
>         jne     .L1194
> 

Hm, something is not combined correctly, I'd say "testb %ah, %ah" is
optimal in the second case.

Uros.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-22 18:28 ` Uros Bizjak
@ 2010-06-22 18:36   ` H.J. Lu
  2010-06-22 19:12     ` Uros Bizjak
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-22 18:36 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

On Tue, Jun 22, 2010 at 11:05 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Mon, 2010-06-21 at 12:33 -0700, H.J. Lu wrote:
>> Hi,
>>
>> This patch adds 8bit divmov pattern for x86. X86 8bit divide
>> instructions return result in AX with
>>
>> AL <- Quotient
>> AH <- Remainder
>>
>> This patch models it and properly extends quotient. Tested
>> on Intel64 with -m64 and -m32.  There are no regressions.
>> OK for trunk?
>>
>> BTW, there is only one divb used in subreg_get_info in
>> gcc compilers. The old code is
>>
>>         movzbl  mode_size(%r13), %edi
>>         movzbl  mode_size(%r14), %esi
>>         xorl    %edx, %edx
>>         movl    %edi, %eax
>>         divw    %si
>>         testw   %dx, %dx
>>         jne     .L1194
>>
>> The new one is
>>
>>         movzbl  mode_size(%r13), %edi
>>         movl    %edi, %eax
>>         divb    mode_size(%r14)
>>         movzbl  %ah, %eax
>>         testb   %al, %al
>>         jne     .L1194
>>
>
> Hm, something is not combined correctly, I'd say "testb %ah, %ah" is
> optimal in the second case.
>

Here is another update adjusted for mov pattern changes in i386.md.

8bit result is stored in

AL <- Quotient
AH <- Remainder

If we use AX for quotient in 8bit divmod pattern, we have to make
sure that AX is valid for quotient.  We have to extend AL with UNSPEC
since AH isn't the part of quotient,.  Instead, I use AL for quotient and
use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
access can be optimized very nicely. If remainder is used, we may have
an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
comprise.


-- 
H.J.
--
gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.
-

[-- Attachment #2: gcc-pr44588-7.patch --]
[-- Type: text/plain, Size: 6963 bytes --]

gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ab90d73..f268e90 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZH
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2052,6 +2058,59 @@
        (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
 
+;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
+;; that we can extract it from AL.
+(define_insn "*movqi_extzh"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzh_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 ;; Convert impossible stores of immediate to existing instructions.
 ;; First try to get scratch register and go through it.  In case this
 ;; fails, move by 32bit parts.
@@ -7305,17 +7364,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7352,6 +7400,58 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand1;
+  
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operands[0],
+		     gen_rtx_<any_div_code> (QImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  Use UNSPEC_MOVQI_EXTZH to extract it
+     from AL.  */
+  operand1 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operands[0]), 
+			     UNSPEC_MOVQI_EXTZH);
+  emit_move_insn (operands[3], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(any_div:QI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-22 18:36   ` H.J. Lu
@ 2010-06-22 19:12     ` Uros Bizjak
  2010-06-22 20:08       ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Uros Bizjak @ 2010-06-22 19:12 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Tue, 2010-06-22 at 11:27 -0700, H.J. Lu wrote:

> >> This patch adds 8bit divmov pattern for x86. X86 8bit divide
> >> instructions return result in AX with
> >>
> >> AL <- Quotient
> >> AH <- Remainder
> >>
> >> This patch models it and properly extends quotient. Tested
> >> on Intel64 with -m64 and -m32.  There are no regressions.
> >> OK for trunk?
> >>
> >> BTW, there is only one divb used in subreg_get_info in
> >> gcc compilers. The old code is
> >>
> >>         movzbl  mode_size(%r13), %edi
> >>         movzbl  mode_size(%r14), %esi
> >>         xorl    %edx, %edx
> >>         movl    %edi, %eax
> >>         divw    %si
> >>         testw   %dx, %dx
> >>         jne     .L1194
> >>
> >> The new one is
> >>
> >>         movzbl  mode_size(%r13), %edi
> >>         movl    %edi, %eax
> >>         divb    mode_size(%r14)
> >>         movzbl  %ah, %eax
> >>         testb   %al, %al
> >>         jne     .L1194
> >>
> >
> > Hm, something is not combined correctly, I'd say "testb %ah, %ah" is
> > optimal in the second case.
> >
> 
> Here is another update adjusted for mov pattern changes in i386.md.
> 
> 8bit result is stored in
> 
> AL <- Quotient
> AH <- Remainder
> 
> If we use AX for quotient in 8bit divmod pattern, we have to make
> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
> since AH isn't the part of quotient,.  Instead, I use AL for quotient and
> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
> access can be optimized very nicely. If remainder is used, we may have
> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
> comprise.

Why we need to reinvent movqi_extzv_2 ?

I guess that <u>divqi3 has to be implemented as multiple-set divmod
pattern using strict_low_part subregs to exactly describe in which
subreg quotient and remainder go.

Uros.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-22 19:12     ` Uros Bizjak
@ 2010-06-22 20:08       ` H.J. Lu
  2010-06-23  6:26         ` Uros Bizjak
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-22 20:08 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Tue, Jun 22, 2010 at 11:44 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> If we use AX for quotient in 8bit divmod pattern, we have to make
>> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
>> since AH isn't the part of quotient,.  Instead, I use AL for quotient and
>> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
>> access can be optimized very nicely. If remainder is used, we may have
>> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
>> comprise.
>
> Why we need to reinvent movqi_extzv_2 ?

Because UNSPEC_MOVQI_EXTZH extracts AH from AL, not from
AX, EAX, RAX.

> I guess that <u>divqi3 has to be implemented as multiple-set divmod
> pattern using strict_low_part subregs to exactly describe in which
> subreg quotient and remainder go.
>

I tried it and couldn't get it to work without adding a new
constraint for the upper 8bit registers.

-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-22 20:08       ` H.J. Lu
@ 2010-06-23  6:26         ` Uros Bizjak
  2010-06-23 16:54           ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Uros Bizjak @ 2010-06-23  6:26 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Tue, Jun 22, 2010 at 8:57 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> If we use AX for quotient in 8bit divmod pattern, we have to make
>>> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
>>> since AH isn't the part of quotient,.  Instead, I use AL for quotient and
>>> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
>>> access can be optimized very nicely. If remainder is used, we may have
>>> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
>>> comprise.
>>
>> Why we need to reinvent movqi_extzv_2 ?
>
> Because UNSPEC_MOVQI_EXTZH extracts AH from AL, not from
> AX, EAX, RAX.

This is wrong, you can't extract AH out from AL...
>
>> I guess that <u>divqi3 has to be implemented as multiple-set divmod
>> pattern using strict_low_part subregs to exactly describe in which
>> subreg quotient and remainder go.
>>
>
> I tried it and couldn't get it to work without adding a new
> constraint for the upper 8bit registers.

"Q" with %h modifier ?

Uros.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23  6:26         ` Uros Bizjak
@ 2010-06-23 16:54           ` H.J. Lu
  2010-06-23 18:37             ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 16:54 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Tue, Jun 22, 2010 at 11:11 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Tue, Jun 22, 2010 at 8:57 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>> If we use AX for quotient in 8bit divmod pattern, we have to make
>>>> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
>>>> since AH isn't the part of quotient,.  Instead, I use AL for quotient and
>>>> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
>>>> access can be optimized very nicely. If remainder is used, we may have
>>>> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
>>>> comprise.
>>>
>>> Why we need to reinvent movqi_extzv_2 ?
>>
>> Because UNSPEC_MOVQI_EXTZH extracts AH from AL, not from
>> AX, EAX, RAX.
>
> This is wrong, you can't extract AH out from AL...

UNSPEC_MOVQI_EXTZH  is only used on AL from
8bit div where AH has remainder. Should I give it a different
name, like UNSPEC_MOVQI_EXTZH_FROM_8BITDIV?

>>
>>> I guess that <u>divqi3 has to be implemented as multiple-set divmod
>>> pattern using strict_low_part subregs to exactly describe in which
>>> subreg quotient and remainder go.
>>>
>>
>> I tried it and couldn't get it to work without adding a new
>> constraint for the upper 8bit registers.
>
> "Q" with %h modifier ?
>

Although there are AX, AL, AH, ......, the rest of compiler only
knows one hard register, AX_REG, which is register 0. We can't
really access upper 8bit register,  like AH,  as a real register. We
can only access them via sign_extract and zero_extract.  As far
as the rest of gcc is concerned, there are no upper 8bit registers.
I don't think we can describe where 8bit div remainder is to the
rest of compiler.


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 16:54           ` H.J. Lu
@ 2010-06-23 18:37             ` Paolo Bonzini
  2010-06-23 18:46               ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-23 18:37 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/23/2010 04:26 PM, H.J. Lu wrote:
> On Tue, Jun 22, 2010 at 11:11 PM, Uros Bizjak<ubizjak@gmail.com>  wrote:
>> On Tue, Jun 22, 2010 at 8:57 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>>
>>>>> If we use AX for quotient in 8bit divmod pattern, we have to make
>>>>> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
>>>>> since AH isn't the part of quotient,.  Instead, I use AL for quotient and
>>>>> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
>>>>> access can be optimized very nicely. If remainder is used, we may have
>>>>> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
>>>>> comprise.
>>>>
>>>> Why we need to reinvent movqi_extzv_2 ?
>>>
>>> Because UNSPEC_MOVQI_EXTZH extracts AH from AL, not from
>>> AX, EAX, RAX.
>>
>> This is wrong, you can't extract AH out from AL...
>
> UNSPEC_MOVQI_EXTZH  is only used on AL from
> 8bit div where AH has remainder. Should I give it a different
> name, like UNSPEC_MOVQI_EXTZH_FROM_8BITDIV?
>
>>>
>>>> I guess that<u>divqi3 has to be implemented as multiple-set divmod
>>>> pattern using strict_low_part subregs to exactly describe in which
>>>> subreg quotient and remainder go.
>>>>
>>>
>>> I tried it and couldn't get it to work without adding a new
>>> constraint for the upper 8bit registers.
>>
>> "Q" with %h modifier ?
>>
>
> Although there are AX, AL, AH, ......, the rest of compiler only
> knows one hard register, AX_REG, which is register 0. We can't
> really access upper 8bit register,  like AH,  as a real register. We
> can only access them via sign_extract and zero_extract.  As far
> as the rest of gcc is concerned, there are no upper 8bit registers.
> I don't think we can describe where 8bit div remainder is to the
> rest of compiler.

Well, sure you can:

(set (reg:HI r)
    (ior:HI (lshift:HI
             (zero_extend:HI (umod:QI ...) (const_int 8)))
            (zero_extend:HI (udiv:QI ...) (const_int 8))))

(set (reg:QI s) ;; remainder
    (zero_extract (reg:HI r) (const_int 8) (const_int 8)))

(set (reg:QI t) ;; quotient
    (subreg (reg:HI r) 0))

The question is only whether fwprop, combine and regalloc are smart 
enough to reason about it.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 18:37             ` Paolo Bonzini
@ 2010-06-23 18:46               ` H.J. Lu
  2010-06-23 19:00                 ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 18:46 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

On Wed, Jun 23, 2010 at 10:34 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/23/2010 04:26 PM, H.J. Lu wrote:
>>
>> On Tue, Jun 22, 2010 at 11:11 PM, Uros Bizjak<ubizjak@gmail.com>  wrote:
>>>
>>> On Tue, Jun 22, 2010 at 8:57 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>>>
>>>>>> If we use AX for quotient in 8bit divmod pattern, we have to make
>>>>>> sure that AX is valid for quotient.  We have to extend AL with UNSPEC
>>>>>> since AH isn't the part of quotient,.  Instead, I use AL for quotient
>>>>>> and
>>>>>> use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
>>>>>> access can be optimized very nicely. If remainder is used, we may have
>>>>>> an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
>>>>>> comprise.
>>>>>
>>>>> Why we need to reinvent movqi_extzv_2 ?
>>>>
>>>> Because UNSPEC_MOVQI_EXTZH extracts AH from AL, not from
>>>> AX, EAX, RAX.
>>>
>>> This is wrong, you can't extract AH out from AL...
>>
>> UNSPEC_MOVQI_EXTZH  is only used on AL from
>> 8bit div where AH has remainder. Should I give it a different
>> name, like UNSPEC_MOVQI_EXTZH_FROM_8BITDIV?
>>
>>>>
>>>>> I guess that<u>divqi3 has to be implemented as multiple-set divmod
>>>>> pattern using strict_low_part subregs to exactly describe in which
>>>>> subreg quotient and remainder go.
>>>>>
>>>>
>>>> I tried it and couldn't get it to work without adding a new
>>>> constraint for the upper 8bit registers.
>>>
>>> "Q" with %h modifier ?
>>>
>>
>> Although there are AX, AL, AH, ......, the rest of compiler only
>> knows one hard register, AX_REG, which is register 0. We can't
>> really access upper 8bit register,  like AH,  as a real register. We
>> can only access them via sign_extract and zero_extract.  As far
>> as the rest of gcc is concerned, there are no upper 8bit registers.
>> I don't think we can describe where 8bit div remainder is to the
>> rest of compiler.
>
> Well, sure you can:
>
> (set (reg:HI r)
>   (ior:HI (lshift:HI
>            (zero_extend:HI (umod:QI ...) (const_int 8)))
>           (zero_extend:HI (udiv:QI ...) (const_int 8))))
>
> (set (reg:QI s) ;; remainder
>   (zero_extract (reg:HI r) (const_int 8) (const_int 8)))
>
> (set (reg:QI t) ;; quotient
>   (subreg (reg:HI r) 0))
>
> The question is only whether fwprop, combine and regalloc are smart enough
> to reason about it.
>

That is what I meant by "We can only access them via sign_extract
and zero_extract."  The rest of gcc will see the same hard register
number for both lower and upper 8bit registers. They aren't prepared
to deal with it.


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 18:46               ` H.J. Lu
@ 2010-06-23 19:00                 ` Paolo Bonzini
  2010-06-23 19:08                   ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-23 19:00 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/23/2010 07:48 PM, H.J. Lu wrote:
>> Well, sure you can:
>>
>> (set (reg:HI r)
>>    (ior:HI (lshift:HI
>>             (zero_extend:HI (umod:QI ...) (const_int 8)))
>>            (zero_extend:HI (udiv:QI ...) (const_int 8))))
>>
>> (set (reg:QI s) ;; remainder
>>    (zero_extract (reg:HI r) (const_int 8) (const_int 8)))
>>
>> (set (reg:QI t) ;; quotient
>>    (subreg (reg:HI r) 0))
>>
>> The question is only whether fwprop, combine and regalloc are smart enough
>> to reason about it.
>>
>
> That is what I meant by "We can only access them via sign_extract
> and zero_extract."  The rest of gcc will see the same hard register
> number for both lower and upper 8bit registers. They aren't prepared
> to deal with it.

Do you mean that GCC will miscompile or pessimize a "mov %ah, %al" for 
the zero-extract in the second set above?  Otherwise, it seems like a 
false problem.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 19:00                 ` Paolo Bonzini
@ 2010-06-23 19:08                   ` H.J. Lu
  2010-06-23 19:53                     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 19:08 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

On Wed, Jun 23, 2010 at 11:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/23/2010 07:48 PM, H.J. Lu wrote:
>>>
>>> Well, sure you can:
>>>
>>> (set (reg:HI r)
>>>   (ior:HI (lshift:HI
>>>            (zero_extend:HI (umod:QI ...) (const_int 8)))
>>>           (zero_extend:HI (udiv:QI ...) (const_int 8))))
>>>
>>> (set (reg:QI s) ;; remainder
>>>   (zero_extract (reg:HI r) (const_int 8) (const_int 8)))
>>>
>>> (set (reg:QI t) ;; quotient
>>>   (subreg (reg:HI r) 0))
>>>
>>> The question is only whether fwprop, combine and regalloc are smart
>>> enough
>>> to reason about it.
>>>
>>
>> That is what I meant by "We can only access them via sign_extract
>> and zero_extract."  The rest of gcc will see the same hard register
>> number for both lower and upper 8bit registers. They aren't prepared
>> to deal with it.
>
> Do you mean that GCC will miscompile or pessimize a "mov %ah, %al" for the
> zero-extract in the second set above?  Otherwise, it seems like a false
> problem.

I tried

 [(set (match_operand:HI 0 "register_operand" "=a")
        (div:HI
          (match_operand:HI 1 "register_operand" "0")
          (match_operand:QI 2 "nonimmediate_operand" "qm")))
   (clobber (reg:CC FLAGS_REG))]

for 8bit div and it doesn't work since AH has remainder.
How can I write 8bit div pattern which works with RA and
other parts of gcc?


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 19:08                   ` H.J. Lu
@ 2010-06-23 19:53                     ` Paolo Bonzini
  2010-06-23 20:22                       ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-23 19:53 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/23/2010 08:24 PM, H.J. Lu wrote:
>   [(set (match_operand:HI 0 "register_operand" "=a")
>          (div:HI
>            (match_operand:HI 1 "register_operand" "0")
>            (match_operand:QI 2 "nonimmediate_operand" "qm")))
>     (clobber (reg:CC FLAGS_REG))]

Maybe this:

[(set (match_operand:QI 0 "register_operand" "=a")
    (subreg:QI
     (div:HI
      (match_operand:HI 1 "register_operand" "0")
      (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
  (clobber (reg:CC FLAGS_REG))]

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 19:53                     ` Paolo Bonzini
@ 2010-06-23 20:22                       ` H.J. Lu
  2010-06-23 20:22                         ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 20:22 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>
>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>         (div:HI
>>           (match_operand:HI 1 "register_operand" "0")
>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>    (clobber (reg:CC FLAGS_REG))]
>
> Maybe this:
>
> [(set (match_operand:QI 0 "register_operand" "=a")
>   (subreg:QI
>    (div:HI
>     (match_operand:HI 1 "register_operand" "0")
>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>  (clobber (reg:CC FLAGS_REG))]
>

It doesn't make a big difference since only lower 8bit
is set by it.


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 20:22                       ` H.J. Lu
@ 2010-06-23 20:22                         ` Paolo Bonzini
  2010-06-23 20:27                           ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-23 20:22 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/23/2010 09:35 PM, H.J. Lu wrote:
> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>
>>>   [(set (match_operand:HI 0 "register_operand" "=a")
>>>          (div:HI
>>>            (match_operand:HI 1 "register_operand" "0")
>>>            (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>
>> Maybe this:
>>
>> [(set (match_operand:QI 0 "register_operand" "=a")
>>    (subreg:QI
>>     (div:HI
>>      (match_operand:HI 1 "register_operand" "0")
>>      (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>   (clobber (reg:CC FLAGS_REG))]
>>
>
> It doesn't make a big difference since only lower 8bit
> is set by it.

For full divmod you have to shift mod and ior of course.  I understood 
that you were talking of *<u>divqi3.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 20:22                         ` Paolo Bonzini
@ 2010-06-23 20:27                           ` H.J. Lu
  2010-06-23 21:50                             ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 20:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>
>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>
>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>
>>>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>>>         (div:HI
>>>>           (match_operand:HI 1 "register_operand" "0")
>>>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>    (clobber (reg:CC FLAGS_REG))]
>>>
>>> Maybe this:
>>>
>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>   (subreg:QI
>>>    (div:HI
>>>     (match_operand:HI 1 "register_operand" "0")
>>>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>  (clobber (reg:CC FLAGS_REG))]
>>>
>>
>> It doesn't make a big difference since only lower 8bit
>> is set by it.
>
> For full divmod you have to shift mod and ior of course.  I understood that
> you were talking of *<u>divqi3.
>

I can't do shift/ior on QI output from *<u>divqi3. I have

;; Divide AX by r/m8, with result stored in
;; AL <- Quotient
;; AH <- Remainder
(define_insn "*<u>divqi3"
  [(set (match_operand:QI 0 "register_operand" "=a")
        (any_div:QI
          (match_operand:HI 1 "register_operand" "0")
          (match_operand:QI 2 "nonimmediate_operand" "qm")))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_QIMODE_MATH"
  "<sgnprefix>div{b}\t%2"
  [(set_attr "type" "idiv")
   (set_attr "mode" "QI")])

and use

;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
;; that we can extract it from AL.
(define_insn "*movqi_extzh"
  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
        (unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
                   UNSPEC_MOVQI_EXTZH))]

to extract it from AL



-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 20:27                           ` H.J. Lu
@ 2010-06-23 21:50                             ` H.J. Lu
  2010-06-23 22:44                               ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 21:50 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2652 bytes --]

On Wed, Jun 23, 2010 at 12:50 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>>
>>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>>
>>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>>
>>>>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>>>>         (div:HI
>>>>>           (match_operand:HI 1 "register_operand" "0")
>>>>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>>    (clobber (reg:CC FLAGS_REG))]
>>>>
>>>> Maybe this:
>>>>
>>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>>   (subreg:QI
>>>>    (div:HI
>>>>     (match_operand:HI 1 "register_operand" "0")
>>>>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>>  (clobber (reg:CC FLAGS_REG))]
>>>>
>>>
>>> It doesn't make a big difference since only lower 8bit
>>> is set by it.
>>
>> For full divmod you have to shift mod and ior of course.  I understood that
>> you were talking of *<u>divqi3.
>>
>
> I can't do shift/ior on QI output from *<u>divqi3. I have
>
> ;; Divide AX by r/m8, with result stored in
> ;; AL <- Quotient
> ;; AH <- Remainder
> (define_insn "*<u>divqi3"
>  [(set (match_operand:QI 0 "register_operand" "=a")
>        (any_div:QI
>          (match_operand:HI 1 "register_operand" "0")
>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
>   (clobber (reg:CC FLAGS_REG))]
>  "TARGET_QIMODE_MATH"
>  "<sgnprefix>div{b}\t%2"
>  [(set_attr "type" "idiv")
>   (set_attr "mode" "QI")])
>
> and use
>
> ;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
> ;; that we can extract it from AL.
> (define_insn "*movqi_extzh"
>  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
>        (unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
>                   UNSPEC_MOVQI_EXTZH))]
>
> to extract it from AL
>

Here is a different approach.  It uses UNSPEC for 8bit divmod.
The generated code is the same.


-- 
H.J.
---
gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_DIVQI): New.
	(UNSPEC_UDIVQI): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzv_3): Likewise.
	(*movqi_extzv_3_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*divqi3): Likewise.
	(*udivqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

[-- Attachment #2: gcc-pr44588-8.patch --]
[-- Type: text/plain, Size: 7326 bytes --]

gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_DIVQI): New.
	(UNSPEC_UDIVQI): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzv_3): Likewise.
	(*movqi_extzv_3_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*divqi3): Likewise.
	(*udivqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ab90d73..1a31109 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,8 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_DIVQI
+  UNSPEC_UDIVQI
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +762,10 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2563,6 +2569,59 @@
 	(const_string "SI")
 	(const_string "QI")))])
 
+(define_insn "*movqi_extzv_3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_3_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_expand "mov<mode>_insv_1"
   [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "")
 			    (const_int 8)
@@ -7305,17 +7364,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7352,6 +7400,76 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_UNSPEC (HImode, gen_rtvec (2, operand1, operands[2]),
+			(<extend_code> == SIGN_EXTEND
+			 ? UNSPEC_DIVQI : UNSPEC_UDIVQI));
+  div = gen_rtx_SET (VOIDmode, operand0, div);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  */
+  operand1 = gen_rtx_<extract_code> (QImode, operand0,
+				     GEN_INT (8), GEN_INT (8));
+  emit_move_insn (operands[3], operand1);
+
+  /* Extract quotient from AL.  */
+  operand1 = simplify_gen_subreg (QImode, operand0, HImode, 0);
+  emit_move_insn (operands[0], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*divqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_DIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_insn "*udivqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_UDIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 21:50                             ` H.J. Lu
@ 2010-06-23 22:44                               ` H.J. Lu
  2010-06-24  0:05                                 ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-23 22:44 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2803 bytes --]

On Wed, Jun 23, 2010 at 2:22 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jun 23, 2010 at 12:50 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>>>
>>>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>>>
>>>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>>>
>>>>>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>>>>>         (div:HI
>>>>>>           (match_operand:HI 1 "register_operand" "0")
>>>>>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>>>    (clobber (reg:CC FLAGS_REG))]
>>>>>
>>>>> Maybe this:
>>>>>
>>>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>>>   (subreg:QI
>>>>>    (div:HI
>>>>>     (match_operand:HI 1 "register_operand" "0")
>>>>>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>>>  (clobber (reg:CC FLAGS_REG))]
>>>>>
>>>>
>>>> It doesn't make a big difference since only lower 8bit
>>>> is set by it.
>>>
>>> For full divmod you have to shift mod and ior of course.  I understood that
>>> you were talking of *<u>divqi3.
>>>
>>
>> I can't do shift/ior on QI output from *<u>divqi3. I have
>>
>> ;; Divide AX by r/m8, with result stored in
>> ;; AL <- Quotient
>> ;; AH <- Remainder
>> (define_insn "*<u>divqi3"
>>  [(set (match_operand:QI 0 "register_operand" "=a")
>>        (any_div:QI
>>          (match_operand:HI 1 "register_operand" "0")
>>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>   (clobber (reg:CC FLAGS_REG))]
>>  "TARGET_QIMODE_MATH"
>>  "<sgnprefix>div{b}\t%2"
>>  [(set_attr "type" "idiv")
>>   (set_attr "mode" "QI")])
>>
>> and use
>>
>> ;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
>> ;; that we can extract it from AL.
>> (define_insn "*movqi_extzh"
>>  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
>>        (unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
>>                   UNSPEC_MOVQI_EXTZH))]
>>
>> to extract it from AL
>>
>
> Here is a different approach.  It uses UNSPEC for 8bit divmod.
> The generated code is the same.
>
>

An updated patch.  No need for *movqi_extzv_3 and
*movqi_extzv_3_rex64.


-- 
H.J.
---
gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_DIVQI): New.
	(UNSPEC_UDIVQI): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(<u>divmodqi4): Likewise.
	(*divqi3): Likewise.
	(*udivqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

[-- Attachment #2: gcc-pr44588-9.patch --]
[-- Type: text/plain, Size: 5783 bytes --]

gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_DIVQI): New.
	(UNSPEC_UDIVQI): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(<u>divmodqi4): Likewise.
	(*divqi3): Likewise.
	(*udivqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ab90d73..68c9e47 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,8 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_DIVQI
+  UNSPEC_UDIVQI
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +762,10 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -7305,17 +7311,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7352,6 +7347,83 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_UNSPEC (HImode, gen_rtvec (2, operand1, operands[2]),
+			(<extend_code> == SIGN_EXTEND
+			 ? UNSPEC_DIVQI : UNSPEC_UDIVQI));
+  div = gen_rtx_SET (VOIDmode, operand0, div);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  */
+  if (<extend_code> == SIGN_EXTEND)
+    operand1 = gen_rtx_<extract_code> (QImode, operand0,
+				       GEN_INT (8), GEN_INT (8));
+  else
+    {
+      operand1 = gen_rtx_<extract_code> (SImode, operand0,
+					 GEN_INT (8), GEN_INT (8));
+      operand1 = simplify_gen_subreg (QImode, operand1, SImode, 0);
+    }
+  emit_move_insn (operands[3], operand1);
+
+  /* Extract quotient from AL.  */
+  operand1 = simplify_gen_subreg (QImode, operand0, HImode, 0);
+  emit_move_insn (operands[0], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*divqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_DIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_insn "*udivqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_UDIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-23 22:44                               ` H.J. Lu
@ 2010-06-24  0:05                                 ` Paolo Bonzini
  2010-06-24  0:54                                   ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-24  0:05 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/24/2010 12:01 AM, H.J. Lu wrote:
> On Wed, Jun 23, 2010 at 2:22 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>> On Wed, Jun 23, 2010 at 12:50 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>>> On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>>>>
>>>>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>    wrote:
>>>>>>
>>>>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>>>>
>>>>>>>   [(set (match_operand:HI 0 "register_operand" "=a")
>>>>>>>          (div:HI
>>>>>>>            (match_operand:HI 1 "register_operand" "0")
>>>>>>>            (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>>>>     (clobber (reg:CC FLAGS_REG))]
>>>>>>
>>>>>> Maybe this:
>>>>>>
>>>>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>>>>    (subreg:QI
>>>>>>     (div:HI
>>>>>>      (match_operand:HI 1 "register_operand" "0")
>>>>>>      (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>>>>   (clobber (reg:CC FLAGS_REG))]
>>>>>>
>>>>>
>>>>> It doesn't make a big difference since only lower 8bit
>>>>> is set by it.
>>>>
>>>> For full divmod you have to shift mod and ior of course.  I understood that
>>>> you were talking of *<u>divqi3.
>>>>
>>>
>>> I can't do shift/ior on QI output from *<u>divqi3. I have
>>>
>> Here is a different approach.  It uses UNSPEC for 8bit divmod.
>> The generated code is the same.
>
> An updated patch.  No need for *movqi_extzv_3 and
> *movqi_extzv_3_rex64.

I don't understand exactly what the problem was; clearly it couldn't 
work as long as this pattern was cheating blatantly:

(define_insn "*<u>divqi3"
   [(set (match_operand:QI 0 "register_operand" "=a")
         (any_div:QI
           (match_operand:HI 1 "register_operand" "0")
           (match_operand:QI 2 "nonimmediate_operand" "qm")))

Anyway this patch is IMO much nicer than the first ones, so if Uros is 
okay I don't think it's useful to pursue a more accurate representation. 
  Just make sure that REG_EQUAL notes are added to the two extractions.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-24  0:05                                 ` Paolo Bonzini
@ 2010-06-24  0:54                                   ` H.J. Lu
  2010-06-24  3:47                                     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2010-06-24  0:54 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

On Wed, Jun 23, 2010 at 3:39 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/24/2010 12:01 AM, H.J. Lu wrote:
>>
>> On Wed, Jun 23, 2010 at 2:22 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>>>
>>> On Wed, Jun 23, 2010 at 12:50 PM, H.J. Lu<hjl.tools@gmail.com>  wrote:
>>>>
>>>> On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>>>
>>>>> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>>>>>
>>>>>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>
>>>>>>  wrote:
>>>>>>>
>>>>>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>>>>>
>>>>>>>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>>>>>>>         (div:HI
>>>>>>>>           (match_operand:HI 1 "register_operand" "0")
>>>>>>>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>>>>>    (clobber (reg:CC FLAGS_REG))]
>>>>>>>
>>>>>>> Maybe this:
>>>>>>>
>>>>>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>>>>>   (subreg:QI
>>>>>>>    (div:HI
>>>>>>>     (match_operand:HI 1 "register_operand" "0")
>>>>>>>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>>>>>  (clobber (reg:CC FLAGS_REG))]
>>>>>>>
>>>>>>
>>>>>> It doesn't make a big difference since only lower 8bit
>>>>>> is set by it.
>>>>>
>>>>> For full divmod you have to shift mod and ior of course.  I understood
>>>>> that
>>>>> you were talking of *<u>divqi3.
>>>>>
>>>>
>>>> I can't do shift/ior on QI output from *<u>divqi3. I have
>>>>
>>> Here is a different approach.  It uses UNSPEC for 8bit divmod.
>>> The generated code is the same.
>>
>> An updated patch.  No need for *movqi_extzv_3 and
>> *movqi_extzv_3_rex64.
>
> I don't understand exactly what the problem was; clearly it couldn't work as
> long as this pattern was cheating blatantly:

Upper 8bit registers aren't real registers. You can't
do RA with them.

> (define_insn "*<u>divqi3"
>  [(set (match_operand:QI 0 "register_operand" "=a")
>        (any_div:QI
>          (match_operand:HI 1 "register_operand" "0")
>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
>
> Anyway this patch is IMO much nicer than the first ones, so if Uros is okay
> I don't think it's useful to pursue a more accurate representation.  Just
> make sure that REG_EQUAL notes are added to the two extractions.
>

What are REG_EQUAL notes used for? As far as the
rest of gcc is concerned, there are no upper 8bit
registers. But you can access bits 8-15 of HI, SI,
DI registers via XXX_extract.

-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-24  0:54                                   ` H.J. Lu
@ 2010-06-24  3:47                                     ` Paolo Bonzini
  2010-06-24 17:38                                       ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-24  3:47 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/24/2010 01:13 AM, H.J. Lu wrote:
>> I don't understand exactly what the problem was; clearly it couldn't work as
>> long as this pattern was cheating blatantly:
>
> Upper 8bit registers aren't real registers. You can't
> do RA with them.

Please read what other people write.  I'm saying that you should have 
used a HI destination just like you did with your new UNSPEC, which is 
acceptable.

The pattern I quoted below used a QI destination; then magically you 
attempted to extract bit 8..15 of it with an unspec, or something like 
that.  For anything like that the optimizers are going to bite back 
sooner or later.  And you'd deserve it.

>> (define_insn "*<u>divqi3"
>>   [(set (match_operand:QI 0 "register_operand" "=a")
>>         (any_div:QI
>>           (match_operand:HI 1 "register_operand" "0")
>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>
>> Anyway this patch is IMO much nicer than the first ones, so if Uros is okay
>> I don't think it's useful to pursue a more accurate representation.  Just
>> make sure that REG_EQUAL notes are added to the two extractions.
>>
>
> What are REG_EQUAL notes used for?
> As far as the rest of gcc is concerned, there are no upper 8bit
> registers. But you can access bits 8-15 of HI, SI,
> DI registers via XXX_extract.

Understood.  Using an unspec is fine for me, even though it's not an 
approval.  But that's completely orthogonal to putting a REG_EQUAL note 
_on the two regs that you extract out of AX_.  The notes' value should 
be one of

    (subreg:QI (udiv:HI (...) (zero_extend (...))))
    (subreg:QI (div:HI (...) (sign_extend (...))))
    (subreg:QI (umod:HI (...) (zero_extend (...))))
    (subreg:QI (mod:HI (...) (sign_extend (...))))

But I think that with over ten years of GCC experience you do not need 
anyone to tell you this.  In fact, regarding "what are REG_EQUAL notes 
used for?" my first thought was RTFM, but anyway: they are used by CSE, 
fwprop, GCSE and combine to simplify and eliminate redundant expressions.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-24  3:47                                     ` Paolo Bonzini
@ 2010-06-24 17:38                                       ` H.J. Lu
  2010-06-24 17:58                                         ` Paolo Bonzini
  2010-06-24 19:01                                         ` Uros Bizjak
  0 siblings, 2 replies; 24+ messages in thread
From: H.J. Lu @ 2010-06-24 17:38 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Uros Bizjak, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2812 bytes --]

On Wed, Jun 23, 2010 at 4:50 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 06/24/2010 01:13 AM, H.J. Lu wrote:
>>>
>>> I don't understand exactly what the problem was; clearly it couldn't work
>>> as
>>> long as this pattern was cheating blatantly:
>>
>> Upper 8bit registers aren't real registers. You can't
>> do RA with them.
>
> Please read what other people write.  I'm saying that you should have used a
> HI destination just like you did with your new UNSPEC, which is acceptable.
>
> The pattern I quoted below used a QI destination; then magically you
> attempted to extract bit 8..15 of it with an unspec, or something like that.
>  For anything like that the optimizers are going to bite back sooner or
> later.  And you'd deserve it.
>
>>> (define_insn "*<u>divqi3"
>>>  [(set (match_operand:QI 0 "register_operand" "=a")
>>>        (any_div:QI
>>>          (match_operand:HI 1 "register_operand" "0")
>>>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>
>>> Anyway this patch is IMO much nicer than the first ones, so if Uros is
>>> okay
>>> I don't think it's useful to pursue a more accurate representation.  Just
>>> make sure that REG_EQUAL notes are added to the two extractions.
>>>
>>
>> What are REG_EQUAL notes used for?
>> As far as the rest of gcc is concerned, there are no upper 8bit
>> registers. But you can access bits 8-15 of HI, SI,
>> DI registers via XXX_extract.
>
> Understood.  Using an unspec is fine for me, even though it's not an
> approval.  But that's completely orthogonal to putting a REG_EQUAL note _on
> the two regs that you extract out of AX_.  The notes' value should be one of
>
>   (subreg:QI (udiv:HI (...) (zero_extend (...))))
>   (subreg:QI (div:HI (...) (sign_extend (...))))
>   (subreg:QI (umod:HI (...) (zero_extend (...))))
>   (subreg:QI (mod:HI (...) (sign_extend (...))))
>
> But I think that with over ten years of GCC experience you do not need
> anyone to tell you this.  In fact, regarding "what are REG_EQUAL notes used
> for?" my first thought was RTFM, but anyway: they are used by CSE, fwprop,
> GCSE and combine to simplify and eliminate redundant expressions.

I am learning new things all the time. Thanks for the pointer.

Here is a new patch without UNSPEC.  OK for trunk?

Thanks.

-- 
H.J.
---
gcc/

2010-06-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (extract_code): New.
	(<u>divmodqi4): Likewise.
	(divmodhiqi3): Likewise.
	(udivmodhiqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/mod-1.c: New.
	* gcc.target/i386/umod-1.c: Likewise.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

[-- Attachment #2: gcc-pr44588-10.patch --]
[-- Type: text/plain, Size: 6496 bytes --]

gcc/

2010-06-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (extract_code): New.
	(<u>divmodqi4): Likewise.
	(divmodhiqi3): Likewise.
	(udivmodhiqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-24  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/mod-1.c: New.
	* gcc.target/i386/umod-1.c: Likewise.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1f7369b..6514ee4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -760,6 +760,8 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -7269,17 +7271,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7316,6 +7307,92 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  if (<extract_code> == SIGN_EXTRACT)
+    {
+      emit_insn (gen_extendqihi2 (operand1, operands[1]));
+      emit_insn (gen_divmodhiqi3 (operand0, operand1, operands[2]));
+
+      div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+      mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+
+      operand1 = gen_rtx_<extract_code> (QImode, operand0,
+					 GEN_INT (8), GEN_INT (8));
+    }
+  else
+    {
+      emit_insn (gen_zero_extendqihi2 (operand1, operands[1]));
+      emit_insn (gen_udivmodhiqi3 (operand0, operand1, operands[2]));
+
+      div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
+      mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
+
+      operand1 = gen_rtx_<extract_code> (SImode, operand0,
+					 GEN_INT (8), GEN_INT (8));
+      operand1 = simplify_gen_subreg (QImode, operand1, SImode, 0);
+    }
+
+  /* Extract remainder from AH.  */
+  insn = emit_move_insn (operands[3], operand1);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, operand0));
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "divmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (mod:QI (match_operand:HI 1 "register_operand" "0")
+		      (match_operand:QI 2 "nonimmediate_operand" "qm")))
+	    (const_int 8))
+	  (zero_extend:HI (div:QI (match_dup 1) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_insn "udivmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (umod:QI (match_operand:HI 1 "register_operand" "0")
+		       (match_operand:QI 2 "nonimmediate_operand" "qm")))
+	    (const_int 8))
+	  (zero_extend:HI (udiv:QI (match_dup 1) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/mod-1.c	2010-06-24 09:41:47.401923084 -0700
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mtune=atom" } */
+
+typedef struct {
+  int a;
+} VCR;
+
+typedef struct {
+  VCR vcr[8];
+} VCRC;
+
+typedef struct {
+  char vcr;
+} OWN;
+
+OWN Own[16];
+
+void
+f (VCRC *x, OWN *own)
+{
+  x[own->vcr / 8].vcr[own->vcr % 8].a--;
+  x[own->vcr / 8].vcr[own->vcr % 8].a = x[own->vcr / 8].vcr[own->vcr % 8].a;
+}
+
+/* { dg-final { scan-assembler-times "idivb" 1 } } */
+/* { dg-final { scan-assembler-not "incl" } } */
+/* { dg-final { scan-assembler-not "orl" } } */
+/* { dg-final { scan-assembler-not "andb" } } */
+/* { dg-final { scan-assembler-not "jns" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-24 17:38                                       ` H.J. Lu
@ 2010-06-24 17:58                                         ` Paolo Bonzini
  2010-06-24 19:01                                         ` Uros Bizjak
  1 sibling, 0 replies; 24+ messages in thread
From: Paolo Bonzini @ 2010-06-24 17:58 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

On 06/24/2010 06:47 PM, H.J. Lu wrote:
> I am learning new things all the time. Thanks for the pointer.
>
> Here is a new patch without UNSPEC.  OK for trunk?

Cannot approve it, but thanks very much!

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
  2010-06-24 17:38                                       ` H.J. Lu
  2010-06-24 17:58                                         ` Paolo Bonzini
@ 2010-06-24 19:01                                         ` Uros Bizjak
  1 sibling, 0 replies; 24+ messages in thread
From: Uros Bizjak @ 2010-06-24 19:01 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Paolo Bonzini, gcc-patches

On Thu, 2010-06-24 at 09:47 -0700, H.J. Lu wrote:
> On Wed, Jun 23, 2010 at 4:50 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
> > On 06/24/2010 01:13 AM, H.J. Lu wrote:
> >>>
> >>> I don't understand exactly what the problem was; clearly it couldn't work
> >>> as
> >>> long as this pattern was cheating blatantly:
> >>
> >> Upper 8bit registers aren't real registers. You can't
> >> do RA with them.
> >
> > Please read what other people write.  I'm saying that you should have used a
> > HI destination just like you did with your new UNSPEC, which is acceptable.
> >
> > The pattern I quoted below used a QI destination; then magically you
> > attempted to extract bit 8..15 of it with an unspec, or something like that.
> >  For anything like that the optimizers are going to bite back sooner or
> > later.  And you'd deserve it.
> >
> >>> (define_insn "*<u>divqi3"
> >>>  [(set (match_operand:QI 0 "register_operand" "=a")
> >>>        (any_div:QI
> >>>          (match_operand:HI 1 "register_operand" "0")
> >>>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
> >>>
> >>> Anyway this patch is IMO much nicer than the first ones, so if Uros is
> >>> okay
> >>> I don't think it's useful to pursue a more accurate representation.  Just
> >>> make sure that REG_EQUAL notes are added to the two extractions.
> >>>
> >>
> >> What are REG_EQUAL notes used for?
> >> As far as the rest of gcc is concerned, there are no upper 8bit
> >> registers. But you can access bits 8-15 of HI, SI,
> >> DI registers via XXX_extract.
> >
> > Understood.  Using an unspec is fine for me, even though it's not an
> > approval.  But that's completely orthogonal to putting a REG_EQUAL note _on
> > the two regs that you extract out of AX_.  The notes' value should be one of
> >
> >   (subreg:QI (udiv:HI (...) (zero_extend (...))))
> >   (subreg:QI (div:HI (...) (sign_extend (...))))
> >   (subreg:QI (umod:HI (...) (zero_extend (...))))
> >   (subreg:QI (mod:HI (...) (sign_extend (...))))
> >
> > But I think that with over ten years of GCC experience you do not need
> > anyone to tell you this.  In fact, regarding "what are REG_EQUAL notes used
> > for?" my first thought was RTFM, but anyway: they are used by CSE, fwprop,
> > GCSE and combine to simplify and eliminate redundant expressions.
> 
> I am learning new things all the time. Thanks for the pointer.
> 
> Here is a new patch without UNSPEC.  OK for trunk?

OK for mainline, the approach in this patch is Really Good (TM).

Please just rename operand0 and operand1 internal variables to perhaps
tmp0 and tmp1, so they will not be confused with operand[0] and
operand[1].

Thanks,
Uros.


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2010-06-24 18:03 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-21 20:38 PATCH: PR target/44588: Very inefficient 8bit mod/div H.J. Lu
2010-06-22 17:08 ` H.J. Lu
2010-06-22 18:28 ` Uros Bizjak
2010-06-22 18:36   ` H.J. Lu
2010-06-22 19:12     ` Uros Bizjak
2010-06-22 20:08       ` H.J. Lu
2010-06-23  6:26         ` Uros Bizjak
2010-06-23 16:54           ` H.J. Lu
2010-06-23 18:37             ` Paolo Bonzini
2010-06-23 18:46               ` H.J. Lu
2010-06-23 19:00                 ` Paolo Bonzini
2010-06-23 19:08                   ` H.J. Lu
2010-06-23 19:53                     ` Paolo Bonzini
2010-06-23 20:22                       ` H.J. Lu
2010-06-23 20:22                         ` Paolo Bonzini
2010-06-23 20:27                           ` H.J. Lu
2010-06-23 21:50                             ` H.J. Lu
2010-06-23 22:44                               ` H.J. Lu
2010-06-24  0:05                                 ` Paolo Bonzini
2010-06-24  0:54                                   ` H.J. Lu
2010-06-24  3:47                                     ` Paolo Bonzini
2010-06-24 17:38                                       ` H.J. Lu
2010-06-24 17:58                                         ` Paolo Bonzini
2010-06-24 19:01                                         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).