public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: PATCH: PR target/44588: Very inefficient 8bit mod/div
Date: Tue, 22 Jun 2010 18:36:00 -0000	[thread overview]
Message-ID: <AANLkTinvsAQe57kp-irwq_eD3cmUlMmayXHsfnMZNaMN@mail.gmail.com> (raw)
In-Reply-To: <1277229955.2613.1.camel@localhost>

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

On Tue, Jun 22, 2010 at 11:05 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Mon, 2010-06-21 at 12:33 -0700, H.J. Lu wrote:
>> Hi,
>>
>> This patch adds 8bit divmov pattern for x86. X86 8bit divide
>> instructions return result in AX with
>>
>> AL <- Quotient
>> AH <- Remainder
>>
>> This patch models it and properly extends quotient. Tested
>> on Intel64 with -m64 and -m32.  There are no regressions.
>> OK for trunk?
>>
>> BTW, there is only one divb used in subreg_get_info in
>> gcc compilers. The old code is
>>
>>         movzbl  mode_size(%r13), %edi
>>         movzbl  mode_size(%r14), %esi
>>         xorl    %edx, %edx
>>         movl    %edi, %eax
>>         divw    %si
>>         testw   %dx, %dx
>>         jne     .L1194
>>
>> The new one is
>>
>>         movzbl  mode_size(%r13), %edi
>>         movl    %edi, %eax
>>         divb    mode_size(%r14)
>>         movzbl  %ah, %eax
>>         testb   %al, %al
>>         jne     .L1194
>>
>
> Hm, something is not combined correctly, I'd say "testb %ah, %ah" is
> optimal in the second case.
>

Here is another update adjusted for mov pattern changes in i386.md.

8bit result is stored in

AL <- Quotient
AH <- Remainder

If we use AX for quotient in 8bit divmod pattern, we have to make
sure that AX is valid for quotient.  We have to extend AL with UNSPEC
since AH isn't the part of quotient,.  Instead, I use AL for quotient and
use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient
access can be optimized very nicely. If remainder is used, we may have
an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable
comprise.


-- 
H.J.
--
gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.
-

[-- Attachment #2: gcc-pr44588-7.patch --]
[-- Type: text/plain, Size: 6963 bytes --]

gcc/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZH): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzh): Likewise.
	(*movqi_extzh_rex64): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-22  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ab90d73..f268e90 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZH
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2052,6 +2058,59 @@
        (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
 
+;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
+;; that we can extract it from AL.
+(define_insn "*movqi_extzh"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzh_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 ;; Convert impossible stores of immediate to existing instructions.
 ;; First try to get scratch register and go through it.  In case this
 ;; fails, move by 32bit parts.
@@ -7305,17 +7364,6 @@
 \f
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7352,6 +7400,58 @@
 \f
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand1;
+  
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operands[0],
+		     gen_rtx_<any_div_code> (QImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  Use UNSPEC_MOVQI_EXTZH to extract it
+     from AL.  */
+  operand1 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operands[0]), 
+			     UNSPEC_MOVQI_EXTZH);
+  emit_move_insn (operands[3], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(any_div:QI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */

  reply	other threads:[~2010-06-22 18:28 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-21 20:38 H.J. Lu
2010-06-22 17:08 ` H.J. Lu
2010-06-22 18:28 ` Uros Bizjak
2010-06-22 18:36   ` H.J. Lu [this message]
2010-06-22 19:12     ` Uros Bizjak
2010-06-22 20:08       ` H.J. Lu
2010-06-23  6:26         ` Uros Bizjak
2010-06-23 16:54           ` H.J. Lu
2010-06-23 18:37             ` Paolo Bonzini
2010-06-23 18:46               ` H.J. Lu
2010-06-23 19:00                 ` Paolo Bonzini
2010-06-23 19:08                   ` H.J. Lu
2010-06-23 19:53                     ` Paolo Bonzini
2010-06-23 20:22                       ` H.J. Lu
2010-06-23 20:22                         ` Paolo Bonzini
2010-06-23 20:27                           ` H.J. Lu
2010-06-23 21:50                             ` H.J. Lu
2010-06-23 22:44                               ` H.J. Lu
2010-06-24  0:05                                 ` Paolo Bonzini
2010-06-24  0:54                                   ` H.J. Lu
2010-06-24  3:47                                     ` Paolo Bonzini
2010-06-24 17:38                                       ` H.J. Lu
2010-06-24 17:58                                         ` Paolo Bonzini
2010-06-24 19:01                                         ` Uros Bizjak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AANLkTinvsAQe57kp-irwq_eD3cmUlMmayXHsfnMZNaMN@mail.gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).