public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [RISCV] Add Pattern for builtin overflow
@ 2021-04-29  5:42 Levy Hsu
  2021-04-30  0:59 ` Jim Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Levy Hsu @ 2021-04-29  5:42 UTC (permalink / raw)
  To: gcc-patches, kito.cheng, jimw, andrew; +Cc: LevyHsu

From: LevyHsu <admin@levyhsu.com>

Added implementation for builtin overflow detection, new patterns are listed below.

---------------------------------------------------------------
Addition:

signed addition (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
	slti    t3, t2, 0
	slt     t4, t0, t1
	bne     t3, t4, overflow

signed addition (SImode in RV64):
	add     t0, t1, t2
        addw    t3, t1, t2
        bne     t0, t3, overflow

unsigned addition (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
        bltu    t0, t1, overflow

unsigned addition (SImode in RV64):
        sext.w  t3, t1
        addw	t0, t1, t2
        bltu	t0, t3, overflow
---------------------------------------------------------------
Subtraction:

signed subtraction (SImode in RV32 || DImode in RV64):
        sub     t0, t1, t2
        slti    t3, t2, 0
        slt     t4, t1, t0
        bne     t3, t4, overflow

signed subtraction (SImode in RV64):
	sub     t0, t1, t2
        subw    t3, t1, t2
        bne     t0, t3, overflow

unsigned subtraction (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
        bltu    t1, t0, overflow

unsigned subtraction (SImode in RV64):
        sext.w  t3, t1
        subw	t0, t1, t2
        bltu    t0, t3, overflow
---------------------------------------------------------------
Multiplication:

signed multiplication (SImode in RV32 || DImode in RV64):
        mulh    t3, t1, t2
        mul	t0, t1, t2
        srai	t4, t0, 31/63 (RV32/64)
        bne     t3, t4, overflow

signed multiplication (SImode in RV64):
	mul     t0, t1, t2
        sext.w  t3, t0
        bne     t0, t3, overflow

unsigned multiplication (SImode in RV32 || DImode in RV64 ):
        mulhu   t3, t1, t2
        mul     t0, t1, t2
        bne     t3, 0,  overflow

unsigned multiplication (SImode in RV64):
        slli    t0, t0, 32
	slli	t1, t1, 32
	mulhu	t2, t1, t1
	srli	t3, t2, 32
	bne	t3, 0,  overflow
        sext.w  t2, t2

Speical Thanks to:
        Jim Wilson, for the throughout help and advice on gcc and gdb.
        Craig Topper, for pointing out the SImode operand needs sext.w for unsigned add/sub in RV64.
        Andrew Waterman. For better SImode signed add/sub and unsigned mul pattern in RV64.
        Kito Cheng, for patch submission.
---
 gcc/config/riscv/riscv.c  |   8 ++
 gcc/config/riscv/riscv.h  |   4 +
 gcc/config/riscv/riscv.md | 243 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 255 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index d489717b2a5..cf94f5c9658 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 172c7ca7c98..0521c8881ae 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -121,6 +121,10 @@ extern const char *riscv_default_mtune (int argc, const char **argv);
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 36012ad1f77..a6e14fdc24d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -462,6 +462,80 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "addv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_adddi3 (t3, t4, t5));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
+
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                  (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
+  }
+  else
+  {
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
+  }
+
+  DONE;
+})
+
 (define_insn "*addsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "=r,r")
 	(sign_extend:DI
@@ -518,6 +592,85 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+(define_expand "subv<mode>4"
+  [(set (match_operand:GPR          0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                 (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_subdi3 (t3, t4, t5));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
+
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(set (match_operand:GPR            0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                   (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
+  }
+  else
+  {
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
+  }
+
+  DONE;
+})
+
+
 (define_insn "*subsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "= r")
 	(sign_extend:DI
@@ -609,6 +762,96 @@
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")])
 
+(define_expand "mulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_muldi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+    rtx lp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
+
+    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
+  }
+
+  DONE;
+})
+
+(define_expand "umulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+    rtx t7 = gen_reg_rtx (DImode);
+    rtx t8 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+        emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+        t3 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+        emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0));
+    else
+        t4 = operands[2];
+
+    emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32)));
+    emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32)));
+    emit_insn (gen_umuldi3_highpart (t7, t5, t6));
+    emit_move_insn (operands[0], gen_lowpart (SImode, t7));
+    emit_insn (gen_lshrdi3 (t8, t7, GEN_INT (32)));
+
+    riscv_expand_conditional_branch (operands[3], NE, t8, const0_rtx);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+  
+    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
+  }
+
+  DONE;
+})
+
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI              0 "register_operand" "=r")
 	(sign_extend:DI
-- 
2.31.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-29  5:42 [PATCH] [RISCV] Add Pattern for builtin overflow Levy Hsu
@ 2021-04-30  0:59 ` Jim Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Jim Wilson @ 2021-04-30  0:59 UTC (permalink / raw)
  To: Levy Hsu; +Cc: GCC Patches, Kito Cheng, Andrew Waterman

On Wed, Apr 28, 2021 at 10:43 PM Levy Hsu <admin@levyhsu.com> wrote:

> From: LevyHsu <admin@levyhsu.com>
>
> Added implementation for builtin overflow detection, new patterns are
> listed below.
>

This looks OK.  You are missing a ChangeLog entry.  I added one.  I had to
fix some whitespace and formatting issues.  Open parens should line up in
the RTL patterns.  There should be no lines that start with 8 spaces, use a
tab instead.  There should be no lines with only whitespace on them.  You
didn't indent open curly braces in some places.  You missed indenting the
first line in a pattern.  You had a blank line at the start of an output
template.  All simple stuff that I fixed.  Then committed the patch.

Jim

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-29 22:02       ` Jim Wilson
@ 2021-05-02  0:06         ` Andrew Waterman
  0 siblings, 0 replies; 8+ messages in thread
From: Andrew Waterman @ 2021-05-02  0:06 UTC (permalink / raw)
  To: Jim Wilson; +Cc: Levy Hsu, GCC Patches, Kito Cheng

On Thu, Apr 29, 2021 at 3:02 PM Jim Wilson <jimw@sifive.com> wrote:
>
> On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote:
>>
>> > This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch.
>>
>> Agreed, these potential improvements are definitely not blockers.
>
>
> Turns out Levy had time to work on the patch after all, and submitted a fourth version with your improvements.

Cool.  Thank you, Levy!

>
> Jim

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-28 23:04     ` Andrew Waterman
@ 2021-04-29 22:02       ` Jim Wilson
  2021-05-02  0:06         ` Andrew Waterman
  0 siblings, 1 reply; 8+ messages in thread
From: Jim Wilson @ 2021-04-29 22:02 UTC (permalink / raw)
  To: Andrew Waterman; +Cc: Levy Hsu, GCC Patches, Kito Cheng

On Wed, Apr 28, 2021 at 4:04 PM Andrew Waterman <andrew@sifive.com> wrote:

> > This is a good suggestion, but in the interests of making forward
> progress here, I'd like to accept the patch and then file these as
> bugzillas as ways to further improve the patch.
>
> Agreed, these potential improvements are definitely not blockers.
>

Turns out Levy had time to work on the patch after all, and submitted a
fourth version with your improvements.

Jim

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-28 20:18   ` Jim Wilson
@ 2021-04-28 23:04     ` Andrew Waterman
  2021-04-29 22:02       ` Jim Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Waterman @ 2021-04-28 23:04 UTC (permalink / raw)
  To: Jim Wilson; +Cc: Levy Hsu, GCC Patches, Kito Cheng

On Wed, Apr 28, 2021 at 1:18 PM Jim Wilson <jimw@sifive.com> wrote:
>
> On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote:
>>
>> > signed addition (SImode with RV64):
>> >         add     t0, t1, t2
>> >     sext.w  t3, t0
>> >     bne     t0, t3, overflow
>>
>> The following version has the same instruction count but offers more ILP:
>>
>>   add t0, t1, t2
>>   addw t3, t1, t2
>>   bne t0, t3, overflow
>
>
> This is a good suggestion, but in the interests of making forward progress here, I'd like to accept the patch and then file these as bugzillas as ways to further improve the patch.

Agreed, these potential improvements are definitely not blockers.

>>
>> > unsigned addition (SImode with RV64):
>> >     sext.w  t3, t1
>> >     addw        t0, t1, t2
>> >     bltu        t0, t3, overflow
>>
>> I think you can do this in two instructions, similar to the previous pattern:
>>
>>   addw t0, t1, t2
>>   bltu t0, t1, overflow
>
>
> Likewise.
>>
>> > signed subtraction (SImode with RV64):
>> >         sub     t0, t1, t2
>> >     sext.w  t3, t0
>> >     bne     t0, t3, overflow
>>
>> See analogous addition comment.
>
>
> Likewise.
>>
>>
>> > unsigned subtraction (SImode with RV64):
>> >     sext.w  t3, t1
>> >     subw        t0, t1, t2
>> >     bltu    t0, t3, overflow
>>
>> See analogous addition comment.
>
>
> Likewise.
>>
>> > unsigned multiplication (SImode with RV64):
>> >     slli    t0,t0,32
>> >         slli    t1,t1,32
>> >         srli    t0,t0,32
>> >         srli    t1,t1,32
>> >         mul         t0,t0,t1
>> >         srai    t5,t0,32
>> >         bne         t5, 0, overflow
>>
>> I think you can eliminate the first two right shifts by replacing mul
>> with mulhu... something like:
>>
>>   slli rx, rx, 32
>>   slli ry, ry, 32
>>   mulhu rz, rx, ry
>>   srli rt, rz, 32
>>   bnez rt, overflow
>
>
> Likewise, except this should be a separate bugzilla.
>
> Jim

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-27  7:45 ` Andrew Waterman
@ 2021-04-28 20:18   ` Jim Wilson
  2021-04-28 23:04     ` Andrew Waterman
  0 siblings, 1 reply; 8+ messages in thread
From: Jim Wilson @ 2021-04-28 20:18 UTC (permalink / raw)
  To: Andrew Waterman; +Cc: Levy Hsu, GCC Patches, Kito Cheng

On Tue, Apr 27, 2021 at 12:45 AM Andrew Waterman <andrew@sifive.com> wrote:

> > signed addition (SImode with RV64):
> >         add     t0, t1, t2
> >     sext.w  t3, t0
> >     bne     t0, t3, overflow
>
> The following version has the same instruction count but offers more ILP:
>
>   add t0, t1, t2
>   addw t3, t1, t2
>   bne t0, t3, overflow
>

This is a good suggestion, but in the interests of making forward progress
here, I'd like to accept the patch and then file these as bugzillas as ways
to further improve the patch.

> > unsigned addition (SImode with RV64):
> >     sext.w  t3, t1
> >     addw        t0, t1, t2
> >     bltu        t0, t3, overflow
>
> I think you can do this in two instructions, similar to the previous
> pattern:
>
>   addw t0, t1, t2
>   bltu t0, t1, overflow
>

Likewise.

> > signed subtraction (SImode with RV64):
> >         sub     t0, t1, t2
> >     sext.w  t3, t0
> >     bne     t0, t3, overflow
>
> See analogous addition comment.
>

Likewise.

>
> > unsigned subtraction (SImode with RV64):
> >     sext.w  t3, t1
> >     subw        t0, t1, t2
> >     bltu    t0, t3, overflow
>
> See analogous addition comment.
>

Likewise.

> > unsigned multiplication (SImode with RV64):
> >     slli    t0,t0,32
> >         slli    t1,t1,32
> >         srli    t0,t0,32
> >         srli    t1,t1,32
> >         mul         t0,t0,t1
> >         srai    t5,t0,32
> >         bne         t5, 0, overflow
>
> I think you can eliminate the first two right shifts by replacing mul
> with mulhu... something like:
>
>   slli rx, rx, 32
>   slli ry, ry, 32
>   mulhu rz, rx, ry
>   srli rt, rz, 32
>   bnez rt, overflow
>

Likewise, except this should be a separate bugzilla.

Jim

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] [RISCV] Add Pattern for builtin overflow
  2021-04-27  6:08 Levy Hsu
@ 2021-04-27  7:45 ` Andrew Waterman
  2021-04-28 20:18   ` Jim Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Waterman @ 2021-04-27  7:45 UTC (permalink / raw)
  To: Levy Hsu; +Cc: GCC Patches, Kito Cheng, Jim Wilson

On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <admin@levyhsu.com> wrote:
>
> From: LevyHsu <admin@levyhsu.com>
>
> Added implementation for builtin overflow detection, new patterns are listed below.
>
> ---------------------------------------------------------------
> Addition:
>
> signed addition (SImode with RV32 || DImode with RV64):
>         add     t0, t1, t2
>         slti    t3, t2, 0
>         slt     t4, t0, t1
>         bne     t3, t4, overflow
>
> signed addition (SImode with RV64):
>         add     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow

The following version has the same instruction count but offers more ILP:

  add t0, t1, t2
  addw t3, t1, t2
  bne t0, t3, overflow

>
> unsigned addition (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t0, t1, overflow
>
> unsigned addition (SImode with RV64):
>     sext.w  t3, t1
>     addw        t0, t1, t2
>     bltu        t0, t3, overflow

I think you can do this in two instructions, similar to the previous pattern:

  addw t0, t1, t2
  bltu t0, t1, overflow

> ---------------------------------------------------------------
> Subtraction:
>
> signed subtraction (SImode with RV32 || DImode with RV64):
>     sub     t0, t1, t2
>     slti    t3, t2, 0
>     slt     t4, t1, t0
>     bne     t3, t4, overflow
>
> signed subtraction (SImode with RV64):
>         sub     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow

See analogous addition comment.

>
> unsigned subtraction (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t1, t0, overflow
>
> unsigned subtraction (SImode with RV64):
>     sext.w  t3, t1
>     subw        t0, t1, t2
>     bltu    t0, t3, overflow

See analogous addition comment.

> ---------------------------------------------------------------
> Multiplication:
>
> signed multiplication (SImode with RV32 || DImode with RV64):
>     mulh    t4, t1, t2
>     mul         t0, t1, t2
>     srai        t5, t0, 31/63 (RV32/64)
>     bne     t4, t5, overflow
>
> signed multiplication (SImode with RV64):
>         mul     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow
>
> unsigned multiplication (SImode with RV32 || DImode with RV64 ):
>     mulhu   t4, t1, t2
>     mul     t0, t1, t2
>     bne     t4, 0,  overflow
>
> unsigned multiplication (SImode with RV64):
>     slli    t0,t0,32
>         slli    t1,t1,32
>         srli    t0,t0,32
>         srli    t1,t1,32
>         mul         t0,t0,t1
>         srai    t5,t0,32
>         bne         t5, 0, overflow

I think you can eliminate the first two right shifts by replacing mul
with mulhu... something like:

  slli rx, rx, 32
  slli ry, ry, 32
  mulhu rz, rx, ry
  srli rt, rz, 32
  bnez rt, overflow

>
> ---------------------------------------------------------------
> ---
>  gcc/config/riscv/riscv.c  |   8 ++
>  gcc/config/riscv/riscv.h  |   5 +
>  gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 253 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index d489717b2a5..cf94f5c9658 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
>    { "size", generic, &optimize_size_tune_info },
>  };
>
> +/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
> +
> +static unsigned int
> +riscv_min_arithmetic_precision (void)
> +{
> +  return 32;
> +}
> +
>  /* Return the riscv_tune_info entry for the given name string.  */
>
>  static const struct riscv_tune_info *
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 172c7ca7c98..a6f451b97e3 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const char **argv);
>  #define MIN_UNITS_PER_WORD 4
>  #endif
>
> +/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
> +
> +#undef TARGET_MIN_ARITHMETIC_PRECISION
> +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
> +
>  /* The `Q' extension is not yet supported.  */
>  #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 36012ad1f77..c82017a4bce 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -462,6 +462,81 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "DI")])
>
> +(define_expand "addv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_adddi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
> +
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +  DONE;
> +})
> +
> +(define_expand "uaddv<mode>4"
> +  [(set (match_operand:GPR           0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                  (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
> +  }
> +  else
> +  {
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*addsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "=r,r")
>         (sign_extend:DI
> @@ -518,6 +593,85 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "SI")])
>
> +(define_expand "subv<mode>4"
> +  [(set (match_operand:GPR          0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                 (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_subdi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "usubv<mode>4"
> +  [(set (match_operand:GPR            0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                   (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
> +  }
> +  else
> +  {
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*subsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "= r")
>         (sign_extend:DI
> @@ -609,6 +763,92 @@
>    [(set_attr "type" "imul")
>     (set_attr "mode" "DI")])
>
> +(define_expand "mulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +    rtx lp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "umulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*mulsi3_extended"
>    [(set (match_operand:DI              0 "register_operand" "=r")
>         (sign_extend:DI
> --
> 2.30.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] [RISCV] Add Pattern for builtin overflow
@ 2021-04-27  6:08 Levy Hsu
  2021-04-27  7:45 ` Andrew Waterman
  0 siblings, 1 reply; 8+ messages in thread
From: Levy Hsu @ 2021-04-27  6:08 UTC (permalink / raw)
  To: gcc-patches, kito.cheng, jimw; +Cc: LevyHsu

From: LevyHsu <admin@levyhsu.com>

Added implementation for builtin overflow detection, new patterns are listed below.

---------------------------------------------------------------
Addition:

signed addition (SImode with RV32 || DImode with RV64):
	add     t0, t1, t2
	slti    t3, t2, 0
	slt     t4, t0, t1
	bne     t3, t4, overflow

signed addition (SImode with RV64):
	add     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned addition (SImode with RV32 || DImode with RV64):
    add     t0, t1, t2
    bltu    t0, t1, overflow

unsigned addition (SImode with RV64):
    sext.w  t3, t1
    addw	t0, t1, t2
    bltu	t0, t3, overflow
---------------------------------------------------------------
Subtraction:

signed subtraction (SImode with RV32 || DImode with RV64):
    sub     t0, t1, t2
    slti    t3, t2, 0
    slt     t4, t1, t0
    bne     t3, t4, overflow

signed subtraction (SImode with RV64):
	sub     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned subtraction (SImode with RV32 || DImode with RV64):
    add     t0, t1, t2
    bltu    t1, t0, overflow

unsigned subtraction (SImode with RV64):
    sext.w  t3, t1
    subw	t0, t1, t2
    bltu    t0, t3, overflow
---------------------------------------------------------------
Multiplication:

signed multiplication (SImode with RV32 || DImode with RV64):
    mulh    t4, t1, t2
    mul		t0, t1, t2
    srai	t5, t0, 31/63 (RV32/64)
    bne     t4, t5, overflow

signed multiplication (SImode with RV64):
	mul     t0, t1, t2
    sext.w  t3, t0
    bne     t0, t3, overflow

unsigned multiplication (SImode with RV32 || DImode with RV64 ):
    mulhu   t4, t1, t2
    mul     t0, t1, t2
    bne     t4, 0,  overflow

unsigned multiplication (SImode with RV64):
    slli    t0,t0,32
	slli	t1,t1,32
	srli	t0,t0,32
	srli	t1,t1,32
	mul	    t0,t0,t1
	srai	t5,t0,32
	bne	    t5, 0, overflow

---------------------------------------------------------------
---
 gcc/config/riscv/riscv.c  |   8 ++
 gcc/config/riscv/riscv.h  |   5 +
 gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 253 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index d489717b2a5..cf94f5c9658 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 172c7ca7c98..a6f451b97e3 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const char **argv);
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 36012ad1f77..c82017a4bce 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -462,6 +462,81 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "addv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_adddi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
+
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r,r")
+        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+                  (match_operand:GPR 2 "arith_operand"    " r,I")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
+  }
+  else
+  {
+    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]);
+  }
+
+  DONE;
+})
+
 (define_insn "*addsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "=r,r")
 	(sign_extend:DI
@@ -518,6 +593,85 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+(define_expand "subv<mode>4"
+  [(set (match_operand:GPR          0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                 (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_subdi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx t3 = gen_reg_rtx (<MODE>mode);
+    rtx t4 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+
+    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
+    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
+
+    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+  }
+  
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(set (match_operand:GPR            0 "register_operand" "= r")
+        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+                   (match_operand:GPR 2 "register_operand" "  r")))
+                        (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+    else
+      t3 = operands[1];
+    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
+  }
+  else
+  {
+    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+    riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]);
+  }
+
+  DONE;
+})
+
 (define_insn "*subsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "= r")
 	(sign_extend:DI
@@ -609,6 +763,92 @@
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")])
 
+(define_expand "mulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+    else
+      t5 = operands[2];
+    emit_insn (gen_muldi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+    rtx lp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1)));
+
+    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
+  }
+
+  DONE;
+})
+
+(define_expand "umulv<mode>4"
+  [(set (match_operand:GPR         0 "register_operand" "=r")
+        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
+                (match_operand:GPR 2 "register_operand" " r")))
+                        (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+  {
+    rtx t3 = gen_reg_rtx (DImode);
+    rtx t4 = gen_reg_rtx (DImode);
+    rtx t5 = gen_reg_rtx (DImode);
+    rtx t6 = gen_reg_rtx (DImode);
+
+    if (GET_CODE (operands[1]) != CONST_INT)
+      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
+    else
+      t4 = operands[1];
+    if (GET_CODE (operands[2]) != CONST_INT)
+      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
+    else
+      t5 = operands[2];
+    emit_insn (gen_muldi3 (t3, t4, t5));
+
+    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
+
+    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+  }
+  else
+  {
+    rtx hp = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+  
+    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
+  }
+
+  DONE;
+})
+
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI              0 "register_operand" "=r")
 	(sign_extend:DI
-- 
2.30.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-05-02  0:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-29  5:42 [PATCH] [RISCV] Add Pattern for builtin overflow Levy Hsu
2021-04-30  0:59 ` Jim Wilson
  -- strict thread matches above, loose matches on Subject: below --
2021-04-27  6:08 Levy Hsu
2021-04-27  7:45 ` Andrew Waterman
2021-04-28 20:18   ` Jim Wilson
2021-04-28 23:04     ` Andrew Waterman
2021-04-29 22:02       ` Jim Wilson
2021-05-02  0:06         ` Andrew Waterman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).