public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
@ 2020-10-09 14:24 Claudiu Zissulescu
  2020-10-23 10:44 ` Claudiu Zissulescu Ianculescu
  2020-11-06 17:27 ` Jeff Law
  0 siblings, 2 replies; 3+ messages in thread
From: Claudiu Zissulescu @ 2020-10-09 14:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: fbedard, andrew.burgess, law, claziss, Claudiu Zissulescu

From: Claudiu Zissulescu <claziss@gmail.com>

ARC MYP7+ instructions add MAC instructions for vector and scalar data
types. This patch adds a madd pattern for 16it datum that is using the
32bit MAC instruction, and dot_prod patterns for v4hi vector
types. The 64bit moves are also upgraded by using vadd2 instuction.

gcc/
xxxx-xx-xx  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (arc_split_move): Recognize vadd2 instructions.
	* config/arc/arc.md (movdi_insn): Update pattern to use vadd2
	instructions.
	(movdf_insn): Likewise.
	(maddhisi4): New pattern.
	(umaddhisi4): Likewise.
	* config/arc/simdext.md (mov<mode>_int): Update pattern to use
	vadd2.
	(sdot_prodv4hi): New pattern.
	(udot_prodv4hi): Likewise.
	(arc_vec_<V_US>mac_hi_v4hi): Update/renamed to
	arc_vec_<V_US>mac_v2hiv2si.
	(arc_vec_<V_US>mac_v2hiv2si_zero): New pattern.

Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
---
 gcc/config/arc/arc.c          |  8 ++++
 gcc/config/arc/arc.md         | 71 ++++++++++++++++++++++++---
 gcc/config/arc/constraints.md |  5 ++
 gcc/config/arc/simdext.md     | 90 +++++++++++++++++++++++++++--------
 4 files changed, 147 insertions(+), 27 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index ec55cfde87a9..d5b521e75e67 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10202,6 +10202,14 @@ arc_split_move (rtx *operands)
       return;
     }
 
+  if (TARGET_PLUS_QMACW
+      && even_register_operand (operands[0], mode)
+      && even_register_operand (operands[1], mode))
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return;
+    }
+
   if (TARGET_PLUS_QMACW
       && GET_CODE (operands[1]) == CONST_VECTOR)
     {
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index f9fc11e51a85..1720e8cd2f6f 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -1345,8 +1345,8 @@ archs4x, archs4xd"
   ")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,   m")
-	(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))]
+  [(set (match_operand:DI 0 "move_dest_operand"      "=r, r,r,   m")
+	(match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)
    || (satisfies_constraint_Cm3 (operands[1])
@@ -1358,6 +1358,13 @@ archs4x, archs4xd"
     default:
       return \"#\";
 
+    case 0:
+    if (TARGET_PLUS_QMACW
+	&& even_register_operand (operands[0], DImode)
+	&& even_register_operand (operands[1], DImode))
+      return \"vadd2\\t%0,%1,0\";
+    return \"#\";
+
     case 2:
     if (TARGET_LL64
         && memory_operand (operands[1], DImode)
@@ -1374,7 +1381,7 @@ archs4x, archs4xd"
     return \"#\";
     }
 }"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
    arc_split_move (operands);
@@ -1420,15 +1427,24 @@ archs4x, archs4xd"
   "if (prepare_move_operands (operands, DFmode)) DONE;")
 
 (define_insn_and_split "*movdf_insn"
-  [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
-	(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
-  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
+  [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,r,r,r,m")
+	(match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))]
+  "register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DFmode)"
   "*
 {
  switch (which_alternative)
    {
     default:
       return \"#\";
+
+    case 2:
+    if (TARGET_PLUS_QMACW
+	&& even_register_operand (operands[0], DFmode)
+	&& even_register_operand (operands[1], DFmode))
+      return \"vadd2\\t%0,%1,0\";
+    return \"#\";
+
     case 4:
     if (TARGET_LL64
 	&& ((even_register_operand (operands[0], DFmode)
@@ -6177,6 +6193,49 @@ archs4x, archs4xd"
   [(set_attr "length" "0")])
 
 ;; MAC and DMPY instructions
+
+; Use MAC instruction to emulate 16bit mac.
+(define_expand "maddhisi4"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "extend_operand"   "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+  "{
+   rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
+   rtx tmp1 = gen_reg_rtx (SImode);
+   rtx tmp2 = gen_reg_rtx (SImode);
+   rtx accl = gen_lowpart (SImode, acc_reg);
+
+   emit_move_insn (accl, operands[3]);
+   emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+   emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2])));
+   emit_insn (gen_mac (tmp1, tmp2));
+   emit_move_insn (operands[0], accl);
+   DONE;
+  }")
+
+; The same for the unsigned variant, but using MACU instruction.
+(define_expand "umaddhisi4"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "extend_operand"   "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+  "{
+   rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
+   rtx tmp1 = gen_reg_rtx (SImode);
+   rtx tmp2 = gen_reg_rtx (SImode);
+   rtx accl = gen_lowpart (SImode, acc_reg);
+
+   emit_move_insn (accl, operands[3]);
+   emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1])));
+   emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2])));
+   emit_insn (gen_macu (tmp1, tmp2));
+   emit_move_insn (operands[0], accl);
+   DONE;
+  }")
+
 (define_expand "maddsidi4"
   [(match_operand:DI 0 "register_operand" "")
    (match_operand:SI 1 "register_operand" "")
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index b7a563a72ada..a2a8e84ac45f 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -493,6 +493,11 @@
   Condition Codes"
   (and (match_code "reg") (match_test "cc_register (op, VOIDmode)")))
 
+(define_constraint "Ral"
+  "@internal
+   Accumulator register @code{ACCL} - do not reload into its class"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == ACCL_REGNO")))
 
 (define_constraint "Q"
   "@internal
diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
index 0e88b3dd815b..d2fc309ea876 100644
--- a/gcc/config/arc/simdext.md
+++ b/gcc/config/arc/simdext.md
@@ -1400,8 +1400,7 @@
 (define_insn_and_split "*mov<mode>_insn"
   [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
 	(match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
-  "TARGET_PLUS_QMACW
-   && (register_operand (operands[0], <MODE>mode)
+  "(register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "*
 {
@@ -1411,7 +1410,11 @@
        return \"#\";
 
      case 1:
-       return \"vadd2 %0, %1, 0\";
+       if (TARGET_PLUS_QMACW
+           && even_register_operand (operands[0], <MODE>mode)
+	   && even_register_operand (operands[1], <MODE>mode))
+         return \"vadd2\\t%0,%1,0\";
+       return \"#\";
 
      case 2:
        if (TARGET_LL64)
@@ -1430,7 +1433,7 @@
    arc_split_move (operands);
    DONE;
   }
-  [(set_attr "type" "move,move,load,store")
+  [(set_attr "type" "move,multi,load,store")
    (set_attr "predicable" "yes,no,no,no")
    (set_attr "iscompact"  "false,false,false,false")
    ])
@@ -1612,6 +1615,44 @@
  DONE;
 })
 
+(define_expand "sdot_prodv4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")
+   (match_operand:V4HI 2 "register_operand" "")
+   (match_operand:V2SI 3 "register_operand" "")]
+  "TARGET_PLUS_MACD"
+{
+ rtx acc_reg  = gen_rtx_REG  (V2SImode, ACC_REG_FIRST);
+ rtx op1_low  = gen_lowpart  (V2HImode, operands[1]);
+ rtx op1_high = gen_highpart (V2HImode, operands[1]);
+ rtx op2_low  = gen_lowpart  (V2HImode, operands[2]);
+ rtx op2_high = gen_highpart (V2HImode, operands[2]);
+
+ emit_move_insn (acc_reg, operands[3]);
+ emit_insn (gen_arc_vec_smac_v2hiv2si_zero (op1_low, op2_low));
+ emit_insn (gen_arc_vec_smac_v2hiv2si (operands[0], op1_high, op2_high));
+ DONE;
+})
+
+(define_expand "udot_prodv4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")
+   (match_operand:V4HI 2 "register_operand" "")
+   (match_operand:V2SI 3 "register_operand" "")]
+  "TARGET_PLUS_MACD"
+{
+ rtx acc_reg  = gen_rtx_REG  (V2SImode, ACC_REG_FIRST);
+ rtx op1_low  = gen_lowpart  (V2HImode, operands[1]);
+ rtx op1_high = gen_highpart (V2HImode, operands[1]);
+ rtx op2_low  = gen_lowpart  (V2HImode, operands[2]);
+ rtx op2_high = gen_highpart (V2HImode, operands[2]);
+
+ emit_move_insn (acc_reg, operands[3]);
+ emit_insn (gen_arc_vec_umac_v2hiv2si_zero (op1_low, op2_low));
+ emit_insn (gen_arc_vec_umac_v2hiv2si (operands[0], op1_high, op2_high));
+ DONE;
+})
+
 (define_insn "arc_vec_<V_US>mult_lo_v4hi"
  [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
        (mult:V2SI (SE:V2SI (vec_select:V2HI
@@ -1704,30 +1745,37 @@
   }
 )
 
-(define_insn "arc_vec_<V_US>mac_hi_v4hi"
- [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+(define_insn "arc_vec_<V_US>mac_v2hiv2si"
+ [(set (match_operand:V2SI 0 "even_register_operand"                "=r,Ral,r")
        (plus:V2SI
-	(reg:V2SI ARCV2_ACC)
-	(mult:V2SI (SE:V2SI (vec_select:V2HI
-			     (match_operand:V4HI 1 "even_register_operand" "0,r")
-			     (parallel [(const_int 2) (const_int 3)])))
-		   (SE:V2SI (vec_select:V2HI
-			     (match_operand:V4HI 2 "even_register_operand" "r,r")
-			     (parallel [(const_int 2) (const_int 3)]))))))
+	(mult:V2SI (SE:V2SI (match_operand:V2HI 1 "register_operand" "0,  r,r"))
+		   (SE:V2SI (match_operand:V2HI 2 "register_operand" "r,  r,r")))
+	(reg:V2SI ARCV2_ACC)))
   (set (reg:V2SI ARCV2_ACC)
        (plus:V2SI
-	(reg:V2SI ARCV2_ACC)
-	(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
-					     (parallel [(const_int 2) (const_int 3)])))
-		   (SE:V2SI (vec_select:V2HI (match_dup 2)
-					     (parallel [(const_int 2) (const_int 3)]))))))
+	(mult:V2SI (SE:V2SI (match_dup 1))
+		   (SE:V2SI (match_dup 2)))
+	(reg:V2SI ARCV2_ACC)))
   ]
   "TARGET_PLUS_MACD"
-  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
+  "@
+   vmac2h<V_US_suffix>%?\\t%0,%1,%2
+   vmac2h<V_US_suffix>%?\\t0,%1,%2
+   vmac2h<V_US_suffix>%?\\t%0,%1,%2"
   [(set_attr "length" "4")
    (set_attr "type" "multi")
-   (set_attr "predicable" "yes,no")
-   (set_attr "cond" "canuse,nocond")])
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "arc_vec_<V_US>mac_v2hiv2si_zero"
+ [(set (reg:V2SI ARCV2_ACC)
+       (plus:V2SI
+	(mult:V2SI (SE:V2SI (match_operand:V2HI 0 "register_operand" "r"))
+		   (SE:V2SI (match_operand:V2HI 1 "register_operand" "r")))
+	(reg:V2SI ARCV2_ACC)))]
+  "TARGET_PLUS_MACD"
+  "vmac2h<V_US_suffix>%?\\t0,%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")])
 
 ;; Builtins
 (define_insn "dmach"
-- 
2.26.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
  2020-10-09 14:24 [PATCH] arc: Improve/add instruction patterns to better use MAC instructions Claudiu Zissulescu
@ 2020-10-23 10:44 ` Claudiu Zissulescu Ianculescu
  2020-11-06 17:27 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: Claudiu Zissulescu Ianculescu @ 2020-10-23 10:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: Francois Bedard, Andrew Burgess, Jeff Law, claziss@gmail.com

Gentle PING.

On Fri, Oct 9, 2020 at 5:24 PM Claudiu Zissulescu <claziss@gmail.com> wrote:
>
> From: Claudiu Zissulescu <claziss@gmail.com>
>
> ARC MYP7+ instructions add MAC instructions for vector and scalar data
> types. This patch adds a madd pattern for 16it datum that is using the
> 32bit MAC instruction, and dot_prod patterns for v4hi vector
> types. The 64bit moves are also upgraded by using vadd2 instuction.
>
> gcc/
> xxxx-xx-xx  Claudiu Zissulescu  <claziss@synopsys.com>
>
>         * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions.
>         * config/arc/arc.md (movdi_insn): Update pattern to use vadd2
>         instructions.
>         (movdf_insn): Likewise.
>         (maddhisi4): New pattern.
>         (umaddhisi4): Likewise.
>         * config/arc/simdext.md (mov<mode>_int): Update pattern to use
>         vadd2.
>         (sdot_prodv4hi): New pattern.
>         (udot_prodv4hi): Likewise.
>         (arc_vec_<V_US>mac_hi_v4hi): Update/renamed to
>         arc_vec_<V_US>mac_v2hiv2si.
>         (arc_vec_<V_US>mac_v2hiv2si_zero): New pattern.
>
> Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
> ---
>  gcc/config/arc/arc.c          |  8 ++++
>  gcc/config/arc/arc.md         | 71 ++++++++++++++++++++++++---
>  gcc/config/arc/constraints.md |  5 ++
>  gcc/config/arc/simdext.md     | 90 +++++++++++++++++++++++++++--------
>  4 files changed, 147 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index ec55cfde87a9..d5b521e75e67 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -10202,6 +10202,14 @@ arc_split_move (rtx *operands)
>        return;
>      }
>
> +  if (TARGET_PLUS_QMACW
> +      && even_register_operand (operands[0], mode)
> +      && even_register_operand (operands[1], mode))
> +    {
> +      emit_move_insn (operands[0], operands[1]);
> +      return;
> +    }
> +
>    if (TARGET_PLUS_QMACW
>        && GET_CODE (operands[1]) == CONST_VECTOR)
>      {
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index f9fc11e51a85..1720e8cd2f6f 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -1345,8 +1345,8 @@ archs4x, archs4xd"
>    ")
>
>  (define_insn_and_split "*movdi_insn"
> -  [(set (match_operand:DI 0 "move_dest_operand"      "=w, w,r,   m")
> -       (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))]
> +  [(set (match_operand:DI 0 "move_dest_operand"      "=r, r,r,   m")
> +       (match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))]
>    "register_operand (operands[0], DImode)
>     || register_operand (operands[1], DImode)
>     || (satisfies_constraint_Cm3 (operands[1])
> @@ -1358,6 +1358,13 @@ archs4x, archs4xd"
>      default:
>        return \"#\";
>
> +    case 0:
> +    if (TARGET_PLUS_QMACW
> +       && even_register_operand (operands[0], DImode)
> +       && even_register_operand (operands[1], DImode))
> +      return \"vadd2\\t%0,%1,0\";
> +    return \"#\";
> +
>      case 2:
>      if (TARGET_LL64
>          && memory_operand (operands[1], DImode)
> @@ -1374,7 +1381,7 @@ archs4x, archs4xd"
>      return \"#\";
>      }
>  }"
> -  "reload_completed"
> +  "&& reload_completed"
>    [(const_int 0)]
>    {
>     arc_split_move (operands);
> @@ -1420,15 +1427,24 @@ archs4x, archs4xd"
>    "if (prepare_move_operands (operands, DFmode)) DONE;")
>
>  (define_insn_and_split "*movdf_insn"
> -  [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,c,c,r,m")
> -       (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
> -  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
> +  [(set (match_operand:DF 0 "move_dest_operand"      "=D,r,r,r,r,m")
> +       (match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))]
> +  "register_operand (operands[0], DFmode)
> +   || register_operand (operands[1], DFmode)"
>    "*
>  {
>   switch (which_alternative)
>     {
>      default:
>        return \"#\";
> +
> +    case 2:
> +    if (TARGET_PLUS_QMACW
> +       && even_register_operand (operands[0], DFmode)
> +       && even_register_operand (operands[1], DFmode))
> +      return \"vadd2\\t%0,%1,0\";
> +    return \"#\";
> +
>      case 4:
>      if (TARGET_LL64
>         && ((even_register_operand (operands[0], DFmode)
> @@ -6177,6 +6193,49 @@ archs4x, archs4xd"
>    [(set_attr "length" "0")])
>
>  ;; MAC and DMPY instructions
> +
> +; Use MAC instruction to emulate 16bit mac.
> +(define_expand "maddhisi4"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:HI 1 "register_operand" "")
> +   (match_operand:HI 2 "extend_operand"   "")
> +   (match_operand:SI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +  "{
> +   rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
> +   rtx tmp1 = gen_reg_rtx (SImode);
> +   rtx tmp2 = gen_reg_rtx (SImode);
> +   rtx accl = gen_lowpart (SImode, acc_reg);
> +
> +   emit_move_insn (accl, operands[3]);
> +   emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1])));
> +   emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2])));
> +   emit_insn (gen_mac (tmp1, tmp2));
> +   emit_move_insn (operands[0], accl);
> +   DONE;
> +  }")
> +
> +; The same for the unsigned variant, but using MACU instruction.
> +(define_expand "umaddhisi4"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:HI 1 "register_operand" "")
> +   (match_operand:HI 2 "extend_operand"   "")
> +   (match_operand:SI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +  "{
> +   rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
> +   rtx tmp1 = gen_reg_rtx (SImode);
> +   rtx tmp2 = gen_reg_rtx (SImode);
> +   rtx accl = gen_lowpart (SImode, acc_reg);
> +
> +   emit_move_insn (accl, operands[3]);
> +   emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1])));
> +   emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2])));
> +   emit_insn (gen_macu (tmp1, tmp2));
> +   emit_move_insn (operands[0], accl);
> +   DONE;
> +  }")
> +
>  (define_expand "maddsidi4"
>    [(match_operand:DI 0 "register_operand" "")
>     (match_operand:SI 1 "register_operand" "")
> diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
> index b7a563a72ada..a2a8e84ac45f 100644
> --- a/gcc/config/arc/constraints.md
> +++ b/gcc/config/arc/constraints.md
> @@ -493,6 +493,11 @@
>    Condition Codes"
>    (and (match_code "reg") (match_test "cc_register (op, VOIDmode)")))
>
> +(define_constraint "Ral"
> +  "@internal
> +   Accumulator register @code{ACCL} - do not reload into its class"
> +  (and (match_code "reg")
> +       (match_test "REGNO (op) == ACCL_REGNO")))
>
>  (define_constraint "Q"
>    "@internal
> diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
> index 0e88b3dd815b..d2fc309ea876 100644
> --- a/gcc/config/arc/simdext.md
> +++ b/gcc/config/arc/simdext.md
> @@ -1400,8 +1400,7 @@
>  (define_insn_and_split "*mov<mode>_insn"
>    [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
>         (match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
> -  "TARGET_PLUS_QMACW
> -   && (register_operand (operands[0], <MODE>mode)
> +  "(register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>    "*
>  {
> @@ -1411,7 +1410,11 @@
>         return \"#\";
>
>       case 1:
> -       return \"vadd2 %0, %1, 0\";
> +       if (TARGET_PLUS_QMACW
> +           && even_register_operand (operands[0], <MODE>mode)
> +          && even_register_operand (operands[1], <MODE>mode))
> +         return \"vadd2\\t%0,%1,0\";
> +       return \"#\";
>
>       case 2:
>         if (TARGET_LL64)
> @@ -1430,7 +1433,7 @@
>     arc_split_move (operands);
>     DONE;
>    }
> -  [(set_attr "type" "move,move,load,store")
> +  [(set_attr "type" "move,multi,load,store")
>     (set_attr "predicable" "yes,no,no,no")
>     (set_attr "iscompact"  "false,false,false,false")
>     ])
> @@ -1612,6 +1615,44 @@
>   DONE;
>  })
>
> +(define_expand "sdot_prodv4hi"
> +  [(match_operand:V2SI 0 "register_operand" "")
> +   (match_operand:V4HI 1 "register_operand" "")
> +   (match_operand:V4HI 2 "register_operand" "")
> +   (match_operand:V2SI 3 "register_operand" "")]
> +  "TARGET_PLUS_MACD"
> +{
> + rtx acc_reg  = gen_rtx_REG  (V2SImode, ACC_REG_FIRST);
> + rtx op1_low  = gen_lowpart  (V2HImode, operands[1]);
> + rtx op1_high = gen_highpart (V2HImode, operands[1]);
> + rtx op2_low  = gen_lowpart  (V2HImode, operands[2]);
> + rtx op2_high = gen_highpart (V2HImode, operands[2]);
> +
> + emit_move_insn (acc_reg, operands[3]);
> + emit_insn (gen_arc_vec_smac_v2hiv2si_zero (op1_low, op2_low));
> + emit_insn (gen_arc_vec_smac_v2hiv2si (operands[0], op1_high, op2_high));
> + DONE;
> +})
> +
> +(define_expand "udot_prodv4hi"
> +  [(match_operand:V2SI 0 "register_operand" "")
> +   (match_operand:V4HI 1 "register_operand" "")
> +   (match_operand:V4HI 2 "register_operand" "")
> +   (match_operand:V2SI 3 "register_operand" "")]
> +  "TARGET_PLUS_MACD"
> +{
> + rtx acc_reg  = gen_rtx_REG  (V2SImode, ACC_REG_FIRST);
> + rtx op1_low  = gen_lowpart  (V2HImode, operands[1]);
> + rtx op1_high = gen_highpart (V2HImode, operands[1]);
> + rtx op2_low  = gen_lowpart  (V2HImode, operands[2]);
> + rtx op2_high = gen_highpart (V2HImode, operands[2]);
> +
> + emit_move_insn (acc_reg, operands[3]);
> + emit_insn (gen_arc_vec_umac_v2hiv2si_zero (op1_low, op2_low));
> + emit_insn (gen_arc_vec_umac_v2hiv2si (operands[0], op1_high, op2_high));
> + DONE;
> +})
> +
>  (define_insn "arc_vec_<V_US>mult_lo_v4hi"
>   [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
>         (mult:V2SI (SE:V2SI (vec_select:V2HI
> @@ -1704,30 +1745,37 @@
>    }
>  )
>
> -(define_insn "arc_vec_<V_US>mac_hi_v4hi"
> - [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
> +(define_insn "arc_vec_<V_US>mac_v2hiv2si"
> + [(set (match_operand:V2SI 0 "even_register_operand"                "=r,Ral,r")
>         (plus:V2SI
> -       (reg:V2SI ARCV2_ACC)
> -       (mult:V2SI (SE:V2SI (vec_select:V2HI
> -                            (match_operand:V4HI 1 "even_register_operand" "0,r")
> -                            (parallel [(const_int 2) (const_int 3)])))
> -                  (SE:V2SI (vec_select:V2HI
> -                            (match_operand:V4HI 2 "even_register_operand" "r,r")
> -                            (parallel [(const_int 2) (const_int 3)]))))))
> +       (mult:V2SI (SE:V2SI (match_operand:V2HI 1 "register_operand" "0,  r,r"))
> +                  (SE:V2SI (match_operand:V2HI 2 "register_operand" "r,  r,r")))
> +       (reg:V2SI ARCV2_ACC)))
>    (set (reg:V2SI ARCV2_ACC)
>         (plus:V2SI
> -       (reg:V2SI ARCV2_ACC)
> -       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
> -                                            (parallel [(const_int 2) (const_int 3)])))
> -                  (SE:V2SI (vec_select:V2HI (match_dup 2)
> -                                            (parallel [(const_int 2) (const_int 3)]))))))
> +       (mult:V2SI (SE:V2SI (match_dup 1))
> +                  (SE:V2SI (match_dup 2)))
> +       (reg:V2SI ARCV2_ACC)))
>    ]
>    "TARGET_PLUS_MACD"
> -  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
> +  "@
> +   vmac2h<V_US_suffix>%?\\t%0,%1,%2
> +   vmac2h<V_US_suffix>%?\\t0,%1,%2
> +   vmac2h<V_US_suffix>%?\\t%0,%1,%2"
>    [(set_attr "length" "4")
>     (set_attr "type" "multi")
> -   (set_attr "predicable" "yes,no")
> -   (set_attr "cond" "canuse,nocond")])
> +   (set_attr "predicable" "yes,no,no")])
> +
> +(define_insn "arc_vec_<V_US>mac_v2hiv2si_zero"
> + [(set (reg:V2SI ARCV2_ACC)
> +       (plus:V2SI
> +       (mult:V2SI (SE:V2SI (match_operand:V2HI 0 "register_operand" "r"))
> +                  (SE:V2SI (match_operand:V2HI 1 "register_operand" "r")))
> +       (reg:V2SI ARCV2_ACC)))]
> +  "TARGET_PLUS_MACD"
> +  "vmac2h<V_US_suffix>%?\\t0,%0,%1"
> +  [(set_attr "length" "4")
> +   (set_attr "type" "multi")])
>
>  ;; Builtins
>  (define_insn "dmach"
> --
> 2.26.2
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
  2020-10-09 14:24 [PATCH] arc: Improve/add instruction patterns to better use MAC instructions Claudiu Zissulescu
  2020-10-23 10:44 ` Claudiu Zissulescu Ianculescu
@ 2020-11-06 17:27 ` Jeff Law
  1 sibling, 0 replies; 3+ messages in thread
From: Jeff Law @ 2020-11-06 17:27 UTC (permalink / raw)
  To: Claudiu Zissulescu, gcc-patches; +Cc: fbedard, andrew.burgess, claziss


On 10/9/20 8:24 AM, Claudiu Zissulescu wrote:
> From: Claudiu Zissulescu <claziss@gmail.com>
>
> ARC MYP7+ instructions add MAC instructions for vector and scalar data
> types. This patch adds a madd pattern for 16it datum that is using the
> 32bit MAC instruction, and dot_prod patterns for v4hi vector
> types. The 64bit moves are also upgraded by using vadd2 instuction.
>
> gcc/
> xxxx-xx-xx  Claudiu Zissulescu  <claziss@synopsys.com>
>
> 	* config/arc/arc.c (arc_split_move): Recognize vadd2 instructions.
> 	* config/arc/arc.md (movdi_insn): Update pattern to use vadd2
> 	instructions.
> 	(movdf_insn): Likewise.
> 	(maddhisi4): New pattern.
> 	(umaddhisi4): Likewise.
> 	* config/arc/simdext.md (mov<mode>_int): Update pattern to use
> 	vadd2.
> 	(sdot_prodv4hi): New pattern.
> 	(udot_prodv4hi): Likewise.
> 	(arc_vec_<V_US>mac_hi_v4hi): Update/renamed to
> 	arc_vec_<V_US>mac_v2hiv2si.
> 	(arc_vec_<V_US>mac_v2hiv2si_zero): New pattern.

OK for the trunk.  Sorry for the delay.

jeff



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-11-06 17:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-09 14:24 [PATCH] arc: Improve/add instruction patterns to better use MAC instructions Claudiu Zissulescu
2020-10-23 10:44 ` Claudiu Zissulescu Ianculescu
2020-11-06 17:27 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).