public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition, Multiply and FMA.
@ 2020-09-25 14:31 Tamar Christina
  2020-11-14 15:11 ` Tamar Christina
  2020-11-16  9:20 ` Kyrylo Tkachov
  0 siblings, 2 replies; 3+ messages in thread
From: Tamar Christina @ 2020-09-25 14:31 UTC (permalink / raw)
  To: gcc-patches
  Cc: nd, Ramana.Radhakrishnan, Richard.Earnshaw, nickc, Kyrylo.Tkachov

[-- Attachment #1: Type: text/plain, Size: 1182 bytes --]

Hi All,

This adds implementation for the optabs for complex additions.  With this the
following C code:

  void f90 (float complex a[restrict N], float complex b[restrict N],
	    float complex c[restrict N])
  {
    for (int i=0; i < N; i++)
      c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
	  add     r3, r2, #1600
  .L2:
	  vld1.32 {q8}, [r0]!
	  vld1.32 {q9}, [r1]!
	  vcadd.f32       q8, q8, q9, #90
	  vst1.32 {q8}, [r2]!
	  cmp     r3, r2
	  bne     .L2
	  bx      lr


instead of

  f90:
	  add     r3, r2, #1600
  .L2:
	  vld2.32 {d24-d27}, [r0]!
	  vld2.32 {d20-d23}, [r1]!
	  vsub.f32	q8, q12, q11
	  vadd.f32	q9, q13, q10
	  vst2.32 {d16-d19}, [r2]!
	  cmp     r3, r2
	  bne     .L2
	  bx      lr


Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/arm/iterators.md (rot): Add UNSPEC_VCMLS, UNSPEC_VCMUL and
	UNSPEC_VCMUL180.
	(rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
	* config/arm/neon.md (cadd<rot><mode>3, cml<fcmac1><rot_op><mode>4,
	cmul<rot_op><mode>3): New.
	* config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180, UNSPEC_VCMLS,
	UNSPEC_VCMLS180): New.

-- 

[-- Attachment #2: rb13518.patch --]
[-- Type: text/x-diff, Size: 4936 bytes --]

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0bc9eba0722689aff4c1a143e952f6eb91c0cd86..f5693c0524274da1eb1c767713574c01ec6d544c 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1146,10 +1146,38 @@ (define_int_attr crypto_mode [(UNSPEC_SHA1H "V4SI") (UNSPEC_AESMC "V16QI")
 
 (define_int_attr rot [(UNSPEC_VCADD90 "90")
 		      (UNSPEC_VCADD270 "270")
+		      (UNSPEC_VCMLS "0")
 		      (UNSPEC_VCMLA "0")
 		      (UNSPEC_VCMLA90 "90")
 		      (UNSPEC_VCMLA180 "180")
-		      (UNSPEC_VCMLA270 "270")])
+		      (UNSPEC_VCMLA270 "270")
+		      (UNSPEC_VCMUL "0")
+		      (UNSPEC_VCMUL180 "180")])
+
+;; A conjucate is a rotation of 180* around the argand plane, or * I.
+(define_int_attr rot_op [(UNSPEC_VCMLS "")
+			 (UNSPEC_VCMLS180 "_conj")
+			 (UNSPEC_VCMLA "")
+			 (UNSPEC_VCMLA180 "_conj")
+			 (UNSPEC_VCMUL "")
+			 (UNSPEC_VCMUL180 "_conj")])
+
+(define_int_attr rotsplit1 [(UNSPEC_VCMLA "0")
+			    (UNSPEC_VCMLA180 "0")
+			    (UNSPEC_VCMUL "0")
+			    (UNSPEC_VCMUL180 "0")
+			    (UNSPEC_VCMLS "270")
+			    (UNSPEC_VCMLS180 "90")])
+
+(define_int_attr rotsplit2 [(UNSPEC_VCMLA "90")
+			    (UNSPEC_VCMLA180 "270")
+			    (UNSPEC_VCMUL "90")
+			    (UNSPEC_VCMUL180 "270")
+			    (UNSPEC_VCMLS "180")
+			    (UNSPEC_VCMLS180 "180")])
+
+(define_int_attr fcmac1 [(UNSPEC_VCMLA "a") (UNSPEC_VCMLA180 "a")
+			 (UNSPEC_VCMLS "s") (UNSPEC_VCMLS180 "s")])
 
 (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
 			    (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
@@ -1256,3 +1284,12 @@ (define_int_attr bt [(UNSPEC_BFMAB "b") (UNSPEC_BFMAT "t")])
 
 ;; An iterator for CDE MVE accumulator/non-accumulator versions.
 (define_int_attr a [(UNSPEC_VCDE "") (UNSPEC_VCDEA "a")])
+
+;; Define iterators for VCMLA operations
+(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
+			       UNSPEC_VCMLA180
+			       UNSPEC_VCMLS])
+
+;; Define iterators for VCMLA operations as MUL
+(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
+			       UNSPEC_VCMUL180])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 3e7b51d8ab60007901392df0ca1cb09fead4d0e9..1611bcea1ba8cb416d27368e4dc39ce15b3a4cd8 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3217,6 +3217,14 @@ (define_insn "neon_vcadd<rot><mode>"
   [(set_attr "type" "neon_fcadd")]
 )
 
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF [(match_operand:VF 1 "register_operand")
+		    (match_operand:VF 2 "register_operand")]
+		    VCADD))]
+  "TARGET_COMPLEX"
+)
+
 (define_insn "neon_vcmla<rot><mode>"
   [(set (match_operand:VF 0 "register_operand" "=w")
 	(plus:VF (match_operand:VF 1 "register_operand" "0")
@@ -3274,6 +3282,43 @@ (define_insn "neon_vcmlaq_lane<rot><mode>"
 )
 
 
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+  [(set (match_operand:VF 0 "register_operand")
+	(plus:VF (match_operand:VF 1 "register_operand")
+		 (unspec:VF [(match_operand:VF 2 "register_operand")
+			     (match_operand:VF 3 "register_operand")]
+			     VCMLA_OP)))]
+  "TARGET_COMPLEX"
+{
+  emit_insn (gen_neon_vcmla<rotsplit1><mode> (operands[0], operands[1],
+					      operands[2], operands[3]));
+  emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], operands[0],
+					      operands[2], operands[3]));
+  DONE;
+})
+
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF [(match_operand:VF 1 "register_operand")
+		    (match_operand:VF 2 "register_operand")]
+		    VCMUL_OP))]
+  "TARGET_COMPLEX"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
+  emit_insn (gen_neon_vcmla<rotsplit1><mode> (operands[0], tmp,
+					      operands[1], operands[2]));
+  emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], operands[0],
+					      operands[1], operands[2]));
+  DONE;
+})
+
 ;; These instructions map to the __builtins for the Dot Product operations.
 (define_insn "neon_<sup>dot<vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 0a2399d4fb7bdef6c9ff2b31a743cf357fd271d5..d1b2824a0fe76f62d69c18dcec2f47dfb75b586e 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -510,6 +510,10 @@ (define_c_enum "unspec" [
   UNSPEC_VCMLA90
   UNSPEC_VCMLA180
   UNSPEC_VCMLA270
+  UNSPEC_VCMUL
+  UNSPEC_VCMUL180
+  UNSPEC_VCMLS
+  UNSPEC_VCMLS180
   UNSPEC_MATMUL_S
   UNSPEC_MATMUL_U
   UNSPEC_MATMUL_US


^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition,  Multiply and FMA.
  2020-09-25 14:31 [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition, Multiply and FMA Tamar Christina
@ 2020-11-14 15:11 ` Tamar Christina
  2020-11-16  9:20 ` Kyrylo Tkachov
  1 sibling, 0 replies; 3+ messages in thread
From: Tamar Christina @ 2020-11-14 15:11 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches
  Cc: Richard Earnshaw, nd, Ramana Radhakrishnan, Kyrylo Tkachov

ping

> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of Tamar
> Christina
> Sent: Friday, September 25, 2020 3:31 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd <nd@arm.com>;
> Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>
> Subject: [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition,
> Multiply and FMA.
> 
> Hi All,
> 
> This adds implementation for the optabs for complex additions.  With this the
> following C code:
> 
>   void f90 (float complex a[restrict N], float complex b[restrict N],
> 	    float complex c[restrict N])
>   {
>     for (int i=0; i < N; i++)
>       c[i] = a[i] + (b[i] * I);
>   }
> 
> generates
> 
>   f90:
> 	  add     r3, r2, #1600
>   .L2:
> 	  vld1.32 {q8}, [r0]!
> 	  vld1.32 {q9}, [r1]!
> 	  vcadd.f32       q8, q8, q9, #90
> 	  vst1.32 {q8}, [r2]!
> 	  cmp     r3, r2
> 	  bne     .L2
> 	  bx      lr
> 
> 
> instead of
> 
>   f90:
> 	  add     r3, r2, #1600
>   .L2:
> 	  vld2.32 {d24-d27}, [r0]!
> 	  vld2.32 {d20-d23}, [r1]!
> 	  vsub.f32	q8, q12, q11
> 	  vadd.f32	q9, q13, q10
> 	  vst2.32 {d16-d19}, [r2]!
> 	  cmp     r3, r2
> 	  bne     .L2
> 	  bx      lr
> 
> 
> Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* config/arm/iterators.md (rot): Add UNSPEC_VCMLS,
> UNSPEC_VCMUL and
> 	UNSPEC_VCMUL180.
> 	(rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
> 	* config/arm/neon.md (cadd<rot><mode>3,
> cml<fcmac1><rot_op><mode>4,
> 	cmul<rot_op><mode>3): New.
> 	* config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180,
> UNSPEC_VCMLS,
> 	UNSPEC_VCMLS180): New.
> 
> --

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition,  Multiply and FMA.
  2020-09-25 14:31 [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition, Multiply and FMA Tamar Christina
  2020-11-14 15:11 ` Tamar Christina
@ 2020-11-16  9:20 ` Kyrylo Tkachov
  1 sibling, 0 replies; 3+ messages in thread
From: Kyrylo Tkachov @ 2020-11-16  9:20 UTC (permalink / raw)
  To: Tamar Christina, gcc-patches
  Cc: nd, Ramana Radhakrishnan, Richard Earnshaw, nickc



> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: 25 September 2020 15:31
> To: gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; Ramana Radhakrishnan
> <Ramana.Radhakrishnan@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nickc@redhat.com; Kyrylo Tkachov
> <Kyrylo.Tkachov@arm.com>
> Subject: [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition,
> Multiply and FMA.
> 
> Hi All,
> 
> This adds implementation for the optabs for complex additions.  With this
> the
> following C code:
> 
>   void f90 (float complex a[restrict N], float complex b[restrict N],
> 	    float complex c[restrict N])
>   {
>     for (int i=0; i < N; i++)
>       c[i] = a[i] + (b[i] * I);
>   }
> 
> generates
> 
>   f90:
> 	  add     r3, r2, #1600
>   .L2:
> 	  vld1.32 {q8}, [r0]!
> 	  vld1.32 {q9}, [r1]!
> 	  vcadd.f32       q8, q8, q9, #90
> 	  vst1.32 {q8}, [r2]!
> 	  cmp     r3, r2
> 	  bne     .L2
> 	  bx      lr
> 
> 
> instead of
> 
>   f90:
> 	  add     r3, r2, #1600
>   .L2:
> 	  vld2.32 {d24-d27}, [r0]!
> 	  vld2.32 {d20-d23}, [r1]!
> 	  vsub.f32	q8, q12, q11
> 	  vadd.f32	q9, q13, q10
> 	  vst2.32 {d16-d19}, [r2]!
> 	  cmp     r3, r2
> 	  bne     .L2
> 	  bx      lr
> 
> 
> Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
> 
> Ok for master?
> 

Ok.
Thanks,
Kyrill

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* config/arm/iterators.md (rot): Add UNSPEC_VCMLS,
> UNSPEC_VCMUL and
> 	UNSPEC_VCMUL180.
> 	(rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
> 	* config/arm/neon.md (cadd<rot><mode>3,
> cml<fcmac1><rot_op><mode>4,
> 	cmul<rot_op><mode>3): New.
> 	* config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180,
> UNSPEC_VCMLS,
> 	UNSPEC_VCMLS180): New.
> 
> --

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-11-16  9:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-25 14:31 [PATCH v2 14/16]Arm: Add NEON RTL patterns for Complex Addition, Multiply and FMA Tamar Christina
2020-11-14 15:11 ` Tamar Christina
2020-11-16  9:20 ` Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).