public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AArch64, Patch] Generate MLA when multiply + add vector by scalar
@ 2017-07-21 11:41 Jackson Woodruff
  2017-07-24 11:40 ` James Greenhalgh
  0 siblings, 1 reply; 2+ messages in thread
From: Jackson Woodruff @ 2017-07-21 11:41 UTC (permalink / raw)
  To: gcc-patches, James Greenhalgh, richard.earnshaw

[-- Attachment #1: Type: text/plain, Size: 1576 bytes --]

Hi all,

This merges vector multiplies and adds into a single mla instruction 
when the multiplication is done by a scalar.

Currently, for the following:

     typedef int __attribute__((vector_size(16))) vec;

     vec
     mla0(vec v0, vec v1, vec v2)
     {
       return v0 + v1 * v2[0];
     }

     vec
     mla1(vec v0, vec v1, int v2)
     {
       return v0 + v1 * c;
     }

The function `mla0` outputs a multiply accumulate by element 
instruction. `mla1` outputs two vector operations (multiply followed by 
add). That is, we currently have:

     mla0:
         mla    v0.4s, v1.4s, v2.s[0]
         ret

     mla1:
         fmov   s2, w0
         mul    v1.4s, v1.4s, v2.s[0]
         add    v0.4s, v1.4s, v0.4s
         ret

This patch replaces this with something similar to `mla0`:

     mla1:
         fmov   s2, w0
         mla    v0.4s, v1.4s, v2.s[0]

This is also done for the identical case for a multiply followed by a 
subtract of vectors with an integer operand on the multiply. Also add 
testcases for this.

Bootstrap and testsuite run on aarch64. OK for trunk?

Jackson

Changelog entry:

gcc/

2017-06-06  Jackson Woodruff <jackson.woodruff@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge<mode>,

	aarch64_mls_elt_merge<mode>, aarch64_fma4_elt_merge<mode>,

	aarch64_fnma_elt_merge<mode>): New define_insns to generate

	multiply accumulate instructions for unmerged

	multiply add vector instructions.


gcc/testsuite/

2017-06-06  Jackson Woodruff <jackson.woodruff@arm.com>

	* gcc.target/aarch64/simd/vmla_elem_1.c: New.


[-- Attachment #2: patchfile --]
[-- Type: text/plain, Size: 3680 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1cb6eeb318716aadacb84a44aa2062d486e0186b..ab1aa5ab84577b3cbddd1eb0e40d29e9b2aa4b42 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1033,6 +1033,18 @@
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
+(define_insn "*aarch64_mla_elt_merge<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+	(plus:VDQHS
+	  (mult:VDQHS (vec_duplicate:VDQHS
+		  (match_operand:<VEL> 1 "register_operand" "w"))
+		(match_operand:VDQHS 2 "register_operand" "w"))
+	  (match_operand:VDQHS 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
 (define_insn "aarch64_mls<mode>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
@@ -1080,6 +1092,18 @@
   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
 )
 
+(define_insn "*aarch64_mls_elt_merge<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+	(minus:VDQHS
+	  (match_operand:VDQHS 1 "register_operand" "0")
+	  (mult:VDQHS (vec_duplicate:VDQHS
+		  (match_operand:<VEL> 2 "register_operand" "w"))
+		(match_operand:VDQHS 3 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
+  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
 ;; Max/Min operations.
 (define_insn "<su><maxmin><mode>3"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..df777581ab43b9b9e20b61f3f8d46193bdfda5fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef short int __attribute__ ((vector_size (16))) v8hi;
+
+v8hi
+mla8hi (v8hi v0, v8hi v1, short int v2)
+{
+  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+  return v0 + v1 * v2;
+}
+
+
+v8hi
+mls8hi (v8hi v0, v8hi v1, short int v2)
+{
+  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+  return v0 - v1 * v2;
+}
+
+typedef short int __attribute__ ((vector_size (8))) v4hi;
+
+v4hi
+mla4hi (v4hi v0, v4hi v1, short int v2)
+{
+  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+  return v0 + v1 * v2;
+}
+
+v4hi
+mls4hi (v4hi v0, v4hi v1, short int v2)
+{
+  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+  return v0 - v1 * v2;
+}
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si
+mla4si (v4si v0, v4si v1, int v2)
+{
+  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+  return v0 + v1 * v2;
+}
+
+v4si
+mls4si (v4si v0, v4si v1, int v2)
+{
+  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+  return v0 - v1 * v2;
+}
+
+typedef int __attribute__((vector_size (8))) v2si;
+
+v2si
+mla2si (v2si v0, v2si v1, int v2)
+{
+  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+  return v0 + v1 * v2;
+}
+
+v2si
+mls2si (v2si v0, v2si v1, int v2)
+{
+  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+  return v0 - v1 * v2;
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [AArch64, Patch] Generate MLA when multiply + add vector by scalar
  2017-07-21 11:41 [AArch64, Patch] Generate MLA when multiply + add vector by scalar Jackson Woodruff
@ 2017-07-24 11:40 ` James Greenhalgh
  0 siblings, 0 replies; 2+ messages in thread
From: James Greenhalgh @ 2017-07-24 11:40 UTC (permalink / raw)
  To: Jackson Woodruff; +Cc: gcc-patches, richard.earnshaw, nd

On Fri, Jul 21, 2017 at 12:41:41PM +0100, Jackson Woodruff wrote:
> Hi all,
> 
> This merges vector multiplies and adds into a single mla instruction
> when the multiplication is done by a scalar.
> 
> Currently, for the following:
> 
>     typedef int __attribute__((vector_size(16))) vec;
> 
>     vec
>     mla0(vec v0, vec v1, vec v2)
>     {
>       return v0 + v1 * v2[0];
>     }
> 
>     vec
>     mla1(vec v0, vec v1, int v2)
>     {
>       return v0 + v1 * c;
>     }
> 
> The function `mla0` outputs a multiply accumulate by element
> instruction. `mla1` outputs two vector operations (multiply followed
> by add). That is, we currently have:
> 
>     mla0:
>         mla    v0.4s, v1.4s, v2.s[0]
>         ret
> 
>     mla1:
>         fmov   s2, w0
>         mul    v1.4s, v1.4s, v2.s[0]
>         add    v0.4s, v1.4s, v0.4s
>         ret
> 
> This patch replaces this with something similar to `mla0`:
> 
>     mla1:
>         fmov   s2, w0
>         mla    v0.4s, v1.4s, v2.s[0]
> 
> This is also done for the identical case for a multiply followed by
> a subtract of vectors with an integer operand on the multiply. Also
> add testcases for this.
> 
> Bootstrap and testsuite run on aarch64. OK for trunk?

OK. I've committed this on your behalf as r250475.

There were two issues with your ChangeLog; there should be two spaces
between your name and your email address, and you had a pattern names
in the ChangeLog which did not appear in the patch. In the end, I
committed the patch with these ChangeLogs:

gcc/

2017-07-24  Jackson Woodruff  <jackson.woodruff@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge<mode>): New.
	(aarch64_mls_elt_merge<mode>): Likewise.

gcc/testsuite/

2017-07-24  Jackson Woodruff  <jackson.woodruff@arm.com>

	* gcc.target/aarch64/simd/vmla_elem_1.c: New.

Thanks,
James


> 
> Jackson
> 
> Changelog entry:
> 
> gcc/
> 
> 2017-06-06  Jackson Woodruff <jackson.woodruff@arm.com>
> 
> 	* config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge<mode>,
> 
> 	aarch64_mls_elt_merge<mode>, aarch64_fma4_elt_merge<mode>,
> 
> 	aarch64_fnma_elt_merge<mode>): New define_insns to generate
> 
> 	multiply accumulate instructions for unmerged
> 
> 	multiply add vector instructions.
> 
> 
> gcc/testsuite/
> 
> 2017-06-06  Jackson Woodruff <jackson.woodruff@arm.com>
> 
> 	* gcc.target/aarch64/simd/vmla_elem_1.c: New.
> 


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-07-24 11:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-21 11:41 [AArch64, Patch] Generate MLA when multiply + add vector by scalar Jackson Woodruff
2017-07-24 11:40 ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).