* [AArch64] implement vmlsq_laneq_*
@ 2012-08-17 15:18 Sofiane Naci
0 siblings, 0 replies; only message in thread
From: Sofiane Naci @ 2012-08-17 15:18 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 517 bytes --]
Hi,
I've just committed the attached patch on the AArch64 branch to implement
vmlsq_laneq_*.
Thanks
Sofiane
-----
r190484 | sofiane | 2012-08-17 16:15:49 +0100 (Fri, 17 Aug 2012) | 11 lines
Tejas Belagod <tejas.belagod@arm.com>
[AArch64] Implement vmlsq_laneq_*.
gcc/
* config/aarch64/arm_neon.h (vmlsq_laneq_f32, vmlsq_laneq_s16,
vmlsq_laneq_u16, vmlsq_laneq_s32, vmlsq_laneq_u32): New.
testsuite/
* gcc.target/aarch64/vmlsq_laneq.c: New.
[-- Attachment #2: aarch64-vmlsq-laneq.patch --]
[-- Type: application/octet-stream, Size: 7744 bytes --]
Index: gcc/testsuite/gcc.target/aarch64/vmlsq_laneq.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/vmlsq_laneq.c (revision 0)
+++ gcc/testsuite/gcc.target/aarch64/vmlsq_laneq.c (revision 0)
@@ -0,0 +1,158 @@
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+void
+test1 ()
+{
+ int16x8_t val1, val2, val3;
+ int16x8_t result;
+ uint64_t act, exp;
+
+ val1 = vcombine_s16 (vcreate_s16 (UINT64_C (0xffff9ab680000000)),
+ vcreate_s16 (UINT64_C (0x00000000ffff0000)));
+ val2 = vcombine_s16 (vcreate_s16 (UINT64_C (0x32b77fffffff7fff)),
+ vcreate_s16 (UINT64_C (0x0000ffff00007fff)));
+ val3 = vcombine_s16 (vcreate_s16 (UINT64_C (0x7fff00007fff0000)),
+ vcreate_s16 (UINT64_C (0x80007fff00000000)));
+ result = vmlsq_laneq_s16 (val1, val2, val3, 6);
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_s16 (result), 0);
+ exp = UINT64_C (0xb2b69ab5ffffffff);
+ if (act != exp)
+ abort ();
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_s16 (result), 1);
+ exp = UINT64_C (0x00007fffffffffff);
+ if (act != exp)
+ abort ();
+}
+
+void
+test2 ()
+{
+ int32x4_t val1, val2, val3;
+ int32x4_t result;
+ uint64_t exp, act;
+
+ val1 = vcombine_s32 (vcreate_s32 (UINT64_C (0x00008000f46f7fff)),
+ vcreate_s32 (UINT64_C (0x7fffffffffff8000)));
+ val2 = vcombine_s32 (vcreate_s32 (UINT64_C (0x7fff7fff0e700000)),
+ vcreate_s32 (UINT64_C (0xffff000080000000)));
+ val3 = vcombine_s32 (vcreate_s32 (UINT64_C (0x00000000ffff0000)),
+ vcreate_s32 (UINT64_C (0xd9edea1a8000fb28)));
+ result = vmlsq_laneq_s32 (val1, val2, val3, 3);
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_s32 (result), 0);
+ exp = UINT64_C (0xcefb6a1a1d0f7fff);
+ if (act != exp)
+ abort ();
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_s32 (result), 1);
+ exp = UINT64_C (0x6a19ffffffff8000);
+ if (act != exp)
+ abort ();
+}
+
+void
+test3 ()
+{
+ uint16x8_t val1, val2, val3;
+ uint16x8_t result;
+ uint64_t act, exp;
+
+ val1 = vcombine_u16 (vcreate_u16 (UINT64_C (0x000080008000802a)),
+ vcreate_u16 (UINT64_C (0x7fffffff00007fff)));
+ val2 = vcombine_u16 (vcreate_u16 (UINT64_C (0x7fffcdf1ffff0000)),
+ vcreate_u16 (UINT64_C (0xe2550000ffffffff)));
+ val3 = vcombine_u16 (vcreate_u16 (UINT64_C (0x80007fff80000000)),
+ vcreate_u16 (UINT64_C (0xbe2100007fffffff)));
+
+ result = vmlsq_laneq_u16 (val1, val2, val3, 7);
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_u16 (result), 0);
+ exp = UINT64_C (0x3e2115ef3e21802a);
+ if (act != exp)
+ abort ();
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_u16 (result), 1);
+ exp = UINT64_C (0x3d0affffbe213e20);
+ if (act != exp)
+ abort ();
+}
+
+void
+test4 ()
+{
+ uint32x4_t val1, val2, val3;
+ uint32x4_t result;
+ uint64_t act, exp;
+
+ val1 = vcombine_u32 (vcreate_u32 (UINT64_C (0x3295fe3d7fff7fff)),
+ vcreate_u32 (UINT64_C (0x7fff00007fff7fff)));
+ val2 = vcombine_u32 (vcreate_u32 (UINT64_C (0xffff7fff7fff8000)),
+ vcreate_u32 (UINT64_C (0x7fff80008000ffff)));
+ val3 = vcombine_u32 (vcreate_u32 (UINT64_C (0x7fff7fff80008000)),
+ vcreate_u32 (UINT64_C (0x0000800053ab7fff)));
+
+ result = vmlsq_laneq_u32 (val1, val2, val3, 2);
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_u32 (result), 0);
+ exp = UINT64_C (0x4640fe3cbffeffff);
+ if (act != exp)
+ abort ();
+
+ act = vgetq_lane_u64 (vreinterpretq_u64_u32 (result), 1);
+ exp = UINT64_C (0xbffe8000d3abfffe);
+ if (act != exp)
+ abort ();
+}
+
+void
+test5 ()
+{
+ float32x4_t val1, val2, val3;
+ float32x4_t result;
+ float32_t act;
+
+ val1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f49daf03ef3dc73)),
+ vcreate_f32 (UINT64_C (0x3f5d467a3ef3dc73)));
+ val2 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3d2064c83d10cd28)),
+ vcreate_f32 (UINT64_C (0x3ea7d1a23d10cd28)));
+ val3 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f6131993edb1e04)),
+ vcreate_f32 (UINT64_C (0x3f37f4bf3edb1e04)));
+
+ result = vmlsq_laneq_f32 (val1, val2, val3, 0);
+
+ act = vgetq_lane_f32 (result, 0);
+ if (act != 0.46116194128990173f)
+ abort ();
+
+ act = vgetq_lane_f32 (result, 1);
+ if (act != 0.7717385292053223f)
+ abort ();
+
+ act = vgetq_lane_f32 (result, 2);
+ if (act != 0.46116194128990173f)
+ abort ();
+
+ act = vgetq_lane_f32 (result, 3);
+ if (act != 0.7240825295448303f)
+ abort ();
+}
+
+int
+main (void)
+{
+ test1 ();
+ test2 ();
+ test3 ();
+ test4 ();
+ test5 ();
+
+ return 0;
+}
Property changes on: gcc/testsuite/gcc.target/aarch64/vmlsq_laneq.c
___________________________________________________________________
Added: svn:keywords
+ Rev Date Author URL Id
Added: svn:eol-style
+ native
Index: gcc/config/aarch64/arm_neon.h
===================================================================
--- gcc/config/aarch64/arm_neon.h (revision 190482)
+++ gcc/config/aarch64/arm_neon.h (working copy)
@@ -11151,6 +11151,77 @@
result; \
})
+#define vmlsq_laneq_f32(__a, __b, __c, __d) \
+ __extension__ \
+ ({ \
+ float32x4_t __c_ = (__c); \
+ float32x4_t __b_ = (__b); \
+ float32x4_t __a_ = (__a); \
+ float32x4_t __result; \
+ float32x4_t __t1; \
+ __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
+ : "=w"(__result), "=w"(__t1) \
+ : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
+ : /* No clobbers */); \
+ __result; \
+ })
+
+#define vmlsq_laneq_s16(__a, __b, __c, __d) \
+ __extension__ \
+ ({ \
+ int16x8_t __c_ = (__c); \
+ int16x8_t __b_ = (__b); \
+ int16x8_t __a_ = (__a); \
+ int16x8_t __result; \
+ __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
+ : "=w"(__result) \
+ : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
+ : /* No clobbers */); \
+ __result; \
+ })
+
+#define vmlsq_laneq_s32(__a, __b, __c, __d) \
+ __extension__ \
+ ({ \
+ int32x4_t __c_ = (__c); \
+ int32x4_t __b_ = (__b); \
+ int32x4_t __a_ = (__a); \
+ int32x4_t __result; \
+ __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
+ : "=w"(__result) \
+ : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
+ : /* No clobbers */); \
+ __result; \
+ })
+
+#define vmlsq_laneq_u16(__a, __b, __c, __d) \
+ __extension__ \
+ ({ \
+ uint16x8_t __c_ = (__c); \
+ uint16x8_t __b_ = (__b); \
+ uint16x8_t __a_ = (__a); \
+ uint16x8_t __result; \
+ __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
+ : "=w"(__result) \
+ : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
+ : /* No clobbers */); \
+ __result; \
+ })
+
+#define vmlsq_laneq_u32(__a, __b, __c, __d) \
+ __extension__ \
+ ({ \
+ uint32x4_t __c_ = (__c); \
+ uint32x4_t __b_ = (__b); \
+ uint32x4_t __a_ = (__a); \
+ uint32x4_t __result; \
+ __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
+ : "=w"(__result) \
+ : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
+ : /* No clobbers */); \
+ __result; \
+ })
+
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
{
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2012-08-17 15:18 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-17 15:18 [AArch64] implement vmlsq_laneq_* Sofiane Naci
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).