Re: [AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: James Greenhalgh <james.greenhalgh@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: alalaw01@arm.com,	marsha01@arm.com,	tbelagod@arm.com,
	christophe.lyon@linaro.org
Subject: Re: [AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.
Date: Mon, 21 Sep 2015 14:43:00 -0000	[thread overview]
Message-ID: <1442846305-39006-1-git-send-email-james.greenhalgh@arm.com> (raw)
In-Reply-To: <CAOckXuM+Yaavs0PW644ZssW5yy6e+T0dSF4_8xsOt-8TxPqNpw@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1502 bytes --]


On Mon, Sep 21, 2015 at 10:44:32AM +0100, Alan Lawrence wrote:
> [Resending in plain text] This makes sense to me now, although I find
> your comment slightly confusing:
>
> [....] in that
> +;; the meaning of HI and LO is always taken with a little-endian view of
> +;; the vector
>
> You mean vec_unpacks_{hi,lo} (which seems to go against the
> *architectural* bit after this), or hi/lo in cases other than
> vec_unpack (=> not "always"), or something else?
>
> maybe s/always/usually/ or s/always/otherwise/ ?
>

What I was aiming for is a description that our implementation of these
standard pattern names looks wrong, because "hi" always extracts the
architectural high lanes, in other big-endian patterns we make the
adjustment that higher numbered lanes map to the low architectural lanes.

I've tried to reword the comment to make it clearer, but I'm assuming some
familiarity with our overall big-endian vector model.

I've also updated the testcase to skip it if we are targetting AArch32,
which does not provide these intrinsics.

OK?

Thanks,
James

---
gcc/

2015-09-21  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64-simd.md

	(aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
	(aarch64_float_truncate_hi_v4sf_le): New.
	(aarch64_float_truncate_hi_v4sf_be): Likewise.

gcc/testsuite/

2015-09-21  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Re-AArch64-Fix-vcvt_high_f64_f32-and-vcvt_figh_f32_f.patch --]
[-- Type: text/x-patch;  name=0001-Re-AArch64-Fix-vcvt_high_f64_f32-and-vcvt_figh_f32_f.patch, Size: 5611 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index dbe5259..5ab2f2b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1703,6 +1703,15 @@
   [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
+;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
+;; is inconsistent with vector ordering elsewhere in the compiler, in that
+;; the meaning of HI and LO changes depending on the target endianness.
+;; While elsewhere we map the higher numbered elements of a vector to
+;; the lower architectural lanes of the vector, for these patterns we want
+;; to always treat "hi" as referring to the higher architectural lanes.
+;; Consequently, while the patterns below look inconsistent with our
+;; other big-endian patterns their behaviour is as required.
+
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "")
    (match_operand:VQ_HSF 1 "register_operand" "")]
@@ -1757,17 +1766,42 @@
   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
-(define_insn "aarch64_float_truncate_hi_<Vdbl>"
+(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
     (vec_concat:<VDBL>
       (match_operand:VDF 1 "register_operand" "0")
       (float_truncate:VDF
 	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
+(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+    (vec_concat:<VDBL>
+      (float_truncate:VDF
+	(match_operand:<VWIDE> 2 "register_operand" "w"))
+      (match_operand:VDF 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "aarch64_float_truncate_hi_<Vdbl>"
+  [(match_operand:<VDBL> 0 "register_operand" "=w")
+   (match_operand:VDF 1 "register_operand" "0")
+   (match_operand:<VWIDE> 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
+			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
+  emit_insn (gen (operands[0], operands[1], operands[2]));
+  DONE;
+}
+)
+
 (define_expand "vec_pack_trunc_v2df"
   [(set (match_operand:V4SF 0 "register_operand")
       (vec_concat:V4SF
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
new file mode 100644
index 0000000..4691da3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
@@ -0,0 +1,99 @@
+/* { dg-skip-if "" { arm*-*-* } } */
+
+#include "arm_neon.h"
+#include <inttypes.h>
+
+void abort (void);
+
+void
+foo (void)
+{
+  /* Test vcvt_high_f32_f64.  */
+  float32x2_t arg1;
+  float64x2_t arg2;
+  float32x4_t result;
+  arg1 = vcreate_f32 (UINT64_C (0x3f0db5793f6e1892));
+  arg2 = vcombine_f64 (vcreate_f64 (UINT64_C (0x3fe8e49d23fb575d)),
+		       vcreate_f64 (UINT64_C (0x3fd921291b3df73e)));
+  //  Expect: "result" = 3ec909483f4724e93f0db5793f6e1892
+  result = vcvt_high_f32_f64 (arg1, arg2);
+  float32_t got;
+  float32_t exp;
+
+  /* Lane 0.  */
+  got = vgetq_lane_f32 (result, 0);
+  exp = ((float32_t) 0.9300624132156372);
+  if (((((exp / got) < ((float32_t) 0.999))
+	 || ((exp / got) > ((float32_t) 1.001)))
+     && (((exp - got) < ((float32_t) -1.0e-4))
+	 || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 1.  */
+  got = vgetq_lane_f32 (result, 1);
+  exp = ((float32_t) 0.5535503029823303);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+     && (((exp - got) < ((float32_t) -1.0e-4))
+	   || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 2.  */
+  got = vgetq_lane_f32 (result, 2);
+  exp = ((float32_t) 0.7779069617051665);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+      && (((exp - got) < ((float32_t) -1.0e-4))
+	  || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 3.  */
+  got = vgetq_lane_f32 (result, 3);
+  exp = ((float32_t) 0.3926489606891329);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+      && (((exp - got) < ((float32_t) -1.0e-4))
+	  || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+}
+
+void
+bar (void)
+{
+  /* Test vcvt_high_f64_f32.  */
+  float32x4_t arg1;
+  float64x2_t result;
+  arg1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f7c5cf13f261f74)),
+		       vcreate_f32 (UINT64_C (0x3e3a7bc03f6ccc1d)));
+  //  Expect: "result" = 3fc74f78000000003fed9983a0000000
+  result = vcvt_high_f64_f32 (arg1);
+
+  float64_t got;
+  float64_t exp;
+
+  /* Lane 0.  */
+  got = vgetq_lane_f64 (result, 0);
+  exp = 0.9249895215034485;
+  if (((((exp / got) < 0.999)
+	 || ((exp / got) > 1.001))
+     && (((exp - got) < -1.0e-4)
+	 || ((exp - got) > 1.0e-4))))
+    abort ();
+
+  /* Lane 1.  */
+  got = vgetq_lane_f64 (result, 1);
+  exp = 0.1821126937866211;
+  if (((((exp / got) < 0.999)
+	  || ((exp / got) > 1.001))
+      && (((exp - got) < -1.0e-4)
+	  || ((exp - got) > 1.0e-4))))
+    abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+  foo ();
+  bar ();
+  return 0;
+}

next prev parent reply	other threads:[~2015-09-21 14:38 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-09  8:53 James Greenhalgh
2015-09-09 10:31 ` Christophe Lyon
2015-09-10 14:16   ` James Greenhalgh
2015-09-20 20:51     ` Christophe Lyon
2015-09-21  9:59     ` Alan Lawrence
2015-09-21 14:43       ` James Greenhalgh [this message]
2015-09-21 15:12         ` Alan Lawrence
2015-09-22 16:03         ` Marcus Shawcroft
2015-09-09 11:43 ` Alan Lawrence
2015-09-10 11:21   ` Alan Lawrence

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1442846305-39006-1-git-send-email-james.greenhalgh@arm.com \
    --to=james.greenhalgh@arm.com \
    --cc=alalaw01@arm.com \
    --cc=christophe.lyon@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=marsha01@arm.com \
    --cc=tbelagod@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).