public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Kirill Yukhin <kirill.yukhin@gmail.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: Jakub Jelinek <jakub@redhat.com>, GCC Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH i386 13/8] [AVX-512] Fix argument order for perm and recp intrinsics.
Date: Mon, 17 Feb 2014 12:27:00 -0000	[thread overview]
Message-ID: <20140217122656.GB58805@msticlxl57.ims.intel.com> (raw)
In-Reply-To: <CAFULd4YsyhC4D5CWcz6Z+kSZxFXBWs+HC31o=Sf2D-8P=zcmiA@mail.gmail.com>

Hello Uroš,
On 13 Feb 18:25, Uros Bizjak wrote:
> On Thu, Feb 13, 2014 at 1:55 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> 
> >>
> >> Please don't change srcp pattern, it should be defined similar to
> >> vrcpss (aka sse_vmrcpv4sf). You need to switch operand order
> >> elsewhere.
> >
> > No, you are correct. Operands should be swapped as in your patch.
> 
> Eh, sorry that after some more thinking, I have to again revert this decision.
> 
> The srcp pattern should remain as is, and you should swap operands in
> avx512fintrin.h instead:

In the bottom there's updated patch.

Added "sse" type. mem operand made second.
Built-ins & tests fixed.

Testing in progress.

Is it ok for mainline if pass?

--
Thanks, K

---
 gcc/config/i386/sse.md                                | 19 ++++++++++++-------
 gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c   | 11 ++++++-----
 gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c   | 11 ++++++-----
 gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c | 11 ++++++-----
 gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c | 11 ++++++-----
 gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c    |  4 ++--
 gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c    |  8 ++++----
 7 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5595767..3d360a0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1456,12 +1456,12 @@
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+	    [(match_operand:VF_128 2 "nonimmediate_operand" "vm")]
 	    UNSPEC_RCP14)
-	  (match_operand:VF_128 2 "register_operand" "v")
+	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -12804,6 +12804,7 @@
   "TARGET_AVX512ER"
   "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
@@ -12814,20 +12815,22 @@
   "TARGET_AVX512ER"
   "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	    [(match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	    UNSPEC_RCP28)
-	  (match_operand:VF_128 2 "register_operand" "v")
+	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
   "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
@@ -12838,19 +12841,21 @@
   "TARGET_AVX512ER"
   "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	    [(match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	    UNSPEC_RSQRT28)
-	  (match_operand:VF_128 2 "register_operand" "v")
+	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
   "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
+   (set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
index d30f088..889f990 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
@@ -10,19 +10,20 @@
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 204179.345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
index 499a977..3280879 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
@@ -10,19 +10,20 @@
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179345.006 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
index 1537a59..bd217e8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
@@ -10,19 +10,20 @@
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 45 - 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
index f88422e..f7bfff5 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
@@ -10,19 +10,20 @@
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179221345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
index 0c9211a..f944600 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
@@ -8,8 +8,8 @@
 static void
 compute_vrcp14sd (double *s1, double *s2, double *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
 }
 
 static void
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
index 3344dad..7aca591 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
@@ -8,10 +8,10 @@
 static void
 compute_vrcp14ss (float *s1, float *s2, float *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
-  r[2] = s2[2];
-  r[3] = s2[3];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
+  r[2] = s1[2];
+  r[3] = s1[3];
 }
 
 static void

  reply	other threads:[~2014-02-17 12:27 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-13 10:45 Kirill Yukhin
2014-02-13 12:37 ` Uros Bizjak
2014-02-13 12:55   ` Uros Bizjak
2014-02-13 17:25     ` Uros Bizjak
2014-02-17 12:27       ` Kirill Yukhin [this message]
2014-02-17 12:42         ` Uros Bizjak
2014-02-18 10:07           ` Kirill Yukhin
2014-02-18 10:34             ` Uros Bizjak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140217122656.GB58805@msticlxl57.ims.intel.com \
    --to=kirill.yukhin@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).