From: Ilya Tocar <tocarip.intel@gmail.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH, i386, testsuite] FMA intrinsics
Date: Wed, 24 Aug 2011 10:06:00 -0000 [thread overview]
Message-ID: <CAFFGCRDywDPXcC5t=5T2-1ot0JFDJN59_iezcpi7ehYNmKD2Qg@mail.gmail.com> (raw)
In-Reply-To: <CAFULd4bd=FDBJhk4pE7texv91OJXf453c87RgopA3b=3bGe5YQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 492 bytes --]
Removed extra blank lines and pass tests through "indent".
2011/8/23 Uros Bizjak <ubizjak@gmail.com>:
> On Tue, Aug 23, 2011 at 4:19 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
>> I removed unnecessary expands/builtins and tests are now compiled with -O2.
>> Is this version ok?
>
> OK with minor comments:
>
> - Please remove extra blank lines you introduced in sse.md
> - Also, I'd recomend you to pass new testcases through "indent"
> command to fix formatting.
>
> Thanks,
> Uros.
>
[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 42603 bytes --]
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ec13d93..3879a2a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,7 @@ i[34567]86-*-*)
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
- lzcntintrin.h bmiintrin.h tbmintrin.h"
+ lzcntintrin.h bmiintrin.h tbmintrin.h fmaintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -364,7 +364,7 @@ x86_64-*-*)
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
- lzcntintrin.h bmiintrin.h tbmintrin.h"
+ lzcntintrin.h bmiintrin.h tbmintrin.h fmaintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
new file mode 100644
index 0000000..2cde564
--- /dev/null
+++ b/gcc/config/i386/fmaintrin.h
@@ -0,0 +1,233 @@
+/* Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <fmaintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _FMAINTRIN_H_INCLUDED
+#define _FMAINTRIN_H_INCLUDED
+
+#ifndef __FMA__
+# error "FMA instruction set not enabled"
+#else
+
+#include <immintrin.h>
+
+
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+#endif
+
+#endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fe6ccbe..07461e3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23888,7 +23888,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_PERM_V4DF,
IX86_BUILTIN_VEC_PERM_V8SF,
- /* FMA4 and XOP instructions. */
+ /* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
IX86_BUILTIN_VFMADDPS,
@@ -23900,6 +23900,11 @@ enum ix86_builtins
IX86_BUILTIN_VFMADDSUBPS256,
IX86_BUILTIN_VFMADDSUBPD256,
+ /* FMA3 instructions. */
+ IX86_BUILTIN_VFMADDSS3,
+ IX86_BUILTIN_VFMADDSD3,
+
+ /* XOP instructions. */
IX86_BUILTIN_VPCMOV,
IX86_BUILTIN_VPCMOV_V2DI,
IX86_BUILTIN_VPCMOV_V4SI,
@@ -25100,6 +25105,13 @@ static const struct builtin_description bdesc_multi_arg[] =
"__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
+ "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
+ UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
+ "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
+ UNKNOWN, (int)MULTI_ARG_3_DF },
+
{ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
"__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
UNKNOWN, (int)MULTI_ARG_3_SF },
@@ -25113,6 +25125,9 @@ static const struct builtin_description bdesc_multi_arg[] =
"__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+
+
{ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
"__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
UNKNOWN, (int)MULTI_ARG_3_SF },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e9f6c3d..0b455a5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1593,6 +1593,89 @@
operands[4] = CONST0_RTX (<MODE>mode);
})
+(define_expand "fmai_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_FMA")
+
+(define_insn "*fmai_fmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_FMA"
+ "@
+ vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_FMA"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_FMA"
+ "@
+ vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_FMA"
+ "@
+ vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 88456f9..546b8a8 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -93,4 +93,8 @@
#include <popcntintrin.h>
#endif
+#ifdef __FMA__
+#include <fmaintrin.h>
+#endif
+
#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c
new file mode 100644
index 0000000..7e73402
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fmadd_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+void
+check_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fmadd_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fmadd_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fmadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c
new file mode 100644
index 0000000..4b61ad5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fmaddsub_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+void
+check_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fmaddsub_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fmaddsub_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fmaddsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c
new file mode 100644
index 0000000..d92aec0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+
+void
+check_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fmsub_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+void
+check_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fmsub_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fmsub_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fmsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c
new file mode 100644
index 0000000..84a41c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fmsubadd_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+void
+check_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fmsubadd_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fmsubadd_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fmsubadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c
new file mode 100644
index 0000000..c0dfa69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fnmadd_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+void
+check_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fnmadd_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fnmadd_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fnmadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c
new file mode 100644
index 0000000..ac4705e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+
+void
+check_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+ union256d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[4];
+ int i;
+ e.x = _mm256_fnmsub_pd (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union256d (e, d))
+ abort ();
+}
+
+void
+check_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+ union256 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[8];
+ int i;
+ e.x = _mm256_fnmsub_ps (__A, __B, __C);
+ for (i = 0; i < 8; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union256 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union256 c[3];
+ union256d d[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 8; j++)
+ c[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 4; j++)
+ d[i].a[j] = i * j + 3.5;
+ }
+ check_mm256_fnmsub_pd (d[0].x, d[1].x, d[2].x);
+ check_mm256_fnmsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-check.h b/gcc/testsuite/gcc.target/i386/fma-check.h
new file mode 100644
index 0000000..696c4a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-check.h
@@ -0,0 +1,25 @@
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void fma_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ fma_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run FMA test only if host has FMA support. */
+ if (ecx & bit_FMA)
+ do_test ();
+
+ exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-compile.c b/gcc/testsuite/gcc.target/i386/fma-compile.c
new file mode 100644
index 0000000..6d5daa5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-compile.c
@@ -0,0 +1,221 @@
+/* Test that the compiler properly generates floating point multiply
+ and add instructions FMA systems. */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfma" } */
+
+#include <x86intrin.h>
+
+__m128d
+check_mm_fmadd_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmadd_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fmadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fmadd_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmadd_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fmadd_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmadd_sd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmadd_sd (a, b, c);
+}
+
+__m128
+check_mm_fmadd_ss (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmadd_ss (a, b, c);
+}
+
+__m128d
+check_mm_fmsub_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmsub_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fmsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmsub_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fmsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmsub_sd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmsub_sd (a, b, c);
+}
+
+__m128
+check_mm_fmsub_ss (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmsub_ss (a, b, c);
+}
+
+__m128d
+check_mm_fnmadd_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fnmadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fnmadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fnmadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fnmadd_ps (a, b, c);
+}
+
+__m128d
+check_mm_fnmadd_sd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fnmadd_sd (a, b, c);
+}
+
+__m128
+check_mm_fnmadd_ss (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fnmadd_ss (a, b, c);
+}
+
+__m128d
+check_mm_fnmsub_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fnmsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fnmsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fnmsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fnmsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fnmsub_sd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fnmsub_sd (a, b, c);
+}
+
+__m128
+check_mm_fnmsub_ss (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fnmsub_ss (a, b, c);
+}
+
+__m128d
+check_mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmaddsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fmaddsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmaddsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fmaddsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_fmsubadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_fmsubadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c)
+{
+ return _mm_fmsubadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c)
+{
+ return _mm256_fmsubadd_ps (a, b, c);
+}
+
+
+/* { dg-final { scan-assembler-times "vfmadd[^s]..ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c
new file mode 100644
index 0000000..43ef9e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmadd_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + c.a[i];
+ }
+
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmadd_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmadd_sd (__A, __B, __C);
+ for (i = 1; i < 2; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = a.a[0] * b.a[0] + c.a[0];
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmadd_ss (__A, __B, __C);
+ for (i = 1; i < 4; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = a.a[0] * b.a[0] + c.a[0];
+ if (check_union128 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fmadd_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmadd_sd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmadd_ps (a[0].x, a[1].x, a[2].x);
+ check_mm_fmadd_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c
new file mode 100644
index 0000000..89c8163
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmaddsub_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmaddsub_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+ }
+ if (check_union128d (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fmaddsub_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmaddsub_ps (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c
new file mode 100644
index 0000000..3d92d4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmsub_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmsub_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmsub_sd (__A, __B, __C);
+ for (i = 1; i < 2; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = a.a[0] * b.a[0] - c.a[0];
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmsub_ss (__A, __B, __C);
+ for (i = 1; i < 4; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = a.a[0] * b.a[0] - c.a[0];
+ if (check_union128 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fmsub_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmsub_sd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmsub_ps (a[0].x, a[1].x, a[2].x);
+ check_mm_fmsub_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c
new file mode 100644
index 0000000..b03f875
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fmsubadd_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fmsubadd_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+ }
+ if (check_union128d (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fmsubadd_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fmsubadd_ps (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c
new file mode 100644
index 0000000..f23a6c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fnmadd_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fnmadd_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] + c.a[i];
+ }
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fnmadd_sd (__A, __B, __C);
+ for (i = 1; i < 2; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = -a.a[0] * b.a[0] + c.a[0];
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fnmadd_ss (__A, __B, __C);
+ for (i = 1; i < 4; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = -a.a[0] * b.a[0] + c.a[0];
+ if (check_union128 (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fnmadd_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fnmadd_sd (b[0].x, b[1].x, b[2].x);
+ check_mm_fnmadd_ps (a[0].x, a[1].x, a[2].x);
+ check_mm_fnmadd_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c
new file mode 100644
index 0000000..d17c7f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fnmsub_sd (__A, __B, __C);
+ for (i = 1; i < 2; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = -a.a[0] * b.a[0] - c.a[0];
+ if (check_union128d (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fnmsub_ss (__A, __B, __C);
+ for (i = 1; i < 4; i++)
+ {
+ d[i] = a.a[i];
+ }
+ d[0] = -a.a[0] * b.a[0] - c.a[0];
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+ union128 a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ float d[4];
+ int i;
+ e.x = _mm_fnmsub_ps (__A, __B, __C);
+ for (i = 0; i < 4; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union128 (e, d))
+ abort ();
+}
+
+void
+check_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+ union128d a, b, c, e;
+ a.x = __A;
+ b.x = __B;
+ c.x = __C;
+ double d[2];
+ int i;
+ e.x = _mm_fnmsub_pd (__A, __B, __C);
+ for (i = 0; i < 2; i++)
+ {
+ d[i] = -a.a[i] * b.a[i] - c.a[i];
+ }
+ if (check_union128d (e, d))
+ abort ();
+}
+
+static void
+fma_test (void)
+{
+ union128 a[3];
+ union128d b[3];
+ int i, j;
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < 4; j++)
+ a[i].a[j] = i * j + 3.5;
+ for (j = 0; j < 2; j++)
+ b[i].a[j] = i * j + 3.5;
+ }
+ check_mm_fnmsub_pd (b[0].x, b[1].x, b[2].x);
+ check_mm_fnmsub_sd (b[0].x, b[1].x, b[2].x);
+ check_mm_fnmsub_ps (a[0].x, a[1].x, a[2].x);
+ check_mm_fnmsub_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 167b79b..43ed841 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -172,6 +172,20 @@ proc check_effective_target_fma4 { } {
} "-O2 -mfma4" ]
}
+# Return 1 if fma instructions can be compiled.
+proc check_effective_target_fma { } {
+ return [check_no_compiler_messages fma object {
+ typedef float __m128 __attribute__ ((__vector_size__ (16)));
+ typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+ __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
+ {
+ return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A,
+ (__v4sf)__B,
+ (__v4sf)__C);
+ }
+ } "-O2 -mfma" ]
+}
+
# Return 1 if xop instructions can be compiled.
proc check_effective_target_xop { } {
return [check_no_compiler_messages xop object {
next prev parent reply other threads:[~2011-08-24 8:48 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-20 11:23 Uros Bizjak
2011-08-22 17:32 ` Ilya Tocar
2011-08-22 20:41 ` Uros Bizjak
2011-08-23 14:55 ` Ilya Tocar
2011-08-23 15:33 ` Uros Bizjak
2011-08-24 10:06 ` Ilya Tocar [this message]
2011-08-24 10:12 ` Jakub Jelinek
2011-08-24 11:12 ` Ilya Tocar
2011-08-24 13:31 ` Uros Bizjak
2011-08-24 13:52 ` Ilya Tocar
2011-08-24 22:00 ` Uros Bizjak
2011-08-25 10:15 ` Ilya Tocar
2011-08-25 10:46 ` Uros Bizjak
2011-08-25 11:46 ` Ilya Tocar
2011-08-25 11:46 ` Jakub Jelinek
2011-08-25 11:49 ` Ilya Tocar
2011-08-26 11:03 ` Ilya Tocar
2011-08-26 14:07 ` H.J. Lu
2011-08-26 15:47 ` Ilya Tocar
2011-08-26 17:02 ` H.J. Lu
2011-08-26 17:06 ` H.J. Lu
2011-08-30 12:05 ` Ilya Tocar
2011-08-30 14:02 ` H.J. Lu
2011-08-30 14:10 ` Ilya Tocar
2011-08-30 14:32 ` Ilya Tocar
2011-08-30 15:25 ` H.J. Lu
-- strict thread matches above, loose matches on Subject: below --
2011-08-17 14:08 [PATCH, i386, testsuite]FMA intrinsics Ilya Tocar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAFFGCRDywDPXcC5t=5T2-1ot0JFDJN59_iezcpi7ehYNmKD2Qg@mail.gmail.com' \
--to=tocarip.intel@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).