From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <kcy@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1729)
	id B6F25385842A; Wed,  2 Nov 2022 00:37:44 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B6F25385842A
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1667349464;
	bh=HYoioiiDZ0iRJJJ/BwHCT1cmN/qMba7Z/TDo20cd6T4=;
	h=From:To:Subject:Date:From;
	b=YhSMzZhkqmatx2kyKuntYKtIGPnpUldLJoNhf8K4g2P5bvJ4q/B6EJHO0yDd3e332
	 OVeU4rCMCqZckMD0tAwRnxa5oMQT/kKXeWWJ9ZktVQuazqZUl/2gWugOhRmOs8Z8EH
	 yveM+MpFwR5XgRaQx2P+6TEf+OuKdJk4D9gXghe0=
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Kwok Yeung <kcy@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc/devel/omp/gcc-12] amdgcn: Enable SIMD vectorization of math
 functions
X-Act-Checkin: gcc
X-Git-Author: Kwok Cheung Yeung <kcy@codesourcery.com>
X-Git-Refname: refs/heads/devel/omp/gcc-12
X-Git-Oldrev: bd9a6106b95907fb76a410e293401443a45b76f2
X-Git-Newrev: 863579c4e301224b33c14821506894ac44d50def
Message-Id: <20221102003744.B6F25385842A@sourceware.org>
Date: Wed,  2 Nov 2022 00:37:44 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:863579c4e301224b33c14821506894ac44d50def

commit 863579c4e301224b33c14821506894ac44d50def
Author: Kwok Cheung Yeung <kcy@codesourcery.com>
Date:   Tue Nov 1 22:50:30 2022 +0000

    amdgcn: Enable SIMD vectorization of math functions
    
    Calls to vectorized versions of routines in the math library will now
    be inserted when vectorizing code containing supported math functions.
    
    2022-11-01  Kwok Cheung Yeung  <kcy@codesourcery.com>
                Paul-Antoine Arras  <pa@codesourcery.com>
    
            gcc/
            * builtins.cc (mathfn_built_in_explicit): New.
            * config/gcn/gcn.cc: Include case-cfn-macros.h.
            (mathfn_built_in_explicit): Add prototype.
            (gcn_vectorize_builtin_vectorized_function): New.
            (gcn_libc_has_function): New.
            (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.
            (TARGET_LIBC_HAS_FUNCTION): Define.
    
            gcc/testsuite/
            * gcc.target/gcn/simd-math-1.c: New testcase.
    
            libgomp/
            * testsuite/libgomp.c/simd-math-1.c: New testcase.

Diff:
---
 gcc/ChangeLog.omp                          |  11 ++
 gcc/builtins.cc                            |   8 ++
 gcc/config/gcn/gcn.cc                      | 110 +++++++++++++++
 gcc/testsuite/ChangeLog.omp                |   4 +
 gcc/testsuite/gcc.target/gcn/simd-math-1.c | 206 +++++++++++++++++++++++++++
 libgomp/ChangeLog.omp                      |   4 +
 libgomp/testsuite/libgomp.c/simd-math-1.c  | 217 +++++++++++++++++++++++++++++
 7 files changed, 560 insertions(+)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index e9ee9d732df..7551b505bc0 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,14 @@
+2022-11-01  Kwok Cheung Yeung  <kcy@codesourcery.com>
+	    Paul-Antoine Arras  <pa@codesourcery.com>
+
+	* builtins.cc (mathfn_built_in_explicit): New.
+	* config/gcn/gcn.cc: Include case-cfn-macros.h.
+	(mathfn_built_in_explicit): Add prototype.
+	(gcn_vectorize_builtin_vectorized_function): New.
+	(gcn_libc_has_function): New.
+	(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.
+	(TARGET_LIBC_HAS_FUNCTION): Define.
+
 2022-11-01  Kwok Cheung Yeung  <kcy@codesourcery.com>
 
 	* config/gcn/gcn-builtins.def (FLOORVF): New builtin.
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 57929a42bc4..b8cd75dcb5f 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -2083,6 +2083,14 @@ mathfn_built_in (tree type, combined_fn fn)
   return mathfn_built_in_1 (type, fn, /*implicit=*/ 1);
 }
 
+/* Like mathfn_built_in_1, but always use the explicit array.  */
+
+tree
+mathfn_built_in_explicit (tree type, combined_fn fn)
+{
+  return mathfn_built_in_1 (type, fn, /*implicit=*/ 0);
+}
+
 /* Like mathfn_built_in_1, but take a built_in_function and
    always use the implicit array.  */
 
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index bfc91e04f81..f0c78510880 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -53,6 +53,7 @@
 #include "dwarf2.h"
 #include "gimple.h"
 #include "cgraph.h"
+#include "case-cfn-macros.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -5143,6 +5144,110 @@ gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node))
   return 0;
 }
 
+tree mathfn_built_in_explicit (tree, combined_fn);
+
+/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
+   Return the function declaration of the vectorized version of the builtin
+   in the math library if available.  */
+
+tree
+gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
+					   tree type_in)
+{
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  int out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  machine_mode in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  int in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  combined_fn cfn = combined_fn (fn);
+
+  /* Keep this consistent with the list of vectorized math routines.  */
+  int implicit_p;
+  switch (fn)
+    {
+    CASE_CFN_ACOS:
+    CASE_CFN_ACOSH:
+    CASE_CFN_ASIN:
+    CASE_CFN_ASINH:
+    CASE_CFN_ATAN:
+    CASE_CFN_ATAN2:
+    CASE_CFN_ATANH:
+    CASE_CFN_COPYSIGN:
+    CASE_CFN_COS:
+    CASE_CFN_COSH:
+    CASE_CFN_ERF:
+    CASE_CFN_EXP:
+    CASE_CFN_EXP2:
+    CASE_CFN_FINITE:
+    CASE_CFN_FMOD:
+    CASE_CFN_GAMMA:
+    CASE_CFN_HYPOT:
+    CASE_CFN_ISNAN:
+    CASE_CFN_LGAMMA:
+    CASE_CFN_LOG:
+    CASE_CFN_LOG10:
+    CASE_CFN_LOG2:
+    CASE_CFN_POW:
+    CASE_CFN_REMAINDER:
+    CASE_CFN_RINT:
+    CASE_CFN_SIN:
+    CASE_CFN_SINH:
+    CASE_CFN_SQRT:
+    CASE_CFN_TAN:
+    CASE_CFN_TANH:
+    CASE_CFN_TGAMMA:
+      implicit_p = 1;
+      break;
+
+    CASE_CFN_SCALB:
+    CASE_CFN_SIGNIFICAND:
+      implicit_p = 0;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  tree out_t_node = (out_mode == DFmode) ? double_type_node : float_type_node;
+  tree fndecl = implicit_p ? mathfn_built_in (out_t_node, cfn)
+			   : mathfn_built_in_explicit (out_t_node, cfn);
+
+  const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  char name[20];
+  sprintf (name, out_mode == DFmode ? "v%ddf_%s" : "v%dsf_%s",
+	   out_n, bname + 10);
+
+  unsigned arity = 0;
+  for (tree args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
+    arity++;
+
+  tree fntype = (arity == 1)
+		? build_function_type_list (type_out, type_in, NULL)
+		: build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  tree new_fndecl = build_decl (BUILTINS_LOCATION,
+				FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Implement TARGET_LIBC_HAS_FUNCTION.  */
+
+bool
+gcn_libc_has_function (enum function_class fn_class,
+		       tree type)
+{
+  return bsd_libc_has_function (fn_class, type);
+}
+
 /* }}}  */
 /* {{{ md_reorg pass.  */
 
@@ -7232,6 +7337,11 @@ gcn_dwarf_register_span (rtx rtl)
   gcn_simd_clone_compute_vecsize_and_simdlen
 #undef  TARGET_SIMD_CLONE_USABLE
 #define TARGET_SIMD_CLONE_USABLE gcn_simd_clone_usable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  gcn_vectorize_builtin_vectorized_function
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
 #undef  TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
   gcn_small_register_classes_for_mode_p
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 3ae28694624..9b26a6ad7eb 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,7 @@
+2022-11-01  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* gcc.target/gcn/simd-math-1.c: New testcase.
+
 2022-11-01  Marcel Vollweiler  <marcel@codesourcery.com>
 
 	* c-c++-common/gomp/metadirective-8.c: New test.
diff --git a/gcc/testsuite/gcc.target/gcn/simd-math-1.c b/gcc/testsuite/gcc.target/gcn/simd-math-1.c
new file mode 100644
index 00000000000..4709af36b40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/simd-math-1.c
@@ -0,0 +1,206 @@
+/* Check that the SIMD versions of math routines give the same (or
+   sufficiently close) results as their scalar equivalents.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-math-errno -mstack-size=3000000 -fdump-tree-vect" } */
+
+#undef PRINT_RESULT
+#define VERBOSE 0
+#define EARLY_EXIT 1
+
+#include <math.h>
+#include <stdlib.h>
+
+#ifdef PRINT_RESULT
+  #include <stdio.h>
+  #define PRINTF printf
+#else
+  static void null_printf (const char *f, ...) { }
+
+  #define PRINTF null_printf
+#endif
+
+#define N 512
+#define EPSILON_float 1e-5
+#define EPSILON_double 1e-10
+
+static int failed = 0;
+
+int deviation_float (float x, float y)
+{
+  union {
+    float f;
+    unsigned u;
+  } u, v;
+
+  u.f = x;
+  v.f = y;
+
+  unsigned mask = 0x80000000U; 
+  int i;
+
+  for (i = 32; i > 0; i--)
+    if ((u.u ^ v.u) & mask)
+      break;
+    else
+      mask >>= 1;
+
+  return i;
+}
+
+int deviation_double (double x, double y)
+{
+  union {
+    double d;
+    unsigned long long u;
+  } u, v;
+
+  u.d = x;
+  v.d = y;
+
+  unsigned long long mask = 0x8000000000000000ULL;
+  int i;
+
+  for (i = 64; i > 0; i--)
+    if ((u.u ^ v.u) & mask)
+      break;
+    else
+      mask >>= 1;
+
+  return i;
+}
+
+#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
+__attribute__((optimize("no-tree-vectorize"))) \
+__attribute__((optimize("no-unsafe-math-optimizations"))) \
+void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
+{ \
+  int failed = 0; \
+  for (int i = 0; i < N; i++) { \
+    TFLOAT expected = FUN (a[i]); \
+    TFLOAT diff = __builtin_fabs (expected - res[i]); \
+    int deviation = deviation_##TFLOAT (expected, res[i]); \
+    int fail = isnan (res[i]) != isnan (expected) \
+               || isinf (res[i]) != isinf (expected) \
+               || (diff > EPSILON_##TFLOAT && deviation > 10); \
+    failed |= fail; \
+    if (VERBOSE || fail) \
+      PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
+              a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
+    if (EARLY_EXIT && fail) \
+      exit (1); \
+  } \
+} \
+void test_##FUN (void) \
+{ \
+  TFLOAT res[N], a[N]; \
+  for (int i = 0; i < N; i++) \
+    a[i] = LOW + ((HIGH - LOW) / N) * i; \
+  for (int i = 0; i < N; i++) \
+    res[i] = FUN (a[i]); \
+  check_##FUN (res, a); \
+}\
+test_##FUN ();
+
+#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
+__attribute__((optimize("no-tree-vectorize"))) \
+__attribute__((optimize("no-unsafe-math-optimizations"))) \
+void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
+{ \
+  int failed = 0; \
+  for (int i = 0; i < N; i++) { \
+    TFLOAT expected = FUN (a[i], b[i]); \
+    TFLOAT diff = __builtin_fabs (expected - res[i]); \
+    int deviation = deviation_##TFLOAT (expected, res[i]); \
+    int fail = isnan (res[i]) != isnan (expected) \
+               || isinf (res[i]) != isinf (expected) \
+               || (diff > EPSILON_##TFLOAT && deviation > 10); \
+    failed |= fail; \
+    if (VERBOSE || fail) \
+      PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
+              a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
+    if (EARLY_EXIT && fail) \
+      exit (1); \
+  } \
+} \
+void test_##FUN (void) \
+{ \
+  TFLOAT res[N], a[N], b[N]; \
+  for (int i = 0; i < N; i++) { \
+    a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
+    b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
+  } \
+  for (int i = 0; i < N; i++) \
+    res[i] = FUN (a[i], b[i]); \
+  check_##FUN (res, a, b); \
+}\
+test_##FUN ();
+
+int main (void)
+{
+  TEST_FUN (float, -1.1, 1.1, acosf); /* { dg-final { scan-tree-dump "v64sf_acosf" "vect" } }*/
+  TEST_FUN (float, -10, 10, acoshf); /* { dg-final { scan-tree-dump "v64sf_acoshf" "vect" } }*/
+  TEST_FUN (float, -1.1, 1.1, asinf); /* { dg-final { scan-tree-dump "v64sf_asinf" "vect" } }*/
+  TEST_FUN (float, -10, 10, asinhf); /* { dg-final { scan-tree-dump "v64sf_asinhf" "vect" } }*/
+  TEST_FUN (float, -1.1, 1.1, atanf); /* { dg-final { scan-tree-dump "v64sf_atanf" "vect" } }*/
+  TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f); /* { dg-final { scan-tree-dump "v64sf_atan2f" "vect" } }*/
+  TEST_FUN (float, -2.0, 2.0, atanhf); /* { dg-final { scan-tree-dump "v64sf_atanhf" "vect" } }*/
+  TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf); /* { dg-final { scan-tree-dump "v64sf_copysignf" "vect" } }*/
+  TEST_FUN (float, -3.14159265359, 3.14159265359, cosf); /* { dg-final { scan-tree-dump "v64sf_cosf" "vect" } }*/
+  TEST_FUN (float, -3.14159265359, 3.14159265359, coshf); /* { dg-final { scan-tree-dump "v64sf_coshf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, erff);  /* { dg-final { scan-tree-dump "v64sf_erff" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, expf); /* { dg-final { scan-tree-dump "v64sf_expf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, exp2f); /* { dg-final { scan-tree-dump "v64sf_exp2f" "vect" } }*/
+  TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf); /* { dg-final { scan-tree-dump "v64sf_fmodf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, gammaf); /* { dg-final { scan-tree-dump "v64sf_gammaf" "vect" { xfail *-*-*} } }*/
+  TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf); /* { dg-final { scan-tree-dump "v64sf_hypotf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, lgammaf); /* { dg-final { scan-tree-dump "v64sf_lgammaf" "vect" { xfail *-*-*} } }*/
+  TEST_FUN (float, -1.0, 50.0, logf); /* { dg-final { scan-tree-dump "v64sf_logf" "vect" } }*/
+  TEST_FUN (float, -1.0, 500.0, log10f); /* { dg-final { scan-tree-dump "v64sf_log10f" "vect" } }*/
+  TEST_FUN (float, -1.0, 64.0, log2f); /* { dg-final { scan-tree-dump "v64sf_log2f" "vect" } }*/
+  TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf); /* { dg-final { scan-tree-dump "v64sf_powf" "vect" } }*/
+  TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf); /* { dg-final { scan-tree-dump "v64sf_remainderf" "vect" } }*/
+  TEST_FUN (float, -50.0, 50.0, rintf);  /* { dg-final { scan-tree-dump "v64sf_rintf" "vect" } }*/
+  TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf); /* { dg-final { scan-tree-dump "v64sf_scalbf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, __builtin_significandf); /* { dg-final { scan-tree-dump "v64sf_significandf" "vect" } }*/
+  TEST_FUN (float, -3.14159265359, 3.14159265359, sinf); /* { dg-final { scan-tree-dump "v64sf_sinf" "vect" } }*/
+  TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf); /* { dg-final { scan-tree-dump "v64sf_sinhf" "vect" } }*/
+  TEST_FUN (float, -0.1, 10000.0, sqrtf); /* { dg-final { scan-tree-dump "v64sf_sqrtf" "vect" } }*/
+  TEST_FUN (float, -5.0, 5.0, tanf); /* { dg-final { scan-tree-dump "v64sf_tanf" "vect" } }*/
+  TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf); /* { dg-final { scan-tree-dump "v64sf_tanhf" "vect" } }*/
+  TEST_FUN (float, -10.0, 10.0, tgammaf); /* { dg-final { scan-tree-dump "v64sf_tgammaf" "vect" } }*/
+
+  TEST_FUN (double, -1.1, 1.1, acos); /* { dg-final { scan-tree-dump "v64df_acos" "vect" } }*/
+  TEST_FUN (double, -10, 10, acosh); /* { dg-final { scan-tree-dump "v64df_acosh" "vect" } }*/
+  TEST_FUN (double, -1.1, 1.1, asin); /* { dg-final { scan-tree-dump "v64df_asin" "vect" } }*/
+  TEST_FUN (double, -10, 10, asinh); /* { dg-final { scan-tree-dump "v64df_asinh" "vect" } }*/
+  TEST_FUN (double, -1.1, 1.1, atan); /* { dg-final { scan-tree-dump "v64df_atan" "vect" } }*/
+  TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2); /* { dg-final { scan-tree-dump "v64df_atan2" "vect" } }*/
+  TEST_FUN (double, -2.0, 2.0, atanh); /* { dg-final { scan-tree-dump "v64df_atanh" "vect" } }*/
+  TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign); /* { dg-final { scan-tree-dump "v64df_copysign" "vect" } }*/
+  TEST_FUN (double, -3.14159265359, 3.14159265359, cos); /* { dg-final { scan-tree-dump "v64df_cos" "vect" } }*/
+  TEST_FUN (double, -3.14159265359, 3.14159265359, cosh); /* { dg-final { scan-tree-dump "v64df_cosh" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, erf); /* { dg-final { scan-tree-dump "v64df_erf" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, exp); /* { dg-final { scan-tree-dump "v64df_exp" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, exp2); /* { dg-final { scan-tree-dump "v64df_exp2" "vect" } }*/
+  TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod); /* { dg-final { scan-tree-dump "v64df_fmod" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, gamma); /* { dg-final { scan-tree-dump "v64df_gamma" "vect" { xfail *-*-*} } }*/
+  TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot); /* { dg-final { scan-tree-dump "v64df_hypot" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, lgamma); /* { dg-final { scan-tree-dump "v64df_lgamma" "vect" { xfail *-*-*} } }*/
+  TEST_FUN (double, -1.0, 50.0, log); /* { dg-final { scan-tree-dump "v64df_log" "vect" } }*/
+  TEST_FUN (double, -1.0, 500.0, log10); /* { dg-final { scan-tree-dump "v64df_log10" "vect" } }*/
+  TEST_FUN (double, -1.0, 64.0, log2); /* { dg-final { scan-tree-dump "v64df_log2" "vect" { xfail *-*-*} } }*/
+  TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow); /* { dg-final { scan-tree-dump "v64df_pow" "vect" } }*/
+  TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder); /* { dg-final { scan-tree-dump "v64df_remainder" "vect" } }*/
+  TEST_FUN (double, -50.0, 50.0, rint); /* { dg-final { scan-tree-dump "v64df_rint" "vect" } }*/
+  TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb); /* { dg-final { scan-tree-dump "v64df_scalb" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, __builtin_significand); /* { dg-final { scan-tree-dump "v64df_significand" "vect" } }*/
+  TEST_FUN (double, -3.14159265359, 3.14159265359, sin); /* { dg-final { scan-tree-dump "v64df_sin" "vect" } }*/
+  TEST_FUN (double, -3.14159265359, 3.14159265359, sinh); /* { dg-final { scan-tree-dump "v64df_sinh" "vect" } }*/
+  TEST_FUN (double, -0.1, 10000.0, sqrt); /* { dg-final { scan-tree-dump "v64df_sqrt" "vect" } }*/
+  TEST_FUN (double, -5.0, 5.0, tan); /* { dg-final { scan-tree-dump "v64df_tan" "vect" } }*/
+  TEST_FUN (double, -3.14159265359, 3.14159265359, tanh); /* { dg-final { scan-tree-dump "v64df_tanh" "vect" } }*/
+  TEST_FUN (double, -10.0, 10.0, tgamma); /* { dg-final { scan-tree-dump "v64df_tgamma" "vect" } }*/
+
+  return failed;
+}
\ No newline at end of file
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 396f8e7e4b4..ddee1ad194a 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,7 @@
+2022-11-01  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* testsuite/libgomp.c/simd-math-1.c: New testcase.
+
 2022-10-28  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Adjust.
diff --git a/libgomp/testsuite/libgomp.c/simd-math-1.c b/libgomp/testsuite/libgomp.c/simd-math-1.c
new file mode 100644
index 00000000000..caf032a77ae
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/simd-math-1.c
@@ -0,0 +1,217 @@
+/* Check that the SIMD versions of math routines give the same (or
+   sufficiently close) results as their scalar equivalents.  */
+
+/* { dg-do run } */
+/* { dg-skip-if "AMD GCN only" { ! amdgcn_offloading_enabled } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
+/* { dg-additional-options "-foffload=-mstack-size=3000000 -foffload=-lm" } */
+
+#undef PRINT_RESULT
+#define VERBOSE 0
+#define EARLY_EXIT 1
+
+#include <math.h>
+#include <stdlib.h>
+
+#ifdef PRINT_RESULT
+  #include <stdio.h>
+  #define PRINTF printf
+#else
+  static void null_printf (const char *f, ...) { }
+
+  #define PRINTF null_printf
+#endif
+
+#define N 512
+#define EPSILON_float 1e-5
+#define EPSILON_double 1e-10
+
+static int xfail = 0;
+static int failed = 0;
+
+int deviation_float (float x, float y)
+{
+  union {
+    float f;
+    unsigned u;
+  } u, v;
+
+  u.f = x;
+  v.f = y;
+
+  unsigned mask = 0x80000000U; 
+  int i;
+
+  for (i = 32; i > 0; i--)
+    if ((u.u ^ v.u) & mask)
+      break;
+    else
+      mask >>= 1;
+
+  return i;
+}
+
+int deviation_double (double x, double y)
+{
+  union {
+    double d;
+    unsigned long long u;
+  } u, v;
+
+  u.d = x;
+  v.d = y;
+
+  unsigned long long mask = 0x8000000000000000ULL;
+  int i;
+
+  for (i = 64; i > 0; i--)
+    if ((u.u ^ v.u) & mask)
+      break;
+    else
+      mask >>= 1;
+
+  return i;
+}
+
+#define TEST_FUN_XFAIL(TFLOAT, LOW, HIGH, FUN) \
+  xfail = 1; \
+  TEST_FUN (TFLOAT, LOW, HIGH, FUN); \
+  xfail = 0;
+
+#define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
+__attribute__((optimize("no-tree-vectorize"))) \
+__attribute__((optimize("no-unsafe-math-optimizations"))) \
+void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
+{ \
+  for (int i = 0; i < N; i++) { \
+    TFLOAT expected = FUN (a[i]); \
+    TFLOAT diff = __builtin_fabs (expected - res[i]); \
+    int deviation = deviation_##TFLOAT (expected, res[i]); \
+    int fail = isnan (res[i]) != isnan (expected) \
+               || isinf (res[i]) != isinf (expected) \
+               || (diff > EPSILON_##TFLOAT && deviation > 10); \
+    if (VERBOSE || fail) \
+      PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
+              a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
+    failed |= (fail && !xfail); \
+    if (EARLY_EXIT && failed) \
+      exit (1); \
+  } \
+} \
+void test_##FUN (void) \
+{ \
+  TFLOAT res[N], a[N]; \
+  for (int i = 0; i < N; i++) \
+    a[i] = LOW + ((HIGH - LOW) / N) * i; \
+  _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
+    for (int i = 0; i < N; i++) \
+      res[i] = FUN (a[i]); \
+  check_##FUN (res, a); \
+}\
+test_##FUN ();
+
+#define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
+__attribute__((optimize("no-tree-vectorize"))) \
+__attribute__((optimize("no-unsafe-math-optimizations"))) \
+void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
+{ \
+  int failed = 0; \
+  for (int i = 0; i < N; i++) { \
+    TFLOAT expected = FUN (a[i], b[i]); \
+    TFLOAT diff = __builtin_fabs (expected - res[i]); \
+    int deviation = deviation_##TFLOAT (expected, res[i]); \
+    int fail = isnan (res[i]) != isnan (expected) \
+               || isinf (res[i]) != isinf (expected) \
+               || (diff > EPSILON_##TFLOAT && deviation > 10); \
+    failed |= fail; \
+    if (VERBOSE || fail) \
+      PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
+              a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
+    if (EARLY_EXIT && fail) \
+      exit (1); \
+  } \
+} \
+void test_##FUN (void) \
+{ \
+  TFLOAT res[N], a[N], b[N]; \
+  for (int i = 0; i < N; i++) { \
+    a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
+    b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
+  } \
+  _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
+    for (int i = 0; i < N; i++) \
+      res[i] = FUN (a[i], b[i]); \
+  check_##FUN (res, a, b); \
+}\
+test_##FUN ();
+
+int main (void)
+{
+  TEST_FUN (float, -1.1, 1.1, acosf);
+  TEST_FUN (float, -10, 10, acoshf);
+  TEST_FUN (float, -1.1, 1.1, asinf);
+  TEST_FUN (float, -10, 10, asinhf);
+  TEST_FUN (float, -1.1, 1.1, atanf);
+  TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f);
+  TEST_FUN (float, -2.0, 2.0, atanhf);
+  TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf);
+  TEST_FUN (float, -3.14159265359, 3.14159265359, cosf);
+  TEST_FUN (float, -3.14159265359, 3.14159265359, coshf);
+  TEST_FUN (float, -10.0, 10.0, erff);
+  TEST_FUN (float, -10.0, 10.0, expf);
+  TEST_FUN (float, -10.0, 10.0, exp2f);
+  TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf);
+  TEST_FUN (float, -10.0, 10.0, gammaf);
+  TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf);
+  TEST_FUN (float, -10.0, 10.0, lgammaf);
+  TEST_FUN (float, -1.0, 50.0, logf);
+  TEST_FUN (float, -1.0, 500.0, log10f);
+  TEST_FUN (float, -1.0, 64.0, log2f);
+  TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf);
+  TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf);
+  TEST_FUN (float, -50.0, 50.0, rintf);
+  TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf);
+  TEST_FUN (float, -10.0, 10.0, __builtin_significandf);
+  TEST_FUN (float, -3.14159265359, 3.14159265359, sinf);
+  TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf);
+  TEST_FUN (float, -0.1, 10000.0, sqrtf);
+  TEST_FUN (float, -5.0, 5.0, tanf);
+  TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf);
+  /* Newlib's version of tgammaf is known to have poor accuracy.  */
+  TEST_FUN_XFAIL (float, -10.0, 10.0, tgammaf);
+
+  TEST_FUN (double, -1.1, 1.1, acos);
+  TEST_FUN (double, -10, 10, acosh);
+  TEST_FUN (double, -1.1, 1.1, asin);
+  TEST_FUN (double, -10, 10, asinh);
+  TEST_FUN (double, -1.1, 1.1, atan);
+  TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2);
+  TEST_FUN (double, -2.0, 2.0, atanh);
+  TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign);
+  TEST_FUN (double, -3.14159265359, 3.14159265359, cos);
+  TEST_FUN (double, -3.14159265359, 3.14159265359, cosh);
+  TEST_FUN (double, -10.0, 10.0, erf);
+  TEST_FUN (double, -10.0, 10.0, exp);
+  TEST_FUN (double, -10.0, 10.0, exp2);
+  TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod);
+  TEST_FUN (double, -10.0, 10.0, gamma);
+  TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot);
+  TEST_FUN (double, -10.0, 10.0, lgamma);
+  TEST_FUN (double, -1.0, 50.0, log);
+  TEST_FUN (double, -1.0, 500.0, log10);
+  TEST_FUN (double, -1.0, 64.0, log2);
+  TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow);
+  TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder);
+  TEST_FUN (double, -50.0, 50.0, rint);
+  TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb);
+  TEST_FUN (double, -10.0, 10.0, __builtin_significand);
+  TEST_FUN (double, -3.14159265359, 3.14159265359, sin);
+  TEST_FUN (double, -3.14159265359, 3.14159265359, sinh);
+  TEST_FUN (double, -0.1, 10000.0, sqrt);
+  TEST_FUN (double, -5.0, 5.0, tan);
+  TEST_FUN (double, -3.14159265359, 3.14159265359, tanh);
+  /* Newlib's version of tgamma is known to have poor accuracy.  */
+  TEST_FUN_XFAIL (double, -10.0, 10.0, tgamma);
+
+  return failed;
+}