public inbox for libstdc++@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Improve -Ofast vectorization of std::sin etc. (PR libstdc++/81706)
@ 2017-08-07  9:08 Jakub Jelinek
  2017-08-07 14:54 ` Jason Merrill
  0 siblings, 1 reply; 16+ messages in thread
From: Jakub Jelinek @ 2017-08-07  9:08 UTC (permalink / raw)
  To: Jason Merrill, Joseph S. Myers, Marek Polacek, Jonathan Wakely
  Cc: gcc-patches, libstdc++

[-- Attachment #1: Type: text/plain, Size: 1059 bytes --]

Hi!

glibc for -ffast-math annotates a couple of math functions with simd
attribute, so that one can use vectorized versions with 4/8/16 vectorization
factor.

If one uses ::cos or ::cosf or std::cos(double), this works just fine, but not
when using std::cos(float).  This is because the libstdc++ headers call
__builtin_cosf, but the builtin function doesn't have the simd attribute,
only ::cosf does.

Attached are 2 patches to improve this.

The first one is a C/C++ FE change, which arranges that if we add simd
attribute to say ::cosf, then calls to __builtin_cosf will act as if
__builtin_cosf also has the attribute.  While other attributes aren't
handled this way, perhaps a small precedent to such change is that if
somebody uses typeof (cosf) cosf __asm ("foobar"); then calls to
__builtin_cosf if they expand into a library call will call foobar, not
cosf.

The other patch is instead a libstdc++ change, not using __builtin_cosf
etc., but ::cosf.

Both patches have been (separately) bootstrapped/regtested on x86_64-linux
and i686-linux.

	Jakub

[-- Attachment #2: U674 --]
[-- Type: text/plain, Size: 4848 bytes --]

2017-08-07  Jakub Jelinek  <jakub@redhat.com>

	PR libstdc++/81706
	* tree.c (attribute_value_equal): Use omp_declare_simd_clauses_equal
	for comparison of OMP_CLAUSEs regardless of flag_openmp{,_simd}.

	* c-decl.c (merge_decls): Copy "omp declare simd" attributes from
	newdecl to corresponding __builtin_ if any.

	* decl.c (duplicate_decls): Copy "omp declare simd" attributes from
	newdecl to corresponding __builtin_ if any.

	* gcc.target/i386/pr81706.c: New test.
	* g++.dg/ext/pr81706.C: New test.

--- gcc/tree.c.jj	2017-07-29 09:48:40.000000000 +0200
+++ gcc/tree.c	2017-08-04 12:06:35.636072718 +0200
@@ -5022,8 +5022,8 @@ attribute_value_equal (const_tree attr1,
 				     TREE_VALUE (attr2)) == 1);
     }
 
-  if ((flag_openmp || flag_openmp_simd)
-      && TREE_VALUE (attr1) && TREE_VALUE (attr2)
+  if (TREE_VALUE (attr1)
+      && TREE_VALUE (attr2)
       && TREE_CODE (TREE_VALUE (attr1)) == OMP_CLAUSE
       && TREE_CODE (TREE_VALUE (attr2)) == OMP_CLAUSE)
     return omp_declare_simd_clauses_equal (TREE_VALUE (attr1),
--- gcc/c/c-decl.c.jj	2017-07-31 11:31:15.000000000 +0200
+++ gcc/c/c-decl.c	2017-08-04 12:39:48.113226134 +0200
@@ -2566,6 +2566,36 @@ merge_decls (tree newdecl, tree olddecl,
 			set_builtin_decl_declared_p (fncode, true);
 		      break;
 		    }
+
+		  tree s = lookup_attribute ("omp declare simd",
+					     DECL_ATTRIBUTES (newdecl));
+		  if (s)
+		    {
+		      tree b
+			= builtin_decl_explicit (DECL_FUNCTION_CODE (newdecl));
+		      if (b)
+			{
+			  tree s2 = lookup_attribute ("omp declare simd",
+						      DECL_ATTRIBUTES (b));
+			  while (s)
+			    {
+			      tree s3;
+			      for (s3 = s2; s3;
+				   s3 = lookup_attribute ("omp declare simd",
+							  TREE_CHAIN (s3)))
+				if (attribute_value_equal (s, s3))
+				  break;
+			      if (!s3)
+				{
+				  s3 = copy_node (s);
+				  TREE_CHAIN (s3) = DECL_ATTRIBUTES (b);
+				  DECL_ATTRIBUTES (b) = s3;
+				}
+			      s = lookup_attribute ("omp declare simd",
+						    TREE_CHAIN (s));
+			    }
+			}
+		    }
 		}
 	    }
 	  else
--- gcc/cp/decl.c.jj	2017-08-01 19:23:10.000000000 +0200
+++ gcc/cp/decl.c	2017-08-04 12:44:44.773780568 +0200
@@ -2456,6 +2456,35 @@ next_arg:;
 		  break;
 		}
 	    }
+
+	  tree s = lookup_attribute ("omp declare simd",
+				     DECL_ATTRIBUTES (newdecl));
+	  if (s)
+	    {
+	      tree b = builtin_decl_explicit (DECL_FUNCTION_CODE (newdecl));
+	      if (b)
+		{
+		  tree s2 = lookup_attribute ("omp declare simd",
+					      DECL_ATTRIBUTES (b));
+		  while (s)
+		    {
+		      tree s3;
+		      for (s3 = s2; s3;
+			   s3 = lookup_attribute ("omp declare simd",
+						  TREE_CHAIN (s3)))
+			if (attribute_value_equal (s, s3))
+			  break;
+		      if (!s3)
+			{
+			  s3 = copy_node (s);
+			  TREE_CHAIN (s3) = DECL_ATTRIBUTES (b);
+			  DECL_ATTRIBUTES (b) = s3;
+			}
+		      s = lookup_attribute ("omp declare simd",
+					    TREE_CHAIN (s));
+		    }
+		}
+	    }
 	}
       if (new_defines_function)
 	/* If defining a function declared with other language
--- gcc/testsuite/gcc.target/i386/pr81706.c.jj	2017-08-06 23:50:46.511337565 +0200
+++ gcc/testsuite/gcc.target/i386/pr81706.c	2017-08-06 23:50:35.000000000 +0200
@@ -0,0 +1,32 @@
+/* PR libstdc++/81706 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
+/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } */
+/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch")));
+extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch")));
+#ifdef __cplusplus
+}
+#endif
+double p[1024] = { 1.0 };
+double q[1024] = { 1.0 };
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    p[i] = cos (q[i]);
+}
+
+void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    p[i] = __builtin_sin (q[i]);
+}
--- gcc/testsuite/g++.dg/ext/pr81706.C.jj	2017-08-06 23:51:09.318065575 +0200
+++ gcc/testsuite/g++.dg/ext/pr81706.C	2017-08-06 23:51:38.577716630 +0200
@@ -0,0 +1,32 @@
+// PR libstdc++/81706
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-O3 -mavx2 -mno-avx512f" }
+// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } }
+// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch")));
+extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch")));
+#ifdef __cplusplus
+}
+#endif
+double p[1024] = { 1.0 };
+double q[1024] = { 1.0 };
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    p[i] = cos (q[i]);
+}
+
+void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    p[i] = __builtin_sin (q[i]);
+}

[-- Attachment #3: U675 --]
[-- Type: text/plain, Size: 1772 bytes --]

2017-08-07  Jakub Jelinek  <jakub@redhat.com>

	PR libstdc++/81706
	* include/c_global/cmath (std::cos, std::exp, std::log,
	std::pow, std::sin): Call ::FNf instead of __builtin_FNf in
	float overloads.

--- libstdc++-v3/include/c_global/cmath.jj	2017-07-24 10:57:58.000000000 +0200
+++ libstdc++-v3/include/c_global/cmath	2017-08-04 10:16:59.484637951 +0200
@@ -182,7 +182,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO
   inline _GLIBCXX_CONSTEXPR float
   cos(float __x)
-  { return __builtin_cosf(__x); }
+  { return ::cosf(__x); }
 
   inline _GLIBCXX_CONSTEXPR long double
   cos(long double __x)
@@ -220,7 +220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO
   inline _GLIBCXX_CONSTEXPR float
   exp(float __x)
-  { return __builtin_expf(__x); }
+  { return ::expf(__x); }
 
   inline _GLIBCXX_CONSTEXPR long double
   exp(long double __x)
@@ -336,7 +336,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO
   inline _GLIBCXX_CONSTEXPR float
   log(float __x)
-  { return __builtin_logf(__x); }
+  { return ::logf(__x); }
 
   inline _GLIBCXX_CONSTEXPR long double
   log(long double __x)
@@ -386,7 +386,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO
   inline _GLIBCXX_CONSTEXPR float
   pow(float __x, float __y)
-  { return __builtin_powf(__x, __y); }
+  { return ::powf(__x, __y); }
 
   inline _GLIBCXX_CONSTEXPR long double
   pow(long double __x, long double __y)
@@ -423,7 +423,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifndef __CORRECT_ISO_CPP_MATH_H_PROTO
   inline _GLIBCXX_CONSTEXPR float
   sin(float __x)
-  { return __builtin_sinf(__x); }
+  { return ::sinf(__x); }
 
   inline _GLIBCXX_CONSTEXPR long double
   sin(long double __x)

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-10-24 20:26 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-07  9:08 [PATCH] Improve -Ofast vectorization of std::sin etc. (PR libstdc++/81706) Jakub Jelinek
2017-08-07 14:54 ` Jason Merrill
2017-08-07 15:28   ` Jakub Jelinek
2017-08-07 20:59     ` Jonathan Wakely
2017-08-07 21:02       ` Jakub Jelinek
2017-08-07 21:58         ` Jonathan Wakely
2017-09-01 11:13     ` Jakub Jelinek
2017-09-09 13:43       ` Jason Merrill
2017-09-12  7:49         ` Jakub Jelinek
2017-09-29 12:32           ` Jakub Jelinek
2017-09-29 20:17             ` Joseph Myers
2017-10-24 15:06             ` Jason Merrill
2017-10-24 15:34               ` Jakub Jelinek
2017-10-24 17:56                 ` Jason Merrill
2017-10-24 19:36                   ` Jakub Jelinek
2017-10-24 20:26                     ` Jason Merrill

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).