public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
From: H.J.Lu <hjl@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc] x86-64: Compile branred.c with -mprefer-vector-width=128 [BZ #24603]
Date: Wed, 24 Jul 2019 21:49:00 -0000	[thread overview]
Message-ID: <20190724214916.99248.qmail@sourceware.org> (raw)

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=7e681561a3aea7aa8f21fb031a7c778147dfdf5b

commit 7e681561a3aea7aa8f21fb031a7c778147dfdf5b
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Jul 24 14:48:33 2019 -0700

    x86-64: Compile branred.c with -mprefer-vector-width=128 [BZ #24603]
    
    When compiled with -O3 and AVX, GCC 8 and 9 optimize some loops in
    sysdeps/ieee754/dbl-64/branred.c with 256-bit vector instructions,
    which leads to store forward stall:
    
    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
    
    There is no easy fix in compiler.  This patch limits vector width to
    128 bits to work around this issue.  It improves performance of sin
    and cos by more than 40% on Skylake compiled with -O3 -march=skylake.
    
    Tested with GCC 7/8/9 on x86-64.
    
    	[BZ #24603]
    	* sysdeps/x86_64/configure.ac: Check if -mprefer-vector-width=128
    	works.
    	* sysdeps/x86_64/configure: Regenerated.
    	* sysdeps/x86_64/fpu/Makefile (CFLAGS-branred.c): New.  Set
    	to -mprefer-vector-width=128 if supported.

Diff:
---
 ChangeLog                   |  9 +++++++++
 sysdeps/x86_64/configure    | 22 ++++++++++++++++++++++
 sysdeps/x86_64/configure.ac |  9 +++++++++
 sysdeps/x86_64/fpu/Makefile | 12 ++++++++++++
 4 files changed, 52 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 88108d1..31a6b38 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2019-07-24  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #24603]
+	* sysdeps/x86_64/configure.ac: Check if -mprefer-vector-width=128
+	works.
+	* sysdeps/x86_64/configure: Regenerated.
+	* sysdeps/x86_64/fpu/Makefile (CFLAGS-branred.c): New.  Set
+	to -mprefer-vector-width=128 if supported.
+
 2019-07-24  Florian Weimer  <fweimer@redhat.com>
 
 	* scripts/build-many-glibcs.py (Context.checkout): Default to
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index 8674d14..84f82c2 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -54,6 +54,28 @@ fi
 config_vars="$config_vars
 config-cflags-avx512 = $libc_cv_cc_avx512"
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking -mprefer-vector-width=128" >&5
+$as_echo_n "checking -mprefer-vector-width=128... " >&6; }
+if ${libc_cv_cc_mprefer_vector_width+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if { ac_try='${CC-cc} -mprefer-vector-width=128 -xc /dev/null -S -o /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
+  libc_cv_cc_mprefer_vector_width=yes
+else
+  libc_cv_cc_mprefer_vector_width=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_mprefer_vector_width" >&5
+$as_echo "$libc_cv_cc_mprefer_vector_width" >&6; }
+config_vars="$config_vars
+config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
 $as_echo_n "checking for Intel MPX support... " >&6; }
 if ${libc_cv_asm_mpx+:} false; then :
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index b7d2c01..cdaba0c 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -25,6 +25,15 @@ if test $libc_cv_cc_avx512 = yes; then
 fi
 LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
 
+dnl Check if -mprefer-vector-width=128 works.
+AC_CACHE_CHECK(-mprefer-vector-width=128, libc_cv_cc_mprefer_vector_width, [dnl
+LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
+		   [libc_cv_cc_mprefer_vector_width=yes],
+		   [libc_cv_cc_mprefer_vector_width=no])
+])
+LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
+		[$libc_cv_cc_mprefer_vector_width])
+
 dnl Check whether asm supports Intel MPX
 AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
 cat > conftest.s <<\EOF
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index 2b7d69b..74b14ba 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -237,3 +237,15 @@ CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F
 CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags)
 endif
 endif
+
+ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
+# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops
+# in branred.c with 256-bit vector instructions, which leads to store
+# forward stall:
+#
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
+#
+# Limit vector width to 128 bits to work around this issue.  It improves
+# performance of sin and cos by more than 40% on Skylake.
+CFLAGS-branred.c = -mprefer-vector-width=128
+endif


                 reply	other threads:[~2019-07-24 21:49 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190724214916.99248.qmail@sourceware.org \
    --to=hjl@sourceware.org \
    --cc=glibc-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).