public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
@ 2014-05-05 18:33 Peter Bergner
  2014-05-05 20:57 ` Peter Bergner
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Bergner @ 2014-05-05 18:33 UTC (permalink / raw)
  To: David Edelsohn; +Cc: GCC Patches, Michael Meissner

Currently, the IBM long double routines in libgcc use a union to construct
a long double from two double values.  This causes horrific code generation
that copies the two double from the FP registers over to GPRs and back
again, giving us two loads and two stores, which leads to two load-hit-store
hazzards.  The following patch makes use of the new __builtin_pack_longdouble
builtin to construct the long double giving us at worse, one or two fmrs.

Is this ok for mainline once my bootstrap and regtesting are complete?

Peter


libgcc/
	* config/rs6000/ibm-ldouble.c (typedef union longDblUnion): Delete.
	(pack_ldouble): New function.
	(__gcc_qadd): Use it.
	(__gcc_qmul): Likewise.
	(__gcc_qdiv): Likewise.
	(__gcc_qneg): Likewise.
	(__gcc_stoq): Likewise.
	(__gcc_dtoq): Likewise.


Index: libgcc/config/rs6000/ibm-ldouble.c
===================================================================
--- libgcc/config/rs6000/ibm-ldouble.c	(revision 210073)
+++ libgcc/config/rs6000/ibm-ldouble.c	(working copy)
@@ -87,18 +87,29 @@ __asm__ (".symver __gcc_qadd,_xlqadd@GCC
 	 ".symver .__gcc_qdiv,._xlqdiv@GCC_3.4");
 #endif
 
-typedef union
-{
-  long double ldval;
-  double dval[2];
-} longDblUnion;
+/* Combine two 'double' values into one 'long double' and return the result.  */
+static inline long double
+pack_ldouble (double dh, double dl)
+{
+#if defined (_SOFT_FLOAT) || defined (__NO_FPRS__)
+  union
+  {
+    long double ldval;
+    double dval[2];
+  } x;
+  x.dval[0] = dh;
+  x.dval[1] = dl;
+  return x.ldval;
+#else
+  return __builtin_pack_longdouble (dh, dl);
+#endif
+}
 
 /* Add two 'long double' values and return the result.	*/
 long double
 __gcc_qadd (double a, double aa, double c, double cc)
 {
-  longDblUnion x;
-  double z, q, zz, xh;
+  double xh, xl, z, q, zz;
 
   z = a + c;
 
@@ -109,12 +120,12 @@ __gcc_qadd (double a, double aa, double
       z = cc + aa + c + a;
       if (nonfinite (z))
 	return z;
-      x.dval[0] = z;  /* Will always be DBL_MAX.  */
+      xh = z;  /* Will always be DBL_MAX.  */
       zz = aa + cc;
       if (fabs(a) > fabs(c))
-	x.dval[1] = a - z + c + zz;
+	xl = a - z + c + zz;
       else
-	x.dval[1] = c - z + a + zz;
+	xl = c - z + a + zz;
     }
   else
     {
@@ -129,10 +140,9 @@ __gcc_qadd (double a, double aa, double
       if (nonfinite (xh))
 	return xh;
 
-      x.dval[0] = xh;
-      x.dval[1] = z - xh + zz;
+      xl = z - xh + zz;
     }
-  return x.ldval;
+  return pack_ldouble (xh, xl);
 }
 
 long double
@@ -148,8 +158,7 @@ static double fmsub (double, double, dou
 long double
 __gcc_qmul (double a, double b, double c, double d)
 {
-  longDblUnion z;
-  double t, tau, u, v, w;
+  double xh, xl, t, tau, u, v, w;
   
   t = a * c;			/* Highest order double term.  */
 
@@ -173,16 +182,15 @@ __gcc_qmul (double a, double b, double c
   /* Construct long double result.  */
   if (nonfinite (u))
     return u;
-  z.dval[0] = u;
-  z.dval[1] = (t - u) + tau;
-  return z.ldval;
+  xh = u;
+  xl = (t - u) + tau;
+  return pack_ldouble (xh, xl);
 }
 
 long double
 __gcc_qdiv (double a, double b, double c, double d)
 {
-  longDblUnion z;
-  double s, sigma, t, tau, u, v, w;
+  double xh, xl, s, sigma, t, tau, u, v, w;
   
   t = a / c;                    /* highest order double term */
   
@@ -219,9 +227,9 @@ __gcc_qdiv (double a, double b, double c
   /* Construct long double result.  */
   if (nonfinite (u))
     return u;
-  z.dval[0] = u;
-  z.dval[1] = (t - u) + tau;
-  return z.ldval;
+  xh = u;
+  xl = (t - u) + tau;
+  return pack_ldouble (xh, xl);
 }
 
 #if defined (_SOFT_DOUBLE) && defined (__LONG_DOUBLE_128__)
@@ -248,11 +256,7 @@ extern int __gedf2 (double, double);
 long double
 __gcc_qneg (double a, double aa)
 {
-  longDblUnion x;
-
-  x.dval[0] = -a;
-  x.dval[1] = -aa;
-  return x.ldval;
+  return pack_ldouble (-a, -aa);
 }
 
 /* Compare two 'long double' values for equality.  */
@@ -292,24 +296,14 @@ strong_alias (__gcc_qge, __gcc_qgt);
 long double
 __gcc_stoq (float a)
 {
-  longDblUnion x;
-
-  x.dval[0] = (double) a;
-  x.dval[1] = 0.0;
-
-  return x.ldval;
+  return pack_ldouble ((double) a, 0.0);
 }
 
 /* Convert double to long double.  */
 long double
 __gcc_dtoq (double a)
 {
-  longDblUnion x;
-
-  x.dval[0] = a;
-  x.dval[1] = 0.0;
-
-  return x.ldval;
+  return pack_ldouble (a, 0.0);
 }
 
 /* Convert long double to single.  */


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
  2014-05-05 18:33 [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c Peter Bergner
@ 2014-05-05 20:57 ` Peter Bergner
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Bergner @ 2014-05-05 20:57 UTC (permalink / raw)
  To: David Edelsohn; +Cc: GCC Patches, Michael Meissner

On Mon, 2014-05-05 at 13:33 -0500, Peter Bergner wrote:
> Currently, the IBM long double routines in libgcc use a union to construct
> a long double from two double values.  This causes horrific code generation
> that copies the two double from the FP registers over to GPRs and back
> again, giving us two loads and two stores, which leads to two load-hit-store
> hazzards.  The following patch makes use of the new __builtin_pack_longdouble
> builtin to construct the long double giving us at worse, one or two fmrs.
> 
> Is this ok for mainline once my bootstrap and regtesting are complete?

Ok, bootstrapping and regtesting have completed with no regressions.

Peter


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
  2014-07-29 17:16 ` Mike Stump
@ 2014-07-31 16:34   ` Peter Bergner
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Bergner @ 2014-07-31 16:34 UTC (permalink / raw)
  To: Mike Stump; +Cc: David Edelsohn, GCC Patches, Alan Modra

On Tue, 2014-07-29 at 10:11 -0700, Mike Stump wrote:
> On Jul 29, 2014, at 7:56 AM, Peter Bergner <bergner@vnet.ibm.com> wrote:
> > Currently, the IBM long double routines in libgcc use a union to construct
> > a long double from two double values.  This causes horrific code generation
> > that copies the two double from the FP registers over to GPRs and back
> > again, giving us two loads and two stores, which leads to two load-hit-store
> > hazzards.
> 
> Gosh, it’s too bad we don’t have any sort of technology to optimize moving data around.

Well the problem is we're trying to move it around, when we'd really like
the data to stay in the FP registers the entire time.  The problem is that
unions and structs that are the same size as a TImode/TFmode/TDmode are
always converted to TImode and that is what ends up causing the whole
fp -> int -> fp shuffle which leads to crappy code.  On power8 where we
have int <-> fp reg copy instructions, it's better than the copy thru
the stack frame, but even that is unnecessary.

Peter



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
  2014-07-31  2:22 ` David Edelsohn
@ 2014-07-31 16:30   ` Peter Bergner
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Bergner @ 2014-07-31 16:30 UTC (permalink / raw)
  To: David Edelsohn; +Cc: GCC Patches, Alan Modra

On Wed, 2014-07-30 at 22:13 -0400, David Edelsohn wrote:
> On Tue, Jul 29, 2014 at 10:56 AM, Peter Bergner <bergner@vnet.ibm.com> wrote:
> > libgcc/
> >         * config/rs6000/ibm-ldouble.c (typedef union longDblUnion): Delete.
> >         (pack_ldouble): New function.
> >         (__gcc_qadd): Use it.
> >         (__gcc_qmul): Likewise.
> >         (__gcc_qdiv): Likewise.
> >         (__gcc_qneg): Likewise.
> >         (__gcc_stoq): Likewise.
> >         (__gcc_dtoq): Likewise.
> 
> Okay.

Committed as revision 213380.  Thanks.

Peter



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
  2014-07-29 15:17 Peter Bergner
  2014-07-29 17:16 ` Mike Stump
@ 2014-07-31  2:22 ` David Edelsohn
  2014-07-31 16:30   ` Peter Bergner
  1 sibling, 1 reply; 7+ messages in thread
From: David Edelsohn @ 2014-07-31  2:22 UTC (permalink / raw)
  To: Peter Bergner; +Cc: GCC Patches, Alan Modra

On Tue, Jul 29, 2014 at 10:56 AM, Peter Bergner <bergner@vnet.ibm.com> wrote:
> ..after cleaning up the conflicting long double builtins and fixing a
> few bugs in the test cases, I'd like to resubmit the following:
>
> Currently, the IBM long double routines in libgcc use a union to construct
> a long double from two double values.  This causes horrific code generation
> that copies the two double from the FP registers over to GPRs and back
> again, giving us two loads and two stores, which leads to two load-hit-store
> hazzards.  The following patch makes use of the new __builtin_pack_longdouble
> builtin to construct the long double giving us at worse, one or two fmrs.
>
> This passed bootstrap and regtesting on powerpc64-linux.  Ok for trunk?
>
> Peter
>
>
> libgcc/
>         * config/rs6000/ibm-ldouble.c (typedef union longDblUnion): Delete.
>         (pack_ldouble): New function.
>         (__gcc_qadd): Use it.
>         (__gcc_qmul): Likewise.
>         (__gcc_qdiv): Likewise.
>         (__gcc_qneg): Likewise.
>         (__gcc_stoq): Likewise.
>         (__gcc_dtoq): Likewise.

Okay.

Thanks, David

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
  2014-07-29 15:17 Peter Bergner
@ 2014-07-29 17:16 ` Mike Stump
  2014-07-31 16:34   ` Peter Bergner
  2014-07-31  2:22 ` David Edelsohn
  1 sibling, 1 reply; 7+ messages in thread
From: Mike Stump @ 2014-07-29 17:16 UTC (permalink / raw)
  To: Peter Bergner; +Cc: David Edelsohn, GCC Patches, Alan Modra

On Jul 29, 2014, at 7:56 AM, Peter Bergner <bergner@vnet.ibm.com> wrote:
> Currently, the IBM long double routines in libgcc use a union to construct
> a long double from two double values.  This causes horrific code generation
> that copies the two double from the FP registers over to GPRs and back
> again, giving us two loads and two stores, which leads to two load-hit-store
> hazzards.

Gosh, it’s too bad we don’t have any sort of technology to optimize moving data around.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c
@ 2014-07-29 15:17 Peter Bergner
  2014-07-29 17:16 ` Mike Stump
  2014-07-31  2:22 ` David Edelsohn
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Bergner @ 2014-07-29 15:17 UTC (permalink / raw)
  To: David Edelsohn; +Cc: GCC Patches, Alan Modra

..after cleaning up the conflicting long double builtins and fixing a
few bugs in the test cases, I'd like to resubmit the following:

Currently, the IBM long double routines in libgcc use a union to construct
a long double from two double values.  This causes horrific code generation
that copies the two double from the FP registers over to GPRs and back
again, giving us two loads and two stores, which leads to two load-hit-store
hazzards.  The following patch makes use of the new __builtin_pack_longdouble
builtin to construct the long double giving us at worse, one or two fmrs.

This passed bootstrap and regtesting on powerpc64-linux.  Ok for trunk?

Peter


libgcc/
	* config/rs6000/ibm-ldouble.c (typedef union longDblUnion): Delete.
	(pack_ldouble): New function.
	(__gcc_qadd): Use it.
	(__gcc_qmul): Likewise.
	(__gcc_qdiv): Likewise.
	(__gcc_qneg): Likewise.
	(__gcc_stoq): Likewise.
	(__gcc_dtoq): Likewise.

Index: libgcc/config/rs6000/ibm-ldouble.c
===================================================================
--- libgcc/config/rs6000/ibm-ldouble.c	(revision 213141)
+++ libgcc/config/rs6000/ibm-ldouble.c	(working copy)
@@ -87,18 +87,30 @@ __asm__ (".symver __gcc_qadd,_xlqadd@GCC
 	 ".symver .__gcc_qdiv,._xlqdiv@GCC_3.4");
 #endif
 
-typedef union
-{
-  long double ldval;
-  double dval[2];
-} longDblUnion;
+/* Combine two 'double' values into one 'long double' and return the result.  */
+static inline long double
+pack_ldouble (double dh, double dl)
+{
+#if defined (__LONG_DOUBLE_128__) \
+    && !(defined (_SOFT_FLOAT) || defined (__NO_FPRS__))
+  return __builtin_pack_longdouble (dh, dl);
+#else
+  union
+  {
+    long double ldval;
+    double dval[2];
+  } x;
+  x.dval[0] = dh;
+  x.dval[1] = dl;
+  return x.ldval;
+#endif
+}
 
 /* Add two 'long double' values and return the result.	*/
 long double
 __gcc_qadd (double a, double aa, double c, double cc)
 {
-  longDblUnion x;
-  double z, q, zz, xh;
+  double xh, xl, z, q, zz;
 
   z = a + c;
 
@@ -109,12 +121,12 @@ __gcc_qadd (double a, double aa, double
       z = cc + aa + c + a;
       if (nonfinite (z))
 	return z;
-      x.dval[0] = z;  /* Will always be DBL_MAX.  */
+      xh = z;  /* Will always be DBL_MAX.  */
       zz = aa + cc;
       if (fabs(a) > fabs(c))
-	x.dval[1] = a - z + c + zz;
+	xl = a - z + c + zz;
       else
-	x.dval[1] = c - z + a + zz;
+	xl = c - z + a + zz;
     }
   else
     {
@@ -129,10 +141,9 @@ __gcc_qadd (double a, double aa, double
       if (nonfinite (xh))
 	return xh;
 
-      x.dval[0] = xh;
-      x.dval[1] = z - xh + zz;
+      xl = z - xh + zz;
     }
-  return x.ldval;
+  return pack_ldouble (xh, xl);
 }
 
 long double
@@ -148,8 +159,7 @@ static double fmsub (double, double, dou
 long double
 __gcc_qmul (double a, double b, double c, double d)
 {
-  longDblUnion z;
-  double t, tau, u, v, w;
+  double xh, xl, t, tau, u, v, w;
   
   t = a * c;			/* Highest order double term.  */
 
@@ -173,16 +183,15 @@ __gcc_qmul (double a, double b, double c
   /* Construct long double result.  */
   if (nonfinite (u))
     return u;
-  z.dval[0] = u;
-  z.dval[1] = (t - u) + tau;
-  return z.ldval;
+  xh = u;
+  xl = (t - u) + tau;
+  return pack_ldouble (xh, xl);
 }
 
 long double
 __gcc_qdiv (double a, double b, double c, double d)
 {
-  longDblUnion z;
-  double s, sigma, t, tau, u, v, w;
+  double xh, xl, s, sigma, t, tau, u, v, w;
   
   t = a / c;                    /* highest order double term */
   
@@ -219,9 +228,9 @@ __gcc_qdiv (double a, double b, double c
   /* Construct long double result.  */
   if (nonfinite (u))
     return u;
-  z.dval[0] = u;
-  z.dval[1] = (t - u) + tau;
-  return z.ldval;
+  xh = u;
+  xl = (t - u) + tau;
+  return pack_ldouble (xh, xl);
 }
 
 #if defined (_SOFT_DOUBLE) && defined (__LONG_DOUBLE_128__)
@@ -248,11 +257,7 @@ extern int __gedf2 (double, double);
 long double
 __gcc_qneg (double a, double aa)
 {
-  longDblUnion x;
-
-  x.dval[0] = -a;
-  x.dval[1] = -aa;
-  return x.ldval;
+  return pack_ldouble (-a, -aa);
 }
 
 /* Compare two 'long double' values for equality.  */
@@ -292,24 +297,14 @@ strong_alias (__gcc_qge, __gcc_qgt);
 long double
 __gcc_stoq (float a)
 {
-  longDblUnion x;
-
-  x.dval[0] = (double) a;
-  x.dval[1] = 0.0;
-
-  return x.ldval;
+  return pack_ldouble ((double) a, 0.0);
 }
 
 /* Convert double to long double.  */
 long double
 __gcc_dtoq (double a)
 {
-  longDblUnion x;
-
-  x.dval[0] = a;
-  x.dval[1] = 0.0;
-
-  return x.ldval;
+  return pack_ldouble (a, 0.0);
 }
 
 /* Convert long double to single.  */


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-07-31 16:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-05 18:33 [PATCH, rs6000] Use new __builtin_pack_longdouble within libgcc's ibm-ldouble.c Peter Bergner
2014-05-05 20:57 ` Peter Bergner
2014-07-29 15:17 Peter Bergner
2014-07-29 17:16 ` Mike Stump
2014-07-31 16:34   ` Peter Bergner
2014-07-31  2:22 ` David Edelsohn
2014-07-31 16:30   ` Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).