[PATCH] IBM Z: Try to make use of load-and-test instructions

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] IBM Z: Try to make use of load-and-test instructions
@ 2020-09-18 11:10 Stefan Schulze Frielinghaus
  2020-09-21 16:51 ` Andreas Krebbel
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Schulze Frielinghaus @ 2020-09-18 11:10 UTC (permalink / raw)
  To: krebbel, gcc-patches

This patch enables a peephole2 optimization which transforms a load of
constant zero into a temporary register which is then finally used to
compare against a floating-point register of interest into a single load
and test instruction.  However, the optimization is only applied if both
registers are dead afterwards and if we test for (in)equality only.
This is relaxed in case of fast math.

This is a follow up to PR88856.

Bootstrapped and regtested on IBM Z.

gcc/ChangeLog:

	* config/s390/s390.md ("*cmp<mode>_ccs_0", "*cmp<mode>_ccz_0",
	"*cmp<mode>_ccs_0_fastmath"): Basically change "*cmp<mode>_ccs_0" into
	"*cmp<mode>_ccz_0" and for fast math add "*cmp<mode>_ccs_0_fastmath".

gcc/testsuite/ChangeLog:

	* gcc.target/s390/load-and-test-fp-1.c: Change test to include all
	possible combinations of dead/live registers and comparisons (equality,
	relational).
	* gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c
	but for fast math.
	* gcc.target/s390/load-and-test-fp.h: New test included by
	load-and-test-fp-{1,2}.c.
---
 gcc/config/s390/s390.md                       | 54 +++++++++++++++----
 .../gcc.target/s390/load-and-test-fp-1.c      | 19 +++----
 .../gcc.target/s390/load-and-test-fp-2.c      | 17 ++----
 .../gcc.target/s390/load-and-test-fp.h        | 12 +++++
 4 files changed, 67 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/load-and-test-fp.h

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4c3e5400a2b..e591aa7c324 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1391,23 +1391,55 @@
 ; (TF|DF|SF|TD|DD|SD) instructions
 
 
-; FIXME: load and test instructions turn SNaN into QNaN what is not
-; acceptable if the target will be used afterwards.  On the other hand
-; they are quite convenient for implementing comparisons with 0.0. So
-; try to enable them via splitter/peephole if the value isn't needed anymore.
-; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
+; load and test instructions turn a signaling NaN into a quiet NaN.  Thus they
+; may only be used if the target register is dead afterwards or if fast math
+; is enabled.  The former is done via a peephole optimization.  Note, load and
+; test instructions may only be used for (in)equality comparisons because
+; relational comparisons must treat a quiet NaN like a signaling NaN which is
+; not the case for load and test instructions.  For fast math insn
+; "cmp<mode>_ccs_0_fastmath" applies.
+; See testcases load-and-test-fp-{1,2}.c
+
+(define_peephole2
+  [(set (match_operand:FP 0 "register_operand")
+	(match_operand:FP 1 "const0_operand"))
+   (set (reg:CCZ CC_REGNUM)
+	(compare:CCZ (match_operand:FP 2 "register_operand")
+		     (match_operand:FP 3 "register_operand")))]
+  "TARGET_HARD_FLOAT
+   && FP_REG_P (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[2])"
+  [(parallel
+    [(set (reg:CCZ CC_REGNUM)
+	  (match_op_dup 4 [(match_dup 2) (match_dup 1)]))
+     (clobber (match_dup 2))])]
+  "operands[4] = gen_rtx_COMPARE (CCZmode, operands[2], operands[1]);")
 
 ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
-(define_insn "*cmp<mode>_ccs_0"
-  [(set (reg CC_REGNUM)
-	(compare (match_operand:FP 0 "register_operand"  "f")
-		 (match_operand:FP 1 "const0_operand"    "")))
-   (clobber (match_operand:FP      2 "register_operand" "=0"))]
-  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+(define_insn "*cmp<mode>_ccz_0"
+  [(set (reg:CCZ CC_REGNUM)
+	(compare:CCZ (match_operand:FP 0 "register_operand" "f")
+		     (match_operand:FP 1 "const0_operand")))
+   (clobber (match_operand:FP 2 "register_operand" "=0"))]
+  "TARGET_HARD_FLOAT"
   "lt<xde><bt>r\t%0,%0"
    [(set_attr "op_type" "RRE")
     (set_attr "type"  "fsimp<mode>")])
 
+(define_insn "*cmp<mode>_ccs_0_fastmath"
+  [(set (reg CC_REGNUM)
+	(compare (match_operand:FP 0 "register_operand" "f")
+		 (match_operand:FP 1 "const0_operand")))]
+  "s390_match_ccmode (insn, CCSmode)
+   && TARGET_HARD_FLOAT
+   && !flag_trapping_math
+   && !flag_signaling_nans"
+  "lt<xde><bt>r\t%0,%0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type" "fsimp<mode>")])
+
 ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
 ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
 (define_insn "*cmp<mode>_ccs"
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
index 2a7e88c0f1b..ebb8a88c574 100644
--- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
@@ -1,17 +1,12 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -mzarch" } */
 
-/* a is used after the comparison.  We cannot use load and test here
-   since it would turn SNaNs into QNaNs.  */
+/* Use load-and-test instructions if compared for (in)equality and if variable
+   `a` is dead after the comparison.  For all other cases use
+   compare-and-signal instructions.  */
 
-double gl;
+#include "load-and-test-fp.h"
 
-double
-foo (double dummy, double a)
-{
-  if (a == 0.0)
-    gl = 1;
-  return a;
-}
-
-/* { dg-final { scan-assembler {\tcdbr?\t} } } */
+/* { dg-final { scan-assembler-times "ltdbr\t" 2 } } */
+/* { dg-final { scan-assembler-times "cdbr\t" 2 } } */
+/* { dg-final { scan-assembler-times "kdbr\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
index 7646fdd5def..53dab3c4424 100644
--- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
@@ -1,16 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mzarch -ffast-math" } */
 
-/* a is not used after the comparison.  So we should use load and test
-   here.  */
+/* Fast-math implies -fno-trapping-math -fno-signaling-nans which imply
+   that no user visible trap will happen.  */
 
-double gl;
+#include "load-and-test-fp.h"
 
-void
-bar (double a)
-{
-  if (a == 0.0)
-    gl = 1;
-}
-
-/* { dg-final { scan-assembler "ltdbr\t" } } */
+/* { dg-final { scan-assembler-times "ltdbr\t" 12 } } */
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp.h b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
new file mode 100644
index 00000000000..f153d96698d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
@@ -0,0 +1,12 @@
+double gl;
+
+#define test(N, CMP) \
+  void   N ## _dead(double a) { if (a CMP 0.0) gl = 1; } \
+  double N ## _live(double a) { if (a CMP 0.0) gl = 1; return a; }
+
+test(eq, ==)
+test(ne, !=)
+test(ge, >=)
+test(gt, >)
+test(le, <=)
+test(lt, <)
-- 
2.25.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] IBM Z: Try to make use of load-and-test instructions
  2020-09-18 11:10 [PATCH] IBM Z: Try to make use of load-and-test instructions Stefan Schulze Frielinghaus
@ 2020-09-21 16:51 ` Andreas Krebbel
  2020-09-22 12:03   ` Stefan Schulze Frielinghaus
  0 siblings, 1 reply; 3+ messages in thread
From: Andreas Krebbel @ 2020-09-21 16:51 UTC (permalink / raw)
  To: Stefan Schulze Frielinghaus, gcc-patches

On 18.09.20 13:10, Stefan Schulze Frielinghaus wrote:
> This patch enables a peephole2 optimization which transforms a load of
> constant zero into a temporary register which is then finally used to
> compare against a floating-point register of interest into a single load
> and test instruction.  However, the optimization is only applied if both
> registers are dead afterwards and if we test for (in)equality only.
> This is relaxed in case of fast math.
> 
> This is a follow up to PR88856.
> 
> Bootstrapped and regtested on IBM Z.
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.md ("*cmp<mode>_ccs_0", "*cmp<mode>_ccz_0",
> 	"*cmp<mode>_ccs_0_fastmath"): Basically change "*cmp<mode>_ccs_0" into
> 	"*cmp<mode>_ccz_0" and for fast math add "*cmp<mode>_ccs_0_fastmath".
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/s390/load-and-test-fp-1.c: Change test to include all
> 	possible combinations of dead/live registers and comparisons (equality,
> 	relational).
> 	* gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c
> 	but for fast math.
> 	* gcc.target/s390/load-and-test-fp.h: New test included by
> 	load-and-test-fp-{1,2}.c.

Ok for mainline. Please see below for some comments.

Thanks!

Andreas

> ---
>  gcc/config/s390/s390.md                       | 54 +++++++++++++++----
>  .../gcc.target/s390/load-and-test-fp-1.c      | 19 +++----
>  .../gcc.target/s390/load-and-test-fp-2.c      | 17 ++----
>  .../gcc.target/s390/load-and-test-fp.h        | 12 +++++
>  4 files changed, 67 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> 
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 4c3e5400a2b..e591aa7c324 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -1391,23 +1391,55 @@
>  ; (TF|DF|SF|TD|DD|SD) instructions
>  
>  
> -; FIXME: load and test instructions turn SNaN into QNaN what is not
> -; acceptable if the target will be used afterwards.  On the other hand
> -; they are quite convenient for implementing comparisons with 0.0. So
> -; try to enable them via splitter/peephole if the value isn't needed anymore.
> -; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
> +; load and test instructions turn a signaling NaN into a quiet NaN.  Thus they
> +; may only be used if the target register is dead afterwards or if fast math
> +; is enabled.  The former is done via a peephole optimization.  Note, load and
> +; test instructions may only be used for (in)equality comparisons because
> +; relational comparisons must treat a quiet NaN like a signaling NaN which is
> +; not the case for load and test instructions.  For fast math insn
> +; "cmp<mode>_ccs_0_fastmath" applies.
> +; See testcases load-and-test-fp-{1,2}.c
> +
> +(define_peephole2
> +  [(set (match_operand:FP 0 "register_operand")
> +	(match_operand:FP 1 "const0_operand"))
> +   (set (reg:CCZ CC_REGNUM)
> +	(compare:CCZ (match_operand:FP 2 "register_operand")
> +		     (match_operand:FP 3 "register_operand")))]
> +  "TARGET_HARD_FLOAT
> +   && FP_REG_P (operands[2])
> +   && REGNO (operands[0]) == REGNO (operands[3])
> +   && peep2_reg_dead_p (2, operands[0])
> +   && peep2_reg_dead_p (2, operands[2])"
> +  [(parallel
> +    [(set (reg:CCZ CC_REGNUM)
> +	  (match_op_dup 4 [(match_dup 2) (match_dup 1)]))
> +     (clobber (match_dup 2))])]
> +  "operands[4] = gen_rtx_COMPARE (CCZmode, operands[2], operands[1]);")

Couldn't this be written as:

 [(parallel
    [(set (reg:CCZ CC_REGNUM)
	  (compare:CCZ (match_dup 2) (match_dup 1)))
     (clobber (match_dup 2))])])

>  
>  ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
> -(define_insn "*cmp<mode>_ccs_0"
> -  [(set (reg CC_REGNUM)
> -	(compare (match_operand:FP 0 "register_operand"  "f")
> -		 (match_operand:FP 1 "const0_operand"    "")))
> -   (clobber (match_operand:FP      2 "register_operand" "=0"))]
> -  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
> +(define_insn "*cmp<mode>_ccz_0"
> +  [(set (reg:CCZ CC_REGNUM)
> +	(compare:CCZ (match_operand:FP 0 "register_operand" "f")
> +		     (match_operand:FP 1 "const0_operand")))
> +   (clobber (match_operand:FP 2 "register_operand" "=0"))]
> +  "TARGET_HARD_FLOAT"
>    "lt<xde><bt>r\t%0,%0"
>     [(set_attr "op_type" "RRE")
>      (set_attr "type"  "fsimp<mode>")])
>  
> +(define_insn "*cmp<mode>_ccs_0_fastmath"
> +  [(set (reg CC_REGNUM)
> +	(compare (match_operand:FP 0 "register_operand" "f")
> +		 (match_operand:FP 1 "const0_operand")))]
> +  "s390_match_ccmode (insn, CCSmode)
> +   && TARGET_HARD_FLOAT
> +   && !flag_trapping_math
> +   && !flag_signaling_nans"
> +  "lt<xde><bt>r\t%0,%0"
> +  [(set_attr "op_type" "RRE")
> +   (set_attr "type" "fsimp<mode>")])
> +
>  ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
>  ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
>  (define_insn "*cmp<mode>_ccs"
> diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> index 2a7e88c0f1b..ebb8a88c574 100644
> --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> @@ -1,17 +1,12 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O3 -mzarch" } */

I think -march=z196 would be needed here as well. Otherwise the 0.0 floating point constant won't
survive until peephole pass. We don't accept FP zeroes in reload for machines earlier than z196. See
legitimate_reload_fp_constant_p.

It should be ok as is for load-and-test-fp-2.c. There the comparison pattern supporting a FP zero is
matched right from the start.

>  
> -/* a is used after the comparison.  We cannot use load and test here
> -   since it would turn SNaNs into QNaNs.  */
> +/* Use load-and-test instructions if compared for (in)equality and if variable
> +   `a` is dead after the comparison.  For all other cases use
> +   compare-and-signal instructions.  */
>  
> -double gl;
> +#include "load-and-test-fp.h"
>  
> -double
> -foo (double dummy, double a)
> -{
> -  if (a == 0.0)
> -    gl = 1;
> -  return a;
> -}
> -
> -/* { dg-final { scan-assembler {\tcdbr?\t} } } */
> +/* { dg-final { scan-assembler-times "ltdbr\t" 2 } } */
> +/* { dg-final { scan-assembler-times "cdbr\t" 2 } } */
> +/* { dg-final { scan-assembler-times "kdbr\t" 8 } } */
> diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> index 7646fdd5def..53dab3c4424 100644
> --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> @@ -1,16 +1,9 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3" } */
> +/* { dg-options "-O3 -mzarch -ffast-math" } */
>  
> -/* a is not used after the comparison.  So we should use load and test
> -   here.  */
> +/* Fast-math implies -fno-trapping-math -fno-signaling-nans which imply
> +   that no user visible trap will happen.  */
>  
> -double gl;
> +#include "load-and-test-fp.h"
>  
> -void
> -bar (double a)
> -{
> -  if (a == 0.0)
> -    gl = 1;
> -}
> -
> -/* { dg-final { scan-assembler "ltdbr\t" } } */
> +/* { dg-final { scan-assembler-times "ltdbr\t" 12 } } */
> diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp.h b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> new file mode 100644
> index 00000000000..f153d96698d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> @@ -0,0 +1,12 @@
> +double gl;
> +
> +#define test(N, CMP) \
> +  void   N ## _dead(double a) { if (a CMP 0.0) gl = 1; } \
> +  double N ## _live(double a) { if (a CMP 0.0) gl = 1; return a; }
> +
> +test(eq, ==)
> +test(ne, !=)
> +test(ge, >=)
> +test(gt, >)
> +test(le, <=)
> +test(lt, <)
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] IBM Z: Try to make use of load-and-test instructions
  2020-09-21 16:51 ` Andreas Krebbel
@ 2020-09-22 12:03   ` Stefan Schulze Frielinghaus
  0 siblings, 0 replies; 3+ messages in thread
From: Stefan Schulze Frielinghaus @ 2020-09-22 12:03 UTC (permalink / raw)
  To: Andreas Krebbel; +Cc: gcc-patches

On Mon, Sep 21, 2020 at 06:51:00PM +0200, Andreas Krebbel wrote:
> On 18.09.20 13:10, Stefan Schulze Frielinghaus wrote:
> > This patch enables a peephole2 optimization which transforms a load of
> > constant zero into a temporary register which is then finally used to
> > compare against a floating-point register of interest into a single load
> > and test instruction.  However, the optimization is only applied if both
> > registers are dead afterwards and if we test for (in)equality only.
> > This is relaxed in case of fast math.
> > 
> > This is a follow up to PR88856.
> > 
> > Bootstrapped and regtested on IBM Z.
> > 
> > gcc/ChangeLog:
> > 
> > 	* config/s390/s390.md ("*cmp<mode>_ccs_0", "*cmp<mode>_ccz_0",
> > 	"*cmp<mode>_ccs_0_fastmath"): Basically change "*cmp<mode>_ccs_0" into
> > 	"*cmp<mode>_ccz_0" and for fast math add "*cmp<mode>_ccs_0_fastmath".
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	* gcc.target/s390/load-and-test-fp-1.c: Change test to include all
> > 	possible combinations of dead/live registers and comparisons (equality,
> > 	relational).
> > 	* gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c
> > 	but for fast math.
> > 	* gcc.target/s390/load-and-test-fp.h: New test included by
> > 	load-and-test-fp-{1,2}.c.
> 
> Ok for mainline. Please see below for some comments.

Pushed with the mentioned changes in commit 1a84651d164.

Thanks for the review!

Cheers,
Stefan

> 
> Thanks!
> 
> Andreas
> 
> > ---
> >  gcc/config/s390/s390.md                       | 54 +++++++++++++++----
> >  .../gcc.target/s390/load-and-test-fp-1.c      | 19 +++----
> >  .../gcc.target/s390/load-and-test-fp-2.c      | 17 ++----
> >  .../gcc.target/s390/load-and-test-fp.h        | 12 +++++
> >  4 files changed, 67 insertions(+), 35 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> > 
> > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> > index 4c3e5400a2b..e591aa7c324 100644
> > --- a/gcc/config/s390/s390.md
> > +++ b/gcc/config/s390/s390.md
> > @@ -1391,23 +1391,55 @@
> >  ; (TF|DF|SF|TD|DD|SD) instructions
> >  
> >  
> > -; FIXME: load and test instructions turn SNaN into QNaN what is not
> > -; acceptable if the target will be used afterwards.  On the other hand
> > -; they are quite convenient for implementing comparisons with 0.0. So
> > -; try to enable them via splitter/peephole if the value isn't needed anymore.
> > -; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
> > +; load and test instructions turn a signaling NaN into a quiet NaN.  Thus they
> > +; may only be used if the target register is dead afterwards or if fast math
> > +; is enabled.  The former is done via a peephole optimization.  Note, load and
> > +; test instructions may only be used for (in)equality comparisons because
> > +; relational comparisons must treat a quiet NaN like a signaling NaN which is
> > +; not the case for load and test instructions.  For fast math insn
> > +; "cmp<mode>_ccs_0_fastmath" applies.
> > +; See testcases load-and-test-fp-{1,2}.c
> > +
> > +(define_peephole2
> > +  [(set (match_operand:FP 0 "register_operand")
> > +	(match_operand:FP 1 "const0_operand"))
> > +   (set (reg:CCZ CC_REGNUM)
> > +	(compare:CCZ (match_operand:FP 2 "register_operand")
> > +		     (match_operand:FP 3 "register_operand")))]
> > +  "TARGET_HARD_FLOAT
> > +   && FP_REG_P (operands[2])
> > +   && REGNO (operands[0]) == REGNO (operands[3])
> > +   && peep2_reg_dead_p (2, operands[0])
> > +   && peep2_reg_dead_p (2, operands[2])"
> > +  [(parallel
> > +    [(set (reg:CCZ CC_REGNUM)
> > +	  (match_op_dup 4 [(match_dup 2) (match_dup 1)]))
> > +     (clobber (match_dup 2))])]
> > +  "operands[4] = gen_rtx_COMPARE (CCZmode, operands[2], operands[1]);")
> 
> Couldn't this be written as:
> 
>  [(parallel
>     [(set (reg:CCZ CC_REGNUM)
> 	  (compare:CCZ (match_dup 2) (match_dup 1)))
>      (clobber (match_dup 2))])])
> 
> >  
> >  ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
> > -(define_insn "*cmp<mode>_ccs_0"
> > -  [(set (reg CC_REGNUM)
> > -	(compare (match_operand:FP 0 "register_operand"  "f")
> > -		 (match_operand:FP 1 "const0_operand"    "")))
> > -   (clobber (match_operand:FP      2 "register_operand" "=0"))]
> > -  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
> > +(define_insn "*cmp<mode>_ccz_0"
> > +  [(set (reg:CCZ CC_REGNUM)
> > +	(compare:CCZ (match_operand:FP 0 "register_operand" "f")
> > +		     (match_operand:FP 1 "const0_operand")))
> > +   (clobber (match_operand:FP 2 "register_operand" "=0"))]
> > +  "TARGET_HARD_FLOAT"
> >    "lt<xde><bt>r\t%0,%0"
> >     [(set_attr "op_type" "RRE")
> >      (set_attr "type"  "fsimp<mode>")])
> >  
> > +(define_insn "*cmp<mode>_ccs_0_fastmath"
> > +  [(set (reg CC_REGNUM)
> > +	(compare (match_operand:FP 0 "register_operand" "f")
> > +		 (match_operand:FP 1 "const0_operand")))]
> > +  "s390_match_ccmode (insn, CCSmode)
> > +   && TARGET_HARD_FLOAT
> > +   && !flag_trapping_math
> > +   && !flag_signaling_nans"
> > +  "lt<xde><bt>r\t%0,%0"
> > +  [(set_attr "op_type" "RRE")
> > +   (set_attr "type" "fsimp<mode>")])
> > +
> >  ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
> >  ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
> >  (define_insn "*cmp<mode>_ccs"
> > diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> > index 2a7e88c0f1b..ebb8a88c574 100644
> > --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> > +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
> > @@ -1,17 +1,12 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O3 -mzarch" } */
> 
> I think -march=z196 would be needed here as well. Otherwise the 0.0 floating point constant won't
> survive until peephole pass. We don't accept FP zeroes in reload for machines earlier than z196. See
> legitimate_reload_fp_constant_p.
> 
> It should be ok as is for load-and-test-fp-2.c. There the comparison pattern supporting a FP zero is
> matched right from the start.
> 
> >  
> > -/* a is used after the comparison.  We cannot use load and test here
> > -   since it would turn SNaNs into QNaNs.  */
> > +/* Use load-and-test instructions if compared for (in)equality and if variable
> > +   `a` is dead after the comparison.  For all other cases use
> > +   compare-and-signal instructions.  */
> >  
> > -double gl;
> > +#include "load-and-test-fp.h"
> >  
> > -double
> > -foo (double dummy, double a)
> > -{
> > -  if (a == 0.0)
> > -    gl = 1;
> > -  return a;
> > -}
> > -
> > -/* { dg-final { scan-assembler {\tcdbr?\t} } } */
> > +/* { dg-final { scan-assembler-times "ltdbr\t" 2 } } */
> > +/* { dg-final { scan-assembler-times "cdbr\t" 2 } } */
> > +/* { dg-final { scan-assembler-times "kdbr\t" 8 } } */
> > diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> > index 7646fdd5def..53dab3c4424 100644
> > --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> > +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
> > @@ -1,16 +1,9 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O3" } */
> > +/* { dg-options "-O3 -mzarch -ffast-math" } */
> >  
> > -/* a is not used after the comparison.  So we should use load and test
> > -   here.  */
> > +/* Fast-math implies -fno-trapping-math -fno-signaling-nans which imply
> > +   that no user visible trap will happen.  */
> >  
> > -double gl;
> > +#include "load-and-test-fp.h"
> >  
> > -void
> > -bar (double a)
> > -{
> > -  if (a == 0.0)
> > -    gl = 1;
> > -}
> > -
> > -/* { dg-final { scan-assembler "ltdbr\t" } } */
> > +/* { dg-final { scan-assembler-times "ltdbr\t" 12 } } */
> > diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp.h b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> > new file mode 100644
> > index 00000000000..f153d96698d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
> > @@ -0,0 +1,12 @@
> > +double gl;
> > +
> > +#define test(N, CMP) \
> > +  void   N ## _dead(double a) { if (a CMP 0.0) gl = 1; } \
> > +  double N ## _live(double a) { if (a CMP 0.0) gl = 1; return a; }
> > +
> > +test(eq, ==)
> > +test(ne, !=)
> > +test(ge, >=)
> > +test(gt, >)
> > +test(le, <=)
> > +test(lt, <)
> > 
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-22 12:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-18 11:10 [PATCH] IBM Z: Try to make use of load-and-test instructions Stefan Schulze Frielinghaus
2020-09-21 16:51 ` Andreas Krebbel
2020-09-22 12:03   ` Stefan Schulze Frielinghaus

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).