public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
@ 2020-07-06  2:17 Xionghu Luo
  2020-07-07  0:18 ` Segher Boessenkool
  0 siblings, 1 reply; 16+ messages in thread
From: Xionghu Luo @ 2020-07-06  2:17 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, wschmidt, guojiufu, linkw, Xionghu Luo

For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

gcc/ChangeLog:

2020-07-03  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-03  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 63 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..8d51de07594 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,69 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	    "=!r,       f,         v,         wa,        m,         Z,
+	     Z,         wa,        ?r,        !r")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand"
+	   "m,         m,         wY,        Z,         r,         f,
+	   wa,        r,         wa,        r")
+	  (const_int 32)) 0)]
+		   UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2
+	    "=X,        X,         X,         X,         X,         X,
+	    X,         r,         X,         X"))]
+  "TARGET_NO_SF_SUBREG
+   && (register_operand (operands[0], SFmode)
+       && register_operand (operands[1], DImode))"
+  "@
+   lwz%U1%X1 %0,%1
+   lfs%U1%X1 %0,%1
+   lxssp %0,%1
+   lxsspx %x0,%y1
+   stw%U0%X0 %1,%0
+   stfiwx %1,%y0
+   stxsiwx %x1,%y0
+   #
+   mfvsrwz %0,%x1
+   mr %0,%1"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = gen_reg_rtx (DImode);
+
+  /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */
+  if (!SUBREG_P (operands[0]))
+    {
+      rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+      emit_insn (gen_anddi3 (tmp, op1, mask));
+      emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+      DONE;
+    }
+  else
+    FAIL;
+}
+  [(set_attr "length"
+	    "*,          *,         *,         *,         *,         *,
+	     *,          12,        *,         *")
+   (set_attr "type"
+	    "load,       fpload,    fpload,    fpload,    store,     fpstore,
+	     fpstore,    vecfloat,  mffgpr,    *")
+   (set_attr "isa"
+	    "*,          *,         p9v,       p8v,       *,         *,
+	     p8v,        p8v,       p8v,       *")])
+
 \f
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-06  2:17 [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310] Xionghu Luo
@ 2020-07-07  0:18 ` Segher Boessenkool
  2020-07-07  8:39   ` luoxhu
  0 siblings, 1 reply; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-07  0:18 UTC (permalink / raw)
  To: Xionghu Luo; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi!

On Sun, Jul 05, 2020 at 09:17:57PM -0500, Xionghu Luo wrote:
> For extracting high part element from DImode register like:
> 
> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> 
> split it before reload with "and mask" to avoid generating shift right
> 32 bit then shift left 32 bit.
> 
> srdi 3,3,32
> sldi 9,3,32
> mtvsrd 1,9
> xscvspdpn 1,1
> 
> =>
> 
> rldicr 3,3,0,31
> mtvsrd 1,3
> xscvspdpn 1,1

Great :-)

> +;; For extracting high part element from DImode register like:
> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> +;; split it before reload with "and mask" to avoid generating shift right
> +;; 32 bit then shift left 32 bit.
> +(define_insn_and_split "movsf_from_si2"
> +  [(set (match_operand:SF 0 "nonimmediate_operand"
> +	    "=!r,       f,         v,         wa,        m,         Z,
> +	     Z,         wa,        ?r,        !r")
> +	    (unspec:SF [
> +	     (subreg:SI (ashiftrt:DI
> +	       (match_operand:DI 1 "input_operand"
> +	   "m,         m,         wY,        Z,         r,         f,
> +	   wa,        r,         wa,        r")
> +	  (const_int 32)) 0)]
> +		   UNSPEC_SF_FROM_SI))
> +  (clobber (match_scratch:DI 2
> +	    "=X,        X,         X,         X,         X,         X,
> +	    X,         r,         X,         X"))]
> +  "TARGET_NO_SF_SUBREG
> +   && (register_operand (operands[0], SFmode)
> +       && register_operand (operands[1], DImode))"

If the insn condition requires operands 0 and 1 to be register_operands,
it can ask for that in the predicates, instead: not nonimmediate_operand
and input_operand, but just gpc_reg_operand instead.  You can leave out
the impossible alternatives as well (0, 1, 2, 3, 4, 5, 6), leaving just

(define_insn_and_split "movsf_from_si2"
  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
	(unspec:SF
	  [(subreg:SI (ashiftrt:DI
			(match_operand:DI 1 "input_operand" "r,wa,r")
			(const_int 32))
		      0)]
	  UNSPEC_SF_FROM_SI)))]
  "TARGET_NO_SF_SUBREG"
  "@
   #
   mfvsrwz %0,%x1
   mr %0,%1"

  "&& !reload_completed
   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
  [(const_int 0)]
{
  rtx op0 = operands[0];
  rtx op1 = operands[1];
  rtx tmp = gen_reg_rtx (DImode);

You cannot call gen_reg_rtx too late in the pass pipeline.  What we
usually do for such cases is put it as a match_scratch in the pattern,
and then do code like

  if (GET_CODE (operands[2]) == SCRATCH)
    operands[2] = gen_reg_rtx (DImode);

so that it will work both before and after reload.

  /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */

(This line is too long, btw.)

  if (!SUBREG_P (operands[0]))
    {
      rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
      emit_insn (gen_anddi3 (tmp, op1, mask));
      emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
      DONE;
    }
  else
    FAIL;
}
  [(set_attr "length" "12,*,*")
   (set_attr "type" "vecfloat,mffgpr,*")
   (set_attr "isa" "p8v,p8v,*")])

I wonder what will happen if you actually do FAIL here...  There then is
no insn alternative that can match, so we ICE?  In that case, just leave
out the whole FAIL thing, it is useless?  You can do a gcc_assert if you
want to check something.

Oh, and maybe you only want to handle GPRs here, not VSRs?  So just the
"r", not the "wa" at all?  What code would it generate for vector regs?

Lots of questions, sorry!


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-07  0:18 ` Segher Boessenkool
@ 2020-07-07  8:39   ` luoxhu
  2020-07-07 21:31     ` Segher Boessenkool
  0 siblings, 1 reply; 16+ messages in thread
From: luoxhu @ 2020-07-07  8:39 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw



On 2020/7/7 08:18, Segher Boessenkool wrote:
> Hi!
> 
> On Sun, Jul 05, 2020 at 09:17:57PM -0500, Xionghu Luo wrote:
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.
>>
>> srdi 3,3,32
>> sldi 9,3,32
>> mtvsrd 1,9
>> xscvspdpn 1,1
>>
>> =>
>>
>> rldicr 3,3,0,31
>> mtvsrd 1,3
>> xscvspdpn 1,1
> 
> Great :-)
> 
>> +;; For extracting high part element from DImode register like:
>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift left 32 bit.
>> +(define_insn_and_split "movsf_from_si2"
>> +  [(set (match_operand:SF 0 "nonimmediate_operand"
>> +	    "=!r,       f,         v,         wa,        m,         Z,
>> +	     Z,         wa,        ?r,        !r")
>> +	    (unspec:SF [
>> +	     (subreg:SI (ashiftrt:DI
>> +	       (match_operand:DI 1 "input_operand"
>> +	   "m,         m,         wY,        Z,         r,         f,
>> +	   wa,        r,         wa,        r")
>> +	  (const_int 32)) 0)]
>> +		   UNSPEC_SF_FROM_SI))
>> +  (clobber (match_scratch:DI 2
>> +	    "=X,        X,         X,         X,         X,         X,
>> +	    X,         r,         X,         X"))]
>> +  "TARGET_NO_SF_SUBREG
>> +   && (register_operand (operands[0], SFmode)
>> +       && register_operand (operands[1], DImode))"
> 
> If the insn condition requires operands 0 and 1 to be register_operands,
> it can ask for that in the predicates, instead: not nonimmediate_operand
> and input_operand, but just gpc_reg_operand instead.  You can leave out
> the impossible alternatives as well (0, 1, 2, 3, 4, 5, 6), leaving just
> 
> (define_insn_and_split "movsf_from_si2"
>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
> 	(unspec:SF
> 	  [(subreg:SI (ashiftrt:DI
> 			(match_operand:DI 1 "input_operand" "r,wa,r")
> 			(const_int 32))
> 		      0)]
> 	  UNSPEC_SF_FROM_SI)))]
>    "TARGET_NO_SF_SUBREG"
>    "@
>     #
>     mfvsrwz %0,%x1
>     mr %0,%1"
> 
>    "&& !reload_completed
>     && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>    [(const_int 0)]
> {
>    rtx op0 = operands[0];
>    rtx op1 = operands[1];
>    rtx tmp = gen_reg_rtx (DImode);
> 
> You cannot call gen_reg_rtx too late in the pass pipeline.  What we
> usually do for such cases is put it as a match_scratch in the pattern,
> and then do code like
> 
>    if (GET_CODE (operands[2]) == SCRATCH)
>      operands[2] = gen_reg_rtx (DImode);
> 
> so that it will work both before and after reload.
> 
>    /* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} from PR42745.  */
> 
> (This line is too long, btw.)
> 
>    if (!SUBREG_P (operands[0]))
>      {
>        rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
>        emit_insn (gen_anddi3 (tmp, op1, mask));
>        emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
>        emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>        DONE;
>      }
>    else
>      FAIL;
> }
>    [(set_attr "length" "12,*,*")
>     (set_attr "type" "vecfloat,mffgpr,*")
>     (set_attr "isa" "p8v,p8v,*")])
> 
> I wonder what will happen if you actually do FAIL here...  There then is
> no insn alternative that can match, so we ICE?  In that case, just leave
> out the whole FAIL thing, it is useless?  You can do a gcc_assert if you
> want to check something.
> 
> Oh, and maybe you only want to handle GPRs here, not VSRs?  So just the
> "r", not the "wa" at all?  What code would it generate for vector regs?
> 
> Lots of questions, sorry!

Thanks for the nice suggestions of the initial patch contains many issues:),

For this case, %1:SF matches with "=wa"?  And how to construct cases to
match("=?r", "wa") and ("=!r", "r") combinations, please?

Removed lots of copy-paste from "movsf_from_si" and update the patch with 
your comments:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-07  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-07  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 40 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..d4af19375aa 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,46 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand" "r,wa,r")
+	       (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r,X,X"))]
+  "TARGET_NO_SF_SUBREG"
+  "@
+  #
+  mfvsrwz %0,%x1
+  mr %0,%1"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  if (GET_CODE (operands[2]) == SCRATCH)
+    op2 = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (op2, op1, mask));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "12,*,*")
+  (set_attr "type" "vecfloat,mffgpr,*")
+  (set_attr "isa" "p8v,p8v,*")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-07  8:39   ` luoxhu
@ 2020-07-07 21:31     ` Segher Boessenkool
  2020-07-08  3:19       ` luoxhu
  0 siblings, 1 reply; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-07 21:31 UTC (permalink / raw)
  To: luoxhu; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi!

On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:
> > Lots of questions, sorry!
> 
> Thanks for the nice suggestions of the initial patch contains many issues:),

Pretty much all of it should *work*, it just can be improved and
simplified quite a bit :-)

> For this case, %1:SF matches with "=wa"?  And how to construct cases to
> match("=?r", "wa") and ("=!r", "r") combinations, please?

operands[0], not operands[1]?

Simple testcases will not put the output into a GPR, unless you force
the compiler to do that, because of the ? and !.

Often you can just do

  asm("#" : "+r"(x));

to force "x" into a GPR at that point of the program.  But there is
nothing stopping the compiler from copying it back to a VSR where it
thinks that is cheaper ;-)

So maybe this pattern should just have the GPR-to-VSR alternative?  It
does not look like the GPR destination variants are useful?

> +  rtx op0 = operands[0];
> +  rtx op1 = operands[1];
> +  rtx op2 = operands[2];

(Please just write out operands[N] everywhere).

> +  if (GET_CODE (operands[2]) == SCRATCH)
> +    op2 = gen_reg_rtx (DImode);
> +
> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
> +  emit_insn (gen_anddi3 (op2, op1, mask));

Groovy :-)

So, it looks like you can remove the ? and ! alternatives, leaving just
the first alternative?


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-07 21:31     ` Segher Boessenkool
@ 2020-07-08  3:19       ` luoxhu
  2020-07-08 22:43         ` Segher Boessenkool
  0 siblings, 1 reply; 16+ messages in thread
From: luoxhu @ 2020-07-08  3:19 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw



On 2020/7/8 05:31, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:
>>> Lots of questions, sorry!
>>
>> Thanks for the nice suggestions of the initial patch contains many issues:),
> 
> Pretty much all of it should *work*, it just can be improved and
> simplified quite a bit :-)
> 
>> For this case, %1:SF matches with "=wa"?  And how to construct cases to
>> match("=?r", "wa") and ("=!r", "r") combinations, please?
> 
> operands[0], not operands[1]?
> 
> Simple testcases will not put the output into a GPR, unless you force
> the compiler to do that, because of the ? and !.
> 
> Often you can just do
> 
>    asm("#" : "+r"(x));
> 
> to force "x" into a GPR at that point of the program.  But there is
> nothing stopping the compiler from copying it back to a VSR where it
> thinks that is cheaper ;-)
> 
> So maybe this pattern should just have the GPR-to-VSR alternative?  It
> does not look like the GPR destination variants are useful?
> 
>> +  rtx op0 = operands[0];
>> +  rtx op1 = operands[1];
>> +  rtx op2 = operands[2];
> 
> (Please just write out operands[N] everywhere).
> 
>> +  if (GET_CODE (operands[2]) == SCRATCH)
>> +    op2 = gen_reg_rtx (DImode);
>> +
>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
>> +  emit_insn (gen_anddi3 (op2, op1, mask));
> 
> Groovy :-)
> 
> So, it looks like you can remove the ? and ! alternatives, leaving just
> the first alternative?
> 

Thanks.

V3 Update: Leave only GPR-to-VSR alternative and use operands[N].
Bootstrap and regression tested pass on Power8-LE.


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 34 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..02c5171378c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,40 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF [
+	     (subreg:SI (ashiftrt:DI
+	       (match_operand:DI 1 "input_operand" "r")
+	       (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "@
+  #"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-08  3:19       ` luoxhu
@ 2020-07-08 22:43         ` Segher Boessenkool
  2020-07-09  3:09           ` luoxhu
  0 siblings, 1 reply; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-08 22:43 UTC (permalink / raw)
  To: luoxhu; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi!

On Wed, Jul 08, 2020 at 11:19:21AM +0800, luoxhu wrote:
> For extracting high part element from DImode register like:
> 
> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> 
> split it before reload with "and mask" to avoid generating shift right
> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
> PR67741, etc.

> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	PR rtl-optimization/89310
> 	* config/rs6000/rs6000.md (movsf_from_si2): New
> 	define_insn_and_split.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>
> 
> 	PR rtl-optimization/89310
> 	* gcc.target/powerpc/pr89310.c: New test.

> +;; For extracting high part element from DImode register like:
> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> +;; split it before reload with "and mask" to avoid generating shift right
> +;; 32 bit then shift left 32 bit.
> +(define_insn_and_split "movsf_from_si2"
> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
> +	    (unspec:SF [
> +	     (subreg:SI (ashiftrt:DI

Put the  (ashiftrt:DI  on a separate line as well?  With indent changes,
etc.

> +	       (match_operand:DI 1 "input_operand" "r")
> +	       (const_int 32))
> +	      0)]
> +	     UNSPEC_SF_FROM_SI))
> +	    (clobber (match_scratch:DI 2 "=r"))]
> +  "TARGET_NO_SF_SUBREG"
> +  "@
> +  #"

@ with only one alternative doesn't do anything; so just write
  "#"
please.

> +

And no empty line here.

> +  "&& !reload_completed

Why this?  Why will this not work after reload?  In the very few cases
where you do need this, you usually also need to check for
lra_in_progress.

> +   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> +  [(const_int 0)]
> +{
> +  if (GET_CODE (operands[2]) == SCRATCH)
> +    operands[2] = gen_reg_rtx (DImode);
> +
> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);

The mask should be different for QI and HI.

> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
> +  DONE;
> +}
> +  [(set_attr "length" "12")
> +  (set_attr "type" "vecfloat")
> +  (set_attr "isa" "p8v")])
> +

No extra whiteline please.


Maybe change it back to just SI?  It won't match often at all for QI or
HI anyway, it seems.  Sorry for that detour.  Should be good with the
above nits fixed :-)


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-08 22:43         ` Segher Boessenkool
@ 2020-07-09  3:09           ` luoxhu
  2020-07-09 19:25             ` Segher Boessenkool
  0 siblings, 1 reply; 16+ messages in thread
From: luoxhu @ 2020-07-09  3:09 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw



On 2020/7/9 06:43, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Jul 08, 2020 at 11:19:21AM +0800, luoxhu wrote:
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
>> PR67741, etc.
> 
>> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>> 	PR rtl-optimization/89310
>> 	* config/rs6000/rs6000.md (movsf_from_si2): New
>> 	define_insn_and_split.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>> 	PR rtl-optimization/89310
>> 	* gcc.target/powerpc/pr89310.c: New test.
> 
>> +;; For extracting high part element from DImode register like:
>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift left 32 bit.
>> +(define_insn_and_split "movsf_from_si2"
>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>> +	    (unspec:SF [
>> +	     (subreg:SI (ashiftrt:DI
> 
> Put the  (ashiftrt:DI  on a separate line as well?  With indent changes,
> etc.
> 
>> +	       (match_operand:DI 1 "input_operand" "r")
>> +	       (const_int 32))
>> +	      0)]
>> +	     UNSPEC_SF_FROM_SI))
>> +	    (clobber (match_scratch:DI 2 "=r"))]
>> +  "TARGET_NO_SF_SUBREG"
>> +  "@
>> +  #"
> 
> @ with only one alternative doesn't do anything; so just write
>    "#"
> please.
> 
>> +
> 
> And no empty line here.
> 
>> +  "&& !reload_completed
> 
> Why this?  Why will this not work after reload?  In the very few cases
> where you do need this, you usually also need to check for
> lra_in_progress.
> 
>> +   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>> +  [(const_int 0)]
>> +{
>> +  if (GET_CODE (operands[2]) == SCRATCH)
>> +    operands[2] = gen_reg_rtx (DImode);
>> +
>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
> 
> The mask should be different for QI and HI.
> 
>> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
>> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
>> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
>> +  DONE;
>> +}
>> +  [(set_attr "length" "12")
>> +  (set_attr "type" "vecfloat")
>> +  (set_attr "isa" "p8v")])
>> +
> 
> No extra whiteline please.
> 
> 
> Maybe change it back to just SI?  It won't match often at all for QI or
> HI anyway, it seems.  Sorry for that detour.  Should be good with the
> above nits fixed :-)

OK, if I see correctly, subreg of DImode should be SImode and I used
subreg:SI to match only SI, so no need to consider mask for QI and HI? :)

Others are updated: removed reload_completed and adjust format. 


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..a493dfd4596 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF [
+	     (subreg:SI (
+		 ashiftrt:DI (
+		   match_operand:DI 1 "input_operand" "r")
+		 (const_int 32))
+	      0)]
+	     UNSPEC_SF_FROM_SI))
+	    (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-09  3:09           ` luoxhu
@ 2020-07-09 19:25             ` Segher Boessenkool
  2020-07-10  1:39               ` [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310] luoxhu
  2020-07-10  7:41               ` [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310] luoxhu
  0 siblings, 2 replies; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-09 19:25 UTC (permalink / raw)
  To: luoxhu; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi!

On Thu, Jul 09, 2020 at 11:09:42AM +0800, luoxhu wrote:
> > Maybe change it back to just SI?  It won't match often at all for QI or
> > HI anyway, it seems.  Sorry for that detour.  Should be good with the
> > above nits fixed :-)
> 
> OK, if I see correctly, subreg of DImode should be SImode and I used
> subreg:SI to match only SI, so no need to consider mask for QI and HI? :)

My problem with it was that the shift amounts won't be 32 for QI etc.?

> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> +;; split it before reload with "and mask" to avoid generating shift right
> +;; 32 bit then shift left 32 bit.
> +(define_insn_and_split "movsf_from_si2"
> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
> +	    (unspec:SF [
> +	     (subreg:SI (
> +		 ashiftrt:DI (

Opening parens *start* a line, they never end it.  So:

(define_insn_and_split "movsf_from_si2"
  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
	  (unspec:SF
	    [(subreg:SI
	       (ashiftrt:DI
		 (match_operand:DI 1 "input_operand" "r")
		 (const_int 32))
	      0)]
	    UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]

or something like that.  There aren't really any real rules...  The
important points are that nested things should be indented, and things
at the same level should have the same indent (like the outer set and
clobber).  The [ for an unspec is sometimes put at the end of a line,
that reads a little better perhaps.

> +  "TARGET_NO_SF_SUBREG"
> +  "#"
> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"

Put this in the insn condition?  And since this is just a predicate,
you can just use it instead of gpc_reg_operand.

(The split condition becomes "&& 1" then, not "").


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-09 19:25             ` Segher Boessenkool
@ 2020-07-10  1:39               ` luoxhu
  2020-07-11  0:54                 ` Segher Boessenkool
  2020-07-10  7:41               ` [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310] luoxhu
  1 sibling, 1 reply; 16+ messages in thread
From: luoxhu @ 2020-07-10  1:39 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi,

On 2020/7/10 03:25, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jul 09, 2020 at 11:09:42AM +0800, luoxhu wrote:
>>> Maybe change it back to just SI?  It won't match often at all for QI or
>>> HI anyway, it seems.  Sorry for that detour.  Should be good with the
>>> above nits fixed :-)
>>
>> OK, if I see correctly, subreg of DImode should be SImode and I used
>> subreg:SI to match only SI, so no need to consider mask for QI and HI? :)
> 
> My problem with it was that the shift amounts won't be 32 for QI etc.?

But if the subreg:SI only accepts SImode comes in, why do we need handle the
shift amounts for QI/HI? It not using QHSI here...

> 
>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift left 32 bit.
>> +(define_insn_and_split "movsf_from_si2"
>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>> +	    (unspec:SF [
>> +	     (subreg:SI (
>> +		 ashiftrt:DI (
> 
> Opening parens *start* a line, they never end it.  So:
> 
> (define_insn_and_split "movsf_from_si2"
>    [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")
> 	  (unspec:SF
> 	    [(subreg:SI
> 	       (ashiftrt:DI
> 		 (match_operand:DI 1 "input_operand" "r")
> 		 (const_int 32))
> 	      0)]
> 	    UNSPEC_SF_FROM_SI))
>     (clobber (match_scratch:DI 2 "=r"))]
> 
> or something like that.  There aren't really any real rules...  The
> important points are that nested things should be indented, and things
> at the same level should have the same indent (like the outer set and
> clobber).  The [ for an unspec is sometimes put at the end of a line,
> that reads a little better perhaps.

OK, seems the md file needs a format tool too...

> 
>> +  "TARGET_NO_SF_SUBREG"
>> +  "#"
>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> 
> Put this in the insn condition?  And since this is just a predicate,
> you can just use it instead of gpc_reg_operand.
> 
> (The split condition becomes "&& 1" then, not "").

OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
movdi_from_sf_zero_ext all use it as condition...

And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
"avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The 
TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.


Updated patch and correct the title:


Define movsf_from_si2 to extract high part SF element from DImode[PR89310]


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 32 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..2331c84dcbd 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,38 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]
  2020-07-09 19:25             ` Segher Boessenkool
  2020-07-10  1:39               ` [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310] luoxhu
@ 2020-07-10  7:41               ` luoxhu
  1 sibling, 0 replies; 16+ messages in thread
From: luoxhu @ 2020-07-10  7:41 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw



On 2020/7/10 03:25, Segher Boessenkool wrote:
> 
>> +  "TARGET_NO_SF_SUBREG"
>> +  "#"
>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> 
> Put this in the insn condition?  And since this is just a predicate,
> you can just use it instead of gpc_reg_operand.
> 
> (The split condition becomes "&& 1" then, not "").
> 

+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "vsx_reg_sfsubreg_ok" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]

This change is invalid as it will cause an ICE in PR42475.c, reason is:
insn #29 will be combined to insn #40, though split1 is success, but it 
will cause ICE in sched1 as the op[0] is not SFmode.  Without this, #29
won't be combined to #40 as the gpc_reg_operand (operands[0], E_SFmode)
will cause the match fail for subreg:SF (reg:SI 155 [ _4 ]) 0).


pr42475.c.268r.combine:
Trying 29 -> 40:
   29: {r120:SF=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
   40: r155:SI#0=r120:SF
      REG_DEAD r120:SF
Successfully matched this instruction:
(set (subreg:SF (reg:SI 155 [ _4 ]) 0)
    (unspec:SF [
            (subreg:SI (ashiftrt:DI (reg:DI 133)
                    (const_int 32 [0x20])) 0)
        ] UNSPEC_SF_FROM_SI))
allowing combination of insns 29 and 40
original costs 12 + 4 = 16
replacement cost 12
deferring deletion of insn with uid = 29.
modifying insn i3    40: {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
      REG_DEAD r133:DI
deferring rescan insn with uid = 40.


pr42475.c.273r.split1:
69: r172:DI=r133:DI&0xffffffff00000000
70: r155:SI#0=unspec[r172:DI] 62
71: r155:SI#0=unspec[r155:SI#0] 103
41: NOTE_INSN_DELETED
42: r157:DI=r155:SI#0<<0x20


pr42475.c.280r.sched1:
pr42475.c: In function ‘bar’:
pr42475.c:20:1: error: unrecognizable insn:
   20 | }
      | ^
(insn 71 70 41 2 (set (subreg:SF (reg:SI 155 [ _4 ]) 0)
        (unspec:SF [
                (subreg:SF (reg:SI 155 [ _4 ]) 0)
            ] UNSPEC_VSX_CVSPDPN)) -1
     (nil))
during RTL pass: sched1
dump file: pr42475.c.280r.sched1
pr42475.c:20:1: internal compiler error: in extract_insn, at recog.c:2294
Please submit a full bug report,
with preprocessed source if appropriate.
See <https://gcc.gnu.org/bugs/> for instructions.


VS not using vsx_reg_sfsubreg_ok as condition:


pr42475.c.268r.combine:
Trying 29 -> 40:
   29: {r120:SF=unspec[r133:DI>>0x20#0] 86;clobber scratch;}
   40: r155:SI#0=r120:SF
      REG_DEAD r120:SF
Failed to match this instruction:
(set (subreg:SF (reg:SI 155 [ _4 ]) 0)
    (unspec:SF [
            (subreg:SI (ashiftrt:DI (reg:DI 133)
                    (const_int 32 [0x20])) 0)
        ] UNSPEC_SF_FROM_SI))


273r.split1:
69: r172:DI=r133:DI&0xffffffff00000000
70: r120:SF=unspec[r172:DI] 62
71: r120:SF=unspec[r120:SF] 103
40: r155:SI#0=r120:SF
	REG_DEAD r120:SF

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-10  1:39               ` [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310] luoxhu
@ 2020-07-11  0:54                 ` Segher Boessenkool
  2020-07-13  6:30                   ` luoxhu
  0 siblings, 1 reply; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-11  0:54 UTC (permalink / raw)
  To: luoxhu; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi!

On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
> OK, seems the md file needs a format tool too...

Heh.  Just make sure it looks good (that is, does what it looks like),
looks like the rest, etc.  It's hard to do anything nice with unspecs,
[ ] lists do not format well.

> >> +  "TARGET_NO_SF_SUBREG"
> >> +  "#"
> >> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> > 
> > Put this in the insn condition?  And since this is just a predicate,
> > you can just use it instead of gpc_reg_operand.
> > 
> > (The split condition becomes "&& 1" then, not "").
> 
> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
> movdi_from_sf_zero_ext all use it as condition...

Since in your case you *always* split, the split condition should be
"always".  There are no alternatives that do not split here.

> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The 
> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.

Good question.  I do not know.

Well...  Since this define_insn* requires p8 *anyway*, we do not need
any of these sf_subreg things?  We always know for each one if it should
be true or false.

> +  "TARGET_NO_SF_SUBREG"

But here we should require p8 some other way, then.

> +  (set_attr "isa" "p8v")])

(This isn't enough, unfortunately).


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-11  0:54                 ` Segher Boessenkool
@ 2020-07-13  6:30                   ` luoxhu
  2020-07-14 14:17                     ` David Edelsohn
  2020-07-20 15:31                     ` Segher Boessenkool
  0 siblings, 2 replies; 16+ messages in thread
From: luoxhu @ 2020-07-13  6:30 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

Hi,

On 2020/7/11 08:54, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
>> OK, seems the md file needs a format tool too...
> 
> Heh.  Just make sure it looks good (that is, does what it looks like),
> looks like the rest, etc.  It's hard to do anything nice with unspecs,
> [ ] lists do not format well.
> 
>>>> +  "TARGET_NO_SF_SUBREG"
>>>> +  "#"
>>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>>>
>>> Put this in the insn condition?  And since this is just a predicate,
>>> you can just use it instead of gpc_reg_operand.
>>>
>>> (The split condition becomes "&& 1" then, not "").
>>
>> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
>> movdi_from_sf_zero_ext all use it as condition...
> 
> Since in your case you *always* split, the split condition should be
> "always".  There are no alternatives that do not split here.
> 
>> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
>> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
>> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
> 
> Good question.  I do not know.
> 
> Well...  Since this define_insn* requires p8 *anyway*, we do not need
> any of these sf_subreg things?  We always know for each one if it should
> be true or false.

Yes, removed the vsx_reg_sfsubreg_ok check.

> 
>> +  "TARGET_NO_SF_SUBREG"
> 
> But here we should require p8 some other way, then.

TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64
which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.

> 
>> +  (set_attr "isa" "p8v")])
> 
> (This isn't enough, unfortunately).
> 


Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* config/rs6000/rs6000.md (movsf_from_si2): New
	define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>

	PR rtl-optimization/89310
	* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..480385ed4d2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+;; For extracting high part element from DImode register like:
+;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	    (unspec:SF
+	     [(subreg:SI
+	       (ashiftrt:DI
+		(match_operand:DI 1 "input_operand" "r")
+		(const_int 32))
+	       0)]
+	     UNSPEC_SF_FROM_SI))
+  (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 00000000000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-13  6:30                   ` luoxhu
@ 2020-07-14 14:17                     ` David Edelsohn
  2020-07-15  3:47                       ` luoxhu
  2020-07-20 15:31                     ` Segher Boessenkool
  1 sibling, 1 reply; 16+ messages in thread
From: David Edelsohn @ 2020-07-14 14:17 UTC (permalink / raw)
  To: luoxhu; +Cc: Segher Boessenkool, GCC Patches, Bill Schmidt, guojiufu, linkw

Unfortunately this patch is eliciting a number of new testsuite
failures, all like

error: unrecognizable insn:
(insn 44 43 45 5 (parallel [
            (set (reg:SI 199)
                (unspec:SI [
                        (reg:SF 202)
                    ] UNSPEC_SI_FROM_SF))
            (clobber (scratch:V4SF))
        ]) "/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1
-1
     (nil))
during RTL pass: vregs

for

gcc.dg/vect/vect-alias-check-11.c
gcc.dg/vect/vect-alias-check-12.c
gcc.dg/vect/pr57741-2.c
gcc.dg/vect/pr57741-3.c
gcc.dg/vect/pr89440.c
gcc.target/powerpc/sse-movss-1.c

Thanks, David

On Mon, Jul 13, 2020 at 2:30 AM luoxhu <luoxhu@linux.ibm.com> wrote:
>
> Hi,
>
> On 2020/7/11 08:54, Segher Boessenkool wrote:
> > Hi!
> >
> > On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
> >> OK, seems the md file needs a format tool too...
> >
> > Heh.  Just make sure it looks good (that is, does what it looks like),
> > looks like the rest, etc.  It's hard to do anything nice with unspecs,
> > [ ] lists do not format well.
> >
> >>>> +  "TARGET_NO_SF_SUBREG"
> >>>> +  "#"
> >>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
> >>>
> >>> Put this in the insn condition?  And since this is just a predicate,
> >>> you can just use it instead of gpc_reg_operand.
> >>>
> >>> (The split condition becomes "&& 1" then, not "").
> >>
> >> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
> >> movdi_from_sf_zero_ext all use it as condition...
> >
> > Since in your case you *always* split, the split condition should be
> > "always".  There are no alternatives that do not split here.
> >
> >> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
> >> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
> >> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
> >
> > Good question.  I do not know.
> >
> > Well...  Since this define_insn* requires p8 *anyway*, we do not need
> > any of these sf_subreg things?  We always know for each one if it should
> > be true or false.
>
> Yes, removed the vsx_reg_sfsubreg_ok check.
>
> >
> >> +  "TARGET_NO_SF_SUBREG"
> >
> > But here we should require p8 some other way, then.
>
> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64
> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.
>
> >
> >> +  (set_attr "isa" "p8v")])
> >
> > (This isn't enough, unfortunately).
> >
>
>
> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:
>
>
> For extracting high part element from DImode register like:
>
> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>
> split it before reload with "and mask" to avoid generating shift right
> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
> PR67741, etc.
>
> srdi 3,3,32
> sldi 9,3,32
> mtvsrd 1,9
> xscvspdpn 1,1
>
> =>
>
> rldicr 3,3,0,31
> mtvsrd 1,3
> xscvspdpn 1,1
>
> Bootstrap and regression tested pass on Power8-LE.
>
> gcc/ChangeLog:
>
> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>
>
>         PR rtl-optimization/89310
>         * config/rs6000/rs6000.md (movsf_from_si2): New
>         define_insn_and_split.
>
> gcc/testsuite/ChangeLog:
>
> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>
>
>         PR rtl-optimization/89310
>         * gcc.target/powerpc/pr89310.c: New test.
> ---
>  gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
>  2 files changed, 48 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4fcd6a94022..480385ed4d2 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
>             "*,          *,         p9v,       p8v,       *,         *,
>              p8v,        p8v,       p8v,       *")])
>
> +;; For extracting high part element from DImode register like:
> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> +;; split it before reload with "and mask" to avoid generating shift right
> +;; 32 bit then shift left 32 bit.
> +(define_insn_and_split "movsf_from_si2"
> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
> +           (unspec:SF
> +            [(subreg:SI
> +              (ashiftrt:DI
> +               (match_operand:DI 1 "input_operand" "r")
> +               (const_int 32))
> +              0)]
> +            UNSPEC_SF_FROM_SI))
> +  (clobber (match_scratch:DI 2 "=r"))]
> +  "TARGET_NO_SF_SUBREG"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  if (GET_CODE (operands[2]) == SCRATCH)
> +    operands[2] = gen_reg_rtx (DImode);
> +
> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
> +  DONE;
> +}
> +  [(set_attr "length" "12")
> +  (set_attr "type" "vecfloat")
> +  (set_attr "isa" "p8v")])
>
>  ;; Move 64-bit binary/decimal floating point
>  (define_expand "mov<mode>"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
> new file mode 100644
> index 00000000000..15e78509246
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +struct s {
> +  int i;
> +  float f;
> +};
> +
> +float
> +foo (struct s arg)
> +{
> +  return arg.f;
> +}
> +
> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */
> +/* { dg-final { scan-assembler-not {\msldi\M} } } */
> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
> --
> 2.21.0.777.g83232e3864
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-14 14:17                     ` David Edelsohn
@ 2020-07-15  3:47                       ` luoxhu
  0 siblings, 0 replies; 16+ messages in thread
From: luoxhu @ 2020-07-15  3:47 UTC (permalink / raw)
  To: David Edelsohn
  Cc: Segher Boessenkool, GCC Patches, Bill Schmidt, guojiufu, linkw

Hi David,

On 2020/7/14 22:17, David Edelsohn wrote:
> Unfortunately this patch is eliciting a number of new testsuite
> failures, all like
> 
> error: unrecognizable insn:
> (insn 44 43 45 5 (parallel [
>              (set (reg:SI 199)
>                  (unspec:SI [
>                          (reg:SF 202)
>                      ] UNSPEC_SI_FROM_SF))
>              (clobber (scratch:V4SF))
>          ]) "/nasfarm/edelsohn/src/src/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c":70:1
> -1
>       (nil))
> during RTL pass: vregs
> 
> for
> 
> gcc.dg/vect/vect-alias-check-11.c
> gcc.dg/vect/vect-alias-check-12.c
> gcc.dg/vect/pr57741-2.c
> gcc.dg/vect/pr57741-3.c
> gcc.dg/vect/pr89440.c
> gcc.target/powerpc/sse-movss-1.c

This patch won't match the instruction with a "clobber (scratch:V4SF)",
it only matches "(clobber (match_scratch:DI 2 "=r"))", I guess you are
replying to the other patch?

"[PATCH 2/2] rs6000: Define define_insn_and_split to split unspec sldi+or to rldimi"

Thanks for your fix patch! :)

This patch's regression tested pass on Power8-LE, I re-run these cases on
Power8-LE, and confirmed these could pass, what is your platform please?
BTW, TARGET_NO_SF_SUBREG ensured TARGET_POWERPC64 for this define_insn_and_split.
Thanks.

Xionghu

> 
> Thanks, David
> 
> On Mon, Jul 13, 2020 at 2:30 AM luoxhu <luoxhu@linux.ibm.com> wrote:
>>
>> Hi,
>>
>> On 2020/7/11 08:54, Segher Boessenkool wrote:
>>> Hi!
>>>
>>> On Fri, Jul 10, 2020 at 09:39:40AM +0800, luoxhu wrote:
>>>> OK, seems the md file needs a format tool too...
>>>
>>> Heh.  Just make sure it looks good (that is, does what it looks like),
>>> looks like the rest, etc.  It's hard to do anything nice with unspecs,
>>> [ ] lists do not format well.
>>>
>>>>>> +  "TARGET_NO_SF_SUBREG"
>>>>>> +  "#"
>>>>>> +  "&& vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>>>>>
>>>>> Put this in the insn condition?  And since this is just a predicate,
>>>>> you can just use it instead of gpc_reg_operand.
>>>>>
>>>>> (The split condition becomes "&& 1" then, not "").
>>>>
>>>> OK, this seems a bit strange as movsi_from_sf, movsf_from_si and
>>>> movdi_from_sf_zero_ext all use it as condition...
>>>
>>> Since in your case you *always* split, the split condition should be
>>> "always".  There are no alternatives that do not split here.
>>>
>>>> And why vsx_reg_sfsubreg_ok allows "SF SUBREGS" and TARGET_NO_SF_SUBREG
>>>> "avoid (SUBREG:SF (REG:SI)", I guess they are not the same meaning? (The
>>>> TARGET_NO_SF_SUBREG is also copied from other similar defines.)  Thanks.
>>>
>>> Good question.  I do not know.
>>>
>>> Well...  Since this define_insn* requires p8 *anyway*, we do not need
>>> any of these sf_subreg things?  We always know for each one if it should
>>> be true or false.
>>
>> Yes, removed the vsx_reg_sfsubreg_ok check.
>>
>>>
>>>> +  "TARGET_NO_SF_SUBREG"
>>>
>>> But here we should require p8 some other way, then.
>>
>> TARGET_NO_SF_SUBREG is defined to TARGET_DIRECT_MOVE_64BIT, and
>> TARGET_DIRECT_MOVE_64BIT is TARGET_DIRECT_MOVE && TARGET_P8_VECTOR && TARGET_POWERPC64
>> which means TARGET_P8_VECTOR must be true for TARGET_NO_SF_SUBREG.
>>
>>>
>>>> +  (set_attr "isa" "p8v")])
>>>
>>> (This isn't enough, unfortunately).
>>>
>>
>>
>> Updated patch to removed the vsx_reg_sfsubreg_ok and ICE fix:
>>
>>
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
>> PR67741, etc.
>>
>> srdi 3,3,32
>> sldi 9,3,32
>> mtvsrd 1,9
>> xscvspdpn 1,1
>>
>> =>
>>
>> rldicr 3,3,0,31
>> mtvsrd 1,3
>> xscvspdpn 1,1
>>
>> Bootstrap and regression tested pass on Power8-LE.
>>
>> gcc/ChangeLog:
>>
>> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>>          PR rtl-optimization/89310
>>          * config/rs6000/rs6000.md (movsf_from_si2): New
>>          define_insn_and_split.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2020-07-13  Xionghu Luo  <luoxhu@linux.ibm.com>
>>
>>          PR rtl-optimization/89310
>>          * gcc.target/powerpc/pr89310.c: New test.
>> ---
>>   gcc/config/rs6000/rs6000.md                | 31 ++++++++++++++++++++++
>>   gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 ++++++++++++
>>   2 files changed, 48 insertions(+)
>>   create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c
>>
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 4fcd6a94022..480385ed4d2 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -7593,6 +7593,37 @@ (define_insn_and_split "movsf_from_si"
>>              "*,          *,         p9v,       p8v,       *,         *,
>>               p8v,        p8v,       p8v,       *")])
>>
>> +;; For extracting high part element from DImode register like:
>> +;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift left 32 bit.
>> +(define_insn_and_split "movsf_from_si2"
>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>> +           (unspec:SF
>> +            [(subreg:SI
>> +              (ashiftrt:DI
>> +               (match_operand:DI 1 "input_operand" "r")
>> +               (const_int 32))
>> +              0)]
>> +            UNSPEC_SF_FROM_SI))
>> +  (clobber (match_scratch:DI 2 "=r"))]
>> +  "TARGET_NO_SF_SUBREG"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +{
>> +  if (GET_CODE (operands[2]) == SCRATCH)
>> +    operands[2] = gen_reg_rtx (DImode);
>> +
>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
>> +  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
>> +  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
>> +  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
>> +  DONE;
>> +}
>> +  [(set_attr "length" "12")
>> +  (set_attr "type" "vecfloat")
>> +  (set_attr "isa" "p8v")])
>>
>>   ;; Move 64-bit binary/decimal floating point
>>   (define_expand "mov<mode>"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c b/gcc/testsuite/gcc.target/powerpc/pr89310.c
>> new file mode 100644
>> index 00000000000..15e78509246
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
>> +
>> +struct s {
>> +  int i;
>> +  float f;
>> +};
>> +
>> +float
>> +foo (struct s arg)
>> +{
>> +  return arg.f;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */
>> +/* { dg-final { scan-assembler-not {\msldi\M} } } */
>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
>> --
>> 2.21.0.777.g83232e3864
>>
>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-13  6:30                   ` luoxhu
  2020-07-14 14:17                     ` David Edelsohn
@ 2020-07-20 15:31                     ` Segher Boessenkool
  2020-07-21  3:43                       ` luoxhu
  1 sibling, 1 reply; 16+ messages in thread
From: Segher Boessenkool @ 2020-07-20 15:31 UTC (permalink / raw)
  To: luoxhu; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

On Mon, Jul 13, 2020 at 02:30:28PM +0800, luoxhu wrote:
> For extracting high part element from DImode register like:
> 
> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
> 
> split it before reload with "and mask" to avoid generating shift right
> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
> PR67741, etc.
> 
> srdi 3,3,32
> sldi 9,3,32
> mtvsrd 1,9
> xscvspdpn 1,1
> 
> =>
> 
> rldicr 3,3,0,31
> mtvsrd 1,3
> xscvspdpn 1,1

> 	* config/rs6000/rs6000.md (movsf_from_si2): New
> 	define_insn_and_split.

(That fits on one line).

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */

> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */
> +/* { dg-final { scan-assembler-not {\msldi\M} } } */
> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */

I'm not sure that works on older cpus?  Please test there, and add
-mdejagnu-cpu=power8 to the dg-options if needed.  Also test on BE please.

Okay for trunk with those last details taking care of.  Thank you!


Segher

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310]
  2020-07-20 15:31                     ` Segher Boessenkool
@ 2020-07-21  3:43                       ` luoxhu
  0 siblings, 0 replies; 16+ messages in thread
From: luoxhu @ 2020-07-21  3:43 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: gcc-patches, dje.gcc, wschmidt, guojiufu, linkw

On 2020/7/20 23:31, Segher Boessenkool wrote:
> On Mon, Jul 13, 2020 at 02:30:28PM +0800, luoxhu wrote:
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
>> PR67741, etc.
>>
>> srdi 3,3,32
>> sldi 9,3,32
>> mtvsrd 1,9
>> xscvspdpn 1,1
>>
>> =>
>>
>> rldicr 3,3,0,31
>> mtvsrd 1,3
>> xscvspdpn 1,1
> 
>> 	* config/rs6000/rs6000.md (movsf_from_si2): New
>> 	define_insn_and_split.
> 
> (That fits on one line).
> 
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2" } */
> 
>> +/* { dg-final { scan-assembler-not {\msrdi\M} } } */
>> +/* { dg-final { scan-assembler-not {\msldi\M} } } */
>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
> 
> I'm not sure that works on older cpus?  Please test there, and add
> -mdejagnu-cpu=power8 to the dg-options if needed.  Also test on BE please.
> 
> Okay for trunk with those last details taking care of.  Thank you!

Thanks for the remind.  Addressed the comments and committed in r11-2245.

Xionghu

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-07-21  3:43 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-06  2:17 [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310] Xionghu Luo
2020-07-07  0:18 ` Segher Boessenkool
2020-07-07  8:39   ` luoxhu
2020-07-07 21:31     ` Segher Boessenkool
2020-07-08  3:19       ` luoxhu
2020-07-08 22:43         ` Segher Boessenkool
2020-07-09  3:09           ` luoxhu
2020-07-09 19:25             ` Segher Boessenkool
2020-07-10  1:39               ` [PATCH] rs6000: Define movsf_from_si2 to extract high part SF element from DImode[PR89310] luoxhu
2020-07-11  0:54                 ` Segher Boessenkool
2020-07-13  6:30                   ` luoxhu
2020-07-14 14:17                     ` David Edelsohn
2020-07-15  3:47                       ` luoxhu
2020-07-20 15:31                     ` Segher Boessenkool
2020-07-21  3:43                       ` luoxhu
2020-07-10  7:41               ` [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310] luoxhu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).