public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
       [not found] <a18cccd5-9465-4cc3-85ed-156a72a7236d@linux.vnet.ibm.com>
@ 2024-02-26  5:43 ` jeevitha
  2024-03-07 15:24   ` [PING^1][PATCH] " jeevitha
                     ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: jeevitha @ 2024-02-26  5:43 UTC (permalink / raw)
  To: GCC Patches, Kewen.Lin, Segher Boessenkool; +Cc: Peter Bergner

Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

PR110040 exposes an issue concerning moves from vector registers to GPRs.
There are two moves, one for upper 64 bits and the other for the lower
64 bits.  In the problematic test case, we are only interested in storing
the lower 64 bits.  However, the instruction for copying the upper 64 bits
is still emitted and is dead code.  This patch adds a splitter that splits
apart the two move instructions so that DCE can remove the dead code after
splitting.

2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>

gcc/
	PR target/110040
	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.

gcc/testsuite/
	PR target/110040
	* gcc.target/powerpc/pr110040-1.c: New testcase.
	* gcc.target/powerpc/pr110040-2.c: New testcase.


diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6111cc90eb7..78457f8fb14 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6706,3 +6706,19 @@
   "vmsumcud %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")]
 )
+
+(define_split
+  [(set (match_operand:V1TI 0 "int_reg_operand")
+       (match_operand:V1TI 1 "vsx_register_operand"))]
+  "reload_completed
+   && TARGET_DIRECT_MOVE_64BIT"
+   [(pc)]
+{
+  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
+  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
+  DONE;
+})
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
new file mode 100644
index 00000000000..fb3bd254636
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
@@ -0,0 +1,14 @@
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+#include <altivec.h>
+
+void
+foo (signed long *dst, vector signed __int128 src)
+{
+  *dst = (signed long) src[0];
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
new file mode 100644
index 00000000000..f3aa22be4e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
@@ -0,0 +1,13 @@
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+#include <altivec.h>
+
+void
+foo (signed int *dst, vector signed __int128 src)
+{
+  __builtin_vec_xst_trunc (src, 0, dst);
+}



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PING^1][PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
  2024-02-26  5:43 ` [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040] jeevitha
@ 2024-03-07 15:24   ` jeevitha
  2024-03-26  4:53   ` [PING^2][PATCH] " jeevitha
  2024-05-22  3:20   ` [PATCH] " Kewen.Lin
  2 siblings, 0 replies; 6+ messages in thread
From: jeevitha @ 2024-03-07 15:24 UTC (permalink / raw)
  To: GCC Patches, Kewen.Lin, Segher Boessenkool; +Cc: Peter Bergner

Ping!

please review.

Thanks & Regards
Jeevitha

On 26/02/24 11:13 am, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> PR110040 exposes an issue concerning moves from vector registers to GPRs.
> There are two moves, one for upper 64 bits and the other for the lower
> 64 bits.  In the problematic test case, we are only interested in storing
> the lower 64 bits.  However, the instruction for copying the upper 64 bits
> is still emitted and is dead code.  This patch adds a splitter that splits
> apart the two move instructions so that DCE can remove the dead code after
> splitting.
> 
> 2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
> 
> gcc/
> 	PR target/110040
> 	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.
> 
> gcc/testsuite/
> 	PR target/110040
> 	* gcc.target/powerpc/pr110040-1.c: New testcase.
> 	* gcc.target/powerpc/pr110040-2.c: New testcase.
> 
> 
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 6111cc90eb7..78457f8fb14 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -6706,3 +6706,19 @@
>    "vmsumcud %0,%1,%2,%3"
>    [(set_attr "type" "veccomplex")]
>  )
> +
> +(define_split
> +  [(set (match_operand:V1TI 0 "int_reg_operand")
> +       (match_operand:V1TI 1 "vsx_register_operand"))]
> +  "reload_completed
> +   && TARGET_DIRECT_MOVE_64BIT"
> +   [(pc)]
> +{
> +  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
> +  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
> +  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
> +  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
> +  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
> +  DONE;
> +})
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> new file mode 100644
> index 00000000000..fb3bd254636
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> @@ -0,0 +1,14 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
> +
> +#include <altivec.h>
> +
> +void
> +foo (signed long *dst, vector signed __int128 src)
> +{
> +  *dst = (signed long) src[0];
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> new file mode 100644
> index 00000000000..f3aa22be4e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> @@ -0,0 +1,13 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
> +
> +#include <altivec.h>
> +
> +void
> +foo (signed int *dst, vector signed __int128 src)
> +{
> +  __builtin_vec_xst_trunc (src, 0, dst);
> +}
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PING^2][PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
  2024-02-26  5:43 ` [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040] jeevitha
  2024-03-07 15:24   ` [PING^1][PATCH] " jeevitha
@ 2024-03-26  4:53   ` jeevitha
  2024-04-17  9:16     ` [PING^3][PATCH] " jeevitha
  2024-05-22  3:20   ` [PATCH] " Kewen.Lin
  2 siblings, 1 reply; 6+ messages in thread
From: jeevitha @ 2024-03-26  4:53 UTC (permalink / raw)
  To: jeevitha, GCC Patches, Kewen.Lin, Segher Boessenkool; +Cc: Peter Bergner

Ping!

please review.

Thanks & Regards
Jeevitha


On 26/02/24 11:13 am, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> PR110040 exposes an issue concerning moves from vector registers to GPRs.
> There are two moves, one for upper 64 bits and the other for the lower
> 64 bits.  In the problematic test case, we are only interested in storing
> the lower 64 bits.  However, the instruction for copying the upper 64 bits
> is still emitted and is dead code.  This patch adds a splitter that splits
> apart the two move instructions so that DCE can remove the dead code after
> splitting.
> 
> 2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
> 
> gcc/
> 	PR target/110040
> 	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.
> 
> gcc/testsuite/
> 	PR target/110040
> 	* gcc.target/powerpc/pr110040-1.c: New testcase.
> 	* gcc.target/powerpc/pr110040-2.c: New testcase.
> 
> 
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 6111cc90eb7..78457f8fb14 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -6706,3 +6706,19 @@
>    "vmsumcud %0,%1,%2,%3"
>    [(set_attr "type" "veccomplex")]
>  )
> +
> +(define_split
> +  [(set (match_operand:V1TI 0 "int_reg_operand")
> +       (match_operand:V1TI 1 "vsx_register_operand"))]
> +  "reload_completed
> +   && TARGET_DIRECT_MOVE_64BIT"
> +   [(pc)]
> +{
> +  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
> +  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
> +  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
> +  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
> +  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
> +  DONE;
> +})
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> new file mode 100644
> index 00000000000..fb3bd254636
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> @@ -0,0 +1,14 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
> +
> +#include <altivec.h>
> +
> +void
> +foo (signed long *dst, vector signed __int128 src)
> +{
> +  *dst = (signed long) src[0];
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> new file mode 100644
> index 00000000000..f3aa22be4e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> @@ -0,0 +1,13 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
> +
> +#include <altivec.h>
> +
> +void
> +foo (signed int *dst, vector signed __int128 src)
> +{
> +  __builtin_vec_xst_trunc (src, 0, dst);
> +}
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PING^3][PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
  2024-03-26  4:53   ` [PING^2][PATCH] " jeevitha
@ 2024-04-17  9:16     ` jeevitha
  2024-05-21  4:57       ` [PING^4][PATCH] " jeevitha
  0 siblings, 1 reply; 6+ messages in thread
From: jeevitha @ 2024-04-17  9:16 UTC (permalink / raw)
  To: jeevitha, GCC Patches, Kewen.Lin, Segher Boessenkool, Michael Meissner
  Cc: Peter Bergner

Ping!

please review.

Thanks & Regards
Jeevitha

On 26/03/24 10:23 am, jeevitha wrote:
> Ping!
> 
> please review.
> 
> Thanks & Regards
> Jeevitha
> 
> 
> On 26/02/24 11:13 am, jeevitha wrote:
>> Hi All,
>>
>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>
>> PR110040 exposes an issue concerning moves from vector registers to GPRs.
>> There are two moves, one for upper 64 bits and the other for the lower
>> 64 bits.  In the problematic test case, we are only interested in storing
>> the lower 64 bits.  However, the instruction for copying the upper 64 bits
>> is still emitted and is dead code.  This patch adds a splitter that splits
>> apart the two move instructions so that DCE can remove the dead code after
>> splitting.
>>
>> 2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
>>
>> gcc/
>> 	PR target/110040
>> 	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.
>>
>> gcc/testsuite/
>> 	PR target/110040
>> 	* gcc.target/powerpc/pr110040-1.c: New testcase.
>> 	* gcc.target/powerpc/pr110040-2.c: New testcase.
>>
>>
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index 6111cc90eb7..78457f8fb14 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -6706,3 +6706,19 @@
>>    "vmsumcud %0,%1,%2,%3"
>>    [(set_attr "type" "veccomplex")]
>>  )
>> +
>> +(define_split
>> +  [(set (match_operand:V1TI 0 "int_reg_operand")
>> +       (match_operand:V1TI 1 "vsx_register_operand"))]
>> +  "reload_completed
>> +   && TARGET_DIRECT_MOVE_64BIT"
>> +   [(pc)]
>> +{
>> +  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
>> +  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
>> +  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
>> +  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
>> +  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
>> +  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
>> +  DONE;
>> +})
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
>> new file mode 100644
>> index 00000000000..fb3bd254636
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
>> @@ -0,0 +1,14 @@
>> +/* PR target/110040 */
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
>> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
>> +
>> +#include <altivec.h>
>> +
>> +void
>> +foo (signed long *dst, vector signed __int128 src)
>> +{
>> +  *dst = (signed long) src[0];
>> +}
>> +
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
>> new file mode 100644
>> index 00000000000..f3aa22be4e8
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
>> @@ -0,0 +1,13 @@
>> +/* PR target/110040 */
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
>> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
>> +
>> +#include <altivec.h>
>> +
>> +void
>> +foo (signed int *dst, vector signed __int128 src)
>> +{
>> +  __builtin_vec_xst_trunc (src, 0, dst);
>> +}
>>
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PING^4][PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
  2024-04-17  9:16     ` [PING^3][PATCH] " jeevitha
@ 2024-05-21  4:57       ` jeevitha
  0 siblings, 0 replies; 6+ messages in thread
From: jeevitha @ 2024-05-21  4:57 UTC (permalink / raw)
  To: jeevitha, GCC Patches, Kewen.Lin, Segher Boessenkool, Michael Meissner
  Cc: Peter Bergner

Ping!

please review.

Thanks & Regards
Jeevitha


On 17/04/24 2:46 pm, jeevitha wrote:
> Ping!
> 
> please review.
> 
> Thanks & Regards
> Jeevitha
> 
> On 26/03/24 10:23 am, jeevitha wrote:
>> Ping!
>>
>> please review.
>>
>> Thanks & Regards
>> Jeevitha
>>
>>
>> On 26/02/24 11:13 am, jeevitha wrote:
>>> Hi All,
>>>
>>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>>
>>> PR110040 exposes an issue concerning moves from vector registers to GPRs.
>>> There are two moves, one for upper 64 bits and the other for the lower
>>> 64 bits.  In the problematic test case, we are only interested in storing
>>> the lower 64 bits.  However, the instruction for copying the upper 64 bits
>>> is still emitted and is dead code.  This patch adds a splitter that splits
>>> apart the two move instructions so that DCE can remove the dead code after
>>> splitting.
>>>
>>> 2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
>>>
>>> gcc/
>>> 	PR target/110040
>>> 	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.
>>>
>>> gcc/testsuite/
>>> 	PR target/110040
>>> 	* gcc.target/powerpc/pr110040-1.c: New testcase.
>>> 	* gcc.target/powerpc/pr110040-2.c: New testcase.
>>>
>>>
>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>> index 6111cc90eb7..78457f8fb14 100644
>>> --- a/gcc/config/rs6000/vsx.md
>>> +++ b/gcc/config/rs6000/vsx.md
>>> @@ -6706,3 +6706,19 @@
>>>    "vmsumcud %0,%1,%2,%3"
>>>    [(set_attr "type" "veccomplex")]
>>>  )
>>> +
>>> +(define_split
>>> +  [(set (match_operand:V1TI 0 "int_reg_operand")
>>> +       (match_operand:V1TI 1 "vsx_register_operand"))]
>>> +  "reload_completed
>>> +   && TARGET_DIRECT_MOVE_64BIT"
>>> +   [(pc)]
>>> +{
>>> +  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
>>> +  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
>>> +  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
>>> +  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
>>> +  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
>>> +  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
>>> +  DONE;
>>> +})
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
>>> new file mode 100644
>>> index 00000000000..fb3bd254636
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
>>> @@ -0,0 +1,14 @@
>>> +/* PR target/110040 */
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
>>> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
>>> +
>>> +#include <altivec.h>
>>> +
>>> +void
>>> +foo (signed long *dst, vector signed __int128 src)
>>> +{
>>> +  *dst = (signed long) src[0];
>>> +}
>>> +
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
>>> new file mode 100644
>>> index 00000000000..f3aa22be4e8
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
>>> @@ -0,0 +1,13 @@
>>> +/* PR target/110040 */
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target power10_ok } */
>>> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
>>> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
>>> +
>>> +#include <altivec.h>
>>> +
>>> +void
>>> +foo (signed int *dst, vector signed __int128 src)
>>> +{
>>> +  __builtin_vec_xst_trunc (src, 0, dst);
>>> +}
>>>
>>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040]
  2024-02-26  5:43 ` [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040] jeevitha
  2024-03-07 15:24   ` [PING^1][PATCH] " jeevitha
  2024-03-26  4:53   ` [PING^2][PATCH] " jeevitha
@ 2024-05-22  3:20   ` Kewen.Lin
  2 siblings, 0 replies; 6+ messages in thread
From: Kewen.Lin @ 2024-05-22  3:20 UTC (permalink / raw)
  To: jeevitha; +Cc: Peter Bergner, GCC Patches, Segher Boessenkool, David Edelsohn

Hi,

on 2024/2/26 13:43, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> PR110040 exposes an issue concerning moves from vector registers to GPRs.
> There are two moves, one for upper 64 bits and the other for the lower
> 64 bits.  In the problematic test case, we are only interested in storing
> the lower 64 bits.  However, the instruction for copying the upper 64 bits
> is still emitted and is dead code.  This patch adds a splitter that splits
> apart the two move instructions so that DCE can remove the dead code after
> splitting.
> 
> 2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
> 
> gcc/
> 	PR target/110040
> 	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.
> 
> gcc/testsuite/
> 	PR target/110040
> 	* gcc.target/powerpc/pr110040-1.c: New testcase.
> 	* gcc.target/powerpc/pr110040-2.c: New testcase.
> 
> 
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 6111cc90eb7..78457f8fb14 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -6706,3 +6706,19 @@
>    "vmsumcud %0,%1,%2,%3"
>    [(set_attr "type" "veccomplex")]
>  )
> +
> +(define_split
> +  [(set (match_operand:V1TI 0 "int_reg_operand")
> +       (match_operand:V1TI 1 "vsx_register_operand"))]
> +  "reload_completed
> +   && TARGET_DIRECT_MOVE_64BIT"
> +   [(pc)]
> +{
> +  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
> +  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
> +  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));

Nit: op3 is the same as op1 (so useless and removable), just
call it as src_op?  Maybe op0 and op2 as dest_op0, dest_op1
to match the extracted index?

> +  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
> +  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));

Nit: GEN_INT ({0,1}) can be substituted with const{0,1}_rtx.

> +  DONE;
> +})
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> new file mode 100644
> index 00000000000..fb3bd254636
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
> @@ -0,0 +1,14 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */

powerpc_p9vector_ok doesn't exist any more, you have to rebase.

This also requires int128 effective target.

Change it to:

/* { dg-options "-O2" } */
/* { dg-require-effective-target int128 } */
/* { dg-require-effective-target powerpc_vsx } */

Since we just check no mfvsrd, I think we can even drop the
"-mdejagnu-cpu=power9", could you test if it works on multiple
environments?

> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
> +
> +#include <altivec.h>
> +
> +void
> +foo (signed long *dst, vector signed __int128 src)
> +{
> +  *dst = (signed long) src[0];
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> new file mode 100644
> index 00000000000..f3aa22be4e8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
> @@ -0,0 +1,13 @@
> +/* PR target/110040 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */

Similar to the above:

/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
/* { dg-require-effective-target int128 } */
/* { dg-require-effective-target powerpc_vsx } */

Also adding one comment why it requires power10.

BR,
Kewen

> +
> +#include <altivec.h>
> +
> +void
> +foo (signed int *dst, vector signed __int128 src)
> +{
> +  __builtin_vec_xst_trunc (src, 0, dst);
> +}
> 
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-05-22  3:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <a18cccd5-9465-4cc3-85ed-156a72a7236d@linux.vnet.ibm.com>
2024-02-26  5:43 ` [PATCH] rs6000: load high and low part of 128bit vector independently [PR110040] jeevitha
2024-03-07 15:24   ` [PING^1][PATCH] " jeevitha
2024-03-26  4:53   ` [PING^2][PATCH] " jeevitha
2024-04-17  9:16     ` [PING^3][PATCH] " jeevitha
2024-05-21  4:57       ` [PING^4][PATCH] " jeevitha
2024-05-22  3:20   ` [PATCH] " Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).