* [PATCH] Disparage slightly for the alternative which move DFmode between SSE_REGS and GENERAL_REGS.
@ 2023-07-06 1:12 liuhongt
2023-07-06 5:53 ` Uros Bizjak
0 siblings, 1 reply; 3+ messages in thread
From: liuhongt @ 2023-07-06 1:12 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak
For testcase
void __cond_swap(double* __x, double* __y) {
bool __r = (*__x < *__y);
auto __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
}
GCC-14 with -O2 and -march=x86-64 options generates the following code:
__cond_swap(double*, double*):
movsd xmm1, QWORD PTR [rdi]
movsd xmm0, QWORD PTR [rsi]
comisd xmm0, xmm1
jbe .L2
movq rax, xmm1
movapd xmm1, xmm0
movq xmm0, rax
.L2:
movsd QWORD PTR [rsi], xmm1
movsd QWORD PTR [rdi], xmm0
ret
rax is used to save and restore DFmode value. In RA both GENERAL_REGS
and SSE_REGS cost zero since we didn't disparage the
alternative in movdf_internal pattern, according to register
allocation order, GENERAL_REGS is allocated. The patch add ? for
alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
pattern, after that we get optimal RA.
__cond_swap:
.LFB0:
.cfi_startproc
movsd (%rdi), %xmm1
movsd (%rsi), %xmm0
comisd %xmm1, %xmm0
jbe .L2
movapd %xmm1, %xmm2
movapd %xmm0, %xmm1
movapd %xmm2, %xmm0
.L2:
movsd %xmm1, (%rsi)
movsd %xmm0, (%rdi)
ret
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?
gcc/ChangeLog:
PR target/110170
* config/i386/i386.md (movdf_internal): Disparage slightly for
2 alternatives (r,v) and (v,r) by adding constraint modifier
'?'.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr110170-3.c: New test.
---
gcc/config/i386/i386.md | 4 ++--
gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
2 files changed, 13 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a82cc353cfd..e47ced1bb70 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3915,9 +3915,9 @@ (define_split
;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
+ "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
(match_operand:DF 1 "general_operand"
- "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
+ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress || reload_completed
|| !CONST_DOUBLE_P (operands[1])
diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
new file mode 100644
index 00000000000..70daa89e9aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
+/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
+
+void __cond_swap(double* __x, double* __y) {
+ _Bool __r = (*__x < *__y);
+ double __tmp = __r ? *__x : *__y;
+ *__y = __r ? *__y : *__x;
+ *__x = __tmp;
+}
+
--
2.39.1.388.g2fc9e9ca3c
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Disparage slightly for the alternative which move DFmode between SSE_REGS and GENERAL_REGS.
2023-07-06 1:12 [PATCH] Disparage slightly for the alternative which move DFmode between SSE_REGS and GENERAL_REGS liuhongt
@ 2023-07-06 5:53 ` Uros Bizjak
2023-10-13 3:12 ` Hongtao Liu
0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2023-07-06 5:53 UTC (permalink / raw)
To: liuhongt; +Cc: gcc-patches
On Thu, Jul 6, 2023 at 3:14 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> For testcase
>
> void __cond_swap(double* __x, double* __y) {
> bool __r = (*__x < *__y);
> auto __tmp = __r ? *__x : *__y;
> *__y = __r ? *__y : *__x;
> *__x = __tmp;
> }
>
> GCC-14 with -O2 and -march=x86-64 options generates the following code:
>
> __cond_swap(double*, double*):
> movsd xmm1, QWORD PTR [rdi]
> movsd xmm0, QWORD PTR [rsi]
> comisd xmm0, xmm1
> jbe .L2
> movq rax, xmm1
> movapd xmm1, xmm0
> movq xmm0, rax
> .L2:
> movsd QWORD PTR [rsi], xmm1
> movsd QWORD PTR [rdi], xmm0
> ret
>
> rax is used to save and restore DFmode value. In RA both GENERAL_REGS
> and SSE_REGS cost zero since we didn't disparage the
> alternative in movdf_internal pattern, according to register
> allocation order, GENERAL_REGS is allocated. The patch add ? for
> alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
> pattern, after that we get optimal RA.
>
> __cond_swap:
> .LFB0:
> .cfi_startproc
> movsd (%rdi), %xmm1
> movsd (%rsi), %xmm0
> comisd %xmm1, %xmm0
> jbe .L2
> movapd %xmm1, %xmm2
> movapd %xmm0, %xmm1
> movapd %xmm2, %xmm0
> .L2:
> movsd %xmm1, (%rsi)
> movsd %xmm0, (%rdi)
> ret
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
> PR target/110170
> * config/i386/i386.md (movdf_internal): Disparage slightly for
> 2 alternatives (r,v) and (v,r) by adding constraint modifier
> '?'.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110170-3.c: New test.
OK.
Thanks,
Uros.
> ---
> gcc/config/i386/i386.md | 4 ++--
> gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
> 2 files changed, 13 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index a82cc353cfd..e47ced1bb70 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -3915,9 +3915,9 @@ (define_split
> ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
> (define_insn "*movdf_internal"
> [(set (match_operand:DF 0 "nonimmediate_operand"
> - "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
> + "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
> (match_operand:DF 1 "general_operand"
> - "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
> + "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
> "!(MEM_P (operands[0]) && MEM_P (operands[1]))
> && (lra_in_progress || reload_completed
> || !CONST_DOUBLE_P (operands[1])
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
> new file mode 100644
> index 00000000000..70daa89e9aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
> +/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
> +
> +void __cond_swap(double* __x, double* __y) {
> + _Bool __r = (*__x < *__y);
> + double __tmp = __r ? *__x : *__y;
> + *__y = __r ? *__y : *__x;
> + *__x = __tmp;
> +}
> +
> --
> 2.39.1.388.g2fc9e9ca3c
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Disparage slightly for the alternative which move DFmode between SSE_REGS and GENERAL_REGS.
2023-07-06 5:53 ` Uros Bizjak
@ 2023-10-13 3:12 ` Hongtao Liu
0 siblings, 0 replies; 3+ messages in thread
From: Hongtao Liu @ 2023-10-13 3:12 UTC (permalink / raw)
To: Uros Bizjak; +Cc: liuhongt, gcc-patches
On Thu, Jul 6, 2023 at 1:53 PM Uros Bizjak via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Jul 6, 2023 at 3:14 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > For testcase
> >
> > void __cond_swap(double* __x, double* __y) {
> > bool __r = (*__x < *__y);
> > auto __tmp = __r ? *__x : *__y;
> > *__y = __r ? *__y : *__x;
> > *__x = __tmp;
> > }
> >
> > GCC-14 with -O2 and -march=x86-64 options generates the following code:
> >
> > __cond_swap(double*, double*):
> > movsd xmm1, QWORD PTR [rdi]
> > movsd xmm0, QWORD PTR [rsi]
> > comisd xmm0, xmm1
> > jbe .L2
> > movq rax, xmm1
> > movapd xmm1, xmm0
> > movq xmm0, rax
> > .L2:
> > movsd QWORD PTR [rsi], xmm1
> > movsd QWORD PTR [rdi], xmm0
> > ret
> >
> > rax is used to save and restore DFmode value. In RA both GENERAL_REGS
> > and SSE_REGS cost zero since we didn't disparage the
> > alternative in movdf_internal pattern, according to register
> > allocation order, GENERAL_REGS is allocated. The patch add ? for
> > alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
> > pattern, after that we get optimal RA.
> >
> > __cond_swap:
> > .LFB0:
> > .cfi_startproc
> > movsd (%rdi), %xmm1
> > movsd (%rsi), %xmm0
> > comisd %xmm1, %xmm0
> > jbe .L2
> > movapd %xmm1, %xmm2
> > movapd %xmm0, %xmm1
> > movapd %xmm2, %xmm0
> > .L2:
> > movsd %xmm1, (%rsi)
> > movsd %xmm0, (%rdi)
> > ret
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > Ok for trunk?
> >
> >
> > gcc/ChangeLog:
> >
> > PR target/110170
> > * config/i386/i386.md (movdf_internal): Disparage slightly for
> > 2 alternatives (r,v) and (v,r) by adding constraint modifier
> > '?'.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr110170-3.c: New test.
>
> OK.
Some user reports the same issue in unixbench, i looks like an common
issue when swap 2 double variable
So I'd like to backport this patch to GCC13/GCC12/GCC11, the fix
should be generally good and at low risk.
Any comments?
>
> Thanks,
> Uros.
>
> > ---
> > gcc/config/i386/i386.md | 4 ++--
> > gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
> > 2 files changed, 13 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index a82cc353cfd..e47ced1bb70 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -3915,9 +3915,9 @@ (define_split
> > ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
> > (define_insn "*movdf_internal"
> > [(set (match_operand:DF 0 "nonimmediate_operand"
> > - "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
> > + "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
> > (match_operand:DF 1 "general_operand"
> > - "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
> > + "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
> > "!(MEM_P (operands[0]) && MEM_P (operands[1]))
> > && (lra_in_progress || reload_completed
> > || !CONST_DOUBLE_P (operands[1])
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
> > new file mode 100644
> > index 00000000000..70daa89e9aa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
> > @@ -0,0 +1,11 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
> > +/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
> > +
> > +void __cond_swap(double* __x, double* __y) {
> > + _Bool __r = (*__x < *__y);
> > + double __tmp = __r ? *__x : *__y;
> > + *__y = __r ? *__y : *__x;
> > + *__x = __tmp;
> > +}
> > +
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-10-13 3:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-06 1:12 [PATCH] Disparage slightly for the alternative which move DFmode between SSE_REGS and GENERAL_REGS liuhongt
2023-07-06 5:53 ` Uros Bizjak
2023-10-13 3:12 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).