* [comitted] i386: Fix V2SF horizontal add/subtract insns
@ 2020-05-27 19:52 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2020-05-27 19:52 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1236 bytes --]
PFPNACC insn is incorrectly modelled to perform addition and subtraction
of two operands, but in reality it performs horizontal addition and
subtraction:
Instruction: PFPNACC dest,src
Description:
dest[31:0] <- dest[31:0] - dest[63:32];
dest[63:32] <- src[31:0] + src[63:32];
Also, it is not possible to directly replace PFACC with HADDPS and PFNACC
with HSUBPS, because operands in the second word do not match.
PFACC does:
dest[31..0] <- dest[31..0] + dest[63..32];
dest[63..32] <- src[31..0] + src [63..32];
while HADDPS does:
dest[31..0] <- dest[31..0] + dest[63..32];
dest[63..32] <- dest[127..96] + dest[95..64];
dest[95..64] <- src [31..0] + src [64..32];
dest[127:96] <- src [127..96] + src [95..64];
2020-05-27 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
* config/i386/mmx.md (*mmx_haddv2sf3): Remove SSE alternatives.
(mmx_hsubv2sf3): Ditto.
(mmx_haddsubv2sf3): New expander.
(*mmx_haddsubv2sf3): Rename from mmx_addsubv2sf3. Correct
RTL template to model horizontal subtraction and addition.
* config/i386/i386-builtin.def (IX86_BUILTIN_PFPNACC):
Update for rename.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5742 bytes --]
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index b873498f3ab..134981a798f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -555,7 +555,7 @@ BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", I
BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI)
BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF)
BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
-BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
+BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_haddsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
/* SSE */
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 271c1c2e833..7c9640d4f9f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -552,32 +552,27 @@
"TARGET_3DNOW")
(define_insn "*mmx_haddv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
(vec_concat:V2SF
(plus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (match_operand:V2SF 1 "register_operand" "0")
(parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
(vec_select:SF (match_dup 1)
(parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
(plus:SF
(vec_select:SF
- (match_operand:V2SF 2 "nonimmediate_operand" "ym,x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
(parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
(vec_select:SF (match_dup 2)
(parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
"TARGET_3DNOW
&& INTVAL (operands[3]) != INTVAL (operands[4])
&& INTVAL (operands[5]) != INTVAL (operands[6])"
- "@
- pfacc\t{%2, %0|%0, %2}
- haddps\t{%2, %0|%0, %2}
- vhaddps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse3_noavx,avx")
- (set_attr "type" "mmxadd,sseadd,sseadd")
- (set_attr "prefix_extra" "1,*,*")
- (set_attr "prefix" "*,orig,vex")
- (set_attr "mode" "V2SF,V4SF,V4SF")])
+ "pfacc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2SF")])
(define_insn "*mmx_haddv2sf3_low"
[(set (match_operand:SF 0 "register_operand" "=x,x")
@@ -599,28 +594,23 @@
(set_attr "mode" "V4SF")])
(define_insn "mmx_hsubv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (match_operand:V2SF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(minus:SF
(vec_select:SF
- (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
- "@
- pfnacc\t{%2, %0|%0, %2}
- hsubps\t{%2, %0|%0, %2}
- vhsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse3_noavx,avx")
- (set_attr "type" "mmxadd,sseadd,sseadd")
- (set_attr "prefix_extra" "1,*,*")
- (set_attr "prefix" "*,orig,vex")
- (set_attr "mode" "V2SF,V4SF,V4SF")])
+ "pfnacc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2SF")])
(define_insn "*mmx_hsubv2sf3_low"
[(set (match_operand:SF 0 "register_operand" "=x,x")
@@ -640,15 +630,38 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
-(define_insn "mmx_addsubv2sf3"
+(define_expand "mmx_haddsubv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW_A")
+
+(define_insn "*mmx_haddsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
- (vec_merge:V2SF
- (plus:V2SF
- (match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
- (minus:V2SF (match_dup 1) (match_dup 2))
- (const_int 1)))]
- "TARGET_3DNOW_A"
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+ (vec_select:SF
+ (match_dup 2)
+ (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))))]
+ "TARGET_3DNOW_A
+ && INTVAL (operands[3]) != INTVAL (operands[4])"
"pfpnacc\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-05-27 19:53 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-27 19:52 [comitted] i386: Fix V2SF horizontal add/subtract insns Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).