public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/97730] New: [10/11 Regression] aarch64, SVE2: Wrong code since r10-5853-g0a09a948 (wrong pattern for BCAX)
@ 2020-11-05 13:35 acoplan at gcc dot gnu.org
  2020-11-05 13:36 ` [Bug target/97730] " acoplan at gcc dot gnu.org
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: acoplan at gcc dot gnu.org @ 2020-11-05 13:35 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97730

            Bug ID: 97730
           Summary: [10/11 Regression] aarch64, SVE2: Wrong code since
                    r10-5853-g0a09a948 (wrong pattern for BCAX)
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: acoplan at gcc dot gnu.org
  Target Milestone: ---

AArch64 GCC miscompiles the following testcase:

unsigned b = 0xce8e5a48, c = 0xb849691a;
unsigned a[8080];
int main() {
  a[0] = b;
  c = c;
  unsigned f = 0xb1e8;
  for (int h = 0; h < 5; h++)
    a[h] = (b & c) ^ f;
  if (a[0] != 0x8808f9e0)
    __builtin_abort();
}

at -O1 -ftree-vectorize -march=armv8.2-a+sve2 since
r10-5853-g0a09a9483825233f16e5b26bb0ffee76752339fc. Below is the generated code
from a trunk build, with some relevant lines annotated:

        .arch armv8.2-a+crc+sve2
        .file   "test.c"
        .text
        .align  2
        .global main
        .type   main, %function
main:
        adrp    x0, .LANCHOR0
        add     x3, x0, :lo12:.LANCHOR0
        ldr     w0, [x0, #:lo12:.LANCHOR0] // w0 <- b
        adrp    x2, a
        add     x1, x2, :lo12:a
        str     w0, [x2, #:lo12:a]         // a[0] <- w0
        mov     w2, 5
        whilelo p0.s, wzr, w2
        ptrue   p1.b, all
        ld1rw   z2.s, p1/z, [x3, 4]        // z2 <- {c, c, ... }
        mov     z1.s, w0                   // z1 <- {b, b, ... }
        mov     w0, 45544                  // w0 <- f (= 0xb1e8)
        mov     z0.s, w0                   // z0 <- {f, f, ... }
        bcax    z0.d, z0.d, z2.d, z1.d     // z0 ^= (z2 & ~z1)
        st1w    z0.s, p0, [x1]             // a[0, 1, ...] <- z0
        cntw    x0
        whilelo p0.s, w0, w2
        b.none  .L2
        incb    x1
        st1w    z0.s, p0, [x1]
.L2:
        adrp    x0, a
        ldr     w1, [x0, #:lo12:a]
        mov     w0, 63968
        movk    w0, 0x8808, lsl 16
        cmp     w1, w0
        bne     .L7
        mov     w0, 0
        ret
.L7:
        stp     x29, x30, [sp, -16]!
        mov     x29, sp
        bl      abort
        .size   main, .-main
        .global a
        .global c
        .global b
        .data
        .align  2
        .set    .LANCHOR0,. + 0
        .type   b, %object
        .size   b, 4
b:
        .word   -829531576
        .type   c, %object
        .size   c, 4
c:
        .word   -1203148518
        .bss
        .align  3
        .type   a, %object
        .size   a, 32320
a:
        .zero   32320
        .ident  "GCC: (unknown) 11.0.0 20201105 (experimental)"

The problem appears to be that the instruction:
  bcax    z0.d, z0.d, z2.d, z1.d
computes (~b & c) ^ f instead of (b & c) ^ f.

Looking at the SVE2 pattern for bcax (aarch64-sve2.md), it looks like we're
missing a not on one of the operands to the and rtx:

;; Unpredicated exclusive OR of AND.
(define_insn "@aarch64_sve2_bcax<mode>"
  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
        (xor:SVE_FULL_I
          (and:SVE_FULL_I
            (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
            (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
          (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
  "TARGET_SVE2"
  "@
  bcax\t%0.d, %0.d, %2.d, %3.d
  movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
  [(set_attr "movprfx" "*,yes")]
)

comparing this to the corresponding pattern for AdvSIMD bcax (aarch64-simd.md),
this becomes clear:

(define_insn "bcaxq<mode>4"
  [(set (match_operand:VQ_I 0 "register_operand" "=w")
        (xor:VQ_I
         (and:VQ_I
          (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
          (match_operand:VQ_I 2 "register_operand" "w"))
         (match_operand:VQ_I 1 "register_operand" "w")))]
  "TARGET_SIMD && TARGET_SHA3"
  "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
  [(set_attr "type" "crypto_sha3")]
)

Indeed, changing the source file to print the value of a[0], we get 0x304190fa,
which is the result of computing (~b & c) ^ f.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-19 10:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-05 13:35 [Bug target/97730] New: [10/11 Regression] aarch64, SVE2: Wrong code since r10-5853-g0a09a948 (wrong pattern for BCAX) acoplan at gcc dot gnu.org
2020-11-05 13:36 ` [Bug target/97730] " acoplan at gcc dot gnu.org
2020-11-05 13:46 ` rguenth at gcc dot gnu.org
2020-11-12 10:06 ` cvs-commit at gcc dot gnu.org
2020-11-12 10:08 ` [Bug target/97730] [10 " acoplan at gcc dot gnu.org
2020-11-19 10:47 ` cvs-commit at gcc dot gnu.org
2020-11-19 10:49 ` acoplan at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).