public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/51534] New: Bad code gen for vcgtq_u32 NEON intrinsic
@ 2011-12-13 19:29 rmansfield at qnx dot com
  2011-12-13 19:53 ` [Bug target/51534] " rmansfield at qnx dot com
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: rmansfield at qnx dot com @ 2011-12-13 19:29 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=51534

             Bug #: 51534
           Summary: Bad code gen for vcgtq_u32 NEON intrinsic
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: rmansfield@qnx.com
              Host: i686-unknown-linux-gnu
            Target: arm-unknown-linux-gnueabi
             Build: i686-unknown-linux-gnu


$ ./xgcc -v
Using built-in specs.
COLLECT_GCC=./xgcc
Target: arm-unknown-linux-gnueabi
Configured with: ../configure --target=arm-unknown-linux-gnueabi
--prefix=/home/ryan/x-tools/arm-unknown-linux-gnueabi
--with-sysroot=/home/ryan/x-tools/arm-unknown-linux-gnueabi/arm-unknown-linux-gnueabi//sys-root
--disable-multilib
--with-local-prefix=/home/ryan/x-tools/arm-unknown-linux-gnueabi/arm-unknown-linux-gnueabi/sys-root
--disable-nls --enable-threads=posix --enable-symvers=gnu --enable-c99
--enable-long-long --enable-target-optspace
target_alias=arm-unknown-linux-gnueabi --enable-languages=c++ --disable-shared
--disable-libmudflap --disable-libssp
Thread model: posix
gcc version 4.7.0 20111213 (experimental) [trunk revision 182291] (GCC) 

$ cat ~/foo.c
#include <arm_neon.h>

void foo (unsigned * src, unsigned *dst, int width)
{
  const int32x4_t vec_alpha_shift = vdupq_n_s32 (0);
  const uint32x4_t vec_one = vdupq_n_u32 (1u);
  const uint32x4_t vec_zero = vdupq_n_u32 (0u);

while (width >= 4)
    {
      uint32x4_t s0 = vld1q_u32 (src);
      uint32x4_t d0 = vld1q_u32 (dst);
      uint32x4_t vec_alpha = vshlq_u32 (s0, vec_alpha_shift);
      vec_alpha =
    vaddq_u32 (vec_alpha,
           vandq_u32 (vcgtq_u32 (vec_alpha, vec_zero), vec_one));
      s0 = vmulq_u32 (s0, vec_alpha);
      d0 = vaddq_u32 (s0, d0);
      vst1q_u32 (dst, d0);
    }
}

$ ./xgcc -B. -O3 -ftree-vectorize -mfpu=neon -mfloat-abi=softfp ~/foo.c 
-march=armv7-a -c

Changing the code from:

const uint32x4_t vec_zero = vdupq_n_u32 (0u)

to

const uint32x4_t vec_zero = vdupq_n_u32 (1u)

results in a proper reg load and operand to vcgt. 

     vmov.i32    q9, #0  @ v4si
     vld1.32    {d16-d17}, [r8]
+    vmov.i32    q12, #1  @ v4si
     mov    r0, sl
     vld1.32    {d20-d21}, [sl]
     vshl.u32    q9, q8, q9
-    vcgt.u32    q11, q9, #0
+    vcgt.u32    q11, q9, q12
     vand    q11, q11, q4
     vadd.i32    q9, q9, q11
     vmul.i32    q8, q8, q9

Also happens on the 4.6 branch. Compiles OK with 4.4 branch. I haven't checked
4.5 yet.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-03-03 19:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-13 19:29 [Bug target/51534] New: Bad code gen for vcgtq_u32 NEON intrinsic rmansfield at qnx dot com
2011-12-13 19:53 ` [Bug target/51534] " rmansfield at qnx dot com
2011-12-14 15:01 ` rearnsha at gcc dot gnu.org
2012-02-28 16:15 ` mgretton at gcc dot gnu.org
2012-02-28 16:19 ` mgretton at gcc dot gnu.org
2014-08-10 16:46 ` rmansfield at qnx dot com
2023-03-03 19:10 ` cvs-commit at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).