From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id E726B38515F4; Thu,  9 Mar 2023 16:45:16 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E726B38515F4
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1678380316;
	bh=k7D4twKi+lj2ckTiTz10GSUkDY6yKcbn6BmlI4LRNE0=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=oiNFKMi7i79k3+6s1uheRPqnb5M8HdpmQzasKnqTubO9rsUTBNiinHvfOv0uVgH5q
	 EFiHBonQm5UzQduuRzs+Wtk6UfrmlZt/jOKTUoGcJQC0UWColWltxbd0FZQaDFq2pP
	 ku1MXKQdegUFsuhtGlIBxI9T9GM+Nvv2m8p3ApjM=
From: "john_platts at hotmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/109069] Vector truncation test program produces
 incorrect result on big-endian powerpc64-linux-gnu with -mcpu=power10 -O2
Date: Thu, 09 Mar 2023 16:45:16 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: target
X-Bugzilla-Version: 12.1.0
X-Bugzilla-Keywords: wrong-code
X-Bugzilla-Severity: normal
X-Bugzilla-Who: john_platts at hotmail dot com
X-Bugzilla-Status: UNCONFIRMED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-109069-4-lYl4DDLPoD@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-109069-4@http.gcc.gnu.org/bugzilla/>
References: <bug-109069-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109069
--- Comment #5 from John Platts <john_platts at hotmail dot com> ---
Here is another test program that shows the same code generation bug when a
splat followed by a vec_sld is incorrectly optimized by gcc 12.2.0 on
powerpc64-linux-gnu and powerpc64le-linux-gnu with the -mcpu=3Dpower10 -O2
options:
#pragma push_macro("vector")
#pragma push_macro("pixel")
#pragma push_macro("bool")

#undef vector
#undef pixel
#undef bool

#include <altivec.h>

#pragma pop_macro("vector")
#pragma pop_macro("pixel")
#pragma pop_macro("bool")

#include <stdint.h>
#include <type_traits>

template<class T>
struct MakeSimdVectorType {
    typedef T type __attribute__((__vector_size__(16)));
};

template<class T>
using SimdVectorType =3D typename MakeSimdVectorType<T>::type;

template<class T, std::enable_if_t<(sizeof(T) =3D=3D 1 &&
    std::is_integral_v<std::decay_t<T>>)>* =3D nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned char>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) =3D=3D 2 &&
    std::is_integral_v<std::decay_t<T>>)>* =3D nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned short>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) =3D=3D 4 &&
    std::is_integral_v<std::decay_t<T>>)>* =3D nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned int>(val)));
}

template<class T, std::enable_if_t<(sizeof(T) =3D=3D 8 &&
    std::is_integral_v<std::decay_t<T>>)>* =3D nullptr>
static inline SimdVectorType<T> Splat(T val) {
    return reinterpret_cast<SimdVectorType<T>>(
        vec_splats(static_cast<unsigned long long>(val)));
}

static inline __vector float Splat(float val) {
    return vec_splats(val);
}

static inline __vector double Splat(double val) {
    return vec_splats(val);
}

using AltivecUCharVectType =3D __vector unsigned char;

template<int kShiftAmount, class T>
AltivecUCharVectType SplatAndShift(T val) {
    const auto splatResult =3D Splat(val);
    return vec_sld(reinterpret_cast<AltivecUCharVectType>(splatResult),
        reinterpret_cast<AltivecUCharVectType>(splatResult), kShiftAmount);
}

template<int kShiftAmount, class T>
AltivecUCharVectType SplatAndShift_2(T val) {
    auto splatResult =3D Splat(val);
    __asm__(""
            : "+wa" (splatResult));
    return vec_sld(reinterpret_cast<AltivecUCharVectType>(splatResult),
        reinterpret_cast<AltivecUCharVectType>(splatResult), kShiftAmount);
}

auto SplatAndShift_I16_1() {
    return SplatAndShift<5>(int16_t{-32346});
}

auto SplatAndShift_I16_2() {
    return SplatAndShift_2<5>(int16_t{-32346});
}

auto SplatAndShift_I32_1() {
    return SplatAndShift<3>(int32_t{-1394373889});
}

auto SplatAndShift_I32_2() {
    return SplatAndShift_2<3>(int32_t{-1394373889});
}

Here is the assembly code that is generated for the above code on
powerpc64le-linux-gnu with the -O2 -mcpu=3Dpower10 options:
_Z19SplatAndShift_I16_1v:
        xxspltiw 34,2175173030
        blr
_Z19SplatAndShift_I16_2v:
        xxspltiw 34,2175173030
        vsldoi 2,2,2,5
        blr
_Z19SplatAndShift_I32_1v:
        xxspltiw 34,2900593407
        blr
_Z19SplatAndShift_I32_2v:
        xxspltiw 34,2900593407
        vsldoi 2,2,2,3
        blr

Here is the assembly code that is generated for the above code on
powerpc64-linux-gnu with the -O2 -mcpu=3Dpower10 options:
_Z19SplatAndShift_I16_1v:
        .quad   .L._Z19SplatAndShift_I16_1v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I16_1v:
        xxspltiw 34,2175173030
        blr
_Z19SplatAndShift_I16_2v:
        .quad   .L._Z19SplatAndShift_I16_2v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I16_2v:
        xxspltiw 34,2175173030
        vsldoi 2,2,2,5
        blr
_Z19SplatAndShift_I32_1v:
        .quad   .L._Z19SplatAndShift_I32_1v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I32_1v:
        xxspltiw 34,2900593407
        blr
_Z19SplatAndShift_I32_2v:
        .quad   .L._Z19SplatAndShift_I32_2v,.TOC.@tocbase,0
.L._Z19SplatAndShift_I32_2v:
        xxspltiw 34,2900593407
        vsldoi 2,2,2,3
        blr=