From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1363) id 998173858439; Wed, 5 Jan 2022 22:17:35 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 998173858439 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Uros Bizjak To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-6273] i386: Introduce V2QImode minmax, abs and uavgv2hi3_ceil [PR103861] X-Act-Checkin: gcc X-Git-Author: Uros Bizjak X-Git-Refname: refs/heads/master X-Git-Oldrev: e3ef832a9e8d6a950a439e34e576eb4cb202dc48 X-Git-Newrev: c166632bd22d7da66354121502019fc9c92ef07f Message-Id: <20220105221735.998173858439@sourceware.org> Date: Wed, 5 Jan 2022 22:17:35 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 05 Jan 2022 22:17:35 -0000 https://gcc.gnu.org/g:c166632bd22d7da66354121502019fc9c92ef07f commit r12-6273-gc166632bd22d7da66354121502019fc9c92ef07f Author: Uros Bizjak Date: Wed Jan 5 23:16:34 2022 +0100 i386: Introduce V2QImode minmax, abs and uavgv2hi3_ceil [PR103861] Add V2QImode minmax, abs and uavxv2qi3_ceil operations with SSE registers. 2022-01-05 Uroš Bizjak gcc/ChangeLog: PR target/103861 * config/i386/mmx.md (VI_16_32): New mode iterator. (VI1_16_32): Ditto. (mmxvecsize): Handle V2QI mode. (3): Rename from v4qi3. Use VI1_16_32 mode iterator. (3): Rename from v4qi3. Use VI1_16_32 mode iterator. (abs2): Use VI_16_32 mode iterator. (uavgv2qi3_ceil): New insn pattern. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/pr103861-3.c: New test. * g++.dg/vect/slp-pr98855.cc (dg-final): Check that no vectorization using SLP was performed. Diff: --- gcc/config/i386/mmx.md | 55 ++++++++++++++++++------- gcc/testsuite/g++.dg/vect/slp-pr98855.cc | 5 +-- gcc/testsuite/gcc.target/i386/pr103861-3.c | 66 ++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 96d85a54e10..a409bb7c6c6 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -63,6 +63,12 @@ ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) +;; 4-byte and 2-byte integer vector modes +(define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) + +;; 4-byte and 2-byte QImode vector modes +(define_mode_iterator VI1_16_32 [V4QI V2QI]) + ;; V2S* modes (define_mode_iterator V2FI [V2SF V2SI]) @@ -71,7 +77,8 @@ ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr mmxvecsize - [(V8QI "b") (V4QI "b") (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")]) + [(V8QI "b") (V4QI "b") (V2QI "b") + (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")]) (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) @@ -2140,11 +2147,11 @@ (match_operand:V4HI 2 "register_operand")))] "TARGET_MMX_WITH_SSE") -(define_insn "v4qi3" - [(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,Yv") - (smaxmin:V4QI - (match_operand:V4QI 1 "register_operand" "%0,0,Yv") - (match_operand:V4QI 2 "register_operand" "Yr,*x,Yv")))] +(define_insn "3" + [(set (match_operand:VI1_16_32 0 "register_operand" "=Yr,*x,Yv") + (smaxmin:VI1_16_32 + (match_operand:VI1_16_32 1 "register_operand" "%0,0,Yv") + (match_operand:VI1_16_32 2 "register_operand" "Yr,*x,Yv")))] "TARGET_SSE4_1" "@ pb\t{%2, %0|%0, %2} @@ -2218,11 +2225,11 @@ (match_operand:V8QI 2 "register_operand")))] "TARGET_MMX_WITH_SSE") -(define_insn "v4qi3" - [(set (match_operand:V4QI 0 "register_operand" "=x,Yw") - (umaxmin:V4QI - (match_operand:V4QI 1 "register_operand" "%0,Yw") - (match_operand:V4QI 2 "register_operand" "x,Yw")))] +(define_insn "3" + [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw") + (umaxmin:VI1_16_32 + (match_operand:VI1_16_32 1 "register_operand" "%0,Yw") + (match_operand:VI1_16_32 2 "register_operand" "x,Yw")))] "TARGET_SSE2" "@ pb\t{%2, %0|%0, %2} @@ -2269,9 +2276,9 @@ "TARGET_SSSE3 && TARGET_MMX_WITH_SSE") (define_insn "abs2" - [(set (match_operand:VI_32 0 "register_operand" "=Yv") - (abs:VI_32 - (match_operand:VI_32 1 "register_operand" "Yv")))] + [(set (match_operand:VI_16_32 0 "register_operand" "=Yv") + (abs:VI_16_32 + (match_operand:VI_16_32 1 "register_operand" "Yv")))] "TARGET_SSSE3" "%vpabs\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") @@ -4351,6 +4358,26 @@ (set_attr "type" "sseiadd") (set_attr "mode" "TI")]) +(define_insn "uavgv2qi3_ceil" + [(set (match_operand:V2QI 0 "register_operand" "=x,Yw") + (truncate:V2QI + (lshiftrt:V2HI + (plus:V2HI + (plus:V2HI + (zero_extend:V2HI + (match_operand:V2QI 1 "register_operand" "%0,Yw")) + (zero_extend:V2HI + (match_operand:V2QI 2 "register_operand" "x,Yw"))) + (const_vector:V2HI [(const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSE2" + "@ + pavgb\t{%2, %0|%0, %2} + vpavgb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + (define_insn "uavgv2hi3_ceil" [(set (match_operand:V2HI 0 "register_operand" "=x,Yw") (truncate:V2HI diff --git a/gcc/testsuite/g++.dg/vect/slp-pr98855.cc b/gcc/testsuite/g++.dg/vect/slp-pr98855.cc index b1010326698..ff59eb95aca 100644 --- a/gcc/testsuite/g++.dg/vect/slp-pr98855.cc +++ b/gcc/testsuite/g++.dg/vect/slp-pr98855.cc @@ -81,6 +81,5 @@ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, uint32_t *EK) } } -// This used to work on { target x86_64-*-* i?86-*-* } but a fix in SLP -// discovery makes us trip over the threshold again. -// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { xfail *-*-* } } } +// { dg-final { scan-tree-dump "not vectorized: vectorization is not profitable" "slp1" } } +// { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } } diff --git a/gcc/testsuite/gcc.target/i386/pr103861-3.c b/gcc/testsuite/gcc.target/i386/pr103861-3.c new file mode 100644 index 00000000000..e5099ea0a83 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103861-3.c @@ -0,0 +1,66 @@ +/* PR target/103861 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse4" } */ + +char r[2], a[2], b[2]; +unsigned char ur[2], ua[2], ub[2]; + +void maxs (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] > b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pmaxsb" } } */ + +void maxu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = ua[i] > ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pmaxub" } } */ + +void mins (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] < b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pminsb" } } */ + +void minu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = ua[i] < ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pminub" } } */ + +void _abs (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] < 0 ? -a[i] : a[i]; +} + +/* { dg-final { scan-assembler "pabsb" } } */ + +void avgu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = (ua[i] + ub[i] + 1) >> 1; +} + +/* { dg-final { scan-assembler "pavgb" { xfail *-*-* } } } */