From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 0A403386C5AD; Mon, 20 Nov 2023 02:52:08 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0A403386C5AD DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1700448728; bh=qwtJjHeyw8NlcsGpQg3JJuOgP7LSCF8MzxzeFqFLB98=; h=From:To:Subject:Date:From; b=HO9CEyizRMJitr2m7tJsZQXChP/eoX7uxAcBBTLRQhRl5KaJCbJNoH1G4xcmHB/GH jX1KecqJ+HABECbL8fDb4oKuBmFvbZ4MKoZC8azlBDa+gqD8x99/VpA6/gkeHlq5UA 4fo8J50uor+JvGbhArtoxTDQl+PLe5CkVXAvW01E= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-5603] Support reduc_{plus, xor, and, ior}_scal_m for vector integer mode. X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: e6269bb69c0734a5af716bfbded3621de6ca351d X-Git-Newrev: 2b59e2b4dff42118fe3a505f07b9a6aa4cf53bdf Message-Id: <20231120025208.0A403386C5AD@sourceware.org> Date: Mon, 20 Nov 2023 02:52:08 +0000 (GMT) List-Id: https://gcc.gnu.org/g:2b59e2b4dff42118fe3a505f07b9a6aa4cf53bdf commit r14-5603-g2b59e2b4dff42118fe3a505f07b9a6aa4cf53bdf Author: liuhongt Date: Thu Nov 16 18:38:39 2023 +0800 Support reduc_{plus,xor,and,ior}_scal_m for vector integer mode. BB vectorizer relies on the backend support of .REDUC_{PLUS,IOR,XOR,AND} to vectorize reduction. gcc/ChangeLog: PR target/112325 * config/i386/sse.md (reduc__scal_): New expander. (REDUC_ANY_LOGIC_MODE): New iterator. (REDUC_PLUS_MODE): Extend to VxHI/SI/DImode. (REDUC_SSE_PLUS_MODE): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112325-1.c: New test. * gcc.target/i386/pr112325-2.c: New test. Diff: --- gcc/config/i386/sse.md | 48 +++++++++++- gcc/testsuite/gcc.target/i386/pr112325-1.c | 116 +++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr112325-2.c | 38 ++++++++++ 3 files changed, 199 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d250a6cb802..f94a77d0b6d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3417,7 +3417,9 @@ (define_mode_iterator REDUC_SSE_PLUS_MODE [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE") - (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")]) + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V8HI "TARGET_SSE2") (V4SI "TARGET_SSE2") + (V2DI "TARGET_SSE2")]) (define_expand "reduc_plus_scal_" [(plus:REDUC_SSE_PLUS_MODE @@ -3458,8 +3460,12 @@ (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512") - (V32QI "TARGET_AVX") - (V64QI "TARGET_AVX512F && TARGET_EVEX512")]) + (V32QI "TARGET_AVX") (V16HI "TARGET_AVX") + (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") + (V64QI "TARGET_AVX512F && TARGET_EVEX512") + (V32HI "TARGET_AVX512F && TARGET_EVEX512") + (V16SI "TARGET_AVX512F && TARGET_EVEX512") + (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) (define_expand "reduc_plus_scal_" [(plus:REDUC_PLUS_MODE @@ -3597,6 +3603,42 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_expand "reduc__scal_" + [(any_logic:VI_128 + (match_operand: 0 "register_operand") + (match_operand:VI_128 1 "register_operand"))] + "TARGET_SSE2" +{ + rtx tmp = gen_reg_rtx (mode); + ix86_expand_reduc (gen_3, tmp, operands[1]); + emit_insn (gen_vec_extract (operands[0], + tmp, const0_rtx)); + DONE; +}) + +(define_mode_iterator REDUC_ANY_LOGIC_MODE + [(V32QI "TARGET_AVX") (V16HI "TARGET_AVX") + (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") + (V64QI "TARGET_AVX512F && TARGET_EVEX512") + (V32HI "TARGET_AVX512F && TARGET_EVEX512") + (V16SI "TARGET_AVX512F && TARGET_EVEX512") + (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + +(define_expand "reduc__scal_" + [(any_logic:REDUC_ANY_LOGIC_MODE + (match_operand: 0 "register_operand") + (match_operand:REDUC_ANY_LOGIC_MODE 1 "register_operand"))] + "" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + rtx tmp2 = gen_reg_rtx (mode); + rtx tmp3 = gen_lowpart (mode, operands[1]); + emit_insn (gen_3 (tmp2, tmp, tmp3)); + emit_insn (gen_reduc__scal_ (operands[0], tmp2)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel floating point comparisons diff --git a/gcc/testsuite/gcc.target/i386/pr112325-1.c b/gcc/testsuite/gcc.target/i386/pr112325-1.c new file mode 100644 index 00000000000..56e20c156f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112325-1.c @@ -0,0 +1,116 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512bw -O2 -mtune=generic -mprefer-vector-width=512 -fdump-tree-slp2" } */ +/* { dg-final { scan-tree-dump-times ".REDUC_PLUS" 3 "slp2" } } */ +/* { dg-final { scan-tree-dump-times ".REDUC_IOR" 4 "slp2" } } */ + +int +__attribute__((noipa)) +plus_v4si (int* a) +{ + int sum = 0; + sum += a[0]; + sum += a[1]; + sum += a[2]; + sum += a[3]; + return sum; +} + +short +__attribute__((noipa)) +plus_v8hi (short* a) +{ + short sum = 0; + sum += a[0]; + sum += a[1]; + sum += a[2]; + sum += a[3]; + sum += a[4]; + sum += a[5]; + sum += a[6]; + sum += a[7]; + return sum; +} + +long long +__attribute__((noipa)) +plus_v8di (long long* a) +{ + long long sum = 0; + sum += a[0]; + sum += a[1]; + sum += a[2]; + sum += a[3]; + sum += a[4]; + sum += a[5]; + sum += a[6]; + sum += a[7]; + return sum; +} + +int +__attribute__((noipa)) +ior_v4si (int* a) +{ + int sum = 0; + sum |= a[0]; + sum |= a[1]; + sum |= a[2]; + sum |= a[3]; + return sum; +} + +short +__attribute__((noipa)) +ior_v8hi (short* a) +{ + short sum = 0; + sum |= a[0]; + sum |= a[1]; + sum |= a[2]; + sum |= a[3]; + sum |= a[4]; + sum |= a[5]; + sum |= a[6]; + sum |= a[7]; + return sum; +} + +long long +__attribute__((noipa)) +ior_v8di (long long* a) +{ + long long sum = 0; + sum |= a[0]; + sum |= a[1]; + sum |= a[2]; + sum |= a[3]; + sum |= a[4]; + sum |= a[5]; + sum |= a[6]; + sum |= a[7]; + return sum; +} + +char +__attribute__((noipa)) +ior_v16qi (char* a) +{ + char sum = 0; + sum |= a[0]; + sum |= a[1]; + sum |= a[2]; + sum |= a[3]; + sum |= a[4]; + sum |= a[5]; + sum |= a[6]; + sum |= a[7]; + sum |= a[8]; + sum |= a[9]; + sum |= a[10]; + sum |= a[11]; + sum |= a[12]; + sum |= a[13]; + sum |= a[14]; + sum |= a[15]; + return sum; +} diff --git a/gcc/testsuite/gcc.target/i386/pr112325-2.c b/gcc/testsuite/gcc.target/i386/pr112325-2.c new file mode 100644 index 00000000000..650006b0bd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112325-2.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +#include "sse2-check.h" +#include "pr112325-1.c" + +static void +sse2_test (void) +{ + int d[4] = { 3, 11, 22, 89}; + short w[8] = { 3, 11, 22, 89, 4, 9, 13, 7}; + char b[16] = { 3, 11, 22, 89, 4, 9, 13, 7, 2, 6, 5, 111, 163, 88, 11, 235}; + long long q[8] = { 3, 11, 22, 89, 4, 9, 13, 7}; + + /* if (plus_v4si (d) != 125) */ + /* __builtin_abort (); */ + + /* if (plus_v8hi (w) != 158) */ + /* __builtin_abort (); */ + + /* if (plus_v8di (q) != 158) */ + /* __builtin_abort (); */ + + /* if (ior_v4si (d) != 95) */ + /* __builtin_abort (); */ + + /* if (ior_v8hi (w) != 95) */ + /* __builtin_abort (); */ + + /* if (ior_v16qi (b) != (char)255) */ + /* __builtin_abort (); */ + + if (ior_v8di (q) != 95) + __builtin_abort (); + + return; +}