--- gcc/config/rs6000/rs6000-builtin.def | 8 ++--- gcc/config/rs6000/vsx.md | 4 +-- .../gcc.target/powerpc/mul-vectorize-3.c | 32 ++++++++++++++++++ .../gcc.target/powerpc/mul-vectorize-4.c | 33 +++++++++++++++++++ 4 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 592efe31b04..cbacbc6b785 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -3016,10 +3016,10 @@ BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3) BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3) BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3) BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3) -BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di) -BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si) -BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di) -BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si) +BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, smulv2di3_highpart) +BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, smulv4si3_highpart) +BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, umulv2di3_highpart) +BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, umulv4si3_highpart) BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3) BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f622873d758..6f6fc0bd835 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6351,7 +6351,7 @@ (define_insn "umod3" [(set_attr "type" "vecdiv") (set_attr "size" "")]) -(define_insn "mulhs_" +(define_insn "smul3_highpart" [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") (mult:VIlong (ashiftrt (match_operand:VIlong 1 "vsx_register_operand" "v") @@ -6363,7 +6363,7 @@ (define_insn "mulhs_" "vmulhs %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "mulhu_" +(define_insn "umul3_highpart" [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") (us_mult:VIlong (ashiftrt (match_operand:VIlong 1 "vsx_register_operand" "v") diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c new file mode 100644 index 00000000000..2c89c0faec2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c @@ -0,0 +1,32 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */ + +/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply + High Signed/Unsigned Word for both signed and unsigned int high part + multiplication. */ + +#define N 128 + +extern signed int si_a[N], si_b[N], si_c[N]; +extern unsigned int ui_a[N], ui_b[N], ui_c[N]; + +typedef signed long long sLL; +typedef unsigned long long uLL; + +__attribute__ ((noipa)) void +test_si () +{ + for (int i = 0; i < N; i++) + si_c[i] = ((sLL) si_a[i] * (sLL) si_b[i]) >> 32; +} + +__attribute__ ((noipa)) void +test_ui () +{ + for (int i = 0; i < N; i++) + ui_c[i] = ((uLL) ui_a[i] * (uLL) ui_b[i]) >> 32; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c new file mode 100644 index 00000000000..265e7588bb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c @@ -0,0 +1,33 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */ + +/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply + High Signed/Unsigned Doubleword for both signed and unsigned long + long high part multiplication. */ + +#define N 128 + +extern signed long long sll_a[N], sll_b[N], sll_c[N]; +extern unsigned long long ull_a[N], ull_b[N], ull_c[N]; + +typedef signed __int128 s128; +typedef unsigned __int128 u128; + +__attribute__ ((noipa)) void +test_sll () +{ + for (int i = 0; i < N; i++) + sll_c[i] = ((s128) sll_a[i] * (s128) sll_b[i]) >> 64; +} + +__attribute__ ((noipa)) void +test_ull () +{ + for (int i = 0; i < N; i++) + ull_c[i] = ((u128) ull_a[i] * (u128) ull_b[i]) >> 64; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */ -- 2.17.1