diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bd60e65b0c3f05f1c931f03807170f3b9d699de5..ca60416e7d7b1d8848f4ec5a624ae479a12ae5bc 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6887,7 +6887,7 @@ (define_insn "@aarch64_dot_prod_lane" [(set_attr "movprfx" "*,yes")] ) -(define_insn "@dot_prod" +(define_insn "@dot_prod_insn" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") (plus:VNx4SI_ONLY (unspec:VNx4SI_ONLY @@ -6902,6 +6902,43 @@ (define_insn "@dot_prod" [(set_attr "movprfx" "*,yes")] ) +(define_expand "@dot_prod" + [(set (match_operand:VNx4SI_ONLY 0 "register_operand") + (plus:VNx4SI_ONLY + (unspec:VNx4SI_ONLY + [(match_operand: 1 "register_operand") + (match_operand: 2 "register_operand")] + DOTPROD_US_ONLY) + (match_operand:VNx4SI_ONLY 3 "register_operand")))] + "TARGET_SVE || TARGET_SVE_I8MM" +{ + if (TARGET_SVE_I8MM) + { + emit_insn (gen_usdot_prod_insn (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + machine_mode elemmode = GET_MODE_INNER (mode); + HOST_WIDE_INT val = 1 << (GET_MODE_BITSIZE (elemmode).to_constant () - 1); + rtx signbit = gen_int_mode (val, elemmode); + rtx t1 = gen_reg_rtx (mode); + rtx t2 = gen_reg_rtx (mode); + rtx tmp = gen_reg_rtx (mode); + rtx c1 = gen_const_vec_duplicate (mode, + gen_int_mode (val - 1, elemmode)); + rtx c2 = gen_const_vec_duplicate (mode, gen_int_mode (1, elemmode)); + rtx dup = gen_const_vec_duplicate (mode, signbit); + c1 = force_reg (mode, c1); + c2 = force_reg (mode, c2); + dup = force_reg (mode, dup); + emit_insn (gen_sub3 (tmp, operands[1], dup)); + emit_insn (gen_sdot_prod (t1, tmp, operands[2], operands[3])); + emit_insn (gen_sdot_prod (t2, c1, operands[2], t1)); + emit_insn (gen_sdot_prod (operands[0], c2, operands[2], t2)); + DONE; +}) + (define_insn "@aarch64_dot_prod_lane" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") (plus:VNx4SI_ONLY diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec_2.c new file mode 100644 index 0000000000000000000000000000000000000000..cbe6b7eb7bef5a5c4b8e5ac823ebdf1d309f8490 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec_2.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#pragma GCC target "+noi8mm" + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-not {\tusdot\t} } } */ +/* { dg-final { scan-assembler-times {\tsdot\t} 3 } } */