> -----Original Message----- > From: Richard Sandiford > Sent: Friday, December 17, 2021 4:49 PM > To: Tamar Christina > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw > ; Marcus Shawcroft > ; Kyrylo Tkachov > Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex mul, > fma and fms > > Richard Sandiford writes: > > Tamar Christina writes: > >> Hi All, > >> > >> After the first patch in the series this updates the optabs to expect > >> the canonical sequence. > >> > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > >> > >> Ok for master? and backport along with the first patch? > >> > >> Thanks, > >> Tamar > >> > >> gcc/ChangeLog: > >> > >> PR tree-optimization/102819 > >> PR tree-optimization/103169 > >> * config/aarch64/aarch64-simd.md > (cml4, > >> cmul3): Use canonical order. > >> * config/aarch64/aarch64-sve.md (cml4, > >> cmul3): Likewise. > >> > >> --- inline copy of patch -- > >> diff --git a/gcc/config/aarch64/aarch64-simd.md > >> b/gcc/config/aarch64/aarch64-simd.md > >> index > >> > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 > c > >> fb5649f9b0e73 100644 > >> --- a/gcc/config/aarch64/aarch64-simd.md > >> +++ b/gcc/config/aarch64/aarch64-simd.md > >> @@ -556,17 +556,17 @@ (define_insn > "aarch64_fcmlaq_lane" > >> ;; remainder. Because of this, expand early. > >> (define_expand "cml4" > >> [(set (match_operand:VHSDF 0 "register_operand") > >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > >> - (unspec:VHSDF [(match_operand:VHSDF 2 > "register_operand") > >> - (match_operand:VHSDF 3 > "register_operand")] > >> - FCMLA_OP)))] > >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > "register_operand") > >> + (match_operand:VHSDF 2 > "register_operand")] > >> + FCMLA_OP) > >> + (match_operand:VHSDF 3 "register_operand")))] > >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > >> { > >> rtx tmp = gen_reg_rtx (mode); > >> - emit_insn (gen_aarch64_fcmla (tmp, operands[1], > >> - operands[3], operands[2])); > >> + emit_insn (gen_aarch64_fcmla (tmp, operands[3], > >> + operands[1], operands[2])); > >> emit_insn (gen_aarch64_fcmla (operands[0], tmp, > >> - operands[3], operands[2])); > >> + operands[1], operands[2])); > >> DONE; > >> }) > >> > >> @@ -583,9 +583,9 @@ (define_expand "cmul3" > >> rtx tmp = force_reg (mode, CONST0_RTX (mode)); > >> rtx res1 = gen_reg_rtx (mode); > >> emit_insn (gen_aarch64_fcmla (res1, tmp, > >> - operands[2], operands[1])); > >> + operands[1], operands[2])); > >> emit_insn (gen_aarch64_fcmla (operands[0], res1, > >> - operands[2], operands[1])); > >> + operands[1], operands[2])); > > > > This doesn't look right. Going from the documentation, patch 1 isn't > > changing the operand order for CMUL: the conjugated operand (if there > > is one) is still operand 2. The FCMLA sequences use the opposite > > order, where the conjugated operand (if there is one) is operand 1. > > So I think > > I meant “the first multiplication operand” rather than “operand 1” here. > > > the reversal here is still needed. > > > > Same for the multiplication operands in CML* above. I did actually change the order in patch 1, but didn't update the docs.. That was done because I followed the SLP order again, but now I've updated them to do what the docs say. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? and backport along with the first patch? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/102819 PR tree-optimization/103169 * config/aarch64/aarch64-simd.md (cml4): Use canonical order. * config/aarch64/aarch64-sve.md (cml4): Likewise. --- inline copy of patch --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1e5731b14cab59ce 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane" ;; remainder. Because of this, expand early. (define_expand "cml4" [(set (match_operand:VHSDF 0 "register_operand") - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") - (match_operand:VHSDF 3 "register_operand")] - FCMLA_OP)))] + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCMLA_OP) + (match_operand:VHSDF 3 "register_operand")))] "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_aarch64_fcmla (tmp, operands[1], - operands[3], operands[2])); + emit_insn (gen_aarch64_fcmla (tmp, operands[3], + operands[2], operands[1])); emit_insn (gen_aarch64_fcmla (operands[0], tmp, - operands[3], operands[2])); + operands[2], operands[1])); DONE; }) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08c0e031be60a8c12 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7278,11 +7278,11 @@ (define_expand "cml4" rtx tmp = gen_reg_rtx (mode); emit_insn (gen_aarch64_pred_fcmla (tmp, operands[4], - operands[3], operands[2], - operands[1], operands[5])); + operands[2], operands[1], + operands[3], operands[5])); emit_insn (gen_aarch64_pred_fcmla (operands[0], operands[4], - operands[3], operands[2], + operands[2], operands[1], tmp, operands[5])); DONE; })