From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7861) id 469923857014; Thu, 7 Dec 2023 01:40:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 469923857014 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1701913205; bh=OcSOD7XaWLfno17+v5zXkbxbcWUuvEi8xwA8BuYcmps=; h=From:To:Subject:Date:From; b=ZY20UTUYJDk/GJqg/hqz9U0KtPDK6oLKW+yjqBri/hpFhglQI/iSdRmd5YFlB0ITj Fwp87tg0kv/LD2fmgLhw3BeXfHTt2cuvBzFWiPhPf/NObeg7Ybx96noWwk2U6Fv7nb YI/1r296MF6lNuvH8q+oUZ751k0HFALZ3YO4UzhY= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Hongyu Wang To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-6250] [APX NDD] Support APX NDD for left shift insns X-Act-Checkin: gcc X-Git-Author: Hongyu Wang X-Git-Refname: refs/heads/master X-Git-Oldrev: c95f67b8966dff4f7b22e794e410c5aa7490877a X-Git-Newrev: 03655cd427b9d8e3b06c950255332eb988b0ade1 Message-Id: <20231207014005.469923857014@sourceware.org> Date: Thu, 7 Dec 2023 01:40:05 +0000 (GMT) List-Id: https://gcc.gnu.org/g:03655cd427b9d8e3b06c950255332eb988b0ade1 commit r14-6250-g03655cd427b9d8e3b06c950255332eb988b0ade1 Author: Hongyu Wang Date: Wed Oct 25 15:07:29 2023 +0800 [APX NDD] Support APX NDD for left shift insns For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl 1 can be optimized to add. As NDD form of add requires src operand to be register since NDD cannot take 2 memory src, we currently just keep using NDD form shift instead of add. The optimization TARGET_SHIFT1 will try to remove constant 1 to use shorter opcode, but under NDD assembler will automatically use it whether $1 exist or not, so do not involve NDD with it. The doubleword insns for left shift calls ix86_expand_ashl, which assume all shift related pattern has same operand[0] and operand[1]. For these pattern we will support them in a standalone patch. gcc/ChangeLog: * config/i386/i386.md (*ashl3_1): Extend with new alternatives to support NDD, limit the new alternative to generate sal only, and adjust output template for NDD. (*ashlsi3_1_zext): Likewise. (*ashlhi3_1): Likewise. (*ashlqi3_1): Likewise. (*ashl3_cmp): Likewise. (*ashlsi3_cmp_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (*ashl3_cconly): Likewise. (*ashl3_doubleword_highpart): Adjust codegen for NDD. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add tests for sal. Diff: --- gcc/config/i386/i386.md | 172 +++++++++++++++++++++----------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 22 ++++ 2 files changed, 136 insertions(+), 58 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ad4c958a1e8..c67896cf97c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14472,10 +14472,19 @@ { split_double_mode (mode, &operands[0], 1, &operands[0], &operands[3]); int bits = INTVAL (operands[2]) - ( * BITS_PER_UNIT); - if (!rtx_equal_p (operands[3], operands[1])) - emit_move_insn (operands[3], operands[1]); - if (bits > 0) - emit_insn (gen_ashl3 (operands[3], operands[3], GEN_INT (bits))); + bool op_equal_p = rtx_equal_p (operands[3], operands[1]); + if (bits == 0) + { + if (!op_equal_p) + emit_move_insn (operands[3], operands[1]); + } + else + { + if (!op_equal_p && !TARGET_APX_NDD) + emit_move_insn (operands[3], operands[1]); + rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3]; + emit_insn (gen_ashl3 (operands[3], op_tmp, GEN_INT (bits))); + } ix86_expand_clear (operands[0]); DONE; }) @@ -14782,12 +14791,14 @@ (set_attr "mode" "")]) (define_insn "*ashl3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k") - (match_operand:QI 2 "nonmemory_operand" "c,M,r,"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") + (match_operand:QI 2 "nonmemory_operand" "c,M,r,,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, mode, operands)" + "ix86_binary_operator_ok (ASHIFT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14802,18 +14813,25 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + /* For NDD form instructions related to TARGET_SHIFT1, the $1 + immediate do not need to be omitted as assembler will map it + to use shorter encoding. */ + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,bmi2,") + [(set_attr "isa" "*,*,bmi2,,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14855,13 +14873,15 @@ (set_attr "mode" "SI")]) (define_insn "*ashlsi3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,M,r")))) + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14874,18 +14894,22 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,*,bmi2") + [(set_attr "isa" "*,*,bmi2,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "3") + (const_string "ishift") (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") @@ -14915,12 +14939,14 @@ "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, HImode, operands)" + "ix86_binary_operator_ok (ASHIFT, HImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14933,18 +14959,22 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{w}\t%0"; else - return "sal{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" + : "sal{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,avx512f") + [(set_attr "isa" "*,*,avx512f,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "msklog") + (eq_attr "alternative" "3") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14960,15 +14990,17 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "HI,SI,HI")]) + (set_attr "mode" "HI,SI,HI,HI")]) (define_insn "*ashlqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, QImode, operands)" + "ix86_binary_operator_ok (ASHIFT, QImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14984,7 +15016,8 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%k0"; @@ -14996,16 +15029,19 @@ if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t{%2, %k0|%k0, %2}"; else - return "sal{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}" + : "sal{b}\t{%2, %0|%0, %2}"; } } } - [(set_attr "isa" "*,*,*,avx512dq") + [(set_attr "isa" "*,*,*,avx512dq,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (eq_attr "alternative" "3") (const_string "msklog") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -15021,10 +15057,10 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI,QI") + (set_attr "mode" "QI,SI,SI,QI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") - (cond [(eq_attr "alternative" "1") + (cond [(eq_attr "alternative" "1,4") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) @@ -15119,10 +15155,10 @@ (define_insn "*ashl3_cmp" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "" "")) + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (ashift:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL @@ -15130,8 +15166,10 @@ && (TARGET_SHIFT1 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, mode, operands)" + && ix86_binary_operator_ok (ASHIFT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: @@ -15140,14 +15178,19 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") @@ -15167,10 +15210,10 @@ (define_insn "*ashlsi3_cmp_zext" [(set (reg FLAGS_REG) (compare - (ashift:SI (match_operand:SI 1 "register_operand" "0") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) @@ -15179,8 +15222,10 @@ && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + && ix86_binary_operator_ok (ASHIFT, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: @@ -15189,14 +15234,19 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") - (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] @@ -15215,10 +15265,10 @@ (define_insn "*ashl3_cconly" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "register_operand" "0") - (match_operand:QI 2 "" "")) + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx @@ -15226,22 +15276,28 @@ || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{}\t%0, %0"; - default: + default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index d97648c876d..9951fb00a4c 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -29,6 +29,16 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \ return c; \ } +#define FOO3(TYPE, OP_NAME, OP, IMM) \ +TYPE \ +__attribute__ ((noipa)) \ +foo3_##OP_NAME##_##TYPE (TYPE a) \ +{ \ + TYPE b = a OP IMM; \ + return b; \ +} + + #define F(TYPE, OP_NAME, OP) \ TYPE \ __attribute__ ((noipa)) \ @@ -112,6 +122,16 @@ FOO (int, xor, ^) FOO1 (int, xor, ^) FOO (long, xor, ^) FOO1 (long, xor, ^) + +FOO (char, shl, <<) +FOO3 (char, shl, <<, 7) +FOO (short, shl, <<) +FOO3 (short, shl, <<, 7) +FOO (int, shl, <<) +FOO3 (int, shl, <<, 7) +FOO (long, shl, <<) +FOO3 (long, shl, <<, 7) + /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -134,3 +154,5 @@ FOO1 (long, xor, ^) /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */