* [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns @ 2023-12-06 5:24 Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo ` (2 more replies) 0 siblings, 3 replies; 7+ messages in thread From: Jiufu Guo @ 2023-12-06 5:24 UTC (permalink / raw) To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu Hi, Trunk gcc supports more constants to be built via two instructions: e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic". And then num_insns_constant should also be updated. Function "rs6000_emit_set_long_const" is used to build complicated constants; and "num_insns_constant_gpr" is used to compute 'how many instructions are needed" to build the constant. So, these two functions should be aligned. The idea of this patch is: to reuse "rs6000_emit_set_long_const" to compute/record the instruction number(when computing the insn_num, then do not emit instructions). Compare with the previous version: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html This version updates "rs6000_emit_set_long_const" to use a condition if to select either "computing insn number" or "emitting the insn". And put them together to avoid misalign in the future. Bootstrap & regtest pass ppc64{,le}. Is this ok for trunk? BR, Jeff (Jiufu Guo) gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new parameter to record number of instructions to build the constant. (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute num_insn. --- gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------ 1 file changed, 137 insertions(+), 135 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3dfd79c4c43..dbdc72dce5d 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr); static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) else if (TARGET_POWERPC64) { - HOST_WIDE_INT low = sext_hwi (value, 32); - HOST_WIDE_INT high = value >> 31; - - if (high == 0 || high == -1) - return 2; - - high >>= 1; - - if (low == 0 || low == high) - return num_insns_constant_gpr (high) + 1; - else if (high == 0) - return num_insns_constant_gpr (low) + 1; - else - return (num_insns_constant_gpr (high) - + num_insns_constant_gpr (low) + 1); + int num_insns = 0; + rs6000_emit_set_long_const (NULL, value, &num_insns); + return num_insns; } else @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of - lis, ori and shl instructions. */ + lis, ori and shl instructions. If NUM_INSNS is not NULL, then + only increase *NUM_INSNS as the number of insns, and do not output + real insns. */ static void -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) { - rtx temp; - int shift; - HOST_WIDE_INT mask; HOST_WIDE_INT ud1, ud2, ud3, ud4; ud1 = c & 0xffff; @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) ud3 = (c >> 32) & 0xffff; ud4 = (c >> 48) & 0xffff; - if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); + /* This lambda is used to emit one insn or just increase the insn count. + When counting the insn number, no need to emit the insn. Here, two + kinds of insns are needed: move and rldimi. */ + auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) { + if (num_insns) + (*num_insns)++; + else if (!op2) + emit_move_insn (dest, op1); + else + emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2, + GEN_INT (0xffffffff))); + }; - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + /* li */ + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16))); + return; + } + + rtx temp = num_insns ? nullptr + : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest; - emit_move_insn (ud1 != 0 ? temp : dest, - GEN_INT (sext_hwi (ud2 << 16, 32))); + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) + { + /* lis[; ori] */ + count_or_emit_insn (ud1 != 0 ? temp : dest, + GEN_INT (sext_hwi (ud2 << 16, 32))); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + return; } - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) + + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) { /* lis; xoris */ - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); + count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); + count_or_emit_insn (dest, + gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); + return; } - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) + + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) { /* li; xoris */ - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, - GEN_INT ((ud2 ^ 0xffff) << 16))); + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16))); + count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp, + GEN_INT ((ud2 ^ 0xffff) << 16))); + return; } - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) - || can_be_built_by_li_and_rldic (c, &shift, &mask)) + + int shift; + HOST_WIDE_INT mask; + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) + || can_be_built_by_li_and_rldic (c, &shift, &mask)) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + /* li/lis; rldicX */ unsigned HOST_WIDE_INT imm = (c | ~mask); imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); - emit_move_insn (temp, GEN_INT (imm)); + count_or_emit_insn (temp, GEN_INT (imm)); if (shift != 0) temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); if (mask != HOST_WIDE_INT_M1) temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); - emit_move_insn (dest, temp); - } - else if (ud3 == 0 && ud4 == 0) - { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + count_or_emit_insn (dest, temp); - gcc_assert (ud2 & 0x8000); + return; + } - if (ud1 == 0) - { - /* lis; rldicl */ - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); - emit_move_insn (dest, - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); - } - else if (!(ud1 & 0x8000)) + if (ud3 == 0 && ud4 == 0) + { + gcc_assert ((ud2 & 0x8000) && ud1 != 0); + if (!(ud1 & 0x8000)) { /* li; oris */ - emit_move_insn (temp, GEN_INT (ud1)); - emit_move_insn (dest, - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); + count_or_emit_insn (temp, GEN_INT (ud1)); + count_or_emit_insn (dest, + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); + return; } - else - { - /* lis; ori; rldicl */ - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); - emit_move_insn (dest, + + /* lis; ori; rldicl */ + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + count_or_emit_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); - } + return; } - else if (ud1 == ud3 && ud2 == ud4) + + if (ud1 == ud3 && ud2 == ud4) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); HOST_WIDE_INT num = (ud2 << 16) | ud1; - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); + rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); - emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); + count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two)); + return; } - else if ((ud4 == 0xffff && (ud3 & 0x8000)) - || (ud4 == 0 && ! (ud3 & 0x8000))) - { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) + { + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); if (ud2 != 0) - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); - emit_move_insn (ud1 != 0 ? temp : dest, - gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); + count_or_emit_insn (ud1 != 0 ? temp : dest, + gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + return; } - else if (TARGET_PREFIXED) + + if (TARGET_PREFIXED) { if (can_create_pseudo_p ()) { - /* pli A,L + pli B,H + rldimi A,B,32,0. */ - temp = gen_reg_rtx (DImode); - rtx temp1 = gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); - emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); - - emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, - GEN_INT (0xffffffff))); + /* pli A,L; pli B,H; rldimi A,B,32,0. */ + rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode); + count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3)); + count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); + count_or_emit_insn (dest, temp, temp1); + return; } - else - { - /* pli A,H + sldi A,32 + paddi A,A,L. */ - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); + /* There may be 1 insn inaccurate because of no info about dest. */ + bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false; - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; + /* pli A,H; sldi A,32; paddi A,A,L. */ + count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3)); + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); - /* Use paddi for the low 32 bits. */ - if (ud2 != 0 && ud1 != 0 && can_use_paddi) - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, + /* Use paddi for the low 32 bits. */ + if (ud2 != 0 && ud1 != 0 && can_use_paddi) + count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest, GEN_INT ((ud2 << 16) | ud1))); - - /* Use oris, ori for low 32 bits. */ - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) - emit_move_insn (dest, + /* Use oris, ori for low 32 bits. */ + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); - } + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + return; } - else + + if (can_create_pseudo_p ()) { - if (can_create_pseudo_p ()) - { - /* lis HIGH,UD4 ; ori HIGH,UD3 ; - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ - rtx high = gen_reg_rtx (DImode); - rtx low = gen_reg_rtx (DImode); - HOST_WIDE_INT num = (ud2 << 16) | ud1; - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); - num = (ud4 << 16) | ud3; - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, - GEN_INT (0xffffffff))); - } - else - { - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; - oris DEST,UD2 ; ori DEST,UD1. */ - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); - if (ud3 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); + /* lis HIGH,UD4 ; ori HIGH,UD3 ; + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); + HOST_WIDE_INT num = (ud2 << 16) | ud1; + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); + num = (ud4 << 16) | ud3; + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); - if (ud2 != 0) - emit_move_insn (dest, - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); - if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); - } + count_or_emit_insn (dest, high, low); + return; } + + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; + oris DEST,UD2 ; ori DEST,UD1. */ + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); + if (ud3 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); + + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); + if (ud2 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); + if (ud1 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + + return; } /* Helper for the following. Get rid of [r+r] memory refs -- 2.25.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH V3 2/3] Using pli for constant splitting 2023-12-06 5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo @ 2023-12-06 5:24 ` Jiufu Guo 2023-12-07 6:12 ` Kewen.Lin 2023-12-06 5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo 2023-12-07 6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin 2 siblings, 1 reply; 7+ messages in thread From: Jiufu Guo @ 2023-12-06 5:24 UTC (permalink / raw) To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu Hi, For constant building e.g. r120=0x66666666, which does not fit 'li or lis', 'pli' is used to build this constant via 'emit_move_insn'. While for a complicated constant, e.g. 0x6666666666666666ULL, when using 'rs6000_emit_set_long_const' to split the constant recursively, it fails to use 'pli' to build the half part constant: 0x66666666. 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half part of the constant when necessary. For example: 0x6666666666666666ULL, "pli 3,1717986918; rldimi 3,3,32,0" can be used. Compare with previous: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html This verion is refreshed and added with a new testcase. Bootstrap®test pass on ppc64{,le}. Is this ok for trunk? BR, Jeff (Jiufu Guo) gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use pli for 34bit constant. gcc/testsuite/ChangeLog: * gcc.target/powerpc/const_split_pli.c: New test. --- gcc/config/rs6000/rs6000.cc | 7 +++++++ gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++ 2 files changed, 16 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index dbdc72dce5d..2e074a21a05 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) GEN_INT (0xffffffff))); }; + if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c)) + { + /* li/lis/pli */ + count_or_emit_insn (dest, GEN_INT (c)); + return; + } + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) { diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c new file mode 100644 index 00000000000..626c93084aa --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target power10_ok } */ + +unsigned long long msk66() { return 0x6666666666666666ULL; } + +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mli\M} } } */ +/* { dg-final { scan-assembler-not {\mlis\M} } } */ -- 2.25.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 2/3] Using pli for constant splitting 2023-12-06 5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo @ 2023-12-07 6:12 ` Kewen.Lin 2023-12-08 3:32 ` Jiufu Guo 0 siblings, 1 reply; 7+ messages in thread From: Kewen.Lin @ 2023-12-07 6:12 UTC (permalink / raw) To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches Hi Jeff, on 2023/12/6 13:24, Jiufu Guo wrote: > Hi, > > For constant building e.g. r120=0x66666666, which does not fit 'li or lis', > 'pli' is used to build this constant via 'emit_move_insn'. > > While for a complicated constant, e.g. 0x6666666666666666ULL, when using > 'rs6000_emit_set_long_const' to split the constant recursively, it fails to > use 'pli' to build the half part constant: 0x66666666. > > 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half > part of the constant when necessary. For example: 0x6666666666666666ULL, > "pli 3,1717986918; rldimi 3,3,32,0" can be used. > > Compare with previous: > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html > This verion is refreshed and added with a new testcase. > > Bootstrap®test pass on ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu Guo) > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use > pli for 34bit constant. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/const_split_pli.c: New test. Nit: Now we have: gcc/testsuite/gcc.target/powerpc/const-build.c gcc/testsuite/gcc.target/powerpc/const_anchors.c gcc/testsuite/gcc.target/powerpc/const-compare.c I prefer the name of this new case is like const-build-1.c (put a detailed comment inside) or const-build-split-pli.c, to align with the existing. > > --- > gcc/config/rs6000/rs6000.cc | 7 +++++++ > gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++ > 2 files changed, 16 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index dbdc72dce5d..2e074a21a05 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) > GEN_INT (0xffffffff))); > }; > > + if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c)) > + { > + /* li/lis/pli */ > + count_or_emit_insn (dest, GEN_INT (c)); > + return; > + } > + > if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) > { > diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c > new file mode 100644 > index 00000000000..626c93084aa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2" } */ It needs -mdejagnu-cpu=power10 as well. > +/* { dg-require-effective-target power10_ok } */ > + > +unsigned long long msk66() { return 0x6666666666666666ULL; } > + > +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */ > +/* { dg-final { scan-assembler-not {\mli\M} } } */ > +/* { dg-final { scan-assembler-not {\mlis\M} } } */ OK for trunk with the above nits tweaked, thanks! BR, Kewen ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 2/3] Using pli for constant splitting 2023-12-07 6:12 ` Kewen.Lin @ 2023-12-08 3:32 ` Jiufu Guo 0 siblings, 0 replies; 7+ messages in thread From: Jiufu Guo @ 2023-12-08 3:32 UTC (permalink / raw) To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches Hi, Thanks for your insight and helpful review! "Kewen.Lin" <linkw@linux.ibm.com> writes: > Hi Jeff, > > on 2023/12/6 13:24, Jiufu Guo wrote: >> Hi, >> >> For constant building e.g. r120=0x66666666, which does not fit 'li or lis', >> 'pli' is used to build this constant via 'emit_move_insn'. >> >> While for a complicated constant, e.g. 0x6666666666666666ULL, when using >> 'rs6000_emit_set_long_const' to split the constant recursively, it fails to >> use 'pli' to build the half part constant: 0x66666666. >> >> 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half >> part of the constant when necessary. For example: 0x6666666666666666ULL, >> "pli 3,1717986918; rldimi 3,3,32,0" can be used. >> >> Compare with previous: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html >> This verion is refreshed and added with a new testcase. >> >> Bootstrap®test pass on ppc64{,le}. >> Is this ok for trunk? >> >> BR, >> Jeff (Jiufu Guo) >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use >> pli for 34bit constant. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/powerpc/const_split_pli.c: New test. > > Nit: Now we have: > > gcc/testsuite/gcc.target/powerpc/const-build.c > gcc/testsuite/gcc.target/powerpc/const_anchors.c > gcc/testsuite/gcc.target/powerpc/const-compare.c > > I prefer the name of this new case is like const-build-1.c > (put a detailed comment inside) or const-build-split-pli.c, > to align with the existing. Thanks! > >> >> --- >> gcc/config/rs6000/rs6000.cc | 7 +++++++ >> gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++ >> 2 files changed, 16 insertions(+) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c >> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index dbdc72dce5d..2e074a21a05 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >> GEN_INT (0xffffffff))); >> }; >> >> + if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c)) >> + { >> + /* li/lis/pli */ >> + count_or_emit_insn (dest, GEN_INT (c)); >> + return; >> + } >> + >> if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) >> { >> diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c >> new file mode 100644 >> index 00000000000..626c93084aa >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c >> @@ -0,0 +1,9 @@ >> +/* { dg-do compile { target lp64 } } */ >> +/* { dg-options "-O2" } */ > > It needs -mdejagnu-cpu=power10 as well. Yeap, thanks. > >> +/* { dg-require-effective-target power10_ok } */ >> + >> +unsigned long long msk66() { return 0x6666666666666666ULL; } >> + >> +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */ >> +/* { dg-final { scan-assembler-not {\mli\M} } } */ >> +/* { dg-final { scan-assembler-not {\mlis\M} } } */ > > OK for trunk with the above nits tweaked, thanks! > > BR, > Kewen ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH V3 3/3] split complicate constant to memory 2023-12-06 5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo @ 2023-12-06 5:24 ` Jiufu Guo 2023-12-07 6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin 2 siblings, 0 replies; 7+ messages in thread From: Jiufu Guo @ 2023-12-06 5:24 UTC (permalink / raw) To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu Hi, Sometimes, a complicated constant is built via 3(or more) instructions to build. Generally speaking, it would not be as fast as loading it from the constant pool (as a few discussions in PR63281): * "ld" is one instruction. If consider "address/toc" adjust, we may count it as 2 instructions (the high part of address computation could be optimized as nop by linker further). And "pld" may need fewer cycles. * As testing(SPEC2017), it could get better/stable runtime if set the threshold as "> 2" (compare with "> 3"). As tested on spec2017, for visible performance changes, we can find the runtime improvement on 500.perlbench_r about ~1.8% (-O2, P10) with the patch. And for performance downgrades on other benchmarks, as investigated, the recessions are not caused by this patch. Compare with the previous version: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636566.html This version is refreshed based on the latest code. Boostrap & regtest pass on ppc64{,le}. Is this ok for trunk? BR, Jeff (Jiufu Guo) PR target/63281 gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_const): Update to split complicate constant to memory. gcc/testsuite/ChangeLog: * gcc.target/powerpc/const_anchors.c: Update to test final-rtl. * gcc.target/powerpc/parall_5insn_const.c: Update to keep original test point. * gcc.target/powerpc/pr106550.c: Likewise.. * gcc.target/powerpc/pr106550_1.c: Likewise. * gcc.target/powerpc/pr87870.c: Update according to latest behavior. * gcc.target/powerpc/pr93012.c: Likewise. --- gcc/config/rs6000/rs6000.cc | 16 ++++++++++++++++ .../gcc.target/powerpc/const_anchors.c | 5 ++--- .../gcc.target/powerpc/parall_5insn_const.c | 14 ++++++++++++-- gcc/testsuite/gcc.target/powerpc/pr106550.c | 17 +++++++++++++++-- gcc/testsuite/gcc.target/powerpc/pr106550_1.c | 15 +++++++++++++-- gcc/testsuite/gcc.target/powerpc/pr87870.c | 5 ++++- gcc/testsuite/gcc.target/powerpc/pr93012.c | 5 ++++- 7 files changed, 66 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 2e074a21a05..e44a6da91ae 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10271,6 +10271,22 @@ rs6000_emit_set_const (rtx dest, rtx source) c = sext_hwi (c, 32); emit_move_insn (lo, GEN_INT (c)); } + + /* If it can be stored to the constant pool and profitable. */ + else if (base_reg_operand (dest, mode) + && num_insns_constant (source, mode) > 2) + { + rtx sym = force_const_mem (mode, source); + if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0)) + && use_toc_relative_ref (XEXP (sym, 0), mode)) + { + rtx toc = create_TOC_reference (XEXP (sym, 0), copy_rtx (dest)); + sym = gen_const_mem (mode, toc); + set_mem_alias_set (sym, get_TOC_alias_set ()); + } + + emit_insn (gen_rtx_SET (dest, sym)); + } else rs6000_emit_set_long_const (dest, c); break; diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c index 542e2674b12..188744165f2 100644 --- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c +++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c @@ -1,5 +1,5 @@ /* { dg-do compile { target has_arch_ppc64 } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fdump-rtl-final" } */ #define C1 0x2351847027482577ULL #define C2 0x2351847027482578ULL @@ -16,5 +16,4 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b) if (b) *a++ = C2; } - -/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */ +/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c index e3a9a7264cf..df0690b90be 100644 --- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c +++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c @@ -9,8 +9,18 @@ void __attribute__ ((noinline)) foo (unsigned long long *a) { /* 2 lis + 2 ori + 1 rldimi for each constant. */ - *a++ = 0x800aabcdc167fa16ULL; - *a++ = 0x7543a876867f616ULL; + { + register long long d asm("r0") = 0x800aabcdc167fa16ULL; + long long n; + asm("mr %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } + { + register long long d asm("r0") = 0x7543a876867f616ULL; + long long n; + asm("mr %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } } long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL}; diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c index 74e395331ab..5eca2b2f701 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr106550.c +++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c @@ -1,12 +1,25 @@ /* PR target/106550 */ /* { dg-options "-O2 -mdejagnu-cpu=power10" } */ /* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target has_arch_ppc64 } */ void foo (unsigned long long *a) { - *a++ = 0x020805006106003; /* pli+pli+rldimi */ - *a++ = 0x2351847027482577;/* pli+pli+rldimi */ + { + /* pli+pli+rldimi */ + register long long d asm("r0") = 0x020805006106003ULL; + long long n; + asm("mr %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } + { + /* pli+pli+rldimi */ + register long long d asm("r0") = 0x2351847027482577ULL; + long long n; + asm("mr %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } } /* { dg-final { scan-assembler-times {\mpli\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c index 5ab40d71a56..80e6b817dff 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c +++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c @@ -13,8 +13,19 @@ foo (unsigned long long *a) asm("cntlzd %0, %1" : "=r"(n) : "r"(d)); *a++ = n; - *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */ - *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */ + { + register long long d asm("r0") = 0x235a8470a7480000ULL; /* pli+sldi+oris */ + long long n; + asm("cntlzd %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } + + { + register long long d asm("r0") = 0x23a184700000b677ULL; /* pli+sldi+ori */ + long long n; + asm("cntlzd %0, %1" : "=r"(n) : "r"(d)); + *a++ = n; + } } /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c index d2108ac3386..5fee06744ae 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr87870.c +++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c @@ -25,4 +25,7 @@ test3 (void) return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad; } -/* { dg-final { scan-assembler-not {\mld\M} } } */ +/* test3 using "ld" to load the value for r3 and r4. + test0, test1 and test2 are using "li". */ +/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mli\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c index 4f764d0576f..b9e869e4285 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c @@ -10,4 +10,7 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; } unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; } unsigned long long mskse() { return 0xffff1234ffff1234ULL; } -/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */ +/* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */ +/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */ -- 2.25.1 ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns 2023-12-06 5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo @ 2023-12-07 6:01 ` Kewen.Lin 2023-12-08 3:30 ` Jiufu Guo 2 siblings, 1 reply; 7+ messages in thread From: Kewen.Lin @ 2023-12-07 6:01 UTC (permalink / raw) To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches Hi Jeff, on 2023/12/6 13:24, Jiufu Guo wrote: > Hi, > > Trunk gcc supports more constants to be built via two instructions: > e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic". > And then num_insns_constant should also be updated. > > Function "rs6000_emit_set_long_const" is used to build complicated > constants; and "num_insns_constant_gpr" is used to compute 'how > many instructions are needed" to build the constant. So, these > two functions should be aligned. > > The idea of this patch is: to reuse "rs6000_emit_set_long_const" to > compute/record the instruction number(when computing the insn_num, > then do not emit instructions). > > Compare with the previous version: > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html > This version updates "rs6000_emit_set_long_const" to use a condition > if to select either "computing insn number" or "emitting the insn". > And put them together to avoid misalign in the future. > > Bootstrap & regtest pass ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu Guo) > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new > parameter to record number of instructions to build the constant. > (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute > num_insn. > > --- > gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------ > 1 file changed, 137 insertions(+), 135 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index 3dfd79c4c43..dbdc72dce5d 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); > static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); > static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); > static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); > -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); > +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr); > static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); > static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); > static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, > @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) > > else if (TARGET_POWERPC64) > { > - HOST_WIDE_INT low = sext_hwi (value, 32); > - HOST_WIDE_INT high = value >> 31; > - > - if (high == 0 || high == -1) > - return 2; > - > - high >>= 1; > - > - if (low == 0 || low == high) > - return num_insns_constant_gpr (high) + 1; > - else if (high == 0) > - return num_insns_constant_gpr (low) + 1; > - else > - return (num_insns_constant_gpr (high) > - + num_insns_constant_gpr (low) + 1); > + int num_insns = 0; > + rs6000_emit_set_long_const (NULL, value, &num_insns); Nit: Maybe nullptr to align with the others in this patch? > + return num_insns; > } > > else > @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) > > /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. > Output insns to set DEST equal to the constant C as a series of > - lis, ori and shl instructions. */ > + lis, ori and shl instructions. If NUM_INSNS is not NULL, then > + only increase *NUM_INSNS as the number of insns, and do not output > + real insns. */ Nit: Maybe s/output real/emit any/. > > static void > -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) > { > - rtx temp; > - int shift; > - HOST_WIDE_INT mask; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > > ud1 = c & 0xffff; > @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > ud3 = (c >> 32) & 0xffff; > ud4 = (c >> 48) & 0xffff; > > - if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) > - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); > + /* This lambda is used to emit one insn or just increase the insn count. > + When counting the insn number, no need to emit the insn. Here, two > + kinds of insns are needed: move and rldimi. */ Can we make the latter a bit more generic? Like something below? > + auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) { > + if (num_insns) > + (*num_insns)++; Nit: Make it early return. > + else if (!op2) > + emit_move_insn (dest, op1); > + else > + emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2, > + GEN_INT (0xffffffff))); [&num_insns] (rtx dest_or_insn, rtx src) if (src) emit_move_insn (dest_or_insn, src); else emit_insn (dest_or_insn); > + }; > > - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) > - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) > + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + /* li */ > + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16))); > + return; > + } > + > + rtx temp = num_insns ? nullptr > + : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest; Nit: Maybe temp = (num_insns || !can_create_pseudo_p ()) ? dest: gen_reg_rtx (DImode); since NULL passed as dest for num_insns. > > - emit_move_insn (ud1 != 0 ? temp : dest, > - GEN_INT (sext_hwi (ud2 << 16, 32))); > + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) > + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) > + { > + /* lis[; ori] */ > + count_or_emit_insn (ud1 != 0 ? temp : dest, > + GEN_INT (sext_hwi (ud2 << 16, 32))); > if (ud1 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + return; > } > - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) > + > + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) > { > /* lis; xoris */ > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > - emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); > - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); > + count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); > + count_or_emit_insn (dest, > + gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); > + return; > } > - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) > + > + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) > { > /* li; xoris */ > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > - emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); > - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > - GEN_INT ((ud2 ^ 0xffff) << 16))); > + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16))); > + count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp, > + GEN_INT ((ud2 ^ 0xffff) << 16))); > + return; > } > - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) > - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) > - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) > - || can_be_built_by_li_and_rldic (c, &shift, &mask)) > + > + int shift; > + HOST_WIDE_INT mask; > + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) > + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) > + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) > + || can_be_built_by_li_and_rldic (c, &shift, &mask)) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + /* li/lis; rldicX */ > unsigned HOST_WIDE_INT imm = (c | ~mask); > imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); > > - emit_move_insn (temp, GEN_INT (imm)); > + count_or_emit_insn (temp, GEN_INT (imm)); > if (shift != 0) > temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); > if (mask != HOST_WIDE_INT_M1) > temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); > - emit_move_insn (dest, temp); > - } > - else if (ud3 == 0 && ud4 == 0) > - { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + count_or_emit_insn (dest, temp); > > - gcc_assert (ud2 & 0x8000); > + return; > + } > > - if (ud1 == 0) > - { > - /* lis; rldicl */ > - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > - emit_move_insn (dest, > - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); > - } > - else if (!(ud1 & 0x8000)) > + if (ud3 == 0 && ud4 == 0) > + { > + gcc_assert ((ud2 & 0x8000) && ud1 != 0); > + if (!(ud1 & 0x8000)) > { > /* li; oris */ > - emit_move_insn (temp, GEN_INT (ud1)); > - emit_move_insn (dest, > - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); > + count_or_emit_insn (temp, GEN_INT (ud1)); > + count_or_emit_insn (dest, > + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); > + return; > } > - else > - { > - /* lis; ori; rldicl */ > - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > - emit_move_insn (dest, > + > + /* lis; ori; rldicl */ > + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + count_or_emit_insn (dest, > gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); > - } > + return; > } > - else if (ud1 == ud3 && ud2 == ud4) > + > + if (ud1 == ud3 && ud2 == ud4) > { Nit: Like the others, it's still preferred to have a comment indicating what's insn sequence for this hunk, ... > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > HOST_WIDE_INT num = (ud2 << 16) | ud1; > - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); > + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); > + > rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); > rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); > - emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two)); > + return; > } > - else if ((ud4 == 0xffff && (ud3 & 0x8000)) > - || (ud4 == 0 && ! (ud3 & 0x8000))) > - { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > > - emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); > + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) > + { ... and this. > + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); > if (ud2 != 0) > - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); > - emit_move_insn (ud1 != 0 ? temp : dest, > - gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); > + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); > + count_or_emit_insn (ud1 != 0 ? temp : dest, > + gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); > if (ud1 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + return; > } > - else if (TARGET_PREFIXED) > + > + if (TARGET_PREFIXED) > { > if (can_create_pseudo_p ()) > { > - /* pli A,L + pli B,H + rldimi A,B,32,0. */ > - temp = gen_reg_rtx (DImode); > - rtx temp1 = gen_reg_rtx (DImode); > - emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); > - emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); > - > - emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, > - GEN_INT (0xffffffff))); > + /* pli A,L; pli B,H; rldimi A,B,32,0. */ > + rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode); > + count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3)); > + count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); > + count_or_emit_insn (dest, temp, temp1); > + return; > } > - else > - { > - /* pli A,H + sldi A,32 + paddi A,A,L. */ > - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); > > - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > + /* There may be 1 insn inaccurate because of no info about dest. */ > + bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false; Nit: Move this line ... > > - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; > + /* pli A,H; sldi A,32; paddi A,A,L. */ > + count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3)); > + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > ... here, just before its use. The others look good to me, thanks! BR, Kewen > - /* Use paddi for the low 32 bits. */ > - if (ud2 != 0 && ud1 != 0 && can_use_paddi) > - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, > + /* Use paddi for the low 32 bits. */ > + if (ud2 != 0 && ud1 != 0 && can_use_paddi) > + count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest, > GEN_INT ((ud2 << 16) | ud1))); > - > - /* Use oris, ori for low 32 bits. */ > - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) > - emit_move_insn (dest, > + /* Use oris, ori for low 32 bits. */ > + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) > + count_or_emit_insn (dest, > gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > - } > + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > + return; > } > - else > + > + if (can_create_pseudo_p ()) > { > - if (can_create_pseudo_p ()) > - { > - /* lis HIGH,UD4 ; ori HIGH,UD3 ; > - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ > - rtx high = gen_reg_rtx (DImode); > - rtx low = gen_reg_rtx (DImode); > - HOST_WIDE_INT num = (ud2 << 16) | ud1; > - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); > - num = (ud4 << 16) | ud3; > - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); > - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, > - GEN_INT (0xffffffff))); > - } > - else > - { > - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; > - oris DEST,UD2 ; ori DEST,UD1. */ > - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); > - if (ud3 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); > + /* lis HIGH,UD4 ; ori HIGH,UD3 ; > + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ > + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); > + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); > + HOST_WIDE_INT num = (ud2 << 16) | ud1; > + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); > + num = (ud4 << 16) | ud3; > + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); > > - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > - if (ud2 != 0) > - emit_move_insn (dest, > - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > - if (ud1 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > - } > + count_or_emit_insn (dest, high, low); > + return; > } > + > + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; > + oris DEST,UD2 ; ori DEST,UD1. */ > + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); > + if (ud3 != 0) > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); > + > + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > + if (ud2 != 0) > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > + if (ud1 != 0) > + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > + > + return; > } > > /* Helper for the following. Get rid of [r+r] memory refs ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns 2023-12-07 6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin @ 2023-12-08 3:30 ` Jiufu Guo 0 siblings, 0 replies; 7+ messages in thread From: Jiufu Guo @ 2023-12-08 3:30 UTC (permalink / raw) To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches Hi, Thanks for your always kind and helpful review!! "Kewen.Lin" <linkw@linux.ibm.com> writes: > Hi Jeff, > > on 2023/12/6 13:24, Jiufu Guo wrote: >> Hi, >> >> Trunk gcc supports more constants to be built via two instructions: >> e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic". >> And then num_insns_constant should also be updated. >> >> Function "rs6000_emit_set_long_const" is used to build complicated >> constants; and "num_insns_constant_gpr" is used to compute 'how >> many instructions are needed" to build the constant. So, these >> two functions should be aligned. >> >> The idea of this patch is: to reuse "rs6000_emit_set_long_const" to >> compute/record the instruction number(when computing the insn_num, >> then do not emit instructions). >> >> Compare with the previous version: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html >> This version updates "rs6000_emit_set_long_const" to use a condition >> if to select either "computing insn number" or "emitting the insn". >> And put them together to avoid misalign in the future. >> >> Bootstrap & regtest pass ppc64{,le}. >> Is this ok for trunk? >> >> BR, >> Jeff (Jiufu Guo) >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new >> parameter to record number of instructions to build the constant. >> (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute >> num_insn. >> >> --- >> gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------ >> 1 file changed, 137 insertions(+), 135 deletions(-) >> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index 3dfd79c4c43..dbdc72dce5d 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); >> static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); >> static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); >> static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); >> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); >> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr); >> static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); >> static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); >> static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, >> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) >> >> else if (TARGET_POWERPC64) >> { >> - HOST_WIDE_INT low = sext_hwi (value, 32); >> - HOST_WIDE_INT high = value >> 31; >> - >> - if (high == 0 || high == -1) >> - return 2; >> - >> - high >>= 1; >> - >> - if (low == 0 || low == high) >> - return num_insns_constant_gpr (high) + 1; >> - else if (high == 0) >> - return num_insns_constant_gpr (low) + 1; >> - else >> - return (num_insns_constant_gpr (high) >> - + num_insns_constant_gpr (low) + 1); >> + int num_insns = 0; >> + rs6000_emit_set_long_const (NULL, value, &num_insns); > > Nit: Maybe nullptr to align with the others in this patch? ok. > >> + return num_insns; >> } >> >> else >> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) >> >> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >> Output insns to set DEST equal to the constant C as a series of >> - lis, ori and shl instructions. */ >> + lis, ori and shl instructions. If NUM_INSNS is not NULL, then >> + only increase *NUM_INSNS as the number of insns, and do not output >> + real insns. */ > > Nit: Maybe s/output real/emit any/. Thanks. > >> >> static void >> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >> { >> - rtx temp; >> - int shift; >> - HOST_WIDE_INT mask; >> HOST_WIDE_INT ud1, ud2, ud3, ud4; >> >> ud1 = c & 0xffff; >> @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> ud3 = (c >> 32) & 0xffff; >> ud4 = (c >> 48) & 0xffff; >> >> - if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) >> - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); >> + /* This lambda is used to emit one insn or just increase the insn count. >> + When counting the insn number, no need to emit the insn. Here, two >> + kinds of insns are needed: move and rldimi. */ > > Can we make the latter a bit more generic? Like something below? Great sugguestion! Thanks. > >> + auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) { >> + if (num_insns) >> + (*num_insns)++; > > Nit: Make it early return. ok. > >> + else if (!op2) >> + emit_move_insn (dest, op1); >> + else >> + emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2, >> + GEN_INT (0xffffffff))); > > > [&num_insns] (rtx dest_or_insn, rtx src) > > if (src) > emit_move_insn (dest_or_insn, src); > else > emit_insn (dest_or_insn); > This could support other gen_X in future. Thanks! > >> + }; >> >> - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) >> - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) >> + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) >> { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + /* li */ >> + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16))); >> + return; >> + } >> + >> + rtx temp = num_insns ? nullptr >> + : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest; > > Nit: Maybe > > temp = (num_insns || !can_create_pseudo_p ()) ? dest: gen_reg_rtx (DImode); > > since NULL passed as dest for num_insns. ok. > >> >> - emit_move_insn (ud1 != 0 ? temp : dest, >> - GEN_INT (sext_hwi (ud2 << 16, 32))); >> + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) >> + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) >> + { >> + /* lis[; ori] */ >> + count_or_emit_insn (ud1 != 0 ? temp : dest, >> + GEN_INT (sext_hwi (ud2 << 16, 32))); >> if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + return; >> } >> - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) >> + >> + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) >> { >> /* lis; xoris */ >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); >> - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); >> + count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); >> + count_or_emit_insn (dest, >> + gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); >> + return; >> } >> - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) >> + >> + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) >> { >> /* li; xoris */ >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); >> - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, >> - GEN_INT ((ud2 ^ 0xffff) << 16))); >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16))); >> + count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp, >> + GEN_INT ((ud2 ^ 0xffff) << 16))); >> + return; >> } >> - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) >> - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) >> - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) >> - || can_be_built_by_li_and_rldic (c, &shift, &mask)) >> + >> + int shift; >> + HOST_WIDE_INT mask; >> + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) >> + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) >> + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) >> + || can_be_built_by_li_and_rldic (c, &shift, &mask)) >> { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + /* li/lis; rldicX */ >> unsigned HOST_WIDE_INT imm = (c | ~mask); >> imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); >> >> - emit_move_insn (temp, GEN_INT (imm)); >> + count_or_emit_insn (temp, GEN_INT (imm)); >> if (shift != 0) >> temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); >> if (mask != HOST_WIDE_INT_M1) >> temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); >> - emit_move_insn (dest, temp); >> - } >> - else if (ud3 == 0 && ud4 == 0) >> - { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + count_or_emit_insn (dest, temp); >> >> - gcc_assert (ud2 & 0x8000); >> + return; >> + } >> >> - if (ud1 == 0) >> - { >> - /* lis; rldicl */ >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> - emit_move_insn (dest, >> - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); >> - } >> - else if (!(ud1 & 0x8000)) >> + if (ud3 == 0 && ud4 == 0) >> + { >> + gcc_assert ((ud2 & 0x8000) && ud1 != 0); >> + if (!(ud1 & 0x8000)) >> { >> /* li; oris */ >> - emit_move_insn (temp, GEN_INT (ud1)); >> - emit_move_insn (dest, >> - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); >> + count_or_emit_insn (temp, GEN_INT (ud1)); >> + count_or_emit_insn (dest, >> + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); >> + return; >> } >> - else >> - { >> - /* lis; ori; rldicl */ >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> - emit_move_insn (dest, >> + >> + /* lis; ori; rldicl */ >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, >> gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); >> - } >> + return; >> } >> - else if (ud1 == ud3 && ud2 == ud4) >> + >> + if (ud1 == ud3 && ud2 == ud4) >> { > > Nit: Like the others, it's still preferred to have a comment indicating > what's insn sequence for this hunk, ... Understand you point. Since the half 32bit maybe with various insn, so it may be hard to list the insn seq. While I also feel we may need a comment here. > >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> HOST_WIDE_INT num = (ud2 << 16) | ud1; >> - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); >> + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); >> + >> rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); >> rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); >> - emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two)); >> + return; >> } >> - else if ((ud4 == 0xffff && (ud3 & 0x8000)) >> - || (ud4 == 0 && ! (ud3 & 0x8000))) >> - { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); >> + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) >> + { > > ... and this. ok. > >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); >> if (ud2 != 0) >> - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); >> - emit_move_insn (ud1 != 0 ? temp : dest, >> - gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); >> + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); >> + count_or_emit_insn (ud1 != 0 ? temp : dest, >> + gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); >> if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + return; >> } >> - else if (TARGET_PREFIXED) >> + >> + if (TARGET_PREFIXED) >> { >> if (can_create_pseudo_p ()) >> { >> - /* pli A,L + pli B,H + rldimi A,B,32,0. */ >> - temp = gen_reg_rtx (DImode); >> - rtx temp1 = gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); >> - emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); >> - >> - emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, >> - GEN_INT (0xffffffff))); >> + /* pli A,L; pli B,H; rldimi A,B,32,0. */ >> + rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode); >> + count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3)); >> + count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); >> + count_or_emit_insn (dest, temp, temp1); >> + return; >> } >> - else >> - { >> - /* pli A,H + sldi A,32 + paddi A,A,L. */ >> - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); >> >> - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> + /* There may be 1 insn inaccurate because of no info about dest. */ >> + bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false; > > Nit: Move this line ... > >> >> - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; >> + /* pli A,H; sldi A,32; paddi A,A,L. */ >> + count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3)); >> + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> > > ... here, just before its use. ok. > > The others look good to me, thanks! Thanks again for your greate comments. BR, Jeff (Jiufu Guo) > > BR, > Kewen > >> - /* Use paddi for the low 32 bits. */ >> - if (ud2 != 0 && ud1 != 0 && can_use_paddi) >> - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, >> + /* Use paddi for the low 32 bits. */ >> + if (ud2 != 0 && ud1 != 0 && can_use_paddi) >> + count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest, >> GEN_INT ((ud2 << 16) | ud1))); >> - >> - /* Use oris, ori for low 32 bits. */ >> - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) >> - emit_move_insn (dest, >> + /* Use oris, ori for low 32 bits. */ >> + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) >> + count_or_emit_insn (dest, >> gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); >> - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> - } >> + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> + return; >> } >> - else >> + >> + if (can_create_pseudo_p ()) >> { >> - if (can_create_pseudo_p ()) >> - { >> - /* lis HIGH,UD4 ; ori HIGH,UD3 ; >> - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ >> - rtx high = gen_reg_rtx (DImode); >> - rtx low = gen_reg_rtx (DImode); >> - HOST_WIDE_INT num = (ud2 << 16) | ud1; >> - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); >> - num = (ud4 << 16) | ud3; >> - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); >> - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, >> - GEN_INT (0xffffffff))); >> - } >> - else >> - { >> - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; >> - oris DEST,UD2 ; ori DEST,UD1. */ >> - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); >> - if (ud3 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); >> + /* lis HIGH,UD4 ; ori HIGH,UD3 ; >> + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ >> + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); >> + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); >> + HOST_WIDE_INT num = (ud2 << 16) | ud1; >> + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); >> + num = (ud4 << 16) | ud3; >> + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); >> >> - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> - if (ud2 != 0) >> - emit_move_insn (dest, >> - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); >> - if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> - } >> + count_or_emit_insn (dest, high, low); >> + return; >> } >> + >> + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; >> + oris DEST,UD2 ; ori DEST,UD1. */ >> + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); >> + if (ud3 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); >> + >> + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> + if (ud2 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); >> + if (ud1 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> + >> + return; >> } >> >> /* Helper for the following. Get rid of [r+r] memory refs ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-12-08 3:32 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-12-06 5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo 2023-12-07 6:12 ` Kewen.Lin 2023-12-08 3:32 ` Jiufu Guo 2023-12-06 5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo 2023-12-07 6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin 2023-12-08 3:30 ` Jiufu Guo
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).