On Wed, Jun 7, 2023 at 9:55 PM Jiufu Guo wrote: > Hi, > > This patch checks if a constant is possible to be rotated to/from a > positive > or negative value from "li". If so, we could use "li;rotldi" to build it. > > Bootstrap and regtest pass on ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu) > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (can_be_rotated_to_positive_li): New > function. > (can_be_rotated_to_negative_li): New function. > (can_be_built_by_li_and_rotldi): New function. > (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi. > > gcc/testsuite/ChangeLog: > > * gcc.target/powerpc/const-build.c: New test. > --- > gcc/config/rs6000/rs6000.cc | 64 +++++++++++++++++-- > .../gcc.target/powerpc/const-build.c | 54 ++++++++++++++++ > 2 files changed, 112 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index 42f49e4a56b..1dd0072350a 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -10258,6 +10258,48 @@ rs6000_emit_set_const (rtx dest, rtx source) > return true; > } > > +/* Check if C can be rotated to a positive value which 'li' instruction > + is able to load. If so, set *ROT to the number by which C is rotated, > + and return true. Return false otherwise. */ > + > +static bool > +can_be_rotated_to_positive_li (HOST_WIDE_INT c, int *rot) > +{ > + /* 49 leading zeros and 15 low bits on the positive value > + generated by 'li' instruction. */ > + return can_be_rotated_to_lowbits (c, 15, rot); > +} > + > +/* Like can_be_rotated_to_positive_li, but check the negative value of > 'li'. */ > + > +static bool > +can_be_rotated_to_negative_li (HOST_WIDE_INT c, int *rot) > +{ > + return can_be_rotated_to_lowbits (~c, 15, rot); > +} > + > +/* Check if value C can be built by 2 instructions: one is 'li', another > is > + rotldi. > + > + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK > + is set to -1, and return true. Return false otherwise. */ > + > I look at this feature and it's good, but I don't fully understand the benefit of this level of abstraction. Ideally all of the above functions would be inlined. They aren't reused. > +static bool > +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, > + HOST_WIDE_INT *mask) > +{ > + int n; > + if (can_be_rotated_to_positive_li (c, &n) > + || can_be_rotated_to_negative_li (c, &n)) > Why not /* Check if C or ~C can be rotated to a positive or negative value which 'li' instruction is able to load. */ if (can_be_rotated_to_lowbits (c, 15, &n) || can_be_rotated_to_lowbits (~c, 15, &n)) ... This is a style of software engineering, but it seems overkill to me when the function is a single line that tail calls another function. Am I missing something? The rest of this patch looks good. Thanks, David > + { > + *mask = HOST_WIDE_INT_M1; > + *shift = HOST_BITS_PER_WIDE_INT - n; > + return true; > + } > + > + return false; > +} > + > /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. > Output insns to set DEST equal to the constant C as a series of > lis, ori and shl instructions. */ > @@ -10266,15 +10308,14 @@ static void > rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > { > rtx temp; > + int shift; > + HOST_WIDE_INT mask; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > > ud1 = c & 0xffff; > - c = c >> 16; > - ud2 = c & 0xffff; > - c = c >> 16; > - ud3 = c & 0xffff; > - c = c >> 16; > - ud4 = c & 0xffff; > + ud2 = (c >> 16) & 0xffff; > + ud3 = (c >> 32) & 0xffff; > + ud4 = (c >> 48) & 0xffff; > > if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) > @@ -10305,6 +10346,17 @@ rs6000_emit_set_long_const (rtx dest, > HOST_WIDE_INT c) > emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > GEN_INT ((ud2 ^ 0xffff) << 16))); > } > + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) > + { > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + unsigned HOST_WIDE_INT imm = (c | ~mask); > + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); > + > + emit_move_insn (temp, GEN_INT (imm)); > + if (shift != 0) > + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); > + emit_move_insn (dest, temp); > + } > else if (ud3 == 0 && ud4 == 0) > { > temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c > b/gcc/testsuite/gcc.target/powerpc/const-build.c > new file mode 100644 > index 00000000000..70f095f6bf2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c > @@ -0,0 +1,54 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -save-temps" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ > + > +#define NOIPA __attribute__ ((noipa)) > + > +struct fun > +{ > + long long (*f) (void); > + long long val; > +}; > + > +long long NOIPA > +li_rotldi_1 (void) > +{ > + return 0x7531000000000LL; > +} > + > +long long NOIPA > +li_rotldi_2 (void) > +{ > + return 0x2100000000000064LL; > +} > + > +long long NOIPA > +li_rotldi_3 (void) > +{ > + return 0xffff8531ffffffffLL; > +} > + > +long long NOIPA > +li_rotldi_4 (void) > +{ > + return 0x21ffffffffffff94LL; > +} > + > +struct fun arr[] = { > + {li_rotldi_1, 0x7531000000000LL}, > + {li_rotldi_2, 0x2100000000000064LL}, > + {li_rotldi_3, 0xffff8531ffffffffLL}, > + {li_rotldi_4, 0x21ffffffffffff94LL}, > +}; > + > +/* { dg-final { scan-assembler-times {\mrotldi\M} 4 } } */ > + > +int > +main () > +{ > + for (int i = 0; i < sizeof (arr) / sizeof (arr[0]); i++) > + if ((*arr[i].f) () != arr[i].val) > + __builtin_abort (); > + > + return 0; > +} > -- > 2.39.1 > >