Hi,    The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2021-11-16 Haochen Gui gcc/         * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify         the expansion for sign extension. All extensions are done on VSX         registers. gcc/testsuite/         * gcc.target/powerpc/p10_vec_xl_sext.c: New test. patch.diff diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index b4e13af4dc6..587e9fa2a2a 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl    if (sign_extend)      { -      rtx discratch = gen_reg_rtx (DImode); +      rtx discratch = gen_reg_rtx (V2DImode);        rtx tiscratch = gen_reg_rtx (TImode);        /* Emit the lxvr*x insn.  */ @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl         return 0;        emit_insn (pat); -      /* Emit a sign extension from QI,HI,WI to double (DI).  */ -      rtx scratch = gen_lowpart (smode, tiscratch); +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */ +      rtx temp1, temp2;        if (icode == CODE_FOR_vsx_lxvrbx) -       emit_insn (gen_extendqidi2 (discratch, scratch)); +       { +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); +       }        else if (icode == CODE_FOR_vsx_lxvrhx) -       emit_insn (gen_extendhidi2 (discratch, scratch)); +       { +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); +       }        else if (icode == CODE_FOR_vsx_lxvrwx) -       emit_insn (gen_extendsidi2 (discratch, scratch)); -      /*  Assign discratch directly if scratch is already DI.  */ -      if (icode == CODE_FOR_vsx_lxvrdx) -       discratch = scratch; +       { +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); +       } +      else if (icode == CODE_FOR_vsx_lxvrdx) +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); +      else +       gcc_unreachable (); -      /* Emit the sign extension from DI (double) to TI (quad).  */ -      emit_insn (gen_extendditi2 (target, discratch)); +      /* Emit the sign extension from V2DI (double) to TI (quad).  */ +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); +      emit_insn (gen_extendditi2_vector (target, temp2));        return target;      } diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c new file mode 100644 index 00000000000..78e72ac5425 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include + +vector signed __int128 +foo1 (signed long a, signed char *b) +{ +  return vec_xl_sext (a, b); +} + +vector signed __int128 +foo2 (signed long a, signed short *b) +{ +  return vec_xl_sext (a, b); +} + +vector signed __int128 +foo3 (signed long a, signed int *b) +{ +  return vec_xl_sext (a, b); +} + +vector signed __int128 +foo4 (signed long a, signed long *b) +{ +  return vec_xl_sext (a, b); +} + +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */