Hi, > > ...can we use expand_vec_perm_const here? It will try the constant > expansion first, which is the preferred order. It also has a few variations up > its sleeve. > We can, however it this function seems to be incorrectly assuming it can always Convert the input mode to a QI vector mode. When I started using it we got a number of miscompilations in the AArch64 codegen. This had the knock-on effect of uncovering bugs in both the AArch64 backend and i386. I'll send patched out for those separately. For now here's the new patch using that hook and updating the permute expansion code: Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * expmed.cc (extract_bit_field_1): Add support for vector element extracts. * optabs.cc (expand_vec_perm_const): Add checks before converting permute to QImode fallback. gcc/testsuite/ChangeLog: * gcc.target/aarch64/ext_1.c: New. --- inline copy of patch --- diff --git a/gcc/expmed.cc b/gcc/expmed.cc index bab020c07222afa38305ef8d7333f271b1965b78..7d38045ae525c8a4665a0c1384fc515e4de88c67 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -1718,6 +1718,21 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, return target; } } + else if (!known_eq (bitnum, 0U) + && multiple_p (GET_MODE_UNIT_BITSIZE (tmode), bitnum, &pos)) + { + /* The encoding has a single stepped pattern. */ + poly_uint64 nunits = GET_MODE_NUNITS (new_mode); + vec_perm_builder sel (nunits, 1, 3); + sel.quick_push (pos); + sel.quick_push (pos + 1); + sel.quick_push (pos + 2); + + rtx res + = expand_vec_perm_const (new_mode, op0, op0, sel, new_mode, NULL); + if (res) + return simplify_gen_subreg (tmode, res, new_mode, 0); + } } /* See if we can get a better vector mode before extracting. */ diff --git a/gcc/optabs.cc b/gcc/optabs.cc index cff37ccb0dfc3dd79b97d0abfd872f340855dc96..f338df410265dfe55b6896160090a453cc6a28d9 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -6267,6 +6267,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, v0_qi = gen_lowpart (qimode, v0); v1_qi = gen_lowpart (qimode, v1); if (targetm.vectorize.vec_perm_const != NULL + && targetm.can_change_mode_class (mode, qimode, ALL_REGS) && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi, v1_qi, qimode_indices)) return gen_lowpart (mode, target_qi); @@ -6311,7 +6312,8 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, } if (qimode != VOIDmode - && selector_fits_mode_p (qimode, qimode_indices)) + && selector_fits_mode_p (qimode, qimode_indices) + && targetm.can_change_mode_class (mode, qimode, ALL_REGS)) { icode = direct_optab_handler (vec_perm_optab, qimode); if (icode != CODE_FOR_nothing) diff --git a/gcc/testsuite/gcc.target/aarch64/ext_1.c b/gcc/testsuite/gcc.target/aarch64/ext_1.c new file mode 100644 index 0000000000000000000000000000000000000000..18a10a14f1161584267a8472e571b3bc2ddf887a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ext_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +typedef unsigned int v4si __attribute__((vector_size (16))); +typedef unsigned int v2si __attribute__((vector_size (8))); + +/* +** extract: { xfail *-*-* } +** ext v0.16b, v0.16b, v0.16b, #4 +** ret +*/ +v2si extract (v4si x) +{ + v2si res = {x[1], x[2]}; + return res; +} + +/* +** extract1: { xfail *-*-* } +** ext v0.16b, v0.16b, v0.16b, #4 +** ret +*/ +v2si extract1 (v4si x) +{ + v2si res; + memcpy (&res, ((int*)&x)+1, sizeof(res)); + return res; +} + +typedef struct cast { + int a; + v2si b __attribute__((packed)); +} cast_t; + +typedef union Data { + v4si x; + cast_t y; +} data; + +/* +** extract2: +** ext v0.16b, v0.16b, v0.16b, #4 +** ret +*/ +v2si extract2 (v4si x) +{ + data d; + d.x = x; + return d.y.b; +} +