From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id 43AD33858438; Tue, 27 Feb 2024 13:37:18 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 43AD33858438 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1709041038; bh=/AOz+fPq1zp8GNn5Df/Gg0y2TsJqY6uzkD0NfEBgkDk=; h=From:To:Subject:Date:From; b=jd5BjP3krEonpbey7AWT3XXRtASIuaDg58nb9edFTsB9UrK3eYzRkjZfdeOwsBNBg GQ8oTszH33Eifg0EcihPTL8yDCWsdfWyzGagLAWAG0sl+uPe9bkKCbVXngkYMeygNN xIkkrk4LkzEwTSNgItDsOxRSsVHdIQvPG5ZI20LU= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] match.pd: Look through view_convert when folding vec_perms X-Act-Checkin: gcc X-Git-Author: Manolis Tsamis X-Git-Refname: refs/vendors/vrull/heads/slp-improvements X-Git-Oldrev: 821917bf62b82ebcd0f423c21b06def7735e1371 X-Git-Newrev: e8a30f2e241d213a9761a473eb63f1c32e2c182e Message-Id: <20240227133718.43AD33858438@sourceware.org> Date: Tue, 27 Feb 2024 13:37:18 +0000 (GMT) List-Id: https://gcc.gnu.org/g:e8a30f2e241d213a9761a473eb63f1c32e2c182e commit e8a30f2e241d213a9761a473eb63f1c32e2c182e Author: Manolis Tsamis Date: Wed Nov 1 12:27:28 2023 +0100 match.pd: Look through view_convert when folding vec_perms The match.pd patterns to merge two vector permutes into one fail to match when a (potentially no-op) view convert expressions is separating the two permutes. This is observable in the SLP tree for the first loop of x264's satd and inhibits optimization. This change adds a check whether a view_convert will be a no-op (comparing the element precision) and rewrites the permute sequence for those cases. Ref #343 Diff: --- gcc/match.pd | 14 ++++++++------ gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index f3fffd8dec2..ae8c981b306 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9978,19 +9978,21 @@ and, d = VEC_PERM_EXPR ; */ (simplify - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5) (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) (with { machine_mode result_mode = TYPE_MODE (type); - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2)); int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); vec_perm_builder builder0; vec_perm_builder builder1; vec_perm_builder builder2 (nelts, nelts, 1); } - (if (tree_to_vec_perm_builder (&builder0, @3) - && tree_to_vec_perm_builder (&builder1, @4)) + (if (tree_to_vec_perm_builder (&builder0, @4) + && tree_to_vec_perm_builder (&builder1, @5) + && element_precision (TREE_TYPE (@0)) + == element_precision (TREE_TYPE (@1))) (with { vec_perm_indices sel0 (builder0, 2, nelts); @@ -10012,10 +10014,10 @@ and, ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false) || !can_vec_perm_const_p (result_mode, op_mode, sel1, false)) : !can_vec_perm_const_p (result_mode, op_mode, sel1, false))) - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); + op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2); } (if (op0) - (vec_perm @1 @2 { op0; }))))))) + (view_convert (vec_perm @2 @3 { op0; })))))))) /* Merge c = VEC_PERM_EXPR ; diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c new file mode 100644 index 00000000000..1a4ab4065de --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); + +void fun (veci *a, veci *b, veci *c) +{ + veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7); + vecu r2 = __builtin_convertvector (r1, vecu); + vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0); + *c = __builtin_convertvector (r3, veci); +} + +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */