From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 120038 invoked by alias); 19 Jun 2015 17:32:20 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 120009 invoked by uid 89); 19 Jun 2015 17:32:18 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.8 required=5.0 tests=AWL,BAYES_20,SPF_PASS autolearn=ham version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (146.101.78.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 19 Jun 2015 17:32:14 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by eu-smtp-1.mimecast.com with ESMTP id uk-mta-35-Q47C9U60ROeBQ29vPRF-zA-1 Received: from [10.2.207.65] ([10.1.2.79]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Fri, 19 Jun 2015 18:32:10 +0100 Message-ID: <5584521A.7030205@arm.com> Date: Fri, 19 Jun 2015 17:37:00 -0000 From: Alan Lawrence User-Agent: Thunderbird 2.0.0.24 (X11/20101213) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" Subject: [PATCH v2] Rerun loop-header-copying just before vectorization X-MC-Unique: Q47C9U60ROeBQ29vPRF-zA-1 Content-Type: multipart/mixed; boundary="------------060808070704080907060608" X-IsSubscribed: yes X-SW-Source: 2015-06/txt/msg01356.txt.bz2 This is a multi-part message in MIME format. --------------060808070704080907060608 Content-Type: text/plain; charset=WINDOWS-1252; format=flowed Content-Transfer-Encoding: quoted-printable Content-length: 1079 This is a respin of https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02139.htm= l .=20 Changes are: * Separate the two passes by descending from a common base class, allow= ing=20 different predicates; * Test flag_tree_vectorize, and loop->force_vectorize/dont_vectorize - = this=20 fixes the test failing before; * Simplify the check for "code after exit edge"; * Revert unnecessary changes to pass_tree_loop_init::execute; * Revert change to slp-perm-7 test (following fix by Marc Glisse) Bootstrapped + check-gcc on aarch64 and x86_64 (linux). gcc/ChangeLog: * tree-pass.h (make_pass_ch_vect): New. * passes.def: Add pass_ch_vect just before pass_if_conversion. * tree-ssa-loop-ch.c (pass_ch_base, pass_ch_vect, pass_data_ch_vect, pass_ch::process_loop_p): New. (pass_ch): Extend pass_ch_base. (pass_ch::execute): Move all but loop_optimizer_init/finalize to... (pass_ch_base::execute): ...here. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-strided-a-u16-i4.c (main1): Narrow scope of x,y,z,w. of unsigned * gcc.dg/vect/vect-ifcvt-11.c: New. --------------060808070704080907060608 Content-Type: text/x-patch; name=rerun-loop-ch-2.patch Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="rerun-loop-ch-2.patch" Content-length: 7544 diff --git a/gcc/passes.def b/gcc/passes.def index 4690e23..5755035 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -247,6 +247,7 @@ along with GCC; see the file COPYING3. If not see PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops) NEXT_PASS (pass_expand_omp_ssa); POP_INSERT_PASSES () + NEXT_PASS (pass_ch_vect); NEXT_PASS (pass_if_conversion); /* pass_vectorize must immediately follow pass_if_conversion. Please do not add any other passes in between. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c b/gcc/testsuite/gcc.= dg/vect/vect-ifcvt-11.c new file mode 100644 index 0000000..7e32369 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target vect_condition } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 16 + +extern void abort (void); + +int A[N] =3D {36, 39, 42, 45, 43, 32, 21, 12, 23, 34, 45, 56, 67, 78, 81, = 11}; +int B[N] =3D {144,195,210,225,172,128,105,60, 92, 136,225,280,268,390,324,= 55}; + +__attribute__((noinline)) +void foo () +{ + for (int i =3D 0; i < N; i++) + { + int m =3D (A[i] & i) ? 5 : 4; + A[i] =3D A[i] * m; + } +} + +int main () +{ + + check_vect (); + foo (); + /* check results: */ + for (int i =3D 0; i < N; i++) + if (A[i] !=3D B[i]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c b/gcc/testsu= ite/gcc.dg/vect/vect-strided-a-u16-i4.c index af33ed4..0be68b3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c @@ -21,7 +21,6 @@ main1 () s *ptr =3D arr; s res[N]; int i; - unsigned short x, y, z, w; =20 for (i =3D 0; i < N; i++) { @@ -35,6 +34,7 @@ main1 () =20 for (i =3D 0; i < N; i++) { + unsigned short x, y, z, w; x =3D ptr->b - ptr->a; y =3D ptr->d - ptr->c; res[i].c =3D x + y; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 172bd82..083e771 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -380,6 +380,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch (gcc::c= ontext *ctxt); extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index 6ece78b..bd409ef 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -144,6 +144,17 @@ do_while_loop_p (struct loop *loop) =20 namespace { =20 +class ch_base : public gimple_opt_pass +{ + protected: + ch_base (pass_data data, gcc::context *ctxt) + : gimple_opt_pass (data, ctxt) + {} + + unsigned int copy_headers (function *fun); + virtual bool process_loop_p (struct loop *loop) =3D 0; +}; + const pass_data pass_data_ch =3D { GIMPLE_PASS, /* type */ @@ -157,21 +168,61 @@ const pass_data pass_data_ch =3D 0, /* todo_flags_finish */ }; =20 -class pass_ch : public gimple_opt_pass +/* This pass calls loop_optimizer_init before it executes, + and loop_optimizer_finalize after. */ +class pass_ch : public ch_base { public: pass_ch (gcc::context *ctxt) - : gimple_opt_pass (pass_data_ch, ctxt) + : ch_base (pass_data_ch, ctxt) {} =20 /* opt_pass methods: */ virtual bool gate (function *) { return flag_tree_ch !=3D 0; } virtual unsigned int execute (function *); =20 +protected: + /* ch_base method: */ + virtual bool process_loop_p (struct loop *loop); }; // class pass_ch =20 +const pass_data pass_data_ch_vect =3D +{ + GIMPLE_PASS, /* type */ + "ch_vect", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_TREE_CH, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +/* This is a more aggressive version, designed to run just before if-conve= rsion + and vectorization, to put more loops into their required form. */ +class pass_ch_vect : public ch_base +{ +public: + pass_ch_vect (gcc::context *ctxt) + : ch_base (pass_data_ch_vect, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { + return flag_tree_ch !=3D 0 + && (flag_tree_loop_vectorize !=3D 0 || fun->has_force_vectorize_loops); + } + virtual unsigned int execute (function *); + +protected: + /* ch_base method: */ + virtual bool process_loop_p (struct loop *loop); +}; // class pass_ch_vect + unsigned int -pass_ch::execute (function *fun) +ch_base::copy_headers (function *fun) { struct loop *loop; basic_block header; @@ -181,13 +232,8 @@ pass_ch::execute (function *fun) unsigned bbs_size; bool changed =3D false; =20 - loop_optimizer_init (LOOPS_HAVE_PREHEADERS - | LOOPS_HAVE_SIMPLE_LATCHES); if (number_of_loops (fun) <=3D 1) - { - loop_optimizer_finalize (); return 0; - } =20 bbs =3D XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); copied_bbs =3D XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); @@ -204,7 +250,7 @@ pass_ch::execute (function *fun) written as such, or because jump threading transformed it into one), we might be in fact peeling the first iteration of the loop. This in general is not a good idea. */ - if (do_while_loop_p (loop)) + if (!process_loop_p (loop)) continue; =20 /* Iterate the header copying up to limit; this takes care of the ca= ses @@ -291,17 +337,76 @@ pass_ch::execute (function *fun) changed =3D true; } =20 - update_ssa (TODO_update_ssa); + if (changed) + update_ssa (TODO_update_ssa); free (bbs); free (copied_bbs); =20 - loop_optimizer_finalize (); return changed ? TODO_cleanup_cfg : 0; } =20 +unsigned int +pass_ch::execute (function *fun) +{ + loop_optimizer_init (LOOPS_HAVE_PREHEADERS + | LOOPS_HAVE_SIMPLE_LATCHES); + + unsigned int res =3D copy_headers (fun); + + loop_optimizer_finalize (); + return res; +} + +unsigned int +pass_ch_vect::execute (function *fun) +{ + return copy_headers (fun); +} + +bool +pass_ch::process_loop_p (struct loop *loop) +{ + return !do_while_loop_p (loop); +} + +bool +pass_ch_vect::process_loop_p (struct loop *loop) +{ + if (!flag_tree_vectorize && !loop->force_vectorize) + return false; + + if (loop->dont_vectorize) + return false; + + if (!do_while_loop_p (loop)) + return true; + + /* The vectorizer won't handle anything with multiple exits, so skip. */ + edge exit =3D single_exit (loop); + if (!exit) + return false; + + /* Apply copying if the exit block looks to have code after it. */ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, exit->src->succs) + if (!loop_exit_edge_p (loop, e) + && e->dest !=3D loop->header + && e->dest !=3D loop->latch) + return true; /* Block with exit edge has code after it. */ + + return false; +} + } // anon namespace =20 gimple_opt_pass * +make_pass_ch_vect (gcc::context *ctxt) +{ + return new pass_ch_vect (ctxt); +} + +gimple_opt_pass * make_pass_ch (gcc::context *ctxt) { return new pass_ch (ctxt); --------------060808070704080907060608--