From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 5007 invoked by alias); 5 Jan 2017 21:19:38 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 4977 invoked by uid 89); 5 Jan 2017 21:19:37 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-5.1 required=5.0 tests=BAYES_00,RP_MATCHES_RCVD,SPF_HELO_PASS autolearn=ham version=3.3.2 spammy=teaches, dts X-HELO: mx1.redhat.com Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 05 Jan 2017 21:19:36 +0000 Received: from int-mx13.intmail.prod.int.phx2.redhat.com (int-mx13.intmail.prod.int.phx2.redhat.com [10.5.11.26]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 126F767BBB; Thu, 5 Jan 2017 21:19:36 +0000 (UTC) Received: from tucnak.zalov.cz (ovpn-116-54.ams2.redhat.com [10.36.116.54]) by int-mx13.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id v05LJXoY018467 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=NO); Thu, 5 Jan 2017 16:19:35 -0500 Received: from tucnak.zalov.cz (localhost [127.0.0.1]) by tucnak.zalov.cz (8.15.2/8.15.2) with ESMTP id v05LJUi0020215; Thu, 5 Jan 2017 22:19:30 +0100 Received: (from jakub@localhost) by tucnak.zalov.cz (8.15.2/8.15.2/Submit) id v05LJSK0020214; Thu, 5 Jan 2017 22:19:28 +0100 Date: Thu, 05 Jan 2017 21:19:00 -0000 From: Jakub Jelinek To: Richard Biener Cc: gcc-patches@gcc.gnu.org Subject: [PATCH] Fix up vectorizable_condition for comparisons of 2 booleans (PR tree-optimization/78938) Message-ID: <20170105211928.GE21933@tucnak> Reply-To: Jakub Jelinek MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.7.1 (2016-10-04) X-IsSubscribed: yes X-SW-Source: 2017-01/txt/msg00342.txt.bz2 Hi! As mentioned in the PR, while vectorizable_comparison has code to deal with comparison of 2 booleans by transorming those into one or two BIT_*_EXPR operations that work equally well on normal vectors as well as the AVX512 bitset masks, vectorizable_comparison lacks that and we ICE during expansion because of that. The following patch teaches vectorizable_condition to do that too. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-01-05 Jakub Jelinek PR tree-optimization/78938 * tree-vect-stmts.c (vectorizable_condition): For non-masked COND_EXPR where comp_vectype is VECTOR_BOOLEAN_TYPE_P, use BIT_{NOT,XOR,AND,IOR}_EXPR on the comparison operands instead of {EQ,NE,GE,GT,LE,LT}_EXPR directly inside of VEC_COND_EXPR. Formatting fixes. * gcc.dg/vect/pr78938.c: New test. --- gcc/tree-vect-stmts.c.jj 2017-01-01 12:45:39.000000000 +0100 +++ gcc/tree-vect-stmts.c 2017-01-05 15:54:41.075218409 +0100 @@ -7731,7 +7731,8 @@ vectorizable_condition (gimple *stmt, gi { tree scalar_dest = NULL_TREE; tree vec_dest = NULL_TREE; - tree cond_expr, then_clause, else_clause; + tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE; + tree then_clause, else_clause; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree comp_vectype = NULL_TREE; tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; @@ -7741,7 +7742,7 @@ vectorizable_condition (gimple *stmt, gi loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); enum vect_def_type dt, dts[4]; int ncopies; - enum tree_code code; + enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; stmt_vec_info prev_stmt_info = NULL; int i, j; bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); @@ -7825,11 +7826,76 @@ vectorizable_condition (gimple *stmt, gi if (vec_cmp_type == NULL_TREE) return false; + cond_code = TREE_CODE (cond_expr); + if (!masked) + { + cond_expr0 = TREE_OPERAND (cond_expr, 0); + cond_expr1 = TREE_OPERAND (cond_expr, 1); + } + + if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) + { + /* Boolean values may have another representation in vectors + and therefore we prefer bit operations over comparison for + them (which also works for scalar masks). We store opcodes + to use in bitop1 and bitop2. Statement is vectorized as + BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) + depending on bitop1 and bitop2 arity. */ + switch (cond_code) + { + case GT_EXPR: + bitop1 = BIT_NOT_EXPR; + bitop2 = BIT_AND_EXPR; + break; + case GE_EXPR: + bitop1 = BIT_NOT_EXPR; + bitop2 = BIT_IOR_EXPR; + break; + case LT_EXPR: + bitop1 = BIT_NOT_EXPR; + bitop2 = BIT_AND_EXPR; + std::swap (cond_expr0, cond_expr1); + break; + case LE_EXPR: + bitop1 = BIT_NOT_EXPR; + bitop2 = BIT_IOR_EXPR; + std::swap (cond_expr0, cond_expr1); + break; + case NE_EXPR: + bitop1 = BIT_XOR_EXPR; + break; + case EQ_EXPR: + bitop1 = BIT_XOR_EXPR; + bitop2 = BIT_NOT_EXPR; + break; + default: + return false; + } + cond_code = SSA_NAME; + } + if (!vec_stmt) { STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; + if (bitop1 != NOP_EXPR) + { + machine_mode mode = TYPE_MODE (comp_vectype); + optab optab; + + optab = optab_for_tree_code (bitop1, comp_vectype, optab_default); + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) + return false; + + if (bitop2 != NOP_EXPR) + { + optab = optab_for_tree_code (bitop2, comp_vectype, + optab_default); + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) + return false; + } + } return expand_vec_cond_expr_p (vectype, comp_vectype, - TREE_CODE (cond_expr)); + cond_code); } /* Transform. */ @@ -7858,11 +7924,11 @@ vectorizable_condition (gimple *stmt, gi auto_vec, 4> vec_defs; if (masked) - ops.safe_push (cond_expr); + ops.safe_push (cond_expr); else { - ops.safe_push (TREE_OPERAND (cond_expr, 0)); - ops.safe_push (TREE_OPERAND (cond_expr, 1)); + ops.safe_push (cond_expr0); + ops.safe_push (cond_expr1); } ops.safe_push (then_clause); ops.safe_push (else_clause); @@ -7886,17 +7952,15 @@ vectorizable_condition (gimple *stmt, gi } else { - vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), - stmt, comp_vectype); - vect_is_simple_use (TREE_OPERAND (cond_expr, 0), - loop_vinfo, >emp, &dts[0]); - - vec_cond_rhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), - stmt, comp_vectype); - vect_is_simple_use (TREE_OPERAND (cond_expr, 1), - loop_vinfo, >emp, &dts[1]); + vec_cond_lhs + = vect_get_vec_def_for_operand (cond_expr0, + stmt, comp_vectype); + vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]); + + vec_cond_rhs + = vect_get_vec_def_for_operand (cond_expr1, + stmt, comp_vectype); + vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]); } if (reduc_index == 1) vec_then_clause = reduc_def; @@ -7953,8 +8017,37 @@ vectorizable_condition (gimple *stmt, gi else { vec_cond_rhs = vec_oprnds1[i]; - vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type, - vec_cond_lhs, vec_cond_rhs); + if (bitop1 == NOP_EXPR) + vec_compare = build2 (cond_code, vec_cmp_type, + vec_cond_lhs, vec_cond_rhs); + else + { + new_temp = make_ssa_name (vec_cmp_type); + if (bitop1 == BIT_NOT_EXPR) + new_stmt = gimple_build_assign (new_temp, bitop1, + vec_cond_rhs); + else + new_stmt + = gimple_build_assign (new_temp, bitop1, vec_cond_lhs, + vec_cond_rhs); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (bitop2 == NOP_EXPR) + vec_compare = new_temp; + else if (bitop2 == BIT_NOT_EXPR) + { + /* Instead of doing ~x ? y : z do x ? z : y. */ + vec_compare = new_temp; + std::swap (vec_then_clause, vec_else_clause); + } + else + { + vec_compare = make_ssa_name (vec_cmp_type); + new_stmt + = gimple_build_assign (vec_compare, bitop2, + vec_cond_lhs, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + } } new_temp = make_ssa_name (vec_dest); new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, --- gcc/testsuite/gcc.dg/vect/pr78938.c.jj 2017-01-05 16:06:37.854958230 +0100 +++ gcc/testsuite/gcc.dg/vect/pr78938.c 2017-01-05 16:07:04.770609976 +0100 @@ -0,0 +1,18 @@ +/* PR tree-optimization/78938 */ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx512bw" { target i?86-*-* x86_64-*-* } } */ + +short int v; + +int +foo (char x, int *b) +{ + int a = 1; + for (; x < 1; x++) + { + int c = *b != v; + int d = x != 0; + a = c == d ? 2 : 0; + } + return a; +} Jakub