diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 67639bc..a72c9e1 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -6108,19 +6108,60 @@ s390_expand_vcond (rtx target, rtx then, rtx els, machine_mode result_mode; rtx result_target; + machine_mode target_mode = GET_MODE (target); + machine_mode cmp_mode = GET_MODE (cmp_op1); + rtx op = (cond == LT) ? els : then; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise + for short and byte (x >> 15 and x >> 7 respectively). */ + if ((cond == LT || cond == GE) + && target_mode == cmp_mode + && cmp_op2 == CONST0_RTX (cmp_mode) + && op == CONST0_RTX (target_mode) + && s390_vector_mode_supported_p (target_mode) + && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT) + { + rtx negop = (cond == LT) ? then : els; + + int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1; + + /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */ + if (negop == CONST1_RTX (target_mode)) + { + rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1, + GEN_INT (shift), target, + 1, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + + /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */ + else if (constm1_operand (negop, target_mode)) + { + rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1, + GEN_INT (shift), target, + 0, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + } + /* We always use an integral type vector to hold the comparison result. */ - result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); + result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode; result_target = gen_reg_rtx (result_mode); - /* Alternatively this could be done by reload by lowering the cmp* - predicates. But it appears to be better for scheduling etc. to - have that in early. */ + /* We allow vector immediates as comparison operands that + can be handled by the optimization above but not by the + following code. Hence, force them into registers here. */ if (!REG_P (cmp_op1)) - cmp_op1 = force_reg (GET_MODE (target), cmp_op1); + cmp_op1 = force_reg (target_mode, cmp_op1); if (!REG_P (cmp_op2)) - cmp_op2 = force_reg (GET_MODE (target), cmp_op2); + cmp_op2 = force_reg (target_mode, cmp_op2); s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2); @@ -6130,7 +6171,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els, if (constm1_operand (then, GET_MODE (then)) && const0_operand (els, GET_MODE (els))) { - emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), + emit_move_insn (target, gen_rtx_SUBREG (target_mode, result_target, 0)); return; } @@ -6139,10 +6180,10 @@ s390_expand_vcond (rtx target, rtx then, rtx els, /* This gets triggered e.g. with gcc.c-torture/compile/pr53410-1.c */ if (!REG_P (then)) - then = force_reg (GET_MODE (target), then); + then = force_reg (target_mode, then); if (!REG_P (els)) - els = force_reg (GET_MODE (target), els); + els = force_reg (target_mode, els); tmp = gen_rtx_fmt_ee (EQ, VOIDmode, result_target, @@ -6150,9 +6191,9 @@ s390_expand_vcond (rtx target, rtx then, rtx els, /* We compared the result against zero above so we have to swap then and els here. */ - tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); + tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then); - gcc_assert (GET_MODE (target) == GET_MODE (then)); + gcc_assert (target_mode == GET_MODE (then)); emit_insn (gen_rtx_SET (target, tmp)); } diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index c9f5890..f6a85c8 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -403,7 +403,7 @@ (if_then_else:V_HW (match_operator 3 "comparison_operator" [(match_operand:V_HW2 4 "register_operand" "") - (match_operand:V_HW2 5 "register_operand" "")]) + (match_operand:V_HW2 5 "nonmemory_operand" "")]) (match_operand:V_HW 1 "nonmemory_operand" "") (match_operand:V_HW 2 "nonmemory_operand" "")))] "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" @@ -418,7 +418,7 @@ (if_then_else:V_HW (match_operator 3 "comparison_operator" [(match_operand:V_HW2 4 "register_operand" "") - (match_operand:V_HW2 5 "register_operand" "")]) + (match_operand:V_HW2 5 "nonmemory_operand" "")]) (match_operand:V_HW 1 "nonmemory_operand" "") (match_operand:V_HW 2 "nonmemory_operand" "")))] "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" diff --git a/gcc/testsuite/gcc.target/s390/vcond-shift.c b/gcc/testsuite/gcc.target/s390/vcond-shift.c new file mode 100644 index 0000000..f58bd1f --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vcond-shift.c @@ -0,0 +1,61 @@ +/* Check if conditional vector instructions are simplified + into shift operations. */ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O3 -march=z13 -mzarch" } */ + +/* { dg-final { scan-assembler "vesraf\t%v.?,%v.?,31" } } */ +/* { dg-final { scan-assembler "vesrah\t%v.?,%v.?,15" } } */ +/* { dg-final { scan-assembler "vesrab\t%v.?,%v.?,7" } } */ +/* { dg-final { scan-assembler-not "vzero\t*" } } */ +/* { dg-final { scan-assembler "vesrlf\t%v.?,%v.?,31" } } */ +/* { dg-final { scan-assembler "vesrlh\t%v.?,%v.?,15" } } */ +/* { dg-final { scan-assembler "vesrlb\t%v.?,%v.?,7" } } */ + +#define SZ 4 +#define SZ2 8 +#define SZ3 16 + +void foo(int *w) +{ + int i; + /* Should expand to (w + (w < 0 ? 1 : 0)) >> 1 + which in turn should get simplified to (w + (w >> 31)) >> 1. */ + for (i = 0; i < SZ; i++) + w[i] = w[i] / 2; +} + +void foo2(short *w) +{ + int i; + for (i = 0; i < SZ2; i++) + w[i] = w[i] / 2; +} + + +void foo3(signed char *w) +{ + int i; + for (i = 0; i < SZ3; i++) + w[i] = w[i] / 2; +} + +int baz(int *x) +{ + int i; + for (i = 0; i < SZ; i++) + x[i] = x[i] < 0 ? -1 : 0; +} + +int baf(short *x) +{ + int i; + for (i = 0; i < SZ2; i++) + x[i] = x[i] >= 0 ? 0 : 1; +} + +int bal(signed char *x) +{ + int i; + for (i = 0; i < SZ3; i++) + x[i] = x[i] >= 0 ? 0 : -1; +}