diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 602dfa7..b3d2c90 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4325,6 +4325,16 @@ (set_attr "type" "imovx,mskmov,mskmov") (set_attr "mode" "SI,QI,QI")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (const_int 0))] + "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);") + ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l. (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") @@ -6453,6 +6463,31 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn_and_split "*add3_doubleword_zext" + [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + (plus: + (zero_extend: + (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")) + (match_operand: 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (UNKNOWN, mode, operands)" + "#" + "&& reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);") + ;; Like DWI, but use POImode instead of OImode. (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")]) @@ -6903,6 +6938,29 @@ } }) +(define_insn_and_split "*sub3_doubleword_zext" + [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + (minus: + (match_operand: 1 "nonimmediate_operand" "0,0") + (zero_extend: + (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (UNKNOWN, mode, operands)" + "#" + "&& reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (minus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DWIH + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);") + (define_insn "*sub_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") (minus:SWI @@ -10956,34 +11014,82 @@ ;; Split DST = (HI<<32)|LO early to minimize register usage. (define_code_iterator any_or_plus [plus ior xor]) -(define_split - [(set (match_operand:DI 0 "register_operand") - (any_or_plus:DI - (ashift:DI (match_operand:DI 1 "register_operand") - (const_int 32)) - (zero_extend:DI (match_operand:SI 2 "register_operand"))))] - "!TARGET_64BIT" - [(set (match_dup 3) (match_dup 4)) - (set (match_dup 5) (match_dup 2))] +(define_insn_and_split "*concat3_1" + [(set (match_operand: 0 "register_operand" "=r") + (any_or_plus: + (ashift: (match_operand: 1 "register_operand" "r") + (match_operand: 2 "const_int_operand")) + (zero_extend: (match_operand:DWIH 3 "register_operand" "r"))))] + "INTVAL (operands[2]) == * BITS_PER_UNIT + && REG_P (operands[0]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 5) (match_dup 6))] +{ + operands[1] = force_reg (mode, operands[1]); + operands[4] = gen_lowpart (mode, operands[0]); + operands[5] = gen_highpart (mode, operands[0]); + operands[6] = gen_lowpart (mode, operands[1]); +}) + +(define_insn_and_split "*concat3_2" + [(set (match_operand: 0 "register_operand" "=r") + (any_or_plus: + (zero_extend: (match_operand:DWIH 1 "register_operand" "r")) + (ashift: (match_operand: 2 "register_operand" "r") + (match_operand: 3 "const_int_operand"))))] + "INTVAL (operands[3]) == * BITS_PER_UNIT + && REG_P (operands[0]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 5) (match_dup 6))] +{ + operands[2] = force_reg (mode, operands[2]); + operands[4] = gen_lowpart (mode, operands[0]); + operands[5] = gen_highpart (mode, operands[0]); + operands[6] = gen_lowpart (mode, operands[2]); +}) + +(define_insn_and_split "*concat3_3" + [(set (match_operand: 0 "register_operand" "=r") + (any_or_plus: + (ashift: + (zero_extend: (match_operand:DWIH 1 "register_operand" "r")) + (match_operand: 2 "const_int_operand")) + (zero_extend: (match_operand:DWIH 3 "register_operand" "r"))))] + "INTVAL (operands[2]) == * BITS_PER_UNIT + && REG_P (operands[0]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 5) (match_dup 1))] { - operands[3] = gen_highpart (SImode, operands[0]); - operands[4] = gen_lowpart (SImode, operands[1]); - operands[5] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (mode, operands[0]); + operands[5] = gen_highpart (mode, operands[0]); }) -(define_split - [(set (match_operand:DI 0 "register_operand") - (any_or_plus:DI - (zero_extend:DI (match_operand:SI 1 "register_operand")) - (ashift:DI (match_operand:DI 2 "register_operand") - (const_int 32))))] - "!TARGET_64BIT" - [(set (match_dup 3) (match_dup 4)) - (set (match_dup 5) (match_dup 1))] +(define_insn_and_split "*concat3_4" + [(set (match_operand: 0 "register_operand" "=r") + (any_or_plus: + (zero_extend: (match_operand:DWIH 1 "register_operand" "r")) + (ashift: + (zero_extend: (match_operand:DWIH 2 "register_operand" "r")) + (match_operand: 3 "const_int_operand"))))] + "INTVAL (operands[3]) == * BITS_PER_UNIT + && REG_P (operands[0]) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 4) (match_dup 1)) + (set (match_dup 5) (match_dup 2))] { - operands[3] = gen_highpart (SImode, operands[0]); - operands[4] = gen_lowpart (SImode, operands[2]); - operands[5] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_lowpart (mode, operands[0]); + operands[5] = gen_highpart (mode, operands[0]); }) ;; Negation instructions diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b2602b..0198156 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2070,7 +2070,8 @@ (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "k")) (const_int 8)) - (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))] + (zero_extend:HI (match_operand:QI 2 "register_operand" "k")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)] "TARGET_AVX512F" "kunpckbw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "HI") @@ -2083,7 +2084,8 @@ (ashift:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "k")) (const_int 16)) - (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))] + (zero_extend:SI (match_operand:HI 2 "register_operand" "k")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)] "TARGET_AVX512BW" "kunpckwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "SI")]) @@ -2094,7 +2096,8 @@ (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "k")) (const_int 32)) - (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))] + (zero_extend:DI (match_operand:SI 2 "register_operand" "k")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)] "TARGET_AVX512BW" "kunpckdq\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mode" "DI")]) @@ -17398,21 +17401,26 @@ }) (define_expand "vec_pack_trunc_qi" - [(set (match_operand:HI 0 "register_operand") - (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand")) - (const_int 8)) - (zero_extend:HI (match_operand:QI 1 "register_operand"))))] + [(parallel + [(set (match_operand:HI 0 "register_operand") + (ior:HI + (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand")) + (const_int 8)) + (zero_extend:HI (match_operand:QI 1 "register_operand")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])] "TARGET_AVX512F") (define_expand "vec_pack_trunc_" - [(set (match_operand: 0 "register_operand") - (ior: - (ashift: + [(parallel + [(set (match_operand: 0 "register_operand") + (ior: + (ashift: + (zero_extend: + (match_operand:SWI24 2 "register_operand")) + (match_dup 3)) (zero_extend: - (match_operand:SWI24 2 "register_operand")) - (match_dup 3)) - (zero_extend: - (match_operand:SWI24 1 "register_operand"))))] + (match_operand:SWI24 1 "register_operand")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])] "TARGET_AVX512BW" { operands[3] = GEN_INT (GET_MODE_BITSIZE (mode)); diff --git a/gcc/testsuite/g++.target/i386/pr91681.C b/gcc/testsuite/g++.target/i386/pr91681.C new file mode 100644 index 0000000..0271e43 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr91681.C @@ -0,0 +1,20 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +void multiply128x64x2_3 ( + const unsigned long a, + const unsigned long b, + const unsigned long c, + const unsigned long d, + __uint128_t o[2]) +{ + __uint128_t B0 = (__uint128_t) b * c; + __uint128_t B2 = (__uint128_t) a * c; + __uint128_t B1 = (__uint128_t) b * d; + __uint128_t B3 = (__uint128_t) a * d; + + o[0] = B2 + (B0 >> 64); + o[1] = B3 + (B1 >> 64); +} + +/* { dg-final { scan-assembler-not "xor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr91681-1.c b/gcc/testsuite/gcc.target/i386/pr91681-1.c new file mode 100644 index 0000000..ab83cc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91681-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ +unsigned __int128 m; + +unsigned __int128 foo(unsigned __int128 x, unsigned long long y) +{ + return x + y; +} + +void bar(unsigned __int128 x, unsigned long long y) +{ + m = x + y; +} + +void baz(unsigned long long y) +{ + m += y; +} + +/* { dg-final { scan-assembler-not "xor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr91681-2.c b/gcc/testsuite/gcc.target/i386/pr91681-2.c new file mode 100644 index 0000000..ea52c72 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91681-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ +unsigned __int128 m; + +unsigned __int128 foo(unsigned __int128 x, unsigned long long y) +{ + return x - y; +} + +void bar(unsigned __int128 x, unsigned long long y) +{ + m = x - y; +} + +void baz(unsigned long long y) +{ + m -= y; +} + +/* { dg-final { scan-assembler-not "xor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr91681-3.c b/gcc/testsuite/gcc.target/i386/pr91681-3.c new file mode 100644 index 0000000..22a03c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91681-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2" } */ + +unsigned long long m; + +unsigned long long foo(unsigned long long x, unsigned int y) +{ + return x - y; +} + +void bar(unsigned long long x, unsigned int y) +{ + m = x - y; +} + +/* { dg-final { scan-assembler-not "xor" } } */