public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Hongtao Liu <crazylht@gmail.com>
To: Jan Beulich <jbeulich@suse.com>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	Hongtao Liu <hongtao.liu@intel.com>,
	 Kirill Yukhin <kirill.yukhin@gmail.com>
Subject: Re: [PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations
Date: Sun, 25 Jun 2023 12:42:43 +0800	[thread overview]
Message-ID: <CAMZc-bz57MPBo9FRQf13i5O2DKFHJcK00ffLx0zA-jEuRzhnag@mail.gmail.com> (raw)
In-Reply-To: <457ffad0-9ecd-3e19-f5ab-6153ce4b8bad@suse.com>

On Wed, Jun 21, 2023 at 2:26 PM Jan Beulich via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> All combinations of and, ior, xor, and not involving two operands can be
> expressed that way in a single insn.
>
> gcc/
>
>         PR target/93768
>         * config/i386/i386.cc (ix86_rtx_costs): Further special-case
>         bitwise vector operations.
>         * config/i386/sse.md (*iornot<mode>3): New insn.
>         (*xnor<mode>3): Likewise.
>         (*<nlogic><mode>3): Likewise.
>         (andor): New code iterator.
>         (nlogic): New code attribute.
>         (ternlog_nlogic): Likewise.
>
> gcc/testsuite/
>
>         PR target/93768
>         gcc.target/i386/avx512-binop-not-1.h: New.
>         gcc.target/i386/avx512-binop-not-2.h: New.
>         gcc.target/i386/avx512f-orn-si-zmm-1.c: New test.
>         gcc.target/i386/avx512f-orn-si-zmm-2.c: New test.
> ---
> The use of VI matches that in e.g. one_cmpl<mode>2 /
> <mask_codefor>one_cmpl<mode>2<mask_name> and *andnot<mode>3, despite
> (here and there)
> - V64QI and V32HI being needlessly excluded when AVX512BW isn't enabled,
> - V<n>TI not being covered,
> - vector modes more narrow than 16 bytes not being covered.
>
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -21178,6 +21178,32 @@ ix86_rtx_costs (rtx x, machine_mode mode
>        return false;
>
>      case IOR:
> +      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
> +       {
> +         /* (ior (not ...) ...) can be a single insn in AVX512.  */
> +         if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
> +             && (GET_MODE_SIZE (mode) == 64
> +                 || (TARGET_AVX512VL
> +                     && (GET_MODE_SIZE (mode) == 32
> +                         || GET_MODE_SIZE (mode) == 16))))
> +           {
> +             rtx right = GET_CODE (XEXP (x, 1)) != NOT
> +                         ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
> +
> +             *total = ix86_vec_cost (mode, cost->sse_op)
> +                      + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
> +                                  outer_code, opno, speed)
> +                      + rtx_cost (right, mode, outer_code, opno, speed);
> +             return true;
> +           }
> +         *total = ix86_vec_cost (mode, cost->sse_op);
> +       }
> +      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> +       *total = cost->add * 2;
> +      else
> +       *total = cost->add;
> +      return false;
> +
>      case XOR:
>        if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
>         *total = ix86_vec_cost (mode, cost->sse_op);
> @@ -21198,11 +21224,20 @@ ix86_rtx_costs (rtx x, machine_mode mode
>           /* pandn is a single instruction.  */
>           if (GET_CODE (XEXP (x, 0)) == NOT)
>             {
> +             rtx right = XEXP (x, 1);
> +
> +             /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
> +             if (GET_CODE (right) == NOT && TARGET_AVX512F
> +                 && (GET_MODE_SIZE (mode) == 64
> +                     || (TARGET_AVX512VL
> +                         && (GET_MODE_SIZE (mode) == 32
> +                             || GET_MODE_SIZE (mode) == 16))))
> +               right = XEXP (right, 0);
> +
>               *total = ix86_vec_cost (mode, cost->sse_op)
>                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
>                                    outer_code, opno, speed)
> -                      + rtx_cost (XEXP (x, 1), mode,
> -                                  outer_code, opno, speed);
> +                      + rtx_cost (right, mode, outer_code, opno, speed);
>               return true;
>             }
>           else if (GET_CODE (XEXP (x, 1)) == NOT)
> @@ -21260,8 +21295,25 @@ ix86_rtx_costs (rtx x, machine_mode mode
>
>      case NOT:
>        if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
> -       // vnot is pxor -1.
> -       *total = ix86_vec_cost (mode, cost->sse_op) + 1;
> +       {
> +         /* (not (xor ...)) can be a single insn in AVX512.  */
> +         if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
> +             && (GET_MODE_SIZE (mode) == 64
> +                 || (TARGET_AVX512VL
> +                     && (GET_MODE_SIZE (mode) == 32
> +                         || GET_MODE_SIZE (mode) == 16))))
> +           {
> +             *total = ix86_vec_cost (mode, cost->sse_op)
> +                      + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
> +                                  outer_code, opno, speed)
> +                      + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
> +                                  outer_code, opno, speed);
> +             return true;
> +           }
> +
> +         // vnot is pxor -1.
> +         *total = ix86_vec_cost (mode, cost->sse_op) + 1;
> +       }
>        else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
>         *total = cost->add * 2;
>        else
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17616,6 +17616,98 @@
>    operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
>  })
>
> +(define_insn "*iornot<mode>3"
> +  [(set (match_operand:VI 0 "register_operand" "=v,v,v,v")
> +       (ior:VI
> +         (not:VI
> +           (match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m"))
> +         (match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))]
> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
> +    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
> +   && (register_operand (operands[1], <MODE>mode)
> +       || register_operand (operands[2], <MODE>mode))"
> +{
> +  if (!register_operand (operands[1], <MODE>mode))
> +    {
> +      if (TARGET_AVX512VL)
> +       return "vpternlog<ternlogsuffix>\t{$0xdd, %1, %2, %0|%0, %2, %1, 0xdd}";
> +      return "vpternlog<ternlogsuffix>\t{$0xdd, %g1, %g2, %g0|%g0, %g2, %g1, 0xdd}";
> +    }
> +  if (TARGET_AVX512VL)
> +    return "vpternlog<ternlogsuffix>\t{$0xbb, %2, %1, %0|%0, %1, %2, 0xbb}";
> +  return "vpternlog<ternlogsuffix>\t{$0xbb, %g2, %g1, %g0|%g0, %g1, %g2, 0xbb}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set (attr "mode")
> +        (if_then_else (match_test "TARGET_AVX512VL")
> +                     (const_string "<sseinsnmode>")
> +                     (const_string "XI")))
> +   (set (attr "enabled")
> +       (if_then_else (eq_attr "alternative" "2,3")
> +                     (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
> +                     (const_string "*")))])
> +
> +(define_insn "*xnor<mode>3"
> +  [(set (match_operand:VI 0 "register_operand" "=v,v")
> +       (not:VI
> +         (xor:VI
> +           (match_operand:VI 1 "bcst_vector_operand" "%v,v")
> +           (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
> +    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
> +   && (register_operand (operands[1], <MODE>mode)
> +       || register_operand (operands[2], <MODE>mode))"
> +{
> +  if (TARGET_AVX512VL)
> +    return "vpternlog<ternlogsuffix>\t{$0x99, %2, %1, %0|%0, %1, %2, 0x99}";
> +  else
> +    return "vpternlog<ternlogsuffix>\t{$0x99, %g2, %g1, %g0|%g0, %g1, %g2, 0x99}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set (attr "mode")
> +        (if_then_else (match_test "TARGET_AVX512VL")
> +                     (const_string "<sseinsnmode>")
> +                     (const_string "XI")))
> +   (set (attr "enabled")
> +       (if_then_else (eq_attr "alternative" "1")
> +                     (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
> +                     (const_string "*")))])
> +
> +(define_code_iterator andor [and ior])
> +(define_code_attr nlogic [(and "nor") (ior "nand")])
> +(define_code_attr ternlog_nlogic [(and "0x11") (ior "0x77")])
> +
> +(define_insn "*<nlogic><mode>3"
> +  [(set (match_operand:VI 0 "register_operand" "=v,v")
> +       (andor:VI
> +         (not:VI (match_operand:VI 1 "bcst_vector_operand" "%v,v"))
> +         (not:VI (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
I'm thinking of doing it in simplify_rtx or gimple match.pd to transform
(and (not op1))  (not op2)) -> (not: (ior: op1 op2))
(ior (not op1) (not op2)) -> (not : (and op1 op2))

Even w/o avx512f, the transformation should also benefit since it
takes less logic operations 3 -> 2.(or 2 -> 2 for pandn).

The other 2 patterns: *xnor<mode>3 and iornot<mode>3  LGTM.

> +  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
> +    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
> +   && (register_operand (operands[1], <MODE>mode)
> +       || register_operand (operands[2], <MODE>mode))"
> +{
> +  if (TARGET_AVX512VL)
> +    return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %2, %1, %0|%0, %1, %2, <ternlog_nlogic>}";
> +  else
> +    return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %g2, %g1, %g0|%g0, %g1, %g2, <ternlog_nlogic>}";
> +}
> +  [(set_attr "type" "sselog")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set (attr "mode")
> +        (if_then_else (match_test "TARGET_AVX512VL")
> +                     (const_string "<sseinsnmode>")
> +                     (const_string "XI")))
> +   (set (attr "enabled")
> +       (if_then_else (eq_attr "alternative" "1")
> +                     (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
> +                     (const_string "*")))])
> +
>  (define_mode_iterator AVX512ZEXTMASK
>    [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
>
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h
> @@ -0,0 +1,13 @@
> +#include <immintrin.h>
> +
> +#define PASTER2(x,y)           x##y
> +#define PASTER3(x,y,z)         _mm##x##_##y##_##z
> +#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
> +#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
> +
> +type
> +foo (type x, SCALAR *f)
> +{
> +  return OP (vec, op, suffix) (x, OP (vec, xor, suffix) (DUP (vec, suffix, *f),
> +                                                        DUP (vec, suffix, ~0)));
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h
> @@ -0,0 +1,13 @@
> +#include <immintrin.h>
> +
> +#define PASTER2(x,y)           x##y
> +#define PASTER3(x,y,z)         _mm##x##_##y##_##z
> +#define OP(vec, op, suffix)    PASTER3 (vec, op, suffix)
> +#define DUP(vec, suffix, val)  PASTER3 (vec, set1, suffix) (val)
> +
> +type
> +foo (type x, SCALAR *f)
> +{
> +  return OP (vec, op, suffix) (OP (vec, xor, suffix) (x, DUP (vec, suffix, ~0)),
> +                              DUP (vec, suffix, *f));
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
> +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcast" } } */
> +
> +#define type __m512i
> +#define vec 512
> +#define op or
> +#define suffix epi32
> +#define SCALAR int
> +
> +#include "avx512-binop-not-1.h"
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
> +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcast" } } */
> +
> +#define type __m512i
> +#define vec 512
> +#define op or
> +#define suffix epi32
> +#define SCALAR int
> +
> +#include "avx512-binop-not-2.h"
>


-- 
BR,
Hongtao

  reply	other threads:[~2023-06-25  4:42 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-21  6:24 [PATCH 0/5] x86: make better use of VPTERNLOG{D,Q} Jan Beulich
2023-06-21  6:25 ` [PATCH 1/5] x86: use VPTERNLOG for further bitwise two-vector operations Jan Beulich
2023-06-25  4:42   ` Hongtao Liu [this message]
2023-06-25  5:52     ` Jan Beulich
2023-06-25  7:13       ` Hongtao Liu
2023-06-25  7:23         ` Hongtao Liu
2023-06-25  7:30           ` Hongtao Liu
2023-06-25 13:35             ` Jan Beulich
2023-06-26  0:42               ` Hongtao Liu
2023-06-21  6:27 ` [PATCH 2/5] x86: use VPTERNLOG also for certain andnot forms Jan Beulich
2023-06-25  4:58   ` Hongtao Liu
2023-06-21  6:27 ` [PATCH 3/5] x86: allow memory operand for AVX2 splitter for PR target/100711 Jan Beulich
2023-06-25  4:58   ` Hongtao Liu
2023-06-21  6:27 ` [PATCH 4/5] x86: further PR target/100711-like splitting Jan Beulich
2023-06-25  5:06   ` Hongtao Liu
2023-06-25  6:16     ` Jan Beulich
2023-06-25  6:27       ` Hongtao Liu
2023-06-21  6:28 ` [PATCH 5/5] x86: yet more " Jan Beulich
2023-06-25  5:12   ` Hongtao Liu
2023-06-25  6:25     ` Jan Beulich
2023-06-25  6:35       ` Hongtao Liu
2023-06-25  6:41         ` Hongtao Liu
2023-11-06 11:10           ` Jan Beulich
2023-11-06 13:48             ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMZc-bz57MPBo9FRQf13i5O2DKFHJcK00ffLx0zA-jEuRzhnag@mail.gmail.com \
    --to=crazylht@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    --cc=jbeulich@suse.com \
    --cc=kirill.yukhin@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).