public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Uros Bizjak <ubizjak@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] x86: Use crc32 target option for CRC32 intrinsics
Date: Thu, 15 Apr 2021 18:33:52 +0200	[thread overview]
Message-ID: <CAFULd4bpyQmdY5jzJz3B1qhGegwqRxu_FzAK15YX5ZM8eJ+ueA@mail.gmail.com> (raw)
In-Reply-To: <CAMe9rOqP-e66XytNGVGcGZ51km3_HnMw_V_nyi6hDhe9JAb3eA@mail.gmail.com>

On Thu, Apr 15, 2021 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 15, 2021 at 9:14 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Apr 15, 2021 at 5:11 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Use crc32 target option for CRC32 intrinsics to support CRC32 intrinsics
> > > without enabling SSE vector instructions.
> >
> > There is no CRC32 ISA. crc32 is part of SSE4.2 [1] and current
> > situation reflects that correctly.
>
> CRC32 is similar to POPCNT which was originally in SSE4.2.   Now POPCNT

It is not similar, POPCNT has its own CPUID flag and can be enabled
independently of SSE4.2.

> is a separate feature which is also enabled by SSE4.2.   Enable CRC32 only
> with SSE4.2 makes it impossible to use CRC32 with -mgeneral-regs-only.   This
> patch addresses this issue the same way as POPCNT.

CRC32 doesn't have its own CPUID flag, so PTA_CRC32 is pointless.

OTOH, the situation is similar with MONITOR and MWAIT. These are
enabled with SSE3 and don't use XMM registers. Also somewhat similar
is FISTTP, but there is no intrinsic for this insn.

Uros.

>
> > [1] https://en.wikipedia.org/wiki/SSE4
> >
> > Uros.
> >
> > >         * config/i386/gnu-property.c
> > >         (file_end_indicate_exec_stack_and_gnu_property): Also check
> > >         TARGET_CRC32 for GNU_PROPERTY_X86_ISA_1_V2.
> > >         * config/i386/i386-c.c (ix86_target_macros_internal): Define
> > >         __CRC32__ for -mcrc32.
> > >         * config/i386/i386-options.c (ix86_option_override_internal):
> > >         Handle PTA_CRC32.  Enable crc32 instruction for -msse4.2.
> > >         * config/i386/i386.h (PTA_CRC32): New.
> > >         (PTA_X86_64_V2): Add PTA_CRC32.
> > >         (PTA_NEHALEM): Likewise.
> > >         * config/i386/i386.md (sse4_2_crc32<mode>): Remove TARGET_SSE4_2
> > >         check.
> > >         (sse4_2_crc32di): Likewise.
> > >         * config/i386/ia32intrin.h: Use crc32 target option for CRC32
> > >         intrinsics.
> > > ---
> > >  gcc/config/i386/gnu-property.c |  1 +
> > >  gcc/config/i386/i386-c.c       |  2 ++
> > >  gcc/config/i386/i386-options.c |  8 ++++++++
> > >  gcc/config/i386/i386.h         |  6 ++++--
> > >  gcc/config/i386/i386.md        |  4 ++--
> > >  gcc/config/i386/ia32intrin.h   | 28 ++++++++++++++--------------
> > >  6 files changed, 31 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/gnu-property.c b/gcc/config/i386/gnu-property.c
> > > index 4ba04403002..b6a3bdf62ce 100644
> > > --- a/gcc/config/i386/gnu-property.c
> > > +++ b/gcc/config/i386/gnu-property.c
> > > @@ -92,6 +92,7 @@ file_end_indicate_exec_stack_and_gnu_property (void)
> > >        /* GNU_PROPERTY_X86_ISA_1_V2.  */
> > >        if (TARGET_CMPXCHG16B
> > >           || (TARGET_64BIT && TARGET_SAHF)
> > > +         || TARGET_CRC32
> > >           || TARGET_POPCNT
> > >           || TARGET_SSE3
> > >           || TARGET_SSSE3
> > > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> > > index be46d0506ad..5ed0de006fb 100644
> > > --- a/gcc/config/i386/i386-c.c
> > > +++ b/gcc/config/i386/i386-c.c
> > > @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> > >      def_or_undef (parse_in, "__LZCNT__");
> > >    if (isa_flag & OPTION_MASK_ISA_TBM)
> > >      def_or_undef (parse_in, "__TBM__");
> > > +  if (isa_flag & OPTION_MASK_ISA_CRC32)
> > > +    def_or_undef (parse_in, "__CRC32__");
> > >    if (isa_flag & OPTION_MASK_ISA_POPCNT)
> > >      def_or_undef (parse_in, "__POPCNT__");
> > >    if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
> > > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
> > > index 91da2849c49..959ee163d2f 100644
> > > --- a/gcc/config/i386/i386-options.c
> > > +++ b/gcc/config/i386/i386-options.c
> > > @@ -2162,6 +2162,9 @@ ix86_option_override_internal (bool main_args_p,
> > >         if (((processor_alias_table[i].flags & PTA_CX16) != 0)
> > >             && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_CX16))
> > >           opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CX16;
> > > +       if (((processor_alias_table[i].flags & PTA_CRC32) != 0)
> > > +           && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CRC32))
> > > +         opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32;
> > >         if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0)
> > >             && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
> > >           opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
> > > @@ -2617,6 +2620,11 @@ ix86_option_override_internal (bool main_args_p,
> > >      opts->x_ix86_isa_flags
> > >        |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
> > >
> > > +  /* Enable crc32 instruction for -msse4.2.  */
> > > +  if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags))
> > > +    opts->x_ix86_isa_flags
> > > +      |= OPTION_MASK_ISA_CRC32 & ~opts->x_ix86_isa_flags_explicit;
> > > +
> > >    /* Enable lzcnt instruction for -mabm.  */
> > >    if (TARGET_ABM_P(opts->x_ix86_isa_flags))
> > >      opts->x_ix86_isa_flags
> > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > > index 97700d797a7..c50f9ab24fa 100644
> > > --- a/gcc/config/i386/i386.h
> > > +++ b/gcc/config/i386/i386.h
> > > @@ -2504,12 +2504,14 @@ constexpr wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23);
> > >  constexpr wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
> > >  constexpr wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
> > >  constexpr wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26);
> > > +constexpr wide_int_bitmask PTA_CRC32 (0, HOST_WIDE_INT_1U << 27);
> > >
> > >  constexpr wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
> > >    | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
> > >  constexpr wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE
> > >                                             & (~PTA_NO_SAHF))
> > > -  | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3;
> > > +  | PTA_CRC32 | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2
> > > +  | PTA_SSSE3;
> > >  constexpr wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
> > >    | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
> > >    | PTA_MOVBE | PTA_XSAVE;
> > > @@ -2519,7 +2521,7 @@ constexpr wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3
> > >  constexpr wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
> > >    | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
> > >  constexpr wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
> > > -  | PTA_POPCNT;
> > > +  | PTA_CRC32 | PTA_POPCNT;
> > >  constexpr wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_PCLMUL;
> > >  constexpr wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
> > >    | PTA_XSAVEOPT;
> > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > > index 9ff35d9a607..1f1d74e6275 100644
> > > --- a/gcc/config/i386/i386.md
> > > +++ b/gcc/config/i386/i386.md
> > > @@ -20998,7 +20998,7 @@ (define_insn "sse4_2_crc32<mode>"
> > >           [(match_operand:SI 1 "register_operand" "0")
> > >            (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
> > >           UNSPEC_CRC32))]
> > > -  "TARGET_SSE4_2 || TARGET_CRC32"
> > > +  "TARGET_CRC32"
> > >    "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
> > >    [(set_attr "type" "sselog1")
> > >     (set_attr "prefix_rep" "1")
> > > @@ -21019,7 +21019,7 @@ (define_insn "sse4_2_crc32di"
> > >           [(match_operand:DI 1 "register_operand" "0")
> > >            (match_operand:DI 2 "nonimmediate_operand" "rm")]
> > >           UNSPEC_CRC32))]
> > > -  "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)"
> > > +  "TARGET_64BIT && TARGET_CRC32"
> > >    "crc32{q}\t{%2, %0|%0, %2}"
> > >    [(set_attr "type" "sselog1")
> > >     (set_attr "prefix_rep" "1")
> > > diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
> > > index 591394076cc..5422b0fc9e0 100644
> > > --- a/gcc/config/i386/ia32intrin.h
> > > +++ b/gcc/config/i386/ia32intrin.h
> > > @@ -51,11 +51,11 @@ __bswapd (int __X)
> > >
> > >  #ifndef __iamcu__
> > >
> > > -#ifndef __SSE4_2__
> > > +#ifndef __CRC32__
> > >  #pragma GCC push_options
> > > -#pragma GCC target("sse4.2")
> > > -#define __DISABLE_SSE4_2__
> > > -#endif /* __SSE4_2__ */
> > > +#pragma GCC target("crc32")
> > > +#define __DISABLE_CRC32__
> > > +#endif /* __CRC32__ */
> > >
> > >  /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
> > >  extern __inline unsigned int
> > > @@ -79,10 +79,10 @@ __crc32d (unsigned int __C, unsigned int __V)
> > >    return __builtin_ia32_crc32si (__C, __V);
> > >  }
> > >
> > > -#ifdef __DISABLE_SSE4_2__
> > > -#undef __DISABLE_SSE4_2__
> > > +#ifdef __DISABLE_CRC32__
> > > +#undef __DISABLE_CRC32__
> > >  #pragma GCC pop_options
> > > -#endif /* __DISABLE_SSE4_2__ */
> > > +#endif /* __DISABLE_CRC32__ */
> > >
> > >  #endif /* __iamcu__ */
> > >
> > > @@ -199,11 +199,11 @@ __bswapq (long long __X)
> > >    return __builtin_bswap64 (__X);
> > >  }
> > >
> > > -#ifndef __SSE4_2__
> > > +#ifndef __CRC32__
> > >  #pragma GCC push_options
> > > -#pragma GCC target("sse4.2")
> > > -#define __DISABLE_SSE4_2__
> > > -#endif /* __SSE4_2__ */
> > > +#pragma GCC target("crc32")
> > > +#define __DISABLE_CRC32__
> > > +#endif /* __CRC32__ */
> > >
> > >  /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
> > >  extern __inline unsigned long long
> > > @@ -213,10 +213,10 @@ __crc32q (unsigned long long __C, unsigned long long __V)
> > >    return __builtin_ia32_crc32di (__C, __V);
> > >  }
> > >
> > > -#ifdef __DISABLE_SSE4_2__
> > > -#undef __DISABLE_SSE4_2__
> > > +#ifdef __DISABLE_CRC32__
> > > +#undef __DISABLE_CRC32__
> > >  #pragma GCC pop_options
> > > -#endif /* __DISABLE_SSE4_2__ */
> > > +#endif /* __DISABLE_CRC32__ */
> > >
> > >  /* 64bit popcnt */
> > >  extern __inline long long
> > > --
> > > 2.30.2
> > >
>
>
>
> --
> H.J.

  reply	other threads:[~2021-04-15 16:34 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-15 15:11 H.J. Lu
2021-04-15 16:14 ` Uros Bizjak
2021-04-15 16:25   ` H.J. Lu
2021-04-15 16:33     ` Uros Bizjak [this message]
2021-04-15 16:50       ` [PATCH v2] " H.J. Lu
2021-04-15 16:53         ` Uros Bizjak
2021-04-15 17:35           ` H.J. Lu
2021-04-15 17:58         ` Uros Bizjak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFULd4bpyQmdY5jzJz3B1qhGegwqRxu_FzAK15YX5ZM8eJ+ueA@mail.gmail.com \
    --to=ubizjak@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hjl.tools@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).