public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "Liu, Hongtao" <hongtao.liu@intel.com>
To: Yan Simonaytes <simonaytes.yan@ispras.ru>,
	"gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Subject: RE: [PATCH] Replace invariant ternlog operands
Date: Thu, 27 Jul 2023 03:00:25 +0000	[thread overview]
Message-ID: <SA1PR11MB6757209037AC39C10C688E42E501A@SA1PR11MB6757.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20230725181118.27484-1-simonaytes.yan@ispras.ru>



> -----Original Message-----
> From: Yan Simonaytes <simonaytes.yan@ispras.ru>
> Sent: Wednesday, July 26, 2023 2:11 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao <hongtao.liu@intel.com>; Uros Bizjak <ubizjak@gmail.com>;
> Yan Simonaytes <simonaytes.yan@ispras.ru>
> Subject: [PATCH] Replace invariant ternlog operands
> 
> Sometimes GCC generates ternlog with three operands, but some of them are
> invariant.
> For example:
> 
> vpternlogq	$252, %zmm2, %zmm1, %zmm0
> 
> In this case zmm1 register isnt used by ternlog.
> So should replace zmm1 with zmm0 or zmm2:
> 
> vpternlogq	$252, %zmm0, %zmm1, %zmm0
> 
> When the third operand of ternlog is memory and both others are invariant
> should add load instruction from this memory to register and replace the first
> and the second operands to this register.
> So insted of
> 
> vpternlogq	$85, (%rdi), %zmm1, %zmm0
> 
> Should emit
> 
> vmovdqa64	(%rdi), %zmm0
> vpternlogq	$85, %zmm0, %zmm0, %zmm0
> 
> gcc/ChangeLog:
> 
>         * config/i386/i386.cc (ternlog_invariant_operand_mask): New helper
> 	function for replacing invariant operands.
>         (reduce_ternlog_operands): Likewise.
>         * config/i386/i386-protos.h (ternlog_invariant_operand_mask):
> Prototype here.
>         (reduce_ternlog_operands): Likewise.
>         * config/i386/sse.md:
> 
> gcc/testsuite/ChangeLog:
> 
>         * gcc.target/i386/reduce-ternlog-operands-1.c: New test.
>         * gcc.target/i386/reduce-ternlog-operands-2.c: New test.
> ---
>  gcc/config/i386/i386-protos.h                 |  2 +
>  gcc/config/i386/i386.cc                       | 45 +++++++++++++++++++
>  gcc/config/i386/sse.md                        | 43 ++++++++++++++++++
>  .../i386/reduce-ternlog-operands-1.c          | 20 +++++++++
>  .../i386/reduce-ternlog-operands-2.c          | 11 +++++
>  5 files changed, 121 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-
> 1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-
> 2.c
> 
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 27fe73ca65c..49398ef9936 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -57,6 +57,8 @@ extern int standard_80387_constant_p (rtx);  extern
> const char *standard_80387_constant_opcode (rtx);  extern rtx
> standard_80387_constant_rtx (int);  extern int standard_sse_constant_p (rtx,
> machine_mode);
> +extern int ternlog_invariant_operand_mask (rtx *operands); extern void
> +reduce_ternlog_operands (rtx *operands);
>  extern const char *standard_sse_constant_opcode (rtx_insn *, rtx *);  extern
> bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);  extern
> bool ix86_pre_reload_split (void); diff --git a/gcc/config/i386/i386.cc
> b/gcc/config/i386/i386.cc index f0d6167e667..140de478571 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -5070,6 +5070,51 @@ ix86_check_no_addr_space (rtx insn)
>      }
>    return true;
>  }
> +
> +/* Return mask of invariant operands:
> +   bit number     0 1 2
> +   operand number 1 2 3.  */
> +
> +int
> +ternlog_invariant_operand_mask (rtx *operands) {
> +  int mask = 0;
> +  int imm8 = XINT (operands[4], 0);
> +
> +  if (((imm8 >> 4) & 0xF) == (imm8 & 0xF))
> +    mask |= 1;
> +  if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
> +    mask |= (1 << 1);
> +  if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
> +    mask |= (1 << 2);
> +
> +  return mask;
> +}
> +
> +/* Replace one of the unused operators with the one used.  */
> +
> +void
> +reduce_ternlog_operands (rtx *operands) {
> +  int mask = ternlog_invariant_operand_mask (operands);
> +
> +  if (mask & 1) /* the first operand is invariant.  */
> +    operands[1] = operands[2];
> +
> +  if (mask & 2) /* the second operand is invariant.  */
> +    operands[2] = operands[1];
> +
> +  if (mask & 4)	/* the third operand is invariant.  */
> +   operands[3] = operands[1];
> +  else if (!MEM_P (operands[3]))
> +    {
> +      if (mask & 1) /* the first operand is invariant.  */
> +	operands[1] = operands[3];
> +      if (mask & 2) /* the second operands is invariant.  */
> +	operands[2] = operands[3];
> +    }
> +}
> +
> 
> 
> 
>  /* Initialize the table of extra 80387 mathematical constants.  */
> 
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> a2099373123..f88d82b315c 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -12625,6 +12625,49 @@
>  		      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
>  		      (const_string "*")))])
> 
> +;; If the first and the second operands of ternlog are invariant and ;;
> +the third operand is memory ;; then we should add load third operand
> +from memory to register and ;; replace first and second operands with
> +this register (define_split
> +  [(set (match_operand:V 0 "register_operand")
> +	(unspec:V
> +	  [(match_operand:V 1 "register_operand")
> +	   (match_operand:V 2 "register_operand")
> +	   (match_operand:V 3 "memory_operand")
> +	   (match_operand:SI 4 "const_0_to_255_operand")]
> +	  UNSPEC_VTERNLOG))]
> +  "ternlog_invariant_operand_mask (operands) == 3 && !reload_completed"
Maybe better with "!reload_completed  && ternlog_invariant_operand_mask (operands) == 3"
> +  [(set (match_dup 0)
> +	(match_dup 3))
> +   (set (match_dup 0)
> +	(unspec:V
> +	  [(match_dup 0)
> +	   (match_dup 0)
> +	   (match_dup 0)
> +	   (match_dup 4)]
> +	  UNSPEC_VTERNLOG))])
> +
> +;; Replace invariant ternlog operands with used operands ;; (except for
> +the case discussed in the previous define_split) (define_split
> +  [(set (match_operand:V 0 "register_operand")
> +	(unspec:V
> +	  [(match_operand:V 1 "register_operand")
> +	   (match_operand:V 2 "register_operand")
> +	   (match_operand:V 3 "nonimmediate_operand")
> +	   (match_operand:SI 4 "const_0_to_255_operand")]
> +	  UNSPEC_VTERNLOG))]
> +  "ternlog_invariant_operand_mask (operands) != 0 && !reload_completed"
Ditto.
> +  [(set (match_dup 0)
> +	(unspec:V
> +	  [(match_dup 1)
> +	   (match_dup 2)
> +	   (match_dup 3)
> +	   (match_dup 4)]
> +	  UNSPEC_VTERNLOG))]
> +  "reduce_ternlog_operands (operands);")
> +
Others LGTM.
>  ;; There must be lots of other combinations like  ;;  ;; (any_logic:V diff --git
> a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> new file mode 100644
> index 00000000000..a7063df9dcb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times {vmovdqa*} "4" } } */
> +
> +#include <immintrin.h>
> +
> +__m512i f(__m512i* a, __m512i* b, __m512i* c) {
> +	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 119); }
> +
> +__m512i g(__m512i* a, __m512i* b, __m512i* c) {
> +	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 250); }
> +
> +__m512i h(__m512i* a, __m512i* b, __m512i* c) {
> +	return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], 252); }
> diff --git a/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> new file mode 100644
> index 00000000000..b44986cc259
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/reduce-ternlog-operands-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler "vpternlog.*0.*0.*0" } } */
> +
> +#include <immintrin.h>
> +
> +__m512i f(__m512i a, __m512i b, __m512i* c) {
> +	return _mm512_ternarylogic_epi64 (a, b, c[0], 0x55); }
> +
> --
> 2.34.1


  reply	other threads:[~2023-07-27  3:00 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-25 18:11 Yan Simonaytes
2023-07-27  3:00 ` Liu, Hongtao [this message]
2023-08-03 17:30   ` Alexander Monakov
2023-08-04  0:43     ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SA1PR11MB6757209037AC39C10C688E42E501A@SA1PR11MB6757.namprd11.prod.outlook.com \
    --to=hongtao.liu@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=simonaytes.yan@ispras.ru \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).