public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero
       [not found] <010fff65-5d8b-774c-fce5-81136424e131.ref@yahoo.co.jp>
@ 2023-09-08  8:48 ` Takayuki 'January June' Suwa
  2023-09-14  2:41   ` Max Filippov
  0 siblings, 1 reply; 2+ messages in thread
From: Takayuki 'January June' Suwa @ 2023-09-08  8:48 UTC (permalink / raw)
  To: GCC Patches; +Cc: Max Filippov

An idiomatic implementation of boolean evaluation of whether a register is
zero or not in Xtensa is to assign 0 and 1 to the temporary and destination,
and then issue the MOV[EQ/NE]Z machine instruction
(See 8.3.2 Instruction Idioms, Xtensa ISA refman., p.599):

;; A2 = (A3 != 0) ? 1 : 0;
	movi.n	a9, 1
	movi.n	a2, 0
	movnez	a2, a9, a3  ;; if (A3 != 0) A2 = A9;

As you can see in the above idiom, if the source and destination are the
same register, a move instruction from the source to another temporary
register must be prepended:

;; A2 = (A2 == 0) ? 1 : 0;
	mov.n	a10, a2
	movi.n	a9, 1
	movi.n	a2, 0
	moveqz	a2, a9, a10  ;; if (A10 == 0) A2 = A9;

Fortunately, we can reduce the number of instructions and temporary
registers with a few tweaks:

;; A2 = (A3 != 0) ? 1 : 0;
	movi.n	a2, 1
	moveqz	a2, a3, a3  ;; if (A3 == 0) A2 = A3;

;; A2 = (A2 != 0) ? 1 : 0;
	movi.n	a9, 1
	movnez	a2, a9, a2  ;; if (A2 != 0) A2 = A9;

;; A2 = (A3 == 0) ? 1 : 0;
	movi.n	a2, -1
        moveqz	a2, a3, a3  ;; if (A3 == 0) A2 = A3;
        addi.n	a2, a2, 1

;; A2 = (A2 == 0) ? 1 : 0;
	movi.n	a9, -1
	movnez	a2, a9, a2  ;; if (A2 != 0) A2 = A9;
	addi.n	a2, a2, 1

Additionally, if TARGET_NSA is configured, the fact that it returns 32 iff
the source of the NSAU machine instruction is 0, otherwise less than, can be
used in boolean evaluation of EQ comparison.

;; A2 = (A3 == 0) ? 1 : 0;
	nsau	a2, a3      ;; Source and destination can be the same register
	srli	a2, a2, 5

Furthermore, this patch also saves one instruction when determining whether
the ANDing with mask values in which 1s are lined up from the upper or lower
bit end (for example, 0xFFE00000 or 0x003FFFFF) is 0 or not.

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (xtensa_expand_scc):
	Revert the changes from the last patch, as the work in the RTL
	expansion pass is too far to determine the physical registers.
	* config/xtensa/xtensa.md (*eqne_INT_MIN): Ditto.
	(eq_zero_NSA, eqne_zero, *eqne_zero_masked_bits): New patterns.
---
 gcc/config/xtensa/xtensa.cc |  35 +----------
 gcc/config/xtensa/xtensa.md | 112 ++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 34 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 1afaa1cc94e..2481b028ca1 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -994,41 +994,8 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx cmp;
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
-  enum rtx_code code = GET_CODE (operands[1]);
 
-  if (cmp_mode == SImode && CONST_INT_P (operands[3])
-      && (code == EQ || code == NE))
-    switch (INTVAL (operands[3]))
-      {
-      case 0:
-	if (TARGET_MINMAX)
-	  {
-	    one_tmp = force_reg (SImode, const1_rtx);
-	    emit_insn (gen_uminsi3 (dest, operands[2], one_tmp));
-	    if (code == EQ)
-	      emit_insn (gen_xorsi3 (dest, dest, one_tmp));
-	    return 1;
-	  }
-	break;
-      case -2147483648:
-	if (TARGET_ABS)
-	  {
-	    emit_insn (gen_abssi2 (dest, operands[2]));
-	    if (code == EQ)
-	      emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
-	    else
-	      {
-		emit_insn (gen_ashrsi3 (dest, dest, GEN_INT (31)));
-		emit_insn (gen_addsi3 (dest, dest, const1_rtx));
-	      }
-	    return 1;
-	  }
-	break;
-      default:
-	break;
-      }
-
-  if (! (cmp = gen_conditional_move (code, cmp_mode,
+  if (! (cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
 				     operands[2], operands[3])))
     return 0;
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d6505e7eb70..6476fdc395a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3188,6 +3188,118 @@
 		      (const_int 5)
 		      (const_int 6)))])
 
+(define_insn_and_split "eq_zero_NSA"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))]
+  "TARGET_NSA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(clz:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lshiftrt:SI (match_dup 0)
+		     (const_int 5)))]
+  ""
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn_and_split "eqne_zero"
+  [(set (match_operand:SI 0 "register_operand" "=a,&a")
+	(match_operator:SI 2 "boolean_operator"
+		[(match_operand:SI 1 "register_operand" "0,r")
+		 (const_int 0)]))
+   (clobber (match_scratch:SI 3 "=&a,X"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum rtx_code code = GET_CODE (operands[2]);
+  int same_p = REGNO (operands[0]) == REGNO (operands[1]);
+  emit_move_insn (same_p ? operands[3] : operands[0],
+		  code == EQ ? constm1_rtx : const1_rtx);
+  emit_insn (gen_movsicc_internal0 (operands[0], operands[1],
+				    same_p ? operands[3] : operands[1],
+				    operands[0],
+				    gen_rtx_fmt_ee (same_p ? NE : EQ,
+						    VOIDmode,
+						    operands[1],
+						    const0_rtx)));
+  if (code == EQ)
+    emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+  DONE;
+}
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set (attr "length")
+	(if_then_else (match_test "GET_CODE (operands[2]) == EQ")
+                      (if_then_else (match_test "TARGET_DENSITY")
+				    (const_int 7)
+				    (const_int 9))
+		      (if_then_else (match_test "TARGET_DENSITY")
+				    (const_int 5)
+				    (const_int 6))))])
+
+(define_insn_and_split "*eqne_zero_masked_bits"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(match_operator 3 "boolean_operator"
+		[(and:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+		 (const_int 0)]))]
+  "IN_RANGE (exact_log2 (INTVAL (operands[2]) + 1), 17, 31)
+   || IN_RANGE (exact_log2 (-INTVAL (operands[2])), 1, 30)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int n;
+  enum rtx_code code = GET_CODE (operands[3]);
+  if (IN_RANGE (n = exact_log2 (mask + 1), 17, 31))
+    emit_insn (gen_ashlsi3 (operands[0], operands[1], GEN_INT (32 - n)));
+  else
+    emit_insn (gen_lshrsi3 (operands[0], operands[1],
+			    GEN_INT (floor_log2 (-mask))));
+  if (TARGET_NSA && code == EQ)
+    emit_insn (gen_eq_zero_NSA (operands[0], operands[0]));
+  else
+    emit_insn (gen_eqne_zero (operands[0], operands[0],
+			      gen_rtx_fmt_ee (code, VOIDmode,
+					      operands[0], const0_rtx)));
+  DONE;
+})
+
+(define_insn_and_split "*eqne_INT_MIN"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(match_operator:SI 2 "boolean_operator"
+		[(match_operand:SI 1 "register_operand" "r")
+		 (const_int -2147483648)]))]
+  "TARGET_ABS"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_abssi2 (operands[0], operands[1]));
+  if (GET_CODE (operands[2]) == EQ)
+    emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+  else
+    {
+      emit_insn (gen_ashrsi3 (operands[0], operands[0], GEN_INT (31)));
+      emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+    }
+  DONE;
+}
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set (attr "length")
+	(if_then_else (match_test "GET_CODE (operands[2]) == EQ")
+		      (const_int 6)
+		      (if_then_else (match_test "TARGET_DENSITY")
+				    (const_int 8)
+				    (const_int 9))))])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
 	(match_operand:SI 6 "reload_operand"))
-- 
2.30.2

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero
  2023-09-08  8:48 ` [PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero Takayuki 'January June' Suwa
@ 2023-09-14  2:41   ` Max Filippov
  0 siblings, 0 replies; 2+ messages in thread
From: Max Filippov @ 2023-09-14  2:41 UTC (permalink / raw)
  To: Takayuki 'January June' Suwa; +Cc: GCC Patches

On Fri, Sep 8, 2023 at 1:49 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> An idiomatic implementation of boolean evaluation of whether a register is
> zero or not in Xtensa is to assign 0 and 1 to the temporary and destination,
> and then issue the MOV[EQ/NE]Z machine instruction
> (See 8.3.2 Instruction Idioms, Xtensa ISA refman., p.599):
>
> ;; A2 = (A3 != 0) ? 1 : 0;
>         movi.n  a9, 1
>         movi.n  a2, 0
>         movnez  a2, a9, a3  ;; if (A3 != 0) A2 = A9;
>
> As you can see in the above idiom, if the source and destination are the
> same register, a move instruction from the source to another temporary
> register must be prepended:
>
> ;; A2 = (A2 == 0) ? 1 : 0;
>         mov.n   a10, a2
>         movi.n  a9, 1
>         movi.n  a2, 0
>         moveqz  a2, a9, a10  ;; if (A10 == 0) A2 = A9;
>
> Fortunately, we can reduce the number of instructions and temporary
> registers with a few tweaks:
>
> ;; A2 = (A3 != 0) ? 1 : 0;
>         movi.n  a2, 1
>         moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;
>
> ;; A2 = (A2 != 0) ? 1 : 0;
>         movi.n  a9, 1
>         movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;
>
> ;; A2 = (A3 == 0) ? 1 : 0;
>         movi.n  a2, -1
>         moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;
>         addi.n  a2, a2, 1
>
> ;; A2 = (A2 == 0) ? 1 : 0;
>         movi.n  a9, -1
>         movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;
>         addi.n  a2, a2, 1
>
> Additionally, if TARGET_NSA is configured, the fact that it returns 32 iff
> the source of the NSAU machine instruction is 0, otherwise less than, can be
> used in boolean evaluation of EQ comparison.
>
> ;; A2 = (A3 == 0) ? 1 : 0;
>         nsau    a2, a3      ;; Source and destination can be the same register
>         srli    a2, a2, 5
>
> Furthermore, this patch also saves one instruction when determining whether
> the ANDing with mask values in which 1s are lined up from the upper or lower
> bit end (for example, 0xFFE00000 or 0x003FFFFF) is 0 or not.
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa.cc (xtensa_expand_scc):
>         Revert the changes from the last patch, as the work in the RTL
>         expansion pass is too far to determine the physical registers.
>         * config/xtensa/xtensa.md (*eqne_INT_MIN): Ditto.
>         (eq_zero_NSA, eqne_zero, *eqne_zero_masked_bits): New patterns.
> ---
>  gcc/config/xtensa/xtensa.cc |  35 +----------
>  gcc/config/xtensa/xtensa.md | 112 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 113 insertions(+), 34 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-09-14  2:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <010fff65-5d8b-774c-fce5-81136424e131.ref@yahoo.co.jp>
2023-09-08  8:48 ` [PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero Takayuki 'January June' Suwa
2023-09-14  2:41   ` Max Filippov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).