public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] pru: Fix CLZ expansion for QI and HI modes
@ 2023-01-22 19:50 Dimitar Dimitrov
  2023-01-24  9:32 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Dimitar Dimitrov @ 2023-01-22 19:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: Dimitar Dimitrov

The recent gcc.dg/tree-ssa/clz-char.c test case failed for PRU target,
exposing a wrong code generation bug in the PRU backend.  The "clz"
pattern did not produce correct output for QI and HI input operand
modes.  SI mode is ok.

The "clz" pattern is expanded to an LMBD instruction to get the
left-most bit position having value "1".  In turn, to get the correct
"clz" value, that bit position must be subtracted from the MSB bit
position of the input operand.  The old behaviour of hard-coding 31
for MSB bit position is wrong.

The LMBD instruction returns 32 if input operand is zero, irrespective
of its register mode.  This maps nicely for SI mode, where the "clz"
pattern outputs -1.  It also leads to peculiar (but valid!) output
values from the "clz" pattern for QI and HI zero-valued inputs.

Pushed to trunk. This patch is confined to the PRU backend only,
and is fixing a wrong code generation.  Hence I deem it suitable
for stage 4.

Is it ok to push a backport to gcc-11 and gcc-12 branches?

gcc/ChangeLog:

	* config/pru/pru.h (CLZ_DEFINED_VALUE_AT_ZERO): Fix value for QI
	and HI input modes.
	* config/pru/pru.md (clz): Fix generated code for QI and HI
	input modes.

gcc/testsuite/ChangeLog:

	* gcc.target/pru/clz-hi-2.c: New test.
	* gcc.target/pru/clz-hi.c: New test.

Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
---
 gcc/config/pru/pru.h                    |  5 ++--
 gcc/config/pru/pru.md                   | 15 ++++++++---
 gcc/testsuite/gcc.target/pru/clz-hi-2.c | 24 +++++++++++++++++
 gcc/testsuite/gcc.target/pru/clz-hi.c   | 35 +++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi-2.c
 create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi.c

diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 3658036cccb..1b5e874bc06 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -566,8 +566,9 @@ do {									\
 
 #define CASE_VECTOR_MODE Pmode
 
-/* See definition of clz pattern for rationale of value -1.  */
-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)
+/* See definition of clz pattern for rationale of the value.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)	\
+	((VALUE) = GET_MODE_BITSIZE (MODE) - 1 - 32, 2)
 
 /* Jumps are cheap on PRU.  */
 #define LOGICAL_OP_NON_SHORT_CIRCUIT		0
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index dfe08071e04..aa8e42a3587 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -1723,8 +1723,16 @@ (define_insn "pru_halt"
   [(set_attr "type" "control")])
 
 ;; Count Leading Zeros implemented using LMBD.
-;; LMBD returns 32 if bit value is not present, and we subtract 31 to get CLZ.
-;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
+;;
+;; LMBD returns 32 if bit value is not present, for any kind of input MODE.
+;; The LMBD's search result for a "1" bit is subtracted from the
+;; mode bit size minus one, in order to get CLZ.
+;;
+;; Hence for SImode we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
+;;
+;; The QImode and HImode defined values for zero inputs end up to be
+;; non-standard (-25 and -17).  But this is considered acceptable in
+;; order to keep the CLZ expansion to only two instructions.
 (define_expand "clz<mode>2"
   [(set (match_operand:QISI 0 "register_operand")
 	(clz:QISI (match_operand:QISI 1 "register_operand")))]
@@ -1735,7 +1743,8 @@ (define_expand "clz<mode>2"
   rtx tmpval = gen_reg_rtx (<MODE>mode);
 
   emit_insn (gen_pru_lmbd (<MODE>mode, tmpval, src, const1_rtx));
-  emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval));
+  int msb_bitn = GET_MODE_BITSIZE (<MODE>mode) - 1;
+  emit_insn (gen_sub3_insn (dst, GEN_INT (msb_bitn), tmpval));
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/pru/clz-hi-2.c b/gcc/testsuite/gcc.target/pru/clz-hi-2.c
new file mode 100644
index 00000000000..af877c7021e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/clz-hi-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-ch" } */
+
+/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */
+
+#define PREC (sizeof(short) * 8)
+
+int
+__attribute__ ((noinline, noclone))
+foo (unsigned short b) {
+    int c = 0;
+
+    if (b == 0)
+      return PREC;
+
+    while (!(b & (1 << (PREC - 1)))) {
+	b <<= 1;
+	c++;
+    }
+
+    return c;
+}
+
+/* { dg-final { scan-assembler "lmbd\\tr\[012\]\[0-9\]?.w\[0-2\], r\[012\]\[0-9\]?.w\[0-2\], 1" } } */
diff --git a/gcc/testsuite/gcc.target/pru/clz-hi.c b/gcc/testsuite/gcc.target/pru/clz-hi.c
new file mode 100644
index 00000000000..9350913b6d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/clz-hi.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
+
+/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */
+
+#define PREC (sizeof(short) * 8)
+
+int
+__attribute__ ((noinline, noclone))
+foo (unsigned short b) {
+    int c = 0;
+
+    if (b == 0)
+      return PREC;
+
+    while (!(b & (1 << (PREC - 1)))) {
+	b <<= 1;
+	c++;
+    }
+
+    return c;
+}
+
+int main()
+{
+  if (foo(0) != PREC)
+    __builtin_abort ();
+  if (foo(1 << (PREC - 1)) != 0)
+    __builtin_abort ();
+  if (foo(35) != PREC - 6)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
-- 
2.39.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [committed] pru: Fix CLZ expansion for QI and HI modes
  2023-01-22 19:50 [committed] pru: Fix CLZ expansion for QI and HI modes Dimitar Dimitrov
@ 2023-01-24  9:32 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2023-01-24  9:32 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: gcc-patches, rguenther

Dimitar Dimitrov <dimitar@dinux.eu> writes:
> The recent gcc.dg/tree-ssa/clz-char.c test case failed for PRU target,
> exposing a wrong code generation bug in the PRU backend.  The "clz"
> pattern did not produce correct output for QI and HI input operand
> modes.  SI mode is ok.
>
> The "clz" pattern is expanded to an LMBD instruction to get the
> left-most bit position having value "1".  In turn, to get the correct
> "clz" value, that bit position must be subtracted from the MSB bit
> position of the input operand.  The old behaviour of hard-coding 31
> for MSB bit position is wrong.
>
> The LMBD instruction returns 32 if input operand is zero, irrespective
> of its register mode.  This maps nicely for SI mode, where the "clz"
> pattern outputs -1.  It also leads to peculiar (but valid!) output
> values from the "clz" pattern for QI and HI zero-valued inputs.
>
> Pushed to trunk. This patch is confined to the PRU backend only,
> and is fixing a wrong code generation.  Hence I deem it suitable
> for stage 4.
>
> Is it ok to push a backport to gcc-11 and gcc-12 branches?

Since it's a backend-specific patch, I think that's your call.

Thanks,
Richard

> gcc/ChangeLog:
>
> 	* config/pru/pru.h (CLZ_DEFINED_VALUE_AT_ZERO): Fix value for QI
> 	and HI input modes.
> 	* config/pru/pru.md (clz): Fix generated code for QI and HI
> 	input modes.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/pru/clz-hi-2.c: New test.
> 	* gcc.target/pru/clz-hi.c: New test.
>
> Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
> ---
>  gcc/config/pru/pru.h                    |  5 ++--
>  gcc/config/pru/pru.md                   | 15 ++++++++---
>  gcc/testsuite/gcc.target/pru/clz-hi-2.c | 24 +++++++++++++++++
>  gcc/testsuite/gcc.target/pru/clz-hi.c   | 35 +++++++++++++++++++++++++
>  4 files changed, 74 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi-2.c
>  create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi.c
>
> diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
> index 3658036cccb..1b5e874bc06 100644
> --- a/gcc/config/pru/pru.h
> +++ b/gcc/config/pru/pru.h
> @@ -566,8 +566,9 @@ do {									\
>  
>  #define CASE_VECTOR_MODE Pmode
>  
> -/* See definition of clz pattern for rationale of value -1.  */
> -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)
> +/* See definition of clz pattern for rationale of the value.  */
> +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)	\
> +	((VALUE) = GET_MODE_BITSIZE (MODE) - 1 - 32, 2)
>  
>  /* Jumps are cheap on PRU.  */
>  #define LOGICAL_OP_NON_SHORT_CIRCUIT		0
> diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
> index dfe08071e04..aa8e42a3587 100644
> --- a/gcc/config/pru/pru.md
> +++ b/gcc/config/pru/pru.md
> @@ -1723,8 +1723,16 @@ (define_insn "pru_halt"
>    [(set_attr "type" "control")])
>  
>  ;; Count Leading Zeros implemented using LMBD.
> -;; LMBD returns 32 if bit value is not present, and we subtract 31 to get CLZ.
> -;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
> +;;
> +;; LMBD returns 32 if bit value is not present, for any kind of input MODE.
> +;; The LMBD's search result for a "1" bit is subtracted from the
> +;; mode bit size minus one, in order to get CLZ.
> +;;
> +;; Hence for SImode we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO.
> +;;
> +;; The QImode and HImode defined values for zero inputs end up to be
> +;; non-standard (-25 and -17).  But this is considered acceptable in
> +;; order to keep the CLZ expansion to only two instructions.
>  (define_expand "clz<mode>2"
>    [(set (match_operand:QISI 0 "register_operand")
>  	(clz:QISI (match_operand:QISI 1 "register_operand")))]
> @@ -1735,7 +1743,8 @@ (define_expand "clz<mode>2"
>    rtx tmpval = gen_reg_rtx (<MODE>mode);
>  
>    emit_insn (gen_pru_lmbd (<MODE>mode, tmpval, src, const1_rtx));
> -  emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval));
> +  int msb_bitn = GET_MODE_BITSIZE (<MODE>mode) - 1;
> +  emit_insn (gen_sub3_insn (dst, GEN_INT (msb_bitn), tmpval));
>    DONE;
>  })
>  
> diff --git a/gcc/testsuite/gcc.target/pru/clz-hi-2.c b/gcc/testsuite/gcc.target/pru/clz-hi-2.c
> new file mode 100644
> index 00000000000..af877c7021e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/pru/clz-hi-2.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-ch" } */
> +
> +/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */
> +
> +#define PREC (sizeof(short) * 8)
> +
> +int
> +__attribute__ ((noinline, noclone))
> +foo (unsigned short b) {
> +    int c = 0;
> +
> +    if (b == 0)
> +      return PREC;
> +
> +    while (!(b & (1 << (PREC - 1)))) {
> +	b <<= 1;
> +	c++;
> +    }
> +
> +    return c;
> +}
> +
> +/* { dg-final { scan-assembler "lmbd\\tr\[012\]\[0-9\]?.w\[0-2\], r\[012\]\[0-9\]?.w\[0-2\], 1" } } */
> diff --git a/gcc/testsuite/gcc.target/pru/clz-hi.c b/gcc/testsuite/gcc.target/pru/clz-hi.c
> new file mode 100644
> index 00000000000..9350913b6d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/pru/clz-hi.c
> @@ -0,0 +1,35 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */
> +
> +/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */
> +
> +#define PREC (sizeof(short) * 8)
> +
> +int
> +__attribute__ ((noinline, noclone))
> +foo (unsigned short b) {
> +    int c = 0;
> +
> +    if (b == 0)
> +      return PREC;
> +
> +    while (!(b & (1 << (PREC - 1)))) {
> +	b <<= 1;
> +	c++;
> +    }
> +
> +    return c;
> +}
> +
> +int main()
> +{
> +  if (foo(0) != PREC)
> +    __builtin_abort ();
> +  if (foo(1 << (PREC - 1)) != 0)
> +    __builtin_abort ();
> +  if (foo(35) != PREC - 6)
> +    __builtin_abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-01-24  9:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-22 19:50 [committed] pru: Fix CLZ expansion for QI and HI modes Dimitar Dimitrov
2023-01-24  9:32 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).