public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
@ 2023-10-20  6:50 Ajit Agarwal
  2023-10-20 23:56 ` Vineet Gupta
  0 siblings, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-20  6:50 UTC (permalink / raw)
  To: gcc-patches
  Cc: Jeff Law, Vineet Gupta, Richard Biener, Segher Boessenkool,
	Peter Bergner

Hello All:

This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
Bootstrapped and regtested on powerpc-linux-gnu.

In this version (version 9) of the patch following review comments are incorporated.

a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
b) Source and destination with different registers are considered.
c) Further enhancements.
d) Added sign extension elimination using abi interfaces.


Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign extension and done
to improve ree pass to eliminate such redundant zero and sign extension
using defined ABI interfaces.

2023-10-20  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

        * ree.cc (combine_reaching_defs): Use of zero_extend and sign_extend
        defined abi interfaces.
        (add_removable_extension): Use of defined abi interfaces for no
        reaching defs.
        (abi_extension_candidate_return_reg_p): New function.
        (abi_extension_candidate_p): New function.
        (abi_extension_candidate_argno_p): New function.
        (abi_handle_regs): New function.
        (abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

        * g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc                                    | 151 +++++++++++++++++-
 .../g++.target/powerpc/zext-elim-3.C          |  13 ++
 2 files changed, 161 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..9f21f0e9907 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
     if (REGNO (DF_REF_REG (def)) == REGNO (reg))
       break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+    return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,124 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode
+    = targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
+					   NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+    return true;
+  else
+    return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   a return registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
+{
+  if (targetm.calls.function_value_regno_p (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  machine_mode dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set), 0);
+
+  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
+    return false;
+
+  /* Mode of destination and source should be different.  */
+  if (dst_mode == GET_MODE (orig_src))
+    return false;
+
+  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
+  bool promote_p = abi_target_promote_function_mode (mode);
+
+  /* REGNO of source and destination should be same if not
+      promoted.  */
+  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
+    return false;
+
+  return true;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
+{
+  if (FUNCTION_ARG_REGNO_P (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+    return false;
+
+  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+    return false;
+
+  for (df_link *use = uses; use; use = use->next)
+    {
+      if (!use->ref)
+	return false;
+
+      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+	return false;
+
+      rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+      if (GET_CODE (PATTERN (use_insn)) == SET)
+	{
+	  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+	  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+	      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+	      || GET_RTX_CLASS (code) == RTX_UNARY)
+	    return false;
+	}
+     }
+
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
+    {
+      machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
+      bool promote_p = abi_target_promote_function_mode (mode);
+
+      return promote_p;
+    }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
    of the candidate for elimination (CAND) and tries to combine
    the extension with the definition instruction.  The changes
@@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+  if (abi_extension_candidate_p (cand->insn)
+      && (!get_defs (cand->insn, orig_src, NULL)))
+    return abi_handle_regs (cand->insn);
 
   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
 
@@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
         }
     }
 
+  rtx insn_set = single_set (cand->insn);
+
+  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
+
+  bool promote_p = abi_target_promote_function_mode (mode);
+
+  if (promote_p)
+    return true;
+
   if (merge_successful)
     {
       /* Commit the changes here if possible
@@ -1112,6 +1245,14 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
       rtx reg = XEXP (src, 0);
       struct df_link *defs, *def;
       ext_cand *cand;
+      defs = get_defs (insn, reg, NULL);
+
+      if (!defs && abi_extension_candidate_argno_p (/*code,*/ REGNO (reg)))
+	{
+	  ext_cand e = {expr, code, mode, insn};
+	  insn_list->safe_push (e);
+	  return;
+	}
 
       /* Zero-extension of an undefined value is partly defined (it's
 	 completely undefined for sign-extension, though).  So if there exists
@@ -1131,7 +1272,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
 	}
 
       /* Second, make sure we can get all the reaching definitions.  */
-      defs = get_defs (insn, reg, NULL);
       if (!defs)
 	{
 	  if (dump_file)
@@ -1321,7 +1461,8 @@ find_and_remove_re (void)
 	      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
 	    {
               reinsn_copy_list.safe_push (curr_cand->insn);
-              reinsn_copy_list.safe_push (state.defs_list[0]);
+	      if (state.defs_list.length () != 0)
+		reinsn_copy_list.safe_push (state.defs_list[0]);
 	    }
 	  reinsn_del_list.safe_push (curr_cand->insn);
 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
@@ -1345,6 +1486,10 @@ find_and_remove_re (void)
   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
     {
       rtx_insn *curr_insn = reinsn_copy_list[i];
+
+      if ((i+1) >= reinsn_copy_list.length ())
+	continue;
+
       rtx_insn *def_insn = reinsn_copy_list[i + 1];
 
       /* Use the mode of the destination of the defining insn
diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
new file mode 100644
index 00000000000..5a050df06ff
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
@@ -0,0 +1,13 @@
+/* { dg-options "-mcpu=power9 -O2" } */
+
+void *memset(void *b, int c, unsigned long len)
+{
+  unsigned long i;
+
+  for (i = 0; i < len; i++)
+    ((unsigned char *)b)[i] = c;
+
+   return b;
+}
+
+/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
-- 
2.39.3


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-20  6:50 [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces Ajit Agarwal
@ 2023-10-20 23:56 ` Vineet Gupta
  2023-10-21 10:14   ` Ajit Agarwal
  2023-10-21 19:26   ` rep.dot.nop
  0 siblings, 2 replies; 20+ messages in thread
From: Vineet Gupta @ 2023-10-20 23:56 UTC (permalink / raw)
  To: Ajit Agarwal, gcc-patches
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain

On 10/19/23 23:50, Ajit Agarwal wrote:
> Hello All:
>
> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
> Bootstrapped and regtested on powerpc-linux-gnu.
>
> In this version (version 9) of the patch following review comments are incorporated.
>
> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
> b) Source and destination with different registers are considered.
> c) Further enhancements.
> d) Added sign extension elimination using abi interfaces.

As has been trend in the past, I don't think all the review comments 
have been addressed.
The standard practice is to reply to reviewer's email and say yay/nay 
explicitly to each comment. Some of my comments in [1a] are still not 
resolved, importantly the last two. To be fair you did reply [1b] but 
the comments were not addressed explicitly.

[1a] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630814.html
[1b] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630865.html

Anyhow I gave this a try for RISC-V, specially after [2a][2b] as I was 
curious to see if this uncovers REE handling extraneous extensions which 
could potentially be eliminated in Expand itself, which is generally 
better as it happens earlier in the pipeline.

[2a] 2023-10-16 8eb9cdd14218 expr: don't clear SUBREG_PROMOTED_VAR_P 
flag for a promoted subreg [target/111466]
[2b] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/631818.html

Bad news is with the patch, we fail to even bootstrap risc-v, buckles 
over when building libgcc itself.

The reproducer is embarrassingly simple, build with -O2:

float a;
b() { return a; }

See details below....

> Thanks & Regards
> Ajit
>
> ree: Improve ree pass for rs6000 target using defined abi interfaces
>
> For rs6000 target we see redundant zero and sign extension and done
> to improve ree pass to eliminate such redundant zero and sign extension
> using defined ABI interfaces.
>
> 2023-10-20  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>
> gcc/ChangeLog:
>
>          * ree.cc (combine_reaching_defs): Use of zero_extend and sign_extend
>          defined abi interfaces.
>          (add_removable_extension): Use of defined abi interfaces for no
>          reaching defs.
>          (abi_extension_candidate_return_reg_p): New function.
>          (abi_extension_candidate_p): New function.
>          (abi_extension_candidate_argno_p): New function.
>          (abi_handle_regs): New function.
>          (abi_target_promote_function_mode): New function.
>
> gcc/testsuite/ChangeLog:
>
>          * g++.target/powerpc/zext-elim-3.C
> ---
>   gcc/ree.cc                                    | 151 +++++++++++++++++-
>   .../g++.target/powerpc/zext-elim-3.C          |  13 ++
>   2 files changed, 161 insertions(+), 3 deletions(-)
>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index fc04249fa84..9f21f0e9907 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>       if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>         break;
>   
> -  gcc_assert (def != NULL);
> +  if (def == NULL)
> +    return NULL;
>   
>     ref_chain = DF_REF_CHAIN (def);
>   
> @@ -750,6 +751,124 @@ get_extended_src_reg (rtx src)
>     return src;
>   }
>   
> +/* Return TRUE if target mode is equal to source mode of zero_extend
> +   or sign_extend otherwise false.  */
> +
> +static bool
> +abi_target_promote_function_mode (machine_mode mode)
> +{
> +  int unsignedp;
> +  machine_mode tgt_mode
> +    = targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
> +					   NULL_TREE, 1);
> +
> +  if (tgt_mode == mode)
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   a return registers.  */
> +
> +static bool
> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
> +{
> +  if (targetm.calls.function_value_regno_p (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if reg source operand of zero_extend is argument registers
> +   and not return registers and source and destination operand are same
> +   and mode of source and destination operand are not same.  */
> +
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set), 0);
> +
> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
> +    return false;
> +
> +  /* Mode of destination and source should be different.  */
> +  if (dst_mode == GET_MODE (orig_src))
> +    return false;
> +
> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  /* REGNO of source and destination should be same if not
> +      promoted.  */
> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an argument registers.  */
> +
> +static bool
> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
> +{
> +  if (FUNCTION_ARG_REGNO_P (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn doesn't have defs and have
> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
> +
> +static bool
> +abi_handle_regs (rtx_insn *insn)
> +{
> +  if (side_effects_p (PATTERN (insn)))
> +    return false;
> +
> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
> +
> +  if (!uses)
> +    return false;
> +
> +  for (df_link *use = uses; use; use = use->next)
> +    {
> +      if (!use->ref)
> +	return false;
> +
> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
> +	return false;
> +
> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
> +
> +      if (GET_CODE (PATTERN (use_insn)) == SET)
> +	{
> +	  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
> +
> +	  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
> +	      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
> +	      || GET_RTX_CLASS (code) == RTX_UNARY)
> +	    return false;
> +	}
> +     }
> +
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
> +    {
> +      machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
> +      bool promote_p = abi_target_promote_function_mode (mode);
> +
> +      return promote_p;
> +    }
> +  return true;
> +}
> +
>   /* This function goes through all reaching defs of the source
>      of the candidate for elimination (CAND) and tries to combine
>      the extension with the definition instruction.  The changes
> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>   
>     state->defs_list.truncate (0);
>     state->copies_list.truncate (0);
> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
> +
> +  if (abi_extension_candidate_p (cand->insn)
> +      && (!get_defs (cand->insn, orig_src, NULL)))
> +    return abi_handle_regs (cand->insn);
>   
>     outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>   
> @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>           }
>       }
>   
> +  rtx insn_set = single_set (cand->insn);
> +
> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
> +
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  if (promote_p)
> +    return true;
> +

Is this early exit OK ? It skips a subsequent apply_change_group() call 
which could potentially fail and thus extension would be undone.
And even for the passing case, we do want those instructions to be 
merged normally. So I don't understand how this change is useful at all ?

FWIW for my test, w/o the patch: apply_change_group would fail (likely 
missing some combine pattern) and undo the merge. However w/ patch we 
just return/continue, keeping the merged but invalid insn which ICE in 
next pass cprop_hardreg as failure to recog that insn.

Some more details in case you are curious:

Coming into REE we have

(insn 7 6 13 2 (set (reg:SI 10 a0 [136])  #DEF
         (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138])))
                  {fix_truncsfsi2}

(insn 13 7 14 2 (set (reg/i:DI 10 a0)    # extension
         (sign_extend:DI (reg:SI 10 a0 [136])))
                  {extendsidi2}

These are merged into unrecog insn as of now:

(insn 7 6 14 2 (set (reg:DI 10 a0)
         (sign_extend:DI (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138]))))

Thx,
-Vineet

>     if (merge_successful)
>       {
>         /* Commit the changes here if possible
> @@ -1112,6 +1245,14 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>         rtx reg = XEXP (src, 0);
>         struct df_link *defs, *def;
>         ext_cand *cand;
> +      defs = get_defs (insn, reg, NULL);
> +
> +      if (!defs && abi_extension_candidate_argno_p (/*code,*/ REGNO (reg)))
> +	{
> +	  ext_cand e = {expr, code, mode, insn};
> +	  insn_list->safe_push (e);
> +	  return;
> +	}
>   
>         /* Zero-extension of an undefined value is partly defined (it's
>   	 completely undefined for sign-extension, though).  So if there exists
> @@ -1131,7 +1272,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>   	}
>   
>         /* Second, make sure we can get all the reaching definitions.  */
> -      defs = get_defs (insn, reg, NULL);
>         if (!defs)
>   	{
>   	  if (dump_file)
> @@ -1321,7 +1461,8 @@ find_and_remove_re (void)
>   	      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>   	    {
>                 reinsn_copy_list.safe_push (curr_cand->insn);
> -              reinsn_copy_list.safe_push (state.defs_list[0]);
> +	      if (state.defs_list.length () != 0)
> +		reinsn_copy_list.safe_push (state.defs_list[0]);
>   	    }
>   	  reinsn_del_list.safe_push (curr_cand->insn);
>   	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
> @@ -1345,6 +1486,10 @@ find_and_remove_re (void)
>     for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>       {
>         rtx_insn *curr_insn = reinsn_copy_list[i];
> +
> +      if ((i+1) >= reinsn_copy_list.length ())
> +	continue;
> +
>         rtx_insn *def_insn = reinsn_copy_list[i + 1];
>   
>         /* Use the mode of the destination of the defining insn
> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> new file mode 100644
> index 00000000000..5a050df06ff
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> @@ -0,0 +1,13 @@
> +/* { dg-options "-mcpu=power9 -O2" } */
> +
> +void *memset(void *b, int c, unsigned long len)
> +{
> +  unsigned long i;
> +
> +  for (i = 0; i < len; i++)
> +    ((unsigned char *)b)[i] = c;
> +
> +   return b;
> +}
> +
> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-20 23:56 ` Vineet Gupta
@ 2023-10-21 10:14   ` Ajit Agarwal
  2023-10-21 19:26   ` rep.dot.nop
  1 sibling, 0 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-21 10:14 UTC (permalink / raw)
  To: Vineet Gupta, gcc-patches
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain

Hello Vineet:

Thanks for your time and valuable comments.

On 21/10/23 5:26 am, Vineet Gupta wrote:
> On 10/19/23 23:50, Ajit Agarwal wrote:
>> Hello All:
>>
>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>> Bootstrapped and regtested on powerpc-linux-gnu.
>>
>> In this version (version 9) of the patch following review comments are incorporated.
>>
>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>> b) Source and destination with different registers are considered.
>> c) Further enhancements.
>> d) Added sign extension elimination using abi interfaces.
> 
> As has been trend in the past, I don't think all the review comments have been addressed.
> The standard practice is to reply to reviewer's email and say yay/nay explicitly to each comment. Some of my comments in [1a] are still not resolved, importantly the last two. To be fair you did reply [1b] but the comments were not addressed explicitly.
> 
> [1a] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630814.html
> [1b] https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630865.html
> 

I have addressed the last 2 comments in the version 10 of the patch. Please let me know if there
is anything missing. Regarding last comments with a providing different tests if you have any suggestions please 
let me know.

> Anyhow I gave this a try for RISC-V, specially after [2a][2b] as I was curious to see if this uncovers REE handling extraneous extensions which could potentially be eliminated in Expand itself, which is generally better as it happens earlier in the pipeline.
> 
> [2a] 2023-10-16 8eb9cdd14218 expr: don't clear SUBREG_PROMOTED_VAR_P flag for a promoted subreg [target/111466]
> [2b] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/631818.html
> 
> Bad news is with the patch, we fail to even bootstrap risc-v, buckles over when building libgcc itself.
> 
> The reproducer is embarrassingly simple, build with -O2:
> 
> float a;
> b() { return a; }
> 
> See details below....
> 
>> Thanks & Regards
>> Ajit
>>
>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>
>> For rs6000 target we see redundant zero and sign extension and done
>> to improve ree pass to eliminate such redundant zero and sign extension
>> using defined ABI interfaces.
>>
>> 2023-10-20  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>
>> gcc/ChangeLog:
>>
>>          * ree.cc (combine_reaching_defs): Use of zero_extend and sign_extend
>>          defined abi interfaces.
>>          (add_removable_extension): Use of defined abi interfaces for no
>>          reaching defs.
>>          (abi_extension_candidate_return_reg_p): New function.
>>          (abi_extension_candidate_p): New function.
>>          (abi_extension_candidate_argno_p): New function.
>>          (abi_handle_regs): New function.
>>          (abi_target_promote_function_mode): New function.
>>
>> gcc/testsuite/ChangeLog:
>>
>>          * g++.target/powerpc/zext-elim-3.C
>> ---
>>   gcc/ree.cc                                    | 151 +++++++++++++++++-
>>   .../g++.target/powerpc/zext-elim-3.C          |  13 ++
>>   2 files changed, 161 insertions(+), 3 deletions(-)
>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index fc04249fa84..9f21f0e9907 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>>       if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>>         break;
>>   -  gcc_assert (def != NULL);
>> +  if (def == NULL)
>> +    return NULL;
>>       ref_chain = DF_REF_CHAIN (def);
>>   @@ -750,6 +751,124 @@ get_extended_src_reg (rtx src)
>>     return src;
>>   }
>>   +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */
>> +
>> +static bool
>> +abi_target_promote_function_mode (machine_mode mode)
>> +{
>> +  int unsignedp;
>> +  machine_mode tgt_mode
>> +    = targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
>> +                       NULL_TREE, 1);
>> +
>> +  if (tgt_mode == mode)
>> +    return true;
>> +  else
>> +    return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   a return registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
>> +{
>> +  if (targetm.calls.function_value_regno_p (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>> +
>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
>> +    return false;
>> +
>> +  /* Mode of destination and source should be different.  */
>> +  if (dst_mode == GET_MODE (orig_src))
>> +    return false;
>> +
>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  /* REGNO of source and destination should be same if not
>> +      promoted.  */
>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>> +    return false;
>> +
>> +  return true;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
>> +{
>> +  if (FUNCTION_ARG_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn doesn't have defs and have
>> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
>> +
>> +static bool
>> +abi_handle_regs (rtx_insn *insn)
>> +{
>> +  if (side_effects_p (PATTERN (insn)))
>> +    return false;
>> +
>> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
>> +
>> +  if (!uses)
>> +    return false;
>> +
>> +  for (df_link *use = uses; use; use = use->next)
>> +    {
>> +      if (!use->ref)
>> +    return false;
>> +
>> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
>> +    return false;
>> +
>> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
>> +
>> +      if (GET_CODE (PATTERN (use_insn)) == SET)
>> +    {
>> +      rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
>> +
>> +      if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_COMM_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_UNARY)
>> +        return false;
>> +    }
>> +     }
>> +
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
>> +    {
>> +      machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>> +      bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +      return promote_p;
>> +    }
>> +  return true;
>> +}
>> +
>>   /* This function goes through all reaching defs of the source
>>      of the candidate for elimination (CAND) and tries to combine
>>      the extension with the definition instruction.  The changes
>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>       state->defs_list.truncate (0);
>>     state->copies_list.truncate (0);
>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>> +
>> +  if (abi_extension_candidate_p (cand->insn)
>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>> +    return abi_handle_regs (cand->insn);
>>       outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>   @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>           }
>>       }
>>   +  rtx insn_set = single_set (cand->insn);
>> +
>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>> +
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  if (promote_p)
>> +    return true;
>> +
> 
> Is this early exit OK ? It skips a subsequent apply_change_group() call which could potentially fail and thus extension would be undone.
> And even for the passing case, we do want those instructions to be merged normally. So I don't understand how this change is useful at all ?
> 
> FWIW for my test, w/o the patch: apply_change_group would fail (likely missing some combine pattern) and undo the merge. However w/ patch we just return/continue, keeping the merged but invalid insn which ICE in next pass cprop_hardreg as failure to recog that insn.
> 
> Some more details in case you are curious:
> 
> Coming into REE we have
> 
> (insn 7 6 13 2 (set (reg:SI 10 a0 [136])  #DEF
>         (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138])))
>                  {fix_truncsfsi2}
> 
> (insn 13 7 14 2 (set (reg/i:DI 10 a0)    # extension
>         (sign_extend:DI (reg:SI 10 a0 [136])))
>                  {extendsidi2}
> 
> These are merged into unrecog insn as of now:
> 
> (insn 7 6 14 2 (set (reg:DI 10 a0)
>         (sign_extend:DI (fix:SI (reg:SF 47 fa5 [orig:138 a ] [138]))))
>

Thanks for your input on this. I have addressed that in the version 10 of the patch please review.

Thanks & Regards
Ajit
 
> Thx,
> -Vineet
> 
>>     if (merge_successful)
>>       {
>>         /* Commit the changes here if possible
>> @@ -1112,6 +1245,14 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>         rtx reg = XEXP (src, 0);
>>         struct df_link *defs, *def;
>>         ext_cand *cand;
>> +      defs = get_defs (insn, reg, NULL);
>> +
>> +      if (!defs && abi_extension_candidate_argno_p (/*code,*/ REGNO (reg)))
>> +    {
>> +      ext_cand e = {expr, code, mode, insn};
>> +      insn_list->safe_push (e);
>> +      return;
>> +    }
>>           /* Zero-extension of an undefined value is partly defined (it's
>>        completely undefined for sign-extension, though).  So if there exists
>> @@ -1131,7 +1272,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>       }
>>           /* Second, make sure we can get all the reaching definitions.  */
>> -      defs = get_defs (insn, reg, NULL);
>>         if (!defs)
>>       {
>>         if (dump_file)
>> @@ -1321,7 +1461,8 @@ find_and_remove_re (void)
>>             && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>>           {
>>                 reinsn_copy_list.safe_push (curr_cand->insn);
>> -              reinsn_copy_list.safe_push (state.defs_list[0]);
>> +          if (state.defs_list.length () != 0)
>> +        reinsn_copy_list.safe_push (state.defs_list[0]);
>>           }
>>         reinsn_del_list.safe_push (curr_cand->insn);
>>         state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
>> @@ -1345,6 +1486,10 @@ find_and_remove_re (void)
>>     for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>>       {
>>         rtx_insn *curr_insn = reinsn_copy_list[i];
>> +
>> +      if ((i+1) >= reinsn_copy_list.length ())
>> +    continue;
>> +
>>         rtx_insn *def_insn = reinsn_copy_list[i + 1];
>>           /* Use the mode of the destination of the defining insn
>> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> new file mode 100644
>> index 00000000000..5a050df06ff
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> @@ -0,0 +1,13 @@
>> +/* { dg-options "-mcpu=power9 -O2" } */
>> +
>> +void *memset(void *b, int c, unsigned long len)
>> +{
>> +  unsigned long i;
>> +
>> +  for (i = 0; i < len; i++)
>> +    ((unsigned char *)b)[i] = c;
>> +
>> +   return b;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-20 23:56 ` Vineet Gupta
  2023-10-21 10:14   ` Ajit Agarwal
@ 2023-10-21 19:26   ` rep.dot.nop
  2023-10-23  6:46     ` Ajit Agarwal
  1 sibling, 1 reply; 20+ messages in thread
From: rep.dot.nop @ 2023-10-21 19:26 UTC (permalink / raw)
  To: gcc-patches, Vineet Gupta, Ajit Agarwal, gcc-patches
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain

On 21 October 2023 01:56:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:
>On 10/19/23 23:50, Ajit Agarwal wrote:
>> Hello All:
>> 
>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>> Bootstrapped and regtested on powerpc-linux-gnu.
>> 
>> In this version (version 9) of the patch following review comments are incorporated.
>> 
>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>> b) Source and destination with different registers are considered.
>> c) Further enhancements.
>> d) Added sign extension elimination using abi interfaces.
>
>As has been trend in the past, I don't think all the review comments have been addressed.

And apart from that, may I ask if this is just me, or does anybody else think that it might be worthwhile to actually read a patch before (re-)posting?

Seeing e.g. the proposed abi_extension_candidate_p as written in a first POC would deserve some manual CSE, if nothing more then for clarity and conciseness?

Just curious from a meta perspective..

And:

>> ree: Improve ree pass for rs6000 target using defined abi interfaces

mentioning powerpc like this, and then changing generic code could be interpreted as misleading, IMHO.

>> 
>> For rs6000 target we see redundant zero and sign extension and done
>> to improve ree pass to eliminate such redundant zero and sign extension
>> using defined ABI interfaces.

Mentioning powerpc in the body as one of the affected target(s) is of course fine.


>>   +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */

, false otherwise.

But I'm not a native speaker 


>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   a return registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)

Leftover debug comment.

>> +{
>> +  if (targetm.calls.function_value_regno_p (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +

As said, I don't see why the below was not cleaned up before the V1 submission.
Iff it breaks when manually CSEing, I'm curious why?

>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>> +
>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))

On top, debug leftover.

>> +    return false;
>> +
>> +  /* Mode of destination and source should be different.  */
>> +  if (dst_mode == GET_MODE (orig_src))
>> +    return false;
>> +
>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  /* REGNO of source and destination should be same if not
>> +      promoted.  */
>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>> +    return false;
>> +
>> +  return true;
>> +}
>> +

As said, please also rephrase the above (and everything else if it obviously looks akin the above).

The rest, mentioned below,  should largely be covered by following the coding convention.

>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */

Singular register.

>> +
>> +static bool
>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)

Debug leftover.
I would probably have inlined this function manually, with a respective comment.
Did not look how often it is used, admittedly.

>> +{
>> +  if (FUNCTION_ARG_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
[]
>> +
>>   /* This function goes through all reaching defs of the source

s/This function goes/Go/

>>      of the candidate for elimination (CAND) and tries to combine

(of, ?didn't look) candidate CAND for eliminating

>>      the extension with the definition instruction.  The changes

defining

Pre-existing, I know.
But you could fix those in a preparatory patch while you touch surrounding code.
This is not a requirement, of course, just good habit, IMHO.

>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>       state->defs_list.truncate (0);
>>     state->copies_list.truncate (0);
>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>> +
>> +  if (abi_extension_candidate_p (cand->insn)
>> +      && (!get_defs (cand->insn, orig_src, NULL)))

Excess braces.
Hopefully check_gnu_style would have complained.

>> +    return abi_handle_regs (cand->insn);
>>       outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>   @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>           }
>>       }
>>   +  rtx insn_set = single_set (cand->insn);
>> +
>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));

Excess braces.
Also in a lot of other spots in your patch.
Please read the coding conventions and the patch, once again, before submission?
thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-21 19:26   ` rep.dot.nop
@ 2023-10-23  6:46     ` Ajit Agarwal
  2023-10-23 14:10       ` Bernhard Reutner-Fischer
  2023-10-23 18:32       ` Vineet Gupta
  0 siblings, 2 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-23  6:46 UTC (permalink / raw)
  To: rep.dot.nop, gcc-patches, Vineet Gupta
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain

Hello All:

Addressed below review comments in the version 11 of the patch.
Please review and please let me know if its ok for trunk.

Thanks & Regards
Ajit

On 22/10/23 12:56 am, rep.dot.nop@gmail.com wrote:
> On 21 October 2023 01:56:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:
>> On 10/19/23 23:50, Ajit Agarwal wrote:
>>> Hello All:
>>>
>>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>>> Bootstrapped and regtested on powerpc-linux-gnu.
>>>
>>> In this version (version 9) of the patch following review comments are incorporated.
>>>
>>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>>> b) Source and destination with different registers are considered.
>>> c) Further enhancements.
>>> d) Added sign extension elimination using abi interfaces.
>>
>> As has been trend in the past, I don't think all the review comments have been addressed.
> 
> And apart from that, may I ask if this is just me, or does anybody else think that it might be worthwhile to actually read a patch before (re-)posting?
> 
> Seeing e.g. the proposed abi_extension_candidate_p as written in a first POC would deserve some manual CSE, if nothing more then for clarity and conciseness?
> 
> Just curious from a meta perspective..
> 
> And:
> 
>>> ree: Improve ree pass for rs6000 target using defined abi interfaces
> 
> mentioning powerpc like this, and then changing generic code could be interpreted as misleading, IMHO.
> 
>>>
>>> For rs6000 target we see redundant zero and sign extension and done
>>> to improve ree pass to eliminate such redundant zero and sign extension
>>> using defined ABI interfaces.
> 
> Mentioning powerpc in the body as one of the affected target(s) is of course fine.
> 
> 
>>>   +/* Return TRUE if target mode is equal to source mode of zero_extend
>>> +   or sign_extend otherwise false.  */
> 
> , false otherwise.
> 
> But I'm not a native speaker 
> 
> 
>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>> +   a return registers.  */
>>> +
>>> +static bool
>>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
> 
> Leftover debug comment.
> 
>>> +{
>>> +  if (targetm.calls.function_value_regno_p (regno))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>>> +
> 
> As said, I don't see why the below was not cleaned up before the V1 submission.
> Iff it breaks when manually CSEing, I'm curious why?
> 
>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>> +   and not return registers and source and destination operand are same
>>> +   and mode of source and destination operand are not same.  */
>>> +
>>> +static bool
>>> +abi_extension_candidate_p (rtx_insn *insn)
>>> +{
>>> +  rtx set = single_set (insn);
>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>> +
>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
> 
> On top, debug leftover.
> 
>>> +    return false;
>>> +
>>> +  /* Mode of destination and source should be different.  */
>>> +  if (dst_mode == GET_MODE (orig_src))
>>> +    return false;
>>> +
>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>> +
>>> +  /* REGNO of source and destination should be same if not
>>> +      promoted.  */
>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>> +    return false;
>>> +
>>> +  return true;
>>> +}
>>> +
> 
> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
> 
> The rest, mentioned below,  should largely be covered by following the coding convention.
> 
>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>> +   an argument registers.  */
> 
> Singular register.
> 
>>> +
>>> +static bool
>>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
> 
> Debug leftover.
> I would probably have inlined this function manually, with a respective comment.
> Did not look how often it is used, admittedly.
> 
>>> +{
>>> +  if (FUNCTION_ARG_REGNO_P (regno))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
> []
>>> +
>>>   /* This function goes through all reaching defs of the source
> 
> s/This function goes/Go/
> 
>>>      of the candidate for elimination (CAND) and tries to combine
> 
> (of, ?didn't look) candidate CAND for eliminating
> 
>>>      the extension with the definition instruction.  The changes
> 
> defining
> 
> Pre-existing, I know.
> But you could fix those in a preparatory patch while you touch surrounding code.
> This is not a requirement, of course, just good habit, IMHO.
> 
>>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>       state->defs_list.truncate (0);
>>>     state->copies_list.truncate (0);
>>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>>> +
>>> +  if (abi_extension_candidate_p (cand->insn)
>>> +      && (!get_defs (cand->insn, orig_src, NULL)))
> 
> Excess braces.
> Hopefully check_gnu_style would have complained.
> 
>>> +    return abi_handle_regs (cand->insn);
>>>       outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>>   @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>           }
>>>       }
>>>   +  rtx insn_set = single_set (cand->insn);
>>> +
>>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
> 
> Excess braces.
> Also in a lot of other spots in your patch.
> Please read the coding conventions and the patch, once again, before submission?
> thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-23  6:46     ` Ajit Agarwal
@ 2023-10-23 14:10       ` Bernhard Reutner-Fischer
  2023-10-24  7:36         ` Ajit Agarwal
  2023-10-23 18:32       ` Vineet Gupta
  1 sibling, 1 reply; 20+ messages in thread
From: Bernhard Reutner-Fischer @ 2023-10-23 14:10 UTC (permalink / raw)
  To: Ajit Agarwal
  Cc: rep.dot.nop, gcc-patches, Vineet Gupta, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain

On Mon, 23 Oct 2023 12:16:18 +0530
Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:

> Hello All:
> 
> Addressed below review comments in the version 11 of the patch.
> Please review and please let me know if its ok for trunk.

s/satisified/satisfied/

> > As said, I don't see why the below was not cleaned up before the V1 submission.
> > Iff it breaks when manually CSEing, I'm curious why?

The function below looks identical in v12 of the patch.
Why didn't you use common subexpressions?

> >   
> >>> +/* Return TRUE if reg source operand of zero_extend is argument registers
> >>> +   and not return registers and source and destination operand are same
> >>> +   and mode of source and destination operand are not same.  */
> >>> +
> >>> +static bool
> >>> +abi_extension_candidate_p (rtx_insn *insn)
> >>> +{
> >>> +  rtx set = single_set (insn);
> >>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
> >>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
> >>> +
> >>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> >>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))  
> >>> +    return false;
> >>> +
> >>> +  /* Mode of destination and source should be different.  */
> >>> +  if (dst_mode == GET_MODE (orig_src))
> >>> +    return false;
> >>> +
> >>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
> >>> +  bool promote_p = abi_target_promote_function_mode (mode);
> >>> +
> >>> +  /* REGNO of source and destination should be same if not
> >>> +      promoted.  */
> >>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
> >>> +    return false;
> >>> +
> >>> +  return true;
> >>> +}
> >>> +  


> > 
> > As said, please also rephrase the above (and everything else if it obviously looks akin the above).

thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-23  6:46     ` Ajit Agarwal
  2023-10-23 14:10       ` Bernhard Reutner-Fischer
@ 2023-10-23 18:32       ` Vineet Gupta
  2023-10-24  7:40         ` Ajit Agarwal
  1 sibling, 1 reply; 20+ messages in thread
From: Vineet Gupta @ 2023-10-23 18:32 UTC (permalink / raw)
  To: Ajit Agarwal, rep.dot.nop, gcc-patches
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain



On 10/22/23 23:46, Ajit Agarwal wrote:
> Hello All:
>
> Addressed below review comments in the version 11 of the patch.
> Please review and please let me know if its ok for trunk.
>
> Thanks & Regards
> Ajit

Again you are not paying attention to prior comments about fixing your 
submission practice and like some of the prior reviewers I'm starting to 
get tired, despite potentially good technical content.

1. The commentary above is NOT part of changelog. Either use a separate 
cover letter or add patch version change history between two "---" lines 
just before the start of code diff. And keep accumulating those as you 
post new versions. See [1]. This is so reviewers knwo what changed over 
10 months and automatically gets dropped when patch is eventually 
applied/merged into tree.

2. Acknowledge (even if it is yes) each and every comment of the 
reviewerw explicitly inline below. That ensures you don't miss 
addressing a change since this forces one to think about each of them.

I do have some technical comments which I'll follow up with later.
Just a short summary that v10 indeed bootstraps risc-v but I don't see 
any improvements at all - as in whenever abi interfaces code identifies 
and extension (saw missing a definition, the it is not able to eliminate 
any extensions despite the patch.

-Vineet

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632180.html

>
> On 22/10/23 12:56 am, rep.dot.nop@gmail.com wrote:
>> On 21 October 2023 01:56:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:
>>> On 10/19/23 23:50, Ajit Agarwal wrote:
>>>> Hello All:
>>>>
>>>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>>>> Bootstrapped and regtested on powerpc-linux-gnu.
>>>>
>>>> In this version (version 9) of the patch following review comments are incorporated.
>>>>
>>>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>>>> b) Source and destination with different registers are considered.
>>>> c) Further enhancements.
>>>> d) Added sign extension elimination using abi interfaces.
>>> As has been trend in the past, I don't think all the review comments have been addressed.
>> And apart from that, may I ask if this is just me, or does anybody else think that it might be worthwhile to actually read a patch before (re-)posting?
>>
>> Seeing e.g. the proposed abi_extension_candidate_p as written in a first POC would deserve some manual CSE, if nothing more then for clarity and conciseness?
>>
>> Just curious from a meta perspective..
>>
>> And:
>>
>>>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>> mentioning powerpc like this, and then changing generic code could be interpreted as misleading, IMHO.
>>
>>>> For rs6000 target we see redundant zero and sign extension and done
>>>> to improve ree pass to eliminate such redundant zero and sign extension
>>>> using defined ABI interfaces.
>> Mentioning powerpc in the body as one of the affected target(s) is of course fine.
>>
>>
>>>>    +/* Return TRUE if target mode is equal to source mode of zero_extend
>>>> +   or sign_extend otherwise false.  */
>> , false otherwise.
>>
>> But I'm not a native speaker
>>
>>
>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>> +   a return registers.  */
>>>> +
>>>> +static bool
>>>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
>> Leftover debug comment.
>>
>>>> +{
>>>> +  if (targetm.calls.function_value_regno_p (regno))
>>>> +    return true;
>>>> +
>>>> +  return false;
>>>> +}
>>>> +
>> As said, I don't see why the below was not cleaned up before the V1 submission.
>> Iff it breaks when manually CSEing, I'm curious why?
>>
>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>> +   and not return registers and source and destination operand are same
>>>> +   and mode of source and destination operand are not same.  */
>>>> +
>>>> +static bool
>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>> +{
>>>> +  rtx set = single_set (insn);
>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>> +
>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
>> On top, debug leftover.
>>
>>>> +    return false;
>>>> +
>>>> +  /* Mode of destination and source should be different.  */
>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>> +    return false;
>>>> +
>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>> +
>>>> +  /* REGNO of source and destination should be same if not
>>>> +      promoted.  */
>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>> +    return false;
>>>> +
>>>> +  return true;
>>>> +}
>>>> +
>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
>>
>> The rest, mentioned below,  should largely be covered by following the coding convention.
>>
>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>> +   an argument registers.  */
>> Singular register.
>>
>>>> +
>>>> +static bool
>>>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
>> Debug leftover.
>> I would probably have inlined this function manually, with a respective comment.
>> Did not look how often it is used, admittedly.
>>
>>>> +{
>>>> +  if (FUNCTION_ARG_REGNO_P (regno))
>>>> +    return true;
>>>> +
>>>> +  return false;
>>>> +}
>> []
>>>> +
>>>>    /* This function goes through all reaching defs of the source
>> s/This function goes/Go/
>>
>>>>       of the candidate for elimination (CAND) and tries to combine
>> (of, ?didn't look) candidate CAND for eliminating
>>
>>>>       the extension with the definition instruction.  The changes
>> defining
>>
>> Pre-existing, I know.
>> But you could fix those in a preparatory patch while you touch surrounding code.
>> This is not a requirement, of course, just good habit, IMHO.
>>
>>>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>        state->defs_list.truncate (0);
>>>>      state->copies_list.truncate (0);
>>>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>>>> +
>>>> +  if (abi_extension_candidate_p (cand->insn)
>>>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>> Excess braces.
>> Hopefully check_gnu_style would have complained.
>>
>>>> +    return abi_handle_regs (cand->insn);
>>>>        outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>>>    @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>            }
>>>>        }
>>>>    +  rtx insn_set = single_set (cand->insn);
>>>> +
>>>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>> Excess braces.
>> Also in a lot of other spots in your patch.
>> Please read the coding conventions and the patch, once again, before submission?
>> thanks


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-23 14:10       ` Bernhard Reutner-Fischer
@ 2023-10-24  7:36         ` Ajit Agarwal
  2023-10-24 20:36           ` rep.dot.nop
  0 siblings, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-24  7:36 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer
  Cc: gcc-patches, Vineet Gupta, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain

Hello Bernhard:

On 23/10/23 7:40 pm, Bernhard Reutner-Fischer wrote:
> On Mon, 23 Oct 2023 12:16:18 +0530
> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> 
>> Hello All:
>>
>> Addressed below review comments in the version 11 of the patch.
>> Please review and please let me know if its ok for trunk.
> 
> s/satisified/satisfied/
> 

I will fix this.

>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>> Iff it breaks when manually CSEing, I'm curious why?
> 
> The function below looks identical in v12 of the patch.
> Why didn't you use common subexpressions?
> ba

Using CSE here breaks aarch64 regressions hence I have reverted it back 
not to use CSE,

>>>   
>>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>>> +   and not return registers and source and destination operand are same
>>>>> +   and mode of source and destination operand are not same.  */
>>>>> +
>>>>> +static bool
>>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>>> +{
>>>>> +  rtx set = single_set (insn);
>>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>>> +
>>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))  
>>>>> +    return false;
>>>>> +
>>>>> +  /* Mode of destination and source should be different.  */
>>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>>> +    return false;
>>>>> +
>>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>>> +
>>>>> +  /* REGNO of source and destination should be same if not
>>>>> +      promoted.  */
>>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>>> +    return false;
>>>>> +
>>>>> +  return true;
>>>>> +}
>>>>> +  
> 
> 
>>>
>>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
> 
> thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-23 18:32       ` Vineet Gupta
@ 2023-10-24  7:40         ` Ajit Agarwal
  2023-10-24  9:36           ` Ajit Agarwal
  0 siblings, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-24  7:40 UTC (permalink / raw)
  To: Vineet Gupta, rep.dot.nop, gcc-patches
  Cc: Jeff Law, Richard Biener, Segher Boessenkool, Peter Bergner,
	gnu-toolchain

Hello Vineet:

On 24/10/23 12:02 am, Vineet Gupta wrote:
> 
> 
> On 10/22/23 23:46, Ajit Agarwal wrote:
>> Hello All:
>>
>> Addressed below review comments in the version 11 of the patch.
>> Please review and please let me know if its ok for trunk.
>>
>> Thanks & Regards
>> Ajit
> 
> Again you are not paying attention to prior comments about fixing your submission practice and like some of the prior reviewers I'm starting to get tired, despite potentially good technical content.
> 

Sorry for the inconvenience caused. I will make sure all the comments from reviewers
are addressed.

> 1. The commentary above is NOT part of changelog. Either use a separate cover letter or add patch version change history between two "---" lines just before the start of code diff. And keep accumulating those as you post new versions. See [1]. This is so reviewers knwo what changed over 10 months and automatically gets dropped when patch is eventually applied/merged into tree.
>

Sure I will do that.
 
> 2. Acknowledge (even if it is yes) each and every comment of the reviewerw explicitly inline below. That ensures you don't miss addressing a change since this forces one to think about each of them.
> 

Surely I will acknowledge each and every comments inline.

> I do have some technical comments which I'll follow up with later.

I look forward to it.

> Just a short summary that v10 indeed bootstraps risc-v but I don't see any improvements at all - as in whenever abi interfaces code identifies and extension (saw missing a definition, the it is not able to eliminate any extensions despite the patch.
>

Thanks for the summary and the check. 

Thanks & Regards
Ajit
 
> -Vineet
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632180.html
> 
>>
>> On 22/10/23 12:56 am, rep.dot.nop@gmail.com wrote:
>>> On 21 October 2023 01:56:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:
>>>> On 10/19/23 23:50, Ajit Agarwal wrote:
>>>>> Hello All:
>>>>>
>>>>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>>>>> Bootstrapped and regtested on powerpc-linux-gnu.
>>>>>
>>>>> In this version (version 9) of the patch following review comments are incorporated.
>>>>>
>>>>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>>>>> b) Source and destination with different registers are considered.
>>>>> c) Further enhancements.
>>>>> d) Added sign extension elimination using abi interfaces.
>>>> As has been trend in the past, I don't think all the review comments have been addressed.
>>> And apart from that, may I ask if this is just me, or does anybody else think that it might be worthwhile to actually read a patch before (re-)posting?
>>>
>>> Seeing e.g. the proposed abi_extension_candidate_p as written in a first POC would deserve some manual CSE, if nothing more then for clarity and conciseness?
>>>
>>> Just curious from a meta perspective..
>>>
>>> And:
>>>
>>>>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>> mentioning powerpc like this, and then changing generic code could be interpreted as misleading, IMHO.
>>>
>>>>> For rs6000 target we see redundant zero and sign extension and done
>>>>> to improve ree pass to eliminate such redundant zero and sign extension
>>>>> using defined ABI interfaces.
>>> Mentioning powerpc in the body as one of the affected target(s) is of course fine.
>>>
>>>
>>>>>    +/* Return TRUE if target mode is equal to source mode of zero_extend
>>>>> +   or sign_extend otherwise false.  */
>>> , false otherwise.
>>>
>>> But I'm not a native speaker
>>>
>>>
>>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>>> +   a return registers.  */
>>>>> +
>>>>> +static bool
>>>>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
>>> Leftover debug comment.
>>>
>>>>> +{
>>>>> +  if (targetm.calls.function_value_regno_p (regno))
>>>>> +    return true;
>>>>> +
>>>>> +  return false;
>>>>> +}
>>>>> +
>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>> Iff it breaks when manually CSEing, I'm curious why?
>>>
>>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>>> +   and not return registers and source and destination operand are same
>>>>> +   and mode of source and destination operand are not same.  */
>>>>> +
>>>>> +static bool
>>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>>> +{
>>>>> +  rtx set = single_set (insn);
>>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>>> +
>>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
>>> On top, debug leftover.
>>>
>>>>> +    return false;
>>>>> +
>>>>> +  /* Mode of destination and source should be different.  */
>>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>>> +    return false;
>>>>> +
>>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>>> +
>>>>> +  /* REGNO of source and destination should be same if not
>>>>> +      promoted.  */
>>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>>> +    return false;
>>>>> +
>>>>> +  return true;
>>>>> +}
>>>>> +
>>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
>>>
>>> The rest, mentioned below,  should largely be covered by following the coding convention.
>>>
>>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>>> +   an argument registers.  */
>>> Singular register.
>>>
>>>>> +
>>>>> +static bool
>>>>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
>>> Debug leftover.
>>> I would probably have inlined this function manually, with a respective comment.
>>> Did not look how often it is used, admittedly.
>>>
>>>>> +{
>>>>> +  if (FUNCTION_ARG_REGNO_P (regno))
>>>>> +    return true;
>>>>> +
>>>>> +  return false;
>>>>> +}
>>> []
>>>>> +
>>>>>    /* This function goes through all reaching defs of the source
>>> s/This function goes/Go/
>>>
>>>>>       of the candidate for elimination (CAND) and tries to combine
>>> (of, ?didn't look) candidate CAND for eliminating
>>>
>>>>>       the extension with the definition instruction.  The changes
>>> defining
>>>
>>> Pre-existing, I know.
>>> But you could fix those in a preparatory patch while you touch surrounding code.
>>> This is not a requirement, of course, just good habit, IMHO.
>>>
>>>>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>>        state->defs_list.truncate (0);
>>>>>      state->copies_list.truncate (0);
>>>>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>>>>> +
>>>>> +  if (abi_extension_candidate_p (cand->insn)
>>>>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>>> Excess braces.
>>> Hopefully check_gnu_style would have complained.
>>>
>>>>> +    return abi_handle_regs (cand->insn);
>>>>>        outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>>>>    @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>>            }
>>>>>        }
>>>>>    +  rtx insn_set = single_set (cand->insn);
>>>>> +
>>>>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>>> Excess braces.
>>> Also in a lot of other spots in your patch.
>>> Please read the coding conventions and the patch, once again, before submission?
>>> thanks
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-24  7:40         ` Ajit Agarwal
@ 2023-10-24  9:36           ` Ajit Agarwal
  0 siblings, 0 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-24  9:36 UTC (permalink / raw)
  To: gcc-patches, Vineet Gupta, Jeff Law
  Cc: Segher Boessenkool, Richard Biener, Peter Bergner,
	Bernhard Reutner-Fischer



On 24/10/23 1:10 pm, Ajit Agarwal wrote:
> Hello Vineet:
> 
> On 24/10/23 12:02 am, Vineet Gupta wrote:
>>
>>
>> On 10/22/23 23:46, Ajit Agarwal wrote:
>>> Hello All:
>>>
>>> Addressed below review comments in the version 11 of the patch.
>>> Please review and please let me know if its ok for trunk.
>>>
>>> Thanks & Regards
>>> Ajit
>>
>> Again you are not paying attention to prior comments about fixing your submission practice and like some of the prior reviewers I'm starting to get tired, despite potentially good technical content.
>>
> 
> Sorry for the inconvenience caused. I will make sure all the comments from reviewers
> are addressed.
> 
>> 1. The commentary above is NOT part of changelog. Either use a separate cover letter or add patch version change history between two "---" lines just before the start of code diff. And keep accumulating those as you post new versions. See [1]. This is so reviewers knwo what changed over 10 months and automatically gets dropped when patch is eventually applied/merged into tree.
>>
> 
> Sure I will do that.

Made changes in version 13 of the patch with changes since v6.

Thanks & Regards
Ajit
>  
>> 2. Acknowledge (even if it is yes) each and every comment of the reviewerw explicitly inline below. That ensures you don't miss addressing a change since this forces one to think about each of them.
>>
> 
> Surely I will acknowledge each and every comments inline.
> 
>> I do have some technical comments which I'll follow up with later.
> 
> I look forward to it.
> 
>> Just a short summary that v10 indeed bootstraps risc-v but I don't see any improvements at all - as in whenever abi interfaces code identifies and extension (saw missing a definition, the it is not able to eliminate any extensions despite the patch.
>>
> 
> Thanks for the summary and the check. 
> 
> Thanks & Regards
> Ajit
>  
>> -Vineet
>>
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632180.html
>>
>>>
>>> On 22/10/23 12:56 am, rep.dot.nop@gmail.com wrote:
>>>> On 21 October 2023 01:56:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:
>>>>> On 10/19/23 23:50, Ajit Agarwal wrote:
>>>>>> Hello All:
>>>>>>
>>>>>> This version 9 of the patch uses abi interfaces to remove zero and sign extension elimination.
>>>>>> Bootstrapped and regtested on powerpc-linux-gnu.
>>>>>>
>>>>>> In this version (version 9) of the patch following review comments are incorporated.
>>>>>>
>>>>>> a) Removal of hard code zero_extend and sign_extend  in abi interfaces.
>>>>>> b) Source and destination with different registers are considered.
>>>>>> c) Further enhancements.
>>>>>> d) Added sign extension elimination using abi interfaces.
>>>>> As has been trend in the past, I don't think all the review comments have been addressed.
>>>> And apart from that, may I ask if this is just me, or does anybody else think that it might be worthwhile to actually read a patch before (re-)posting?
>>>>
>>>> Seeing e.g. the proposed abi_extension_candidate_p as written in a first POC would deserve some manual CSE, if nothing more then for clarity and conciseness?
>>>>
>>>> Just curious from a meta perspective..
>>>>
>>>> And:
>>>>
>>>>>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>>> mentioning powerpc like this, and then changing generic code could be interpreted as misleading, IMHO.
>>>>
>>>>>> For rs6000 target we see redundant zero and sign extension and done
>>>>>> to improve ree pass to eliminate such redundant zero and sign extension
>>>>>> using defined ABI interfaces.
>>>> Mentioning powerpc in the body as one of the affected target(s) is of course fine.
>>>>
>>>>
>>>>>>    +/* Return TRUE if target mode is equal to source mode of zero_extend
>>>>>> +   or sign_extend otherwise false.  */
>>>> , false otherwise.
>>>>
>>>> But I'm not a native speaker
>>>>
>>>>
>>>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>>>> +   a return registers.  */
>>>>>> +
>>>>>> +static bool
>>>>>> +abi_extension_candidate_return_reg_p (/*rtx_insn *insn, */int regno)
>>>> Leftover debug comment.
>>>>
>>>>>> +{
>>>>>> +  if (targetm.calls.function_value_regno_p (regno))
>>>>>> +    return true;
>>>>>> +
>>>>>> +  return false;
>>>>>> +}
>>>>>> +
>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>>
>>>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>>>> +   and not return registers and source and destination operand are same
>>>>>> +   and mode of source and destination operand are not same.  */
>>>>>> +
>>>>>> +static bool
>>>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>>>> +{
>>>>>> +  rtx set = single_set (insn);
>>>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>>>> +
>>>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))
>>>> On top, debug leftover.
>>>>
>>>>>> +    return false;
>>>>>> +
>>>>>> +  /* Mode of destination and source should be different.  */
>>>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>>>> +    return false;
>>>>>> +
>>>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>>>> +
>>>>>> +  /* REGNO of source and destination should be same if not
>>>>>> +      promoted.  */
>>>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>>>> +    return false;
>>>>>> +
>>>>>> +  return true;
>>>>>> +}
>>>>>> +
>>>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
>>>>
>>>> The rest, mentioned below,  should largely be covered by following the coding convention.
>>>>
>>>>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>>>>> +   an argument registers.  */
>>>> Singular register.
>>>>
>>>>>> +
>>>>>> +static bool
>>>>>> +abi_extension_candidate_argno_p (/*rtx_code code, */int regno)
>>>> Debug leftover.
>>>> I would probably have inlined this function manually, with a respective comment.
>>>> Did not look how often it is used, admittedly.
>>>>
>>>>>> +{
>>>>>> +  if (FUNCTION_ARG_REGNO_P (regno))
>>>>>> +    return true;
>>>>>> +
>>>>>> +  return false;
>>>>>> +}
>>>> []
>>>>>> +
>>>>>>    /* This function goes through all reaching defs of the source
>>>> s/This function goes/Go/
>>>>
>>>>>>       of the candidate for elimination (CAND) and tries to combine
>>>> (of, ?didn't look) candidate CAND for eliminating
>>>>
>>>>>>       the extension with the definition instruction.  The changes
>>>> defining
>>>>
>>>> Pre-existing, I know.
>>>> But you could fix those in a preparatory patch while you touch surrounding code.
>>>> This is not a requirement, of course, just good habit, IMHO.
>>>>
>>>>>> @@ -770,6 +889,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>>>        state->defs_list.truncate (0);
>>>>>>      state->copies_list.truncate (0);
>>>>>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>>>>>> +
>>>>>> +  if (abi_extension_candidate_p (cand->insn)
>>>>>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>>>> Excess braces.
>>>> Hopefully check_gnu_style would have complained.
>>>>
>>>>>> +    return abi_handle_regs (cand->insn);
>>>>>>        outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>>>>>    @@ -1036,6 +1160,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>>>>>            }
>>>>>>        }
>>>>>>    +  rtx insn_set = single_set (cand->insn);
>>>>>> +
>>>>>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>>>> Excess braces.
>>>> Also in a lot of other spots in your patch.
>>>> Please read the coding conventions and the patch, once again, before submission?
>>>> thanks
>>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-24  7:36         ` Ajit Agarwal
@ 2023-10-24 20:36           ` rep.dot.nop
  2023-10-24 20:49             ` Vineet Gupta
  2023-10-25 11:08             ` Ajit Agarwal
  0 siblings, 2 replies; 20+ messages in thread
From: rep.dot.nop @ 2023-10-24 20:36 UTC (permalink / raw)
  To: Ajit Agarwal
  Cc: gcc-patches, Vineet Gupta, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain

On 24 October 2023 09:36:22 CEST, Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>Hello Bernhard:
>
>On 23/10/23 7:40 pm, Bernhard Reutner-Fischer wrote:
>> On Mon, 23 Oct 2023 12:16:18 +0530
>> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>> 
>>> Hello All:
>>>
>>> Addressed below review comments in the version 11 of the patch.
>>> Please review and please let me know if its ok for trunk.
>> 
>> s/satisified/satisfied/
>> 
>
>I will fix this.

thanks!

>
>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>> Iff it breaks when manually CSEing, I'm curious why?
>> 
>> The function below looks identical in v12 of the patch.
>> Why didn't you use common subexpressions?
>> ba
>
>Using CSE here breaks aarch64 regressions hence I have reverted it back 
>not to use CSE,

Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?

I might have not completely understood the subtile intricacies of RTL re-entrancy, it seems?

thanks

>>>>   
>>>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>>>> +   and not return registers and source and destination operand are same
>>>>>> +   and mode of source and destination operand are not same.  */
>>>>>> +
>>>>>> +static bool
>>>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>>>> +{
>>>>>> +  rtx set = single_set (insn);
>>>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>>>> +
>>>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))  
>>>>>> +    return false;
>>>>>> +
>>>>>> +  /* Mode of destination and source should be different.  */
>>>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>>>> +    return false;
>>>>>> +
>>>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>>>> +
>>>>>> +  /* REGNO of source and destination should be same if not
>>>>>> +      promoted.  */
>>>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>>>> +    return false;
>>>>>> +
>>>>>> +  return true;
>>>>>> +}
>>>>>> +  
>> 
>> 
>>>>
>>>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
>> 
>> thanks


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-24 20:36           ` rep.dot.nop
@ 2023-10-24 20:49             ` Vineet Gupta
  2023-10-25 11:11               ` Ajit Agarwal
  2023-10-25 11:08             ` Ajit Agarwal
  1 sibling, 1 reply; 20+ messages in thread
From: Vineet Gupta @ 2023-10-24 20:49 UTC (permalink / raw)
  To: rep.dot.nop, Ajit Agarwal
  Cc: gcc-patches, Jeff Law, Richard Biener, Segher Boessenkool,
	Peter Bergner, gnu-toolchain

On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:
>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>> The function below looks identical in v12 of the patch.
>>> Why didn't you use common subexpressions?
>>> ba
>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>> not to use CSE,
> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?

I was meaning to ask this before, but what exactly is the CSE issue, 
manually or whatever.

-Vineet

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-24 20:36           ` rep.dot.nop
  2023-10-24 20:49             ` Vineet Gupta
@ 2023-10-25 11:08             ` Ajit Agarwal
  1 sibling, 0 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-25 11:08 UTC (permalink / raw)
  To: rep.dot.nop
  Cc: gcc-patches, Vineet Gupta, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain



On 25/10/23 2:06 am, rep.dot.nop@gmail.com wrote:
> On 24 October 2023 09:36:22 CEST, Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>> Hello Bernhard:
>>
>> On 23/10/23 7:40 pm, Bernhard Reutner-Fischer wrote:
>>> On Mon, 23 Oct 2023 12:16:18 +0530
>>> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>
>>>> Hello All:
>>>>
>>>> Addressed below review comments in the version 11 of the patch.
>>>> Please review and please let me know if its ok for trunk.
>>>
>>> s/satisified/satisfied/
>>>
>>
>> I will fix this.
> 
> thanks!
> 
>>
>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>
>>> The function below looks identical in v12 of the patch.
>>> Why didn't you use common subexpressions?
>>> ba
>>
>> Using CSE here breaks aarch64 regressions hence I have reverted it back 
>> not to use CSE,
> 
> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?
> 
> I might have not completely understood the subtile intricacies of RTL re-entrancy, it seems?
> 

Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
test fails.

static inline bool
abi_extension_candidate_return_reg_p (int regno)
{
  if (targetm.calls.function_value_regno_p (regno))
    return true;

  return false;
}

+static inline bool
+abi_extension_candidate_return_reg_p (int regno)
+{
+  return targetm.calls.function_value_regno_p (regno);
+}


I have not done any assembly diff as myself have not cross compiled with aarch64.
Reverting above CSE the tests passes with automatically regression runs and build with linaro.
Linaro runs the tests with every patch we submit in gcc-patches and if there is any fail they 
report error.

Reverting CSE the Linaro tests passes.

Thanks & Regards
Ajit
> thanks
> 
>>>>>   
>>>>>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>>>>>> +   and not return registers and source and destination operand are same
>>>>>>> +   and mode of source and destination operand are not same.  */
>>>>>>> +
>>>>>>> +static bool
>>>>>>> +abi_extension_candidate_p (rtx_insn *insn)
>>>>>>> +{
>>>>>>> +  rtx set = single_set (insn);
>>>>>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>>>>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>>>>>> +
>>>>>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>>>>> +      || abi_extension_candidate_return_reg_p (/*insn,*/ REGNO (orig_src)))  
>>>>>>> +    return false;
>>>>>>> +
>>>>>>> +  /* Mode of destination and source should be different.  */
>>>>>>> +  if (dst_mode == GET_MODE (orig_src))
>>>>>>> +    return false;
>>>>>>> +
>>>>>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>>>>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>>>>>> +
>>>>>>> +  /* REGNO of source and destination should be same if not
>>>>>>> +      promoted.  */
>>>>>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>>>>>> +    return false;
>>>>>>> +
>>>>>>> +  return true;
>>>>>>> +}
>>>>>>> +  
>>>
>>>
>>>>>
>>>>> As said, please also rephrase the above (and everything else if it obviously looks akin the above).
>>>
>>> thanks
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-24 20:49             ` Vineet Gupta
@ 2023-10-25 11:11               ` Ajit Agarwal
  2023-10-27 17:16                 ` Bernhard Reutner-Fischer
  0 siblings, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-25 11:11 UTC (permalink / raw)
  To: Vineet Gupta, rep.dot.nop
  Cc: gcc-patches, Jeff Law, Richard Biener, Segher Boessenkool,
	Peter Bergner, gnu-toolchain



On 25/10/23 2:19 am, Vineet Gupta wrote:
> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:
>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>> The function below looks identical in v12 of the patch.
>>>> Why didn't you use common subexpressions?
>>>> ba
>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>> not to use CSE,
>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?
> 
> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
> 
Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
test fails.

static inline bool
abi_extension_candidate_return_reg_p (int regno)
{
  if (targetm.calls.function_value_regno_p (regno))
    return true;

  return false;
}

+static inline bool
+abi_extension_candidate_return_reg_p (int regno)
+{
+  return targetm.calls.function_value_regno_p (regno);
+}


I have not done any assembly diff as myself have not cross compiled with aarch64.
Reverting above CSE the tests passes with automatically regression runs and build with linaro.
Linaro runs the tests with every patch we submit in gcc-patches and if there is any fail they 
report error.

Reverting CSE the Linaro tests passes.

Thanks & Regards
Ajit

> -Vineet

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-25 11:11               ` Ajit Agarwal
@ 2023-10-27 17:16                 ` Bernhard Reutner-Fischer
  2023-10-27 22:39                   ` Vineet Gupta
  2023-10-28 10:25                   ` Ajit Agarwal
  0 siblings, 2 replies; 20+ messages in thread
From: Bernhard Reutner-Fischer @ 2023-10-27 17:16 UTC (permalink / raw)
  To: Ajit Agarwal
  Cc: rep.dot.nop, Vineet Gupta, gcc-patches, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain

On Wed, 25 Oct 2023 16:41:07 +0530
Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:

> On 25/10/23 2:19 am, Vineet Gupta wrote:
> > On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:  
> >>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
> >>>>>> Iff it breaks when manually CSEing, I'm curious why?  
> >>>> The function below looks identical in v12 of the patch.
> >>>> Why didn't you use common subexpressions?
> >>>> ba  
> >>> Using CSE here breaks aarch64 regressions hence I have reverted it back
> >>> not to use CSE,  
> >> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?  
> > 
> > I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.

If nothing else it would hopefully improve the readability.

> >   
> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
> test fails.

We already concluded that this failure was obviously a hiccup on the
testers, no problem.

> +static inline bool
> +abi_extension_candidate_return_reg_p (int regno)
> +{
> +  return targetm.calls.function_value_regno_p (regno);
> +}

But i was referring to abi_extension_candidate_p :)

your v13 looks like this:

+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  machine_mode dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set), 0);
+
+  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
+    return false;
+
+  /* Return FALSE if mode of destination and source is same.  */
+  if (dst_mode == GET_MODE (orig_src))
+    return false;
+
+  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
+  bool promote_p = abi_target_promote_function_mode (mode);
+
+  /* Return FALSE if promote is false and REGNO of source and destination
+     is different.  */
+  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
+    return false;
+
+  return true;
+}

and i suppose it would be easier to read if phrased something like

static bool
abi_extension_candidate_p (rtx_insn *insn)
{
  rtx set = single_set (insn);
  rtx orig_src = XEXP (SET_SRC (set), 0);
  unsigned int src_regno = REGNO (orig_src);

  /* Not a function argument reg or is a function values return reg.  */
  if (!FUNCTION_ARG_REGNO_P (src_regno)
      || abi_extension_candidate_return_reg_p (src_regno))
    return false;

  rtx dst = SET_DST (set);
  machine_mode src_mode = GET_MODE (orig_src);

  /* Return FALSE if mode of destination and source is the same.  */
  if (GET_MODE (dst) == src_mode)
    return false;

  /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
     is different.  */
  if (!abi_target_promote_function_mode_p (src_mode)
      && REGNO (dst) != src_regno)
    return false;

  return true;
}

so no, that's not exactly better.

Maybe just do what the function comment says (i did not check the "not
promoted" part, but you get the idea):

^L

/* Return TRUE if
   reg source operand is argument register and not return register,
   mode of source and destination operand are different,
   if not promoted REGNO of source and destination operand are the same.  */
static bool
abi_extension_candidate_p (rtx_insn *insn)
{
  rtx set = single_set (insn);
  rtx orig_src = XEXP (SET_SRC (set), 0);

  if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
      && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
      && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
      && abi_target_promote_function_mode_p (GET_MODE (orig_src))
      && REGNO (SET_DST (set)) == REGNO (orig_src))
    return true;

  return false;
}

I think this is much easier to actually read (and that's why good
function comments are important). In the end it's not important and
just personal preference.
Either way, I did not check the plausibility of the logic therein.

> 
> 
> I have not done any assembly diff as myself have not cross compiled with aarch64.

fair enough.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-27 17:16                 ` Bernhard Reutner-Fischer
@ 2023-10-27 22:39                   ` Vineet Gupta
  2023-10-28 10:26                     ` Ajit Agarwal
  2023-10-28 10:25                   ` Ajit Agarwal
  1 sibling, 1 reply; 20+ messages in thread
From: Vineet Gupta @ 2023-10-27 22:39 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer, Ajit Agarwal
  Cc: gcc-patches, Jeff Law, Richard Biener, Segher Boessenkool,
	Peter Bergner, gnu-toolchain



On 10/27/23 10:16, Bernhard Reutner-Fischer wrote:
> On Wed, 25 Oct 2023 16:41:07 +0530
> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
>> On 25/10/23 2:19 am, Vineet Gupta wrote:
>>> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:
>>>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>>>> The function below looks identical in v12 of the patch.
>>>>>> Why didn't you use common subexpressions?
>>>>>> ba
>>>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>>>> not to use CSE,
>>>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?
>>> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
> If nothing else it would hopefully improve the readability.
>
>>>    
>> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
>> test fails.
> We already concluded that this failure was obviously a hiccup on the
> testers, no problem.
>
>> +static inline bool
>> +abi_extension_candidate_return_reg_p (int regno)
>> +{
>> +  return targetm.calls.function_value_regno_p (regno);
>> +}
> But i was referring to abi_extension_candidate_p :)
>
> your v13 looks like this:
>
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set), 0);
> +
> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
> +    return false;
> +
> +  /* Return FALSE if mode of destination and source is same.  */
> +  if (dst_mode == GET_MODE (orig_src))
> +    return false;
> +
> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  /* Return FALSE if promote is false and REGNO of source and destination
> +     is different.  */
> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
> +    return false;
> +
> +  return true;
> +}
>
> and i suppose it would be easier to read if phrased something like
>
> static bool
> abi_extension_candidate_p (rtx_insn *insn)
> {
>    rtx set = single_set (insn);
>    rtx orig_src = XEXP (SET_SRC (set), 0);
>    unsigned int src_regno = REGNO (orig_src);
>
>    /* Not a function argument reg or is a function values return reg.  */
>    if (!FUNCTION_ARG_REGNO_P (src_regno)
>        || abi_extension_candidate_return_reg_p (src_regno))
>      return false;
>
>    rtx dst = SET_DST (set);
>    machine_mode src_mode = GET_MODE (orig_src);
>
>    /* Return FALSE if mode of destination and source is the same.  */
>    if (GET_MODE (dst) == src_mode)
>      return false;
>
>    /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
>       is different.  */
>    if (!abi_target_promote_function_mode_p (src_mode)
>        && REGNO (dst) != src_regno)
>      return false;
>
>    return true;
> }
>
> so no, that's not exactly better.
>
> Maybe just do what the function comment says (i did not check the "not
> promoted" part, but you get the idea):
>
> ^L
>
> /* Return TRUE if
>     reg source operand is argument register and not return register,
>     mode of source and destination operand are different,
>     if not promoted REGNO of source and destination operand are the same.  */
> static bool
> abi_extension_candidate_p (rtx_insn *insn)
> {
>    rtx set = single_set (insn);
>    rtx orig_src = XEXP (SET_SRC (set), 0);
>
>    if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>        && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
>        && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
>        && abi_target_promote_function_mode_p (GET_MODE (orig_src))
>        && REGNO (SET_DST (set)) == REGNO (orig_src))
>      return true;
>
>    return false;
> }

This may have been my doing as I asked to split out the logic as some of 
the conditions merit more commentary.
e.g. why does the mode need to be same
But granted this is the usual coding style in gcc and the extra comments 
could still be added before the big if

-Vineet

>
> I think this is much easier to actually read (and that's why good
> function comments are important). In the end it's not important and
> just personal preference.
> Either way, I did not check the plausibility of the logic therein.
>
>>
>> I have not done any assembly diff as myself have not cross compiled with aarch64.
> fair enough.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-27 17:16                 ` Bernhard Reutner-Fischer
  2023-10-27 22:39                   ` Vineet Gupta
@ 2023-10-28 10:25                   ` Ajit Agarwal
  2023-10-29 10:48                     ` Ajit Agarwal
  1 sibling, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-28 10:25 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer
  Cc: Vineet Gupta, gcc-patches, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain



On 27/10/23 10:46 pm, Bernhard Reutner-Fischer wrote:
> On Wed, 25 Oct 2023 16:41:07 +0530
> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> 
>> On 25/10/23 2:19 am, Vineet Gupta wrote:
>>> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:  
>>>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>>>> Iff it breaks when manually CSEing, I'm curious why?  
>>>>>> The function below looks identical in v12 of the patch.
>>>>>> Why didn't you use common subexpressions?
>>>>>> ba  
>>>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>>>> not to use CSE,  
>>>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?  
>>>
>>> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
> 
> If nothing else it would hopefully improve the readability.
> 
>>>   
>> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
>> test fails.
> 
> We already concluded that this failure was obviously a hiccup on the
> testers, no problem.

Thanks.
> 
>> +static inline bool
>> +abi_extension_candidate_return_reg_p (int regno)
>> +{
>> +  return targetm.calls.function_value_regno_p (regno);
>> +}
> 
> But i was referring to abi_extension_candidate_p :)
> 
> your v13 looks like this:
> 
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set), 0);
> +
> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
> +    return false;
> +
> +  /* Return FALSE if mode of destination and source is same.  */
> +  if (dst_mode == GET_MODE (orig_src))
> +    return false;
> +
> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  /* Return FALSE if promote is false and REGNO of source and destination
> +     is different.  */
> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
> +    return false;
> +
> +  return true;
> +}
> 
> and i suppose it would be easier to read if phrased something like
> 
> static bool
> abi_extension_candidate_p (rtx_insn *insn)
> {
>   rtx set = single_set (insn);
>   rtx orig_src = XEXP (SET_SRC (set), 0);
>   unsigned int src_regno = REGNO (orig_src);
> 
>   /* Not a function argument reg or is a function values return reg.  */
>   if (!FUNCTION_ARG_REGNO_P (src_regno)
>       || abi_extension_candidate_return_reg_p (src_regno))
>     return false;
> 
>   rtx dst = SET_DST (set);
>   machine_mode src_mode = GET_MODE (orig_src);
> 
>   /* Return FALSE if mode of destination and source is the same.  */
>   if (GET_MODE (dst) == src_mode)
>     return false;
> 
>   /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
>      is different.  */
>   if (!abi_target_promote_function_mode_p (src_mode)
>       && REGNO (dst) != src_regno)
>     return false;
> 
>   return true;
> }
> 
> so no, that's not exactly better.
> 
> Maybe just do what the function comment says (i did not check the "not
> promoted" part, but you get the idea):
> 
> ^L
> 
> /* Return TRUE if
>    reg source operand is argument register and not return register,
>    mode of source and destination operand are different,
>    if not promoted REGNO of source and destination operand are the same.  */
> static bool
> abi_extension_candidate_p (rtx_insn *insn)
> {
>   rtx set = single_set (insn);
>   rtx orig_src = XEXP (SET_SRC (set), 0);
> 
>   if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>       && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
>       && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
>       && abi_target_promote_function_mode_p (GET_MODE (orig_src))
>       && REGNO (SET_DST (set)) == REGNO (orig_src))
>     return true;
> 
>   return false;
> }
> 
> I think this is much easier to actually read (and that's why good
> function comments are important). In the end it's not important and
> just personal preference.
> Either way, I did not check the plausibility of the logic therein.
> 
>>

Addressed in V15 of the patch. 
>>
>> I have not done any assembly diff as myself have not cross compiled with aarch64.
> 
> fair enough.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-27 22:39                   ` Vineet Gupta
@ 2023-10-28 10:26                     ` Ajit Agarwal
  2023-10-29 10:49                       ` Ajit Agarwal
  0 siblings, 1 reply; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-28 10:26 UTC (permalink / raw)
  To: Vineet Gupta, Bernhard Reutner-Fischer
  Cc: gcc-patches, Jeff Law, Richard Biener, Segher Boessenkool,
	Peter Bergner, gnu-toolchain



On 28/10/23 4:09 am, Vineet Gupta wrote:
> 
> 
> On 10/27/23 10:16, Bernhard Reutner-Fischer wrote:
>> On Wed, 25 Oct 2023 16:41:07 +0530
>> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>>> On 25/10/23 2:19 am, Vineet Gupta wrote:
>>>> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:
>>>>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>>>>> The function below looks identical in v12 of the patch.
>>>>>>> Why didn't you use common subexpressions?
>>>>>>> ba
>>>>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>>>>> not to use CSE,
>>>>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?
>>>> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
>> If nothing else it would hopefully improve the readability.
>>
>>>>    
>>> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
>>> test fails.
>> We already concluded that this failure was obviously a hiccup on the
>> testers, no problem.
>>
>>> +static inline bool
>>> +abi_extension_candidate_return_reg_p (int regno)
>>> +{
>>> +  return targetm.calls.function_value_regno_p (regno);
>>> +}
>> But i was referring to abi_extension_candidate_p :)
>>
>> your v13 looks like this:
>>
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>> +
>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
>> +    return false;
>> +
>> +  /* Return FALSE if mode of destination and source is same.  */
>> +  if (dst_mode == GET_MODE (orig_src))
>> +    return false;
>> +
>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  /* Return FALSE if promote is false and REGNO of source and destination
>> +     is different.  */
>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>> +    return false;
>> +
>> +  return true;
>> +}
>>
>> and i suppose it would be easier to read if phrased something like
>>
>> static bool
>> abi_extension_candidate_p (rtx_insn *insn)
>> {
>>    rtx set = single_set (insn);
>>    rtx orig_src = XEXP (SET_SRC (set), 0);
>>    unsigned int src_regno = REGNO (orig_src);
>>
>>    /* Not a function argument reg or is a function values return reg.  */
>>    if (!FUNCTION_ARG_REGNO_P (src_regno)
>>        || abi_extension_candidate_return_reg_p (src_regno))
>>      return false;
>>
>>    rtx dst = SET_DST (set);
>>    machine_mode src_mode = GET_MODE (orig_src);
>>
>>    /* Return FALSE if mode of destination and source is the same.  */
>>    if (GET_MODE (dst) == src_mode)
>>      return false;
>>
>>    /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
>>       is different.  */
>>    if (!abi_target_promote_function_mode_p (src_mode)
>>        && REGNO (dst) != src_regno)
>>      return false;
>>
>>    return true;
>> }
>>
>> so no, that's not exactly better.
>>
>> Maybe just do what the function comment says (i did not check the "not
>> promoted" part, but you get the idea):
>>
>> ^L
>>
>> /* Return TRUE if
>>     reg source operand is argument register and not return register,
>>     mode of source and destination operand are different,
>>     if not promoted REGNO of source and destination operand are the same.  */
>> static bool
>> abi_extension_candidate_p (rtx_insn *insn)
>> {
>>    rtx set = single_set (insn);
>>    rtx orig_src = XEXP (SET_SRC (set), 0);
>>
>>    if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>        && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
>>        && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
>>        && abi_target_promote_function_mode_p (GET_MODE (orig_src))
>>        && REGNO (SET_DST (set)) == REGNO (orig_src))
>>      return true;
>>
>>    return false;
>> }
> 
> This may have been my doing as I asked to split out the logic as some of the conditions merit more commentary.
> e.g. why does the mode need to be same
> But granted this is the usual coding style in gcc and the extra comments could still be added before the big if
> 

Addressed in V15 of the patch,
> -Vineet
> 
>>
>> I think this is much easier to actually read (and that's why good
>> function comments are important). In the end it's not important and
>> just personal preference.
>> Either way, I did not check the plausibility of the logic therein.
>>
>>>
>>> I have not done any assembly diff as myself have not cross compiled with aarch64.
>> fair enough.
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-28 10:25                   ` Ajit Agarwal
@ 2023-10-29 10:48                     ` Ajit Agarwal
  0 siblings, 0 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-29 10:48 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer
  Cc: Vineet Gupta, gcc-patches, Jeff Law, Richard Biener,
	Segher Boessenkool, Peter Bergner, gnu-toolchain



On 28/10/23 3:55 pm, Ajit Agarwal wrote:
> 
> 
> On 27/10/23 10:46 pm, Bernhard Reutner-Fischer wrote:
>> On Wed, 25 Oct 2023 16:41:07 +0530
>> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>>> On 25/10/23 2:19 am, Vineet Gupta wrote:
>>>> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:  
>>>>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>>>>> Iff it breaks when manually CSEing, I'm curious why?  
>>>>>>> The function below looks identical in v12 of the patch.
>>>>>>> Why didn't you use common subexpressions?
>>>>>>> ba  
>>>>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>>>>> not to use CSE,  
>>>>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?  
>>>>
>>>> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
>>
>> If nothing else it would hopefully improve the readability.
>>
>>>>   
>>> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
>>> test fails.
>>
>> We already concluded that this failure was obviously a hiccup on the
>> testers, no problem.
> 
> Thanks.
>>
>>> +static inline bool
>>> +abi_extension_candidate_return_reg_p (int regno)
>>> +{
>>> +  return targetm.calls.function_value_regno_p (regno);
>>> +}
>>
>> But i was referring to abi_extension_candidate_p :)
>>
>> your v13 looks like this:
>>
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>> +
>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
>> +    return false;
>> +
>> +  /* Return FALSE if mode of destination and source is same.  */
>> +  if (dst_mode == GET_MODE (orig_src))
>> +    return false;
>> +
>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  /* Return FALSE if promote is false and REGNO of source and destination
>> +     is different.  */
>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>> +    return false;
>> +
>> +  return true;
>> +}
>>
>> and i suppose it would be easier to read if phrased something like
>>
>> static bool
>> abi_extension_candidate_p (rtx_insn *insn)
>> {
>>   rtx set = single_set (insn);
>>   rtx orig_src = XEXP (SET_SRC (set), 0);
>>   unsigned int src_regno = REGNO (orig_src);
>>
>>   /* Not a function argument reg or is a function values return reg.  */
>>   if (!FUNCTION_ARG_REGNO_P (src_regno)
>>       || abi_extension_candidate_return_reg_p (src_regno))
>>     return false;
>>
>>   rtx dst = SET_DST (set);
>>   machine_mode src_mode = GET_MODE (orig_src);
>>
>>   /* Return FALSE if mode of destination and source is the same.  */
>>   if (GET_MODE (dst) == src_mode)
>>     return false;
>>
>>   /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
>>      is different.  */
>>   if (!abi_target_promote_function_mode_p (src_mode)
>>       && REGNO (dst) != src_regno)
>>     return false;
>>
>>   return true;
>> }
>>
>> so no, that's not exactly better.
>>
>> Maybe just do what the function comment says (i did not check the "not
>> promoted" part, but you get the idea):
>>
>> ^L
>>
>> /* Return TRUE if
>>    reg source operand is argument register and not return register,
>>    mode of source and destination operand are different,
>>    if not promoted REGNO of source and destination operand are the same.  */
>> static bool
>> abi_extension_candidate_p (rtx_insn *insn)
>> {
>>   rtx set = single_set (insn);
>>   rtx orig_src = XEXP (SET_SRC (set), 0);
>>
>>   if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>       && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
>>       && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
>>       && abi_target_promote_function_mode_p (GET_MODE (orig_src))
>>       && REGNO (SET_DST (set)) == REGNO (orig_src))
>>     return true;
>>
>>   return false;
>> }
>>
>> I think this is much easier to actually read (and that's why good
>> function comments are important). In the end it's not important and
>> just personal preference.
>> Either way, I did not check the plausibility of the logic therein.
>>
>>>
> > Addressed in V15 of the patch.

The above rearranging code breaks the logic and I have modified as follows.

+/* Return TRUE if
+   reg source operand is argument register and not return register,
+   mode of source and destination operand are different,
+   if not promoted REGNO of source and destination operand are the same.  */
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  machine_mode dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set), 0);
+
+  if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
+      && dst_mode != GET_MODE (orig_src))
+     {
+       if (!abi_target_promote_function_mode (GET_MODE (orig_src))
+          && REGNO (SET_DEST (set)) != REGNO (orig_src))
+        return false;
+
+       return true;
+     }
+  return false;
+}

Thanks & Regards
Ajit


 
>>>
>>> I have not done any assembly diff as myself have not cross compiled with aarch64.
>>
>> fair enough.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
  2023-10-28 10:26                     ` Ajit Agarwal
@ 2023-10-29 10:49                       ` Ajit Agarwal
  0 siblings, 0 replies; 20+ messages in thread
From: Ajit Agarwal @ 2023-10-29 10:49 UTC (permalink / raw)
  To: Vineet Gupta, Bernhard Reutner-Fischer
  Cc: gcc-patches, Jeff Law, Richard Biener, Segher Boessenkool,
	Peter Bergner, gnu-toolchain



On 28/10/23 3:56 pm, Ajit Agarwal wrote:
> 
> 
> On 28/10/23 4:09 am, Vineet Gupta wrote:
>>
>>
>> On 10/27/23 10:16, Bernhard Reutner-Fischer wrote:
>>> On Wed, 25 Oct 2023 16:41:07 +0530
>>> Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>
>>>> On 25/10/23 2:19 am, Vineet Gupta wrote:
>>>>> On 10/24/23 13:36, rep.dot.nop@gmail.com wrote:
>>>>>>>>>> As said, I don't see why the below was not cleaned up before the V1 submission.
>>>>>>>>>> Iff it breaks when manually CSEing, I'm curious why?
>>>>>>>> The function below looks identical in v12 of the patch.
>>>>>>>> Why didn't you use common subexpressions?
>>>>>>>> ba
>>>>>>> Using CSE here breaks aarch64 regressions hence I have reverted it back
>>>>>>> not to use CSE,
>>>>>> Just for my own education, can you please paste your patch perusing common subexpressions and an assembly diff of the failing versus working aarch64 testcase, along how you configured that failing (cross-?)compiler and the command-line of a typical testcase that broke when manually CSEing the function below?
>>>>> I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever.
>>> If nothing else it would hopefully improve the readability.
>>>
>>>>>    
>>>> Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64
>>>> test fails.
>>> We already concluded that this failure was obviously a hiccup on the
>>> testers, no problem.
>>>
>>>> +static inline bool
>>>> +abi_extension_candidate_return_reg_p (int regno)
>>>> +{
>>>> +  return targetm.calls.function_value_regno_p (regno);
>>>> +}
>>> But i was referring to abi_extension_candidate_p :)
>>>
>>> your v13 looks like this:
>>>
>>> +static bool
>>> +abi_extension_candidate_p (rtx_insn *insn)
>>> +{
>>> +  rtx set = single_set (insn);
>>> +  machine_mode dst_mode = GET_MODE (SET_DEST (set));
>>> +  rtx orig_src = XEXP (SET_SRC (set), 0);
>>> +
>>> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>> +      || abi_extension_candidate_return_reg_p (REGNO (orig_src)))
>>> +    return false;
>>> +
>>> +  /* Return FALSE if mode of destination and source is same.  */
>>> +  if (dst_mode == GET_MODE (orig_src))
>>> +    return false;
>>> +
>>> +  machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0));
>>> +  bool promote_p = abi_target_promote_function_mode (mode);
>>> +
>>> +  /* Return FALSE if promote is false and REGNO of source and destination
>>> +     is different.  */
>>> +  if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src))
>>> +    return false;
>>> +
>>> +  return true;
>>> +}
>>>
>>> and i suppose it would be easier to read if phrased something like
>>>
>>> static bool
>>> abi_extension_candidate_p (rtx_insn *insn)
>>> {
>>>    rtx set = single_set (insn);
>>>    rtx orig_src = XEXP (SET_SRC (set), 0);
>>>    unsigned int src_regno = REGNO (orig_src);
>>>
>>>    /* Not a function argument reg or is a function values return reg.  */
>>>    if (!FUNCTION_ARG_REGNO_P (src_regno)
>>>        || abi_extension_candidate_return_reg_p (src_regno))
>>>      return false;
>>>
>>>    rtx dst = SET_DST (set);
>>>    machine_mode src_mode = GET_MODE (orig_src);
>>>
>>>    /* Return FALSE if mode of destination and source is the same.  */
>>>    if (GET_MODE (dst) == src_mode)
>>>      return false;
>>>
>>>    /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination
>>>       is different.  */
>>>    if (!abi_target_promote_function_mode_p (src_mode)
>>>        && REGNO (dst) != src_regno)
>>>      return false;
>>>
>>>    return true;
>>> }
>>>
>>> so no, that's not exactly better.
>>>
>>> Maybe just do what the function comment says (i did not check the "not
>>> promoted" part, but you get the idea):
>>>
>>> ^L
>>>
>>> /* Return TRUE if
>>>     reg source operand is argument register and not return register,
>>>     mode of source and destination operand are different,
>>>     if not promoted REGNO of source and destination operand are the same.  */
>>> static bool
>>> abi_extension_candidate_p (rtx_insn *insn)
>>> {
>>>    rtx set = single_set (insn);
>>>    rtx orig_src = XEXP (SET_SRC (set), 0);
>>>
>>>    if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>>        && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
>>>        && GET_MODE (SET_DST (set)) != GET_MODE (orig_src)
>>>        && abi_target_promote_function_mode_p (GET_MODE (orig_src))
>>>        && REGNO (SET_DST (set)) == REGNO (orig_src))
>>>      return true;
>>>
>>>    return false;
>>> }
>>
>> This may have been my doing as I asked to split out the logic as some of the conditions merit more commentary.
>> e.g. why does the mode need to be same
>> But granted this is the usual coding style in gcc and the extra comments could still be added before the big if
>>
> 
> Addressed in V15 of the patch,

The above rearranging code breaks the logic. I have implemented as follows.
+/* Return TRUE if
+   reg source operand is argument register and not return register,
+   mode of source and destination operand are different,
+   if not promoted REGNO of source and destination operand are the same.  */
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  machine_mode dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set), 0);
+
+  if (FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      && !abi_extension_candidate_return_reg_p (REGNO (orig_src))
+      && dst_mode != GET_MODE (orig_src))
+     {
+       if (!abi_target_promote_function_mode (GET_MODE (orig_src))
+          && REGNO (SET_DEST (set)) != REGNO (orig_src))
+        return false;
+
+       return true;
+     }
+  return false;
+}

Thanks & Regards
Ajit

>> -Vineet
>>
>>>
>>> I think this is much easier to actually read (and that's why good
>>> function comments are important). In the end it's not important and
>>> just personal preference.
>>> Either way, I did not check the plausibility of the logic therein.
>>>
>>>>
>>>> I have not done any assembly diff as myself have not cross compiled with aarch64.
>>> fair enough.
>>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2023-10-29 10:49 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-20  6:50 [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces Ajit Agarwal
2023-10-20 23:56 ` Vineet Gupta
2023-10-21 10:14   ` Ajit Agarwal
2023-10-21 19:26   ` rep.dot.nop
2023-10-23  6:46     ` Ajit Agarwal
2023-10-23 14:10       ` Bernhard Reutner-Fischer
2023-10-24  7:36         ` Ajit Agarwal
2023-10-24 20:36           ` rep.dot.nop
2023-10-24 20:49             ` Vineet Gupta
2023-10-25 11:11               ` Ajit Agarwal
2023-10-27 17:16                 ` Bernhard Reutner-Fischer
2023-10-27 22:39                   ` Vineet Gupta
2023-10-28 10:26                     ` Ajit Agarwal
2023-10-29 10:49                       ` Ajit Agarwal
2023-10-28 10:25                   ` Ajit Agarwal
2023-10-29 10:48                     ` Ajit Agarwal
2023-10-25 11:08             ` Ajit Agarwal
2023-10-23 18:32       ` Vineet Gupta
2023-10-24  7:40         ` Ajit Agarwal
2023-10-24  9:36           ` Ajit Agarwal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).