public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-14 14:17 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-14 14:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:72c687f5507d5a3ab7023a35d2ef85562104df15

commit 72c687f5507d5a3ab7023a35d2ef85562104df15
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 14 10:17:37 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-14   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
            register allocation.
            (vsx_extract_v4sf_to_df_load): New insn.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 49 ++++++++++++++++++++++++++++++++-------------
 gcc/config/rs6000/vsx.md    | 24 +++++++++++++++++++++-
 2 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..21d4e2caf20 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
   /* All insns should use the 'Q' constraint (address is a single register) if
      the element number is not a constant.  */
   gcc_assert (satisfies_constraint_Q (mem));
@@ -7704,6 +7707,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7753,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7778,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7792,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7852,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7862,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
    (set_attr "length" "8")
    (set_attr "isa" "*,p7v,p9v,*")])
 
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+  [(set (match_operand:DF 0 "register_operand" "=f,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v")])
+
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-15  1:20 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-15  1:20 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2610ff1d92a51410e61851445b27bf942b5e4100

commit 2610ff1d92a51410e61851445b27bf942b5e4100
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 14 21:20:28 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-14   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-14 18:53 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-14 18:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2e97044d04f57877de8901c6394658cde804384c

commit 2e97044d04f57877de8901c6394658cde804384c
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 14 14:52:52 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-14   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
            register allocation.
            (vsx_extract_v4sf_to_df_load): New insn.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
 gcc/config/rs6000/vsx.md    | 24 ++++++++++++++++++-
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
    (set_attr "length" "8")
    (set_attr "isa" "*,p7v,p9v,*")])
 
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+  [(set (match_operand:DF 0 "register_operand" "=f,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v")])
+
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 23:56 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 23:56 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:36fa985f1a8b2f7947a0379568c82b03f55d7b1d

commit 36fa985f1a8b2f7947a0379568c82b03f55d7b1d
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 13 19:56:07 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-13   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
            register allocation.
            (vsx_extract_v4sf_to_df_load): New insn.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 54 +++++++++++++++++++++++++++++++++------------
 gcc/config/rs6000/vsx.md    | 24 +++++++++++++++++++-
 2 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..0e2eb964783 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
+  if (can_create_pseudo_p ())
+    base_tmp = gen_reg_rtx (Pmode);
+
   /* All insns should use the 'Q' constraint (address is a single register) if
      the element number is not a constant.  */
   gcc_assert (satisfies_constraint_Q (mem));
@@ -7704,6 +7707,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7753,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7778,7 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7791,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7851,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7861,39 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      /* If the final register is not the same mode because a FLOAT_EXTEND,
+	 ZERO_EXTEND, or SIGN_EXTEND was folded into the instruction, adjust
+	 the register to be of the correct mode for the load.  */
+      if (GET_MODE (scalar_reg) != scalar_mode)
+	scalar_reg = gen_rtx_REG (scalar_mode, REGNO (scalar_reg));
+
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
    (set_attr "length" "8")
    (set_attr "isa" "*,p7v,p9v,*")])
 
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+  [(set (match_operand:DF 0 "register_operand" "=f,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v")])
+
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 19:29 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 19:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fc0cbadc4dafbe7385e22cd52887c1e1c77e2f03

commit fc0cbadc4dafbe7385e22cd52887c1e1c77e2f03
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 13 15:29:07 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-13   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
            register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
            register allocation.
            (vsx_extract_v4sf_to_df_load): New insn.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 51 ++++++++++++++++++++++++++++++++-------------
 gcc/config/rs6000/vsx.md    | 24 ++++++++++++++++++++-
 2 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..869a385eddf 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
   /* All insns should use the 'Q' constraint (address is a single register) if
      the element number is not a constant.  */
   gcc_assert (satisfies_constraint_Q (mem));
@@ -7747,6 +7750,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7775,7 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7788,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7848,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7848,26 +7858,39 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 
   else
     {
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_move_insn (base_tmp, addr);
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      /* If the final register is not the same mode because a FLOAT_EXTEND,
+	 ZERO_EXTEND, or SIGN_EXTEND was folded into the instruction, adjust
+	 the register to be of the correct mode for the load.  */
+      if (GET_MODE (scalar_reg) != scalar_mode)
+	scalar_reg = gen_rtx_REG (scalar_mode, REGNO (scalar_reg));
+
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,7 +3558,7 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
    (set_attr "length" "8")
    (set_attr "isa" "*,p7v,p9v,*")])
 
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+  [(set (match_operand:DF 0 "register_operand" "=f,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v")])
+
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 16:50 Michael Meissner
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 16:50 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d70280f2d9dfceb90a5709a2e8b66f3dbcee0b73

commit d70280f2d9dfceb90a5709a2e8b66f3dbcee0b73
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Apr 13 12:48:58 2023 -0400

    Improve vec_extract of V4SF from memory with constant element.
    
    2023-04-13   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
            register allocation.
            (vsx_extract_v4sf_to_df_load): New insn.

Diff:
---
 gcc/config/rs6000/vsx.md | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..70f4e8b8365 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
   [(set_attr "length" "8")
    (set_attr "type" "fp")])
 
+;; V4SF extract from memory with constant element number
 (define_insn_and_split "*vsx_extract_v4sf_load"
   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
 	(vec_select:SF
@@ -3557,9 +3558,12 @@
    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SFmode)"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = gen_reg_rtx (DImode);
+
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
 					   operands[3], SFmode);
 }
@@ -3567,6 +3571,30 @@
    (set_attr "length" "8")
    (set_attr "isa" "*,p7v,p9v,*")])
 
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+  [(set (match_operand:DF 0 "register_operand" "=f,v")
+	(float_extend:DF
+	 (vec_select:SF
+	  (match_operand:V4SF 1 "memory_operand" "m,m")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+   (clobber (match_scratch:P 3 "=&b,&b"))]
+  "VECTOR_MEM_VSX_P (V4SFmode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(float_extend:DF (match_dup 4)))]
+{
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = gen_reg_rtx (DImode);
+
+  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+					   operands[3], SFmode);
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "8")
+   (set_attr "isa" "*,p8v")])
+
 ;; Variable V4SF extract from a register
 (define_insn_and_split "vsx_extract_v4sf_var"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-15  1:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-14 14:17 [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2023-04-15  1:20 Michael Meissner
2023-04-14 18:53 Michael Meissner
2023-04-13 23:56 Michael Meissner
2023-04-13 19:29 Michael Meissner
2023-04-13 16:50 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).