public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work119)] Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements.
@ 2023-05-01 16:46 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-05-01 16:46 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c87f2f266d5093ff4c55b26698a790673d39e35f

commit c87f2f266d5093ff4c55b26698a790673d39e35f
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon May 1 12:46:23 2023 -0400

    Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements.
    
    This patch goes through the support function rs6000_adjust_vec_address and the
    functions it calls to allow them to be called before register allocation.  The
    places that take a scratch register will allocate a new pseudo register if they
    are passed a SCRATCH register.
    
    It then changes the vec_extract support for V16QI/V8HI/V4SI with constant
    element numbers to be split before register allocation.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow function to be
            called before register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_<mode>_load): Split before register
            allocation.
            (vsx_extract_v4si_load_to_<su>di): Likewise.
            (vsx_extract_<VSX_EXTRACT_I2:mode>_load_to_u<GPR:mode>): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 87 +++++++++++++++++++++++++++++++++++----------
 gcc/config/rs6000/vsx.md    |  6 ++--
 2 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..0e04f7151f1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,33 +7853,72 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
     }
 
+  /* Deal with Altivec style addresses.  These come up on the power8 when GCC
+     generates the Altivec load/store (LVX and STVX) to eliminate byte swapping
+     the vectors.  */
+  else if (GET_CODE (addr) == AND
+	   && CONST_INT_P (XEXP (addr, 1))
+	   && INTVAL (XEXP (addr, 1)) == -16)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
+      /* Is this reg+reg?  */
+      if (GET_CODE (op0) == PLUS)
+	{
+	  rtx plus_tmp = (can_create_pseudo_p ()
+			  ? gen_reg_rtx (Pmode)
+			  : base_tmp);
+
+	  emit_insn (gen_rtx_SET (plus_tmp, op0));
+	  op0 = plus_tmp;
+	}
+
+      emit_insn (gen_rtx_SET (base_tmp,
+			      gen_rtx_AND (Pmode, op0, op1)));
+      new_addr = base_tmp;
+    }
+
   else
     {
-      emit_move_insn (base_tmp, addr);
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_rtx_SET (base_tmp, addr));
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d156c9bd90b..e0c29353b38 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4035,7 +4035,7 @@
 			"=X,     X,     &b,     X,            &b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
@@ -4068,7 +4068,7 @@
 			"=X,     X,     &b,     X,       &b"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(any_extend:DI (match_dup 4)))]
 {
@@ -4101,7 +4101,7 @@
 			"=X,     X,     &b,     X,       &b"))]
   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I2:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(zero_extend:GPR (match_dup 4)))]
 {

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work119)] Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements.
@ 2023-05-01 17:22 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2023-05-01 17:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:67313a509367e6b2bc5382d33076452fa67d40da

commit 67313a509367e6b2bc5382d33076452fa67d40da
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon May 1 13:22:27 2023 -0400

    Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements.
    
    This patch goes through the support function rs6000_adjust_vec_address and the
    functions it calls to allow them to be called before register allocation.  The
    places that take a scratch register will allocate a new pseudo register if they
    are passed a SCRATCH register.
    
    It then changes the vec_extract support for V16QI/V8HI/V4SI with constant
    element numbers to be split before register allocation.
    
    2023-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000.cc (get_vector_offset): Allow function to be
            called before register allocation.
            (adjust_vec_address_pcrel): Likewise.
            (rs6000_adjust_vec_address): Likewise.
            * config/rs6000/vsx.md (vsx_extract_<mode>_load): Split before register
            allocation.
            (vsx_extract_v4si_load_to_<su>di): Likewise.
            (vsx_extract_<VSX_EXTRACT_I2:mode>_load_to_u<GPR:mode>): Likewise.
            (*vsx_extract_v8hi_load_to_s<mode>): Likewise.
            (vsx_extract_<mode>_var_load): Likewise.
            (vsx_extract_v4si_var_load_to_<su>di): Likewise.
            (vsx_extract_<VSX_EXTRACT_I2:mode>_var_load_to_u<GPR:mode>): Likewise.
            (vsx_extract_v8hi_var_load_to_s<mode>): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 87 +++++++++++++++++++++++++++++++++++----------
 gcc/config/rs6000/vsx.md    | 16 ++++-----
 2 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..0e04f7151f1 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (CONST_INT_P (element))
     return GEN_INT (INTVAL (element) * scalar_size);
 
-  /* All insns should use the 'Q' constraint (address is a single register) if
-     the element number is not a constant.  */
-  gcc_assert (satisfies_constraint_Q (mem));
+  if (GET_CODE (base_tmp) == SCRATCH)
+    base_tmp = gen_reg_rtx (Pmode);
+
+  /* After register allocation, all insns should use the 'Q' constraint
+     (address is a single register) if the element number is not a
+     constant.  */
+  gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
 
   /* Mask the element to make sure the element number is between 0 and the
      maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
   if (shift > 0)
     {
       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+      if (can_create_pseudo_p ())
+	base_tmp = gen_reg_rtx (Pmode);
+
       emit_insn (gen_rtx_SET (base_tmp, shift_op));
     }
 
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
 
       else
 	{
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_move_insn (base_tmp, addr);
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
    temporary (BASE_TMP) to fixup the address.  Return the new memory address
    that is valid for reads or writes to a given register (SCALAR_REG).
 
-   This function is expected to be called after reload is completed when we are
-   splitting insns.  The temporary BASE_TMP might be set multiple times with
-   this code.  */
+   The temporary BASE_TMP might be set multiple times with this code if this is
+   called after register allocation.  */
 
 rtx
 rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
   rtx addr = XEXP (mem, 0);
   rtx new_addr;
 
-  gcc_assert (!reg_mentioned_p (base_tmp, addr));
-  gcc_assert (!reg_mentioned_p (base_tmp, element));
+  if (GET_CODE (base_tmp) != SCRATCH)
+    {
+      gcc_assert (!reg_mentioned_p (base_tmp, addr));
+      gcc_assert (!reg_mentioned_p (base_tmp, element));
+    }
 
   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,33 +7853,72 @@ rs6000_adjust_vec_address (rtx scalar_reg,
 	     offset, it has the benefit that if D-FORM instructions are
 	     allowed, the offset is part of the memory access to the vector
 	     element. */
+	  if (GET_CODE (base_tmp) == SCRATCH)
+	    base_tmp = gen_reg_rtx (Pmode);
+
 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
 	}
     }
 
+  /* Deal with Altivec style addresses.  These come up on the power8 when GCC
+     generates the Altivec load/store (LVX and STVX) to eliminate byte swapping
+     the vectors.  */
+  else if (GET_CODE (addr) == AND
+	   && CONST_INT_P (XEXP (addr, 1))
+	   && INTVAL (XEXP (addr, 1)) == -16)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
+      /* Is this reg+reg?  */
+      if (GET_CODE (op0) == PLUS)
+	{
+	  rtx plus_tmp = (can_create_pseudo_p ()
+			  ? gen_reg_rtx (Pmode)
+			  : base_tmp);
+
+	  emit_insn (gen_rtx_SET (plus_tmp, op0));
+	  op0 = plus_tmp;
+	}
+
+      emit_insn (gen_rtx_SET (base_tmp,
+			      gen_rtx_AND (Pmode, op0, op1)));
+      new_addr = base_tmp;
+    }
+
   else
     {
-      emit_move_insn (base_tmp, addr);
+      if (GET_CODE (base_tmp) == SCRATCH)
+	base_tmp = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_rtx_SET (base_tmp, addr));
       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
     }
 
-    /* If the address isn't valid, move the address into the temporary base
-       register.  Some reasons it could not be valid include:
+    /* If register allocation has been done and the address isn't valid, move
+       the address into the temporary base register.  Some reasons it could not
+       be valid include:
 
        The address offset overflowed the 16 or 34 bit offset size;
        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
        Only X_FORM loads can be done, and the address is D_FORM.  */
 
-  enum insn_form iform
-    = address_to_insn_form (new_addr, scalar_mode,
-			    reg_to_non_prefixed (scalar_reg, scalar_mode));
-
-  if (iform == INSN_FORM_BAD)
+  if (!can_create_pseudo_p ())
     {
-      emit_move_insn (base_tmp, new_addr);
-      new_addr = base_tmp;
+      enum insn_form iform
+	= address_to_insn_form (new_addr, scalar_mode,
+				reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+      if (iform == INSN_FORM_BAD)
+	{
+	  emit_move_insn (base_tmp, new_addr);
+	  new_addr = base_tmp;
+	}
     }
 
   return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d156c9bd90b..90f64f79124 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4035,7 +4035,7 @@
 			"=X,     X,     &b,     X,            &b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
@@ -4068,7 +4068,7 @@
 			"=X,     X,     &b,     X,       &b"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(any_extend:DI (match_dup 4)))]
 {
@@ -4101,7 +4101,7 @@
 			"=X,     X,     &b,     X,       &b"))]
   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I2:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(zero_extend:GPR (match_dup 4)))]
 {
@@ -4178,7 +4178,7 @@
    (clobber (match_scratch:DI 3 "=X,X,&b"))]
   "VECTOR_MEM_VSX_P (V8HImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(sign_extend:GPR (match_dup 4)))]
 {
@@ -4219,7 +4219,7 @@
    (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1],
@@ -4240,7 +4240,7 @@
    (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(any_extend:DI (match_dup 4)))]
 {
@@ -4262,7 +4262,7 @@
    (clobber (match_scratch:DI 3 "=&b,&b"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(zero_extend:GPR (match_dup 4)))]
 {
@@ -4285,7 +4285,7 @@
    (clobber (match_scratch:DI 3 "=&b"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
 	(sign_extend:GPR (match_dup 4)))]
 {

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-05-01 17:22 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-01 16:46 [gcc(refs/users/meissner/heads/work119)] Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements Michael Meissner
2023-05-01 17:22 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).