public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-14 14:17 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-14 14:17 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:72c687f5507d5a3ab7023a35d2ef85562104df15
commit 72c687f5507d5a3ab7023a35d2ef85562104df15
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 14 10:17:37 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-14 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
register allocation.
(adjust_vec_address_pcrel): Likewise.
(rs6000_adjust_vec_address): Likewise.
* config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
register allocation.
(vsx_extract_v4sf_to_df_load): New insn.
Diff:
---
gcc/config/rs6000/rs6000.cc | 49 ++++++++++++++++++++++++++++++++-------------
gcc/config/rs6000/vsx.md | 24 +++++++++++++++++++++-
2 files changed, 58 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..21d4e2caf20 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (CONST_INT_P (element))
return GEN_INT (INTVAL (element) * scalar_size);
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
/* All insns should use the 'Q' constraint (address is a single register) if
the element number is not a constant. */
gcc_assert (satisfies_constraint_Q (mem));
@@ -7704,6 +7707,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (shift > 0)
{
rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ if (can_create_pseudo_p ())
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, shift_op));
}
@@ -7747,6 +7753,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7769,9 +7778,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG).
- This function is expected to be called after reload is completed when we are
- splitting insns. The temporary BASE_TMP might be set multiple times with
- this code. */
+ The temporary BASE_TMP might be set multiple times with this code if this is
+ called after register allocation. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7792,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
rtx addr = XEXP (mem, 0);
rtx new_addr;
- gcc_assert (!reg_mentioned_p (base_tmp, addr));
- gcc_assert (!reg_mentioned_p (base_tmp, element));
+ if (GET_CODE (base_tmp) != SCRATCH)
+ {
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
+ }
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7852,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
offset, it has the benefit that if D-FORM instructions are
allowed, the offset is part of the memory access to the vector
element. */
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7848,26 +7862,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If the address isn't valid, move the address into the temporary base
- register. Some reasons it could not be valid include:
+ /* If register allocation has been done and the address isn't valid, move
+ the address into the temporary base register. Some reasons it could not
+ be valid include:
The address offset overflowed the 16 or 34 bit offset size;
We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
Only X_FORM loads can be done, and the address is D_FORM. */
- enum insn_form iform
- = address_to_insn_form (new_addr, scalar_mode,
- reg_to_non_prefixed (scalar_reg, scalar_mode));
-
- if (iform == INSN_FORM_BAD)
+ if (!can_create_pseudo_p ())
{
- emit_move_insn (base_tmp, new_addr);
- new_addr = base_tmp;
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+ if (iform == INSN_FORM_BAD)
+ {
+ emit_move_insn (base_tmp, new_addr);
+ new_addr = base_tmp;
+ }
}
return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
[(set_attr "length" "8")
(set_attr "type" "fp")])
+;; V4SF extract from memory with constant element number
(define_insn_and_split "*vsx_extract_v4sf_load"
[(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
(vec_select:SF
@@ -3557,7 +3558,7 @@
(clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
"VECTOR_MEM_VSX_P (V4SFmode)"
"#"
- "&& reload_completed"
+ "&& 1"
[(set (match_dup 0) (match_dup 4))]
{
operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
(set_attr "length" "8")
(set_attr "isa" "*,p7v,p9v,*")])
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+ [(set (match_operand:DF 0 "register_operand" "=f,v")
+ (float_extend:DF
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "m,m")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+ (clobber (match_scratch:P 3 "=&b,&b"))]
+ "VECTOR_MEM_VSX_P (V4SFmode)"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (float_extend:DF (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], SFmode);
+}
+ [(set_attr "type" "fpload")
+ (set_attr "length" "8")
+ (set_attr "isa" "*,p8v")])
+
;; Variable V4SF extract from a register
(define_insn_and_split "vsx_extract_v4sf_var"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-15 1:20 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-15 1:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2610ff1d92a51410e61851445b27bf942b5e4100
commit 2610ff1d92a51410e61851445b27bf942b5e4100
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 14 21:20:28 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-14 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
register allocation.
(adjust_vec_address_pcrel): Likewise.
(rs6000_adjust_vec_address): Likewise.
Diff:
---
gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
1 file changed, 39 insertions(+), 17 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (CONST_INT_P (element))
return GEN_INT (INTVAL (element) * scalar_size);
- /* All insns should use the 'Q' constraint (address is a single register) if
- the element number is not a constant. */
- gcc_assert (satisfies_constraint_Q (mem));
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
+ /* After register allocation, all insns should use the 'Q' constraint
+ (address is a single register) if the element number is not a
+ constant. */
+ gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
/* Mask the element to make sure the element number is between 0 and the
maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (shift > 0)
{
rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ if (can_create_pseudo_p ())
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, shift_op));
}
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG).
- This function is expected to be called after reload is completed when we are
- splitting insns. The temporary BASE_TMP might be set multiple times with
- this code. */
+ The temporary BASE_TMP might be set multiple times with this code if this is
+ called after register allocation. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
rtx addr = XEXP (mem, 0);
rtx new_addr;
- gcc_assert (!reg_mentioned_p (base_tmp, addr));
- gcc_assert (!reg_mentioned_p (base_tmp, element));
+ if (GET_CODE (base_tmp) != SCRATCH)
+ {
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
+ }
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
offset, it has the benefit that if D-FORM instructions are
allowed, the offset is part of the memory access to the vector
element. */
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If the address isn't valid, move the address into the temporary base
- register. Some reasons it could not be valid include:
+ /* If register allocation has been done and the address isn't valid, move
+ the address into the temporary base register. Some reasons it could not
+ be valid include:
The address offset overflowed the 16 or 34 bit offset size;
We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
Only X_FORM loads can be done, and the address is D_FORM. */
- enum insn_form iform
- = address_to_insn_form (new_addr, scalar_mode,
- reg_to_non_prefixed (scalar_reg, scalar_mode));
-
- if (iform == INSN_FORM_BAD)
+ if (!can_create_pseudo_p ())
{
- emit_move_insn (base_tmp, new_addr);
- new_addr = base_tmp;
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+ if (iform == INSN_FORM_BAD)
+ {
+ emit_move_insn (base_tmp, new_addr);
+ new_addr = base_tmp;
+ }
}
return change_address (mem, scalar_mode, new_addr);
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-14 18:53 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-14 18:53 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2e97044d04f57877de8901c6394658cde804384c
commit 2e97044d04f57877de8901c6394658cde804384c
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 14 14:52:52 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-14 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
register allocation.
(adjust_vec_address_pcrel): Likewise.
(rs6000_adjust_vec_address): Likewise.
* config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
register allocation.
(vsx_extract_v4sf_to_df_load): New insn.
Diff:
---
gcc/config/rs6000/rs6000.cc | 56 +++++++++++++++++++++++++++++++--------------
gcc/config/rs6000/vsx.md | 24 ++++++++++++++++++-
2 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..65295dbaf81 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (CONST_INT_P (element))
return GEN_INT (INTVAL (element) * scalar_size);
- /* All insns should use the 'Q' constraint (address is a single register) if
- the element number is not a constant. */
- gcc_assert (satisfies_constraint_Q (mem));
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
+ /* After register allocation, all insns should use the 'Q' constraint
+ (address is a single register) if the element number is not a
+ constant. */
+ gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem));
/* Mask the element to make sure the element number is between 0 and the
maximum number of elements - 1 so that we don't generate an address
@@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (shift > 0)
{
rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ if (can_create_pseudo_p ())
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, shift_op));
}
@@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG).
- This function is expected to be called after reload is completed when we are
- splitting insns. The temporary BASE_TMP might be set multiple times with
- this code. */
+ The temporary BASE_TMP might be set multiple times with this code if this is
+ called after register allocation. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
rtx addr = XEXP (mem, 0);
rtx new_addr;
- gcc_assert (!reg_mentioned_p (base_tmp, addr));
- gcc_assert (!reg_mentioned_p (base_tmp, element));
+ if (GET_CODE (base_tmp) != SCRATCH)
+ {
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
+ }
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
offset, it has the benefit that if D-FORM instructions are
allowed, the offset is part of the memory access to the vector
element. */
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7848,26 +7863,33 @@ rs6000_adjust_vec_address (rtx scalar_reg,
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If the address isn't valid, move the address into the temporary base
- register. Some reasons it could not be valid include:
+ /* If register allocation has been done and the address isn't valid, move
+ the address into the temporary base register. Some reasons it could not
+ be valid include:
The address offset overflowed the 16 or 34 bit offset size;
We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
Only X_FORM loads can be done, and the address is D_FORM. */
- enum insn_form iform
- = address_to_insn_form (new_addr, scalar_mode,
- reg_to_non_prefixed (scalar_reg, scalar_mode));
-
- if (iform == INSN_FORM_BAD)
+ if (!can_create_pseudo_p ())
{
- emit_move_insn (base_tmp, new_addr);
- new_addr = base_tmp;
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+ if (iform == INSN_FORM_BAD)
+ {
+ emit_move_insn (base_tmp, new_addr);
+ new_addr = base_tmp;
+ }
}
return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
[(set_attr "length" "8")
(set_attr "type" "fp")])
+;; V4SF extract from memory with constant element number
(define_insn_and_split "*vsx_extract_v4sf_load"
[(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
(vec_select:SF
@@ -3557,7 +3558,7 @@
(clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
"VECTOR_MEM_VSX_P (V4SFmode)"
"#"
- "&& reload_completed"
+ "&& 1"
[(set (match_dup 0) (match_dup 4))]
{
operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
(set_attr "length" "8")
(set_attr "isa" "*,p7v,p9v,*")])
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+ [(set (match_operand:DF 0 "register_operand" "=f,v")
+ (float_extend:DF
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "m,m")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+ (clobber (match_scratch:P 3 "=&b,&b"))]
+ "VECTOR_MEM_VSX_P (V4SFmode)"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (float_extend:DF (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], SFmode);
+}
+ [(set_attr "type" "fpload")
+ (set_attr "length" "8")
+ (set_attr "isa" "*,p8v")])
+
;; Variable V4SF extract from a register
(define_insn_and_split "vsx_extract_v4sf_var"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 23:56 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 23:56 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:36fa985f1a8b2f7947a0379568c82b03f55d7b1d
commit 36fa985f1a8b2f7947a0379568c82b03f55d7b1d
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Apr 13 19:56:07 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-13 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
register allocation.
(adjust_vec_address_pcrel): Likewise.
(rs6000_adjust_vec_address): Likewise.
* config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
register allocation.
(vsx_extract_v4sf_to_df_load): New insn.
Diff:
---
gcc/config/rs6000/rs6000.cc | 54 +++++++++++++++++++++++++++++++++------------
gcc/config/rs6000/vsx.md | 24 +++++++++++++++++++-
2 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..0e2eb964783 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (CONST_INT_P (element))
return GEN_INT (INTVAL (element) * scalar_size);
+ if (can_create_pseudo_p ())
+ base_tmp = gen_reg_rtx (Pmode);
+
/* All insns should use the 'Q' constraint (address is a single register) if
the element number is not a constant. */
gcc_assert (satisfies_constraint_Q (mem));
@@ -7704,6 +7707,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (shift > 0)
{
rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ if (can_create_pseudo_p ())
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, shift_op));
}
@@ -7747,6 +7753,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7769,9 +7778,7 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG).
- This function is expected to be called after reload is completed when we are
- splitting insns. The temporary BASE_TMP might be set multiple times with
- this code. */
+ The temporary BASE_TMP might be set multiple times with this code. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7791,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
rtx addr = XEXP (mem, 0);
rtx new_addr;
- gcc_assert (!reg_mentioned_p (base_tmp, addr));
- gcc_assert (!reg_mentioned_p (base_tmp, element));
+ if (GET_CODE (base_tmp) != SCRATCH)
+ {
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
+ }
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7851,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
offset, it has the benefit that if D-FORM instructions are
allowed, the offset is part of the memory access to the vector
element. */
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7848,26 +7861,39 @@ rs6000_adjust_vec_address (rtx scalar_reg,
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If the address isn't valid, move the address into the temporary base
- register. Some reasons it could not be valid include:
+ /* If register allocation has been done and the address isn't valid, move
+ the address into the temporary base register. Some reasons it could not
+ be valid include:
The address offset overflowed the 16 or 34 bit offset size;
We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
Only X_FORM loads can be done, and the address is D_FORM. */
- enum insn_form iform
- = address_to_insn_form (new_addr, scalar_mode,
- reg_to_non_prefixed (scalar_reg, scalar_mode));
-
- if (iform == INSN_FORM_BAD)
+ if (!can_create_pseudo_p ())
{
- emit_move_insn (base_tmp, new_addr);
- new_addr = base_tmp;
+ /* If the final register is not the same mode because a FLOAT_EXTEND,
+ ZERO_EXTEND, or SIGN_EXTEND was folded into the instruction, adjust
+ the register to be of the correct mode for the load. */
+ if (GET_MODE (scalar_reg) != scalar_mode)
+ scalar_reg = gen_rtx_REG (scalar_mode, REGNO (scalar_reg));
+
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+ if (iform == INSN_FORM_BAD)
+ {
+ emit_move_insn (base_tmp, new_addr);
+ new_addr = base_tmp;
+ }
}
return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
[(set_attr "length" "8")
(set_attr "type" "fp")])
+;; V4SF extract from memory with constant element number
(define_insn_and_split "*vsx_extract_v4sf_load"
[(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
(vec_select:SF
@@ -3557,7 +3558,7 @@
(clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
"VECTOR_MEM_VSX_P (V4SFmode)"
"#"
- "&& reload_completed"
+ "&& 1"
[(set (match_dup 0) (match_dup 4))]
{
operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
(set_attr "length" "8")
(set_attr "isa" "*,p7v,p9v,*")])
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+ [(set (match_operand:DF 0 "register_operand" "=f,v")
+ (float_extend:DF
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "m,m")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+ (clobber (match_scratch:P 3 "=&b,&b"))]
+ "VECTOR_MEM_VSX_P (V4SFmode)"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (float_extend:DF (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], SFmode);
+}
+ [(set_attr "type" "fpload")
+ (set_attr "length" "8")
+ (set_attr "isa" "*,p8v")])
+
;; Variable V4SF extract from a register
(define_insn_and_split "vsx_extract_v4sf_var"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 19:29 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 19:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:fc0cbadc4dafbe7385e22cd52887c1e1c77e2f03
commit fc0cbadc4dafbe7385e22cd52887c1e1c77e2f03
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Apr 13 15:29:07 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-13 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000.cc (get_vector_offset): Allow being called before
register allocation.
(adjust_vec_address_pcrel): Likewise.
(rs6000_adjust_vec_address): Likewise.
* config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
register allocation.
(vsx_extract_v4sf_to_df_load): New insn.
Diff:
---
gcc/config/rs6000/rs6000.cc | 51 ++++++++++++++++++++++++++++++++-------------
gcc/config/rs6000/vsx.md | 24 ++++++++++++++++++++-
2 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3be5860dd9b..869a385eddf 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7686,6 +7686,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
if (CONST_INT_P (element))
return GEN_INT (INTVAL (element) * scalar_size);
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
/* All insns should use the 'Q' constraint (address is a single register) if
the element number is not a constant. */
gcc_assert (satisfies_constraint_Q (mem));
@@ -7747,6 +7750,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7769,9 +7775,7 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG).
- This function is expected to be called after reload is completed when we are
- splitting insns. The temporary BASE_TMP might be set multiple times with
- this code. */
+ The temporary BASE_TMP might be set multiple times with this code. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
@@ -7784,8 +7788,11 @@ rs6000_adjust_vec_address (rtx scalar_reg,
rtx addr = XEXP (mem, 0);
rtx new_addr;
- gcc_assert (!reg_mentioned_p (base_tmp, addr));
- gcc_assert (!reg_mentioned_p (base_tmp, element));
+ if (GET_CODE (base_tmp) != SCRATCH)
+ {
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
+ }
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
@@ -7841,6 +7848,9 @@ rs6000_adjust_vec_address (rtx scalar_reg,
offset, it has the benefit that if D-FORM instructions are
allowed, the offset is part of the memory access to the vector
element. */
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
@@ -7848,26 +7858,39 @@ rs6000_adjust_vec_address (rtx scalar_reg,
else
{
+ if (GET_CODE (base_tmp) == SCRATCH)
+ base_tmp = gen_reg_rtx (Pmode);
+
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If the address isn't valid, move the address into the temporary base
- register. Some reasons it could not be valid include:
+ /* If register allocation has been done and the address isn't valid, move
+ the address into the temporary base register. Some reasons it could not
+ be valid include:
The address offset overflowed the 16 or 34 bit offset size;
We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
Only X_FORM loads can be done, and the address is D_FORM. */
- enum insn_form iform
- = address_to_insn_form (new_addr, scalar_mode,
- reg_to_non_prefixed (scalar_reg, scalar_mode));
-
- if (iform == INSN_FORM_BAD)
+ if (!can_create_pseudo_p ())
{
- emit_move_insn (base_tmp, new_addr);
- new_addr = base_tmp;
+ /* If the final register is not the same mode because a FLOAT_EXTEND,
+ ZERO_EXTEND, or SIGN_EXTEND was folded into the instruction, adjust
+ the register to be of the correct mode for the load. */
+ if (GET_MODE (scalar_reg) != scalar_mode)
+ scalar_reg = gen_rtx_REG (scalar_mode, REGNO (scalar_reg));
+
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
+
+ if (iform == INSN_FORM_BAD)
+ {
+ emit_move_insn (base_tmp, new_addr);
+ new_addr = base_tmp;
+ }
}
return change_address (mem, scalar_mode, new_addr);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..00f85caed62 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
[(set_attr "length" "8")
(set_attr "type" "fp")])
+;; V4SF extract from memory with constant element number
(define_insn_and_split "*vsx_extract_v4sf_load"
[(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
(vec_select:SF
@@ -3557,7 +3558,7 @@
(clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
"VECTOR_MEM_VSX_P (V4SFmode)"
"#"
- "&& reload_completed"
+ "&& 1"
[(set (match_dup 0) (match_dup 4))]
{
operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
@@ -3567,6 +3568,27 @@
(set_attr "length" "8")
(set_attr "isa" "*,p7v,p9v,*")])
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+ [(set (match_operand:DF 0 "register_operand" "=f,v")
+ (float_extend:DF
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "m,m")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+ (clobber (match_scratch:P 3 "=&b,&b"))]
+ "VECTOR_MEM_VSX_P (V4SFmode)"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (float_extend:DF (match_dup 4)))]
+{
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], SFmode);
+}
+ [(set_attr "type" "fpload")
+ (set_attr "length" "8")
+ (set_attr "isa" "*,p8v")])
+
;; Variable V4SF extract from a register
(define_insn_and_split "vsx_extract_v4sf_var"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element.
@ 2023-04-13 16:50 Michael Meissner
0 siblings, 0 replies; 6+ messages in thread
From: Michael Meissner @ 2023-04-13 16:50 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d70280f2d9dfceb90a5709a2e8b66f3dbcee0b73
commit d70280f2d9dfceb90a5709a2e8b66f3dbcee0b73
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Apr 13 12:48:58 2023 -0400
Improve vec_extract of V4SF from memory with constant element.
2023-04-13 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/vsx.md (vsx_extract_v4sf_load): Allow split before
register allocation.
(vsx_extract_v4sf_to_df_load): New insn.
Diff:
---
gcc/config/rs6000/vsx.md | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 417aff5e24b..70f4e8b8365 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3549,6 +3549,7 @@
[(set_attr "length" "8")
(set_attr "type" "fp")])
+;; V4SF extract from memory with constant element number
(define_insn_and_split "*vsx_extract_v4sf_load"
[(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
(vec_select:SF
@@ -3557,9 +3558,12 @@
(clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
"VECTOR_MEM_VSX_P (V4SFmode)"
"#"
- "&& reload_completed"
+ "&& 1"
[(set (match_dup 0) (match_dup 4))]
{
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (DImode);
+
operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
operands[3], SFmode);
}
@@ -3567,6 +3571,30 @@
(set_attr "length" "8")
(set_attr "isa" "*,p7v,p9v,*")])
+;; V4SF extract from memory and convert to DFmode with constant element number
+(define_insn_and_split "*vsx_extract_v4sf_to_df_load"
+ [(set (match_operand:DF 0 "register_operand" "=f,v")
+ (float_extend:DF
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "m,m")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n")]))))
+ (clobber (match_scratch:P 3 "=&b,&b"))]
+ "VECTOR_MEM_VSX_P (V4SFmode)"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (float_extend:DF (match_dup 4)))]
+{
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (DImode);
+
+ operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
+ operands[3], SFmode);
+}
+ [(set_attr "type" "fpload")
+ (set_attr "length" "8")
+ (set_attr "isa" "*,p8v")])
+
;; Variable V4SF extract from a register
(define_insn_and_split "vsx_extract_v4sf_var"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-04-15 1:20 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-14 14:17 [gcc(refs/users/meissner/heads/work118)] Improve vec_extract of V4SF from memory with constant element Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2023-04-15 1:20 Michael Meissner
2023-04-14 18:53 Michael Meissner
2023-04-13 23:56 Michael Meissner
2023-04-13 19:29 Michael Meissner
2023-04-13 16:50 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).