From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 4370F3858D1E; Mon, 1 May 2023 23:07:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4370F3858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682982474; bh=KVmGBx7KgtFns6IQbSXQmTLaLOQe+tPBBIUjoikENXM=; h=From:To:Subject:Date:From; b=dg7xDn6mk3Q12pG3YdNoctdJKmtsLwUWM4b2Gmf1wA3ZiwuMG6MAdEuQempHhgvhN 1piPn8XyH+jwsJxShQ61+Sfw8YAum5pqOsto1hSG/BTRda6xpM1B4Jr8rqUc3+yL0g Wm4PrVTFiGgzvBkcixKpTbj0RDQTMEtRfbrleZS4= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work120)] Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work120 X-Git-Oldrev: a77ad87e980b128cca1595578730f89cc8c8ebb1 X-Git-Newrev: a1b43b9ad8cff06175abdb54e1c0e7d7f2da2cc4 Message-Id: <20230501230754.4370F3858D1E@sourceware.org> Date: Mon, 1 May 2023 23:07:54 +0000 (GMT) List-Id: https://gcc.gnu.org/g:a1b43b9ad8cff06175abdb54e1c0e7d7f2da2cc4 commit a1b43b9ad8cff06175abdb54e1c0e7d7f2da2cc4 Author: Michael Meissner Date: Mon May 1 19:07:25 2023 -0400 Split vec_extract from memory before reload for V16QI/V8HI/V4SI with constant elements. This patch goes through the support function rs6000_adjust_vec_address and the functions it calls to allow them to be called before register allocation. The places that take a scratch register will allocate a new pseudo register if they are passed a SCRATCH register. It then changes the vec_extract support for V16QI/V8HI/V4SI with constant element numbers to be split before register allocation. 2023-05-01 Michael Meissner gcc/ * config/rs6000/rs6000.cc (get_vector_offset): Allow function to be called before register allocation. (adjust_vec_address_pcrel): Likewise. (rs6000_adjust_vec_address): Likewise. * config/rs6000/vsx.md (vsx_extract__load): Split before register allocation. (vsx_extract_v4si_load_to_di): Likewise. (vsx_extract__load_to_u): Likewise. (*vsx_extract_v8hi_load_to_s): Likewise. (vsx_extract__var_load): Likewise. (vsx_extract_v4si_var_load_to_di): Likewise. (vsx_extract__var_load_to_u): Likewise. (vsx_extract_v8hi_var_load_to_s): Likewise. Diff: --- gcc/config/rs6000/predicates.md | 13 ++++++++++ gcc/config/rs6000/rs6000.cc | 57 +++++++++++++++++++++++++++-------------- gcc/config/rs6000/vsx.md | 32 +++++++++++------------ 3 files changed, 67 insertions(+), 35 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index a16ee30f0c0..571386e7c21 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -994,6 +994,19 @@ return memory_operand (op, mode); }) +;; Return true if the address can be used in optimizing vec_extract from memory +;; operations. We don't allow update memory addresses or Altivec style vector +;; addresses. +(define_predicate "vec_extract_memory_operand" + (match_code "mem") +{ + if (update_address_mem (op, mode)) + return 0; + if (altivec_indexed_or_indirect_operand (op, mode)) + return 0; + return memory_operand (op, mode); +}) + ;; Return 1 if the operand is a MEM with an indexed-form address. (define_special_predicate "indexed_address_mem" (match_test "(MEM_P (op) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 6debf0f63ff..0d1f4e7dfe3 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -7686,9 +7686,13 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size) if (CONST_INT_P (element)) return GEN_INT (INTVAL (element) * scalar_size); - /* All insns should use the 'Q' constraint (address is a single register) if - the element number is not a constant. */ - gcc_assert (satisfies_constraint_Q (mem)); + if (GET_CODE (base_tmp) == SCRATCH) + base_tmp = gen_reg_rtx (Pmode); + + /* After register allocation, all insns should use the 'Q' constraint + (address is a single register) if the element number is not a + constant. */ + gcc_assert (can_create_pseudo_p () || satisfies_constraint_Q (mem)); /* Mask the element to make sure the element number is between 0 and the maximum number of elements - 1 so that we don't generate an address @@ -7704,6 +7708,9 @@ get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size) if (shift > 0) { rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift)); + if (can_create_pseudo_p ()) + base_tmp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (base_tmp, shift_op)); } @@ -7747,6 +7754,9 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp) else { + if (GET_CODE (base_tmp) == SCRATCH) + base_tmp = gen_reg_rtx (Pmode); + emit_move_insn (base_tmp, addr); new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); } @@ -7769,9 +7779,8 @@ adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp) temporary (BASE_TMP) to fixup the address. Return the new memory address that is valid for reads or writes to a given register (SCALAR_REG). - This function is expected to be called after reload is completed when we are - splitting insns. The temporary BASE_TMP might be set multiple times with - this code. */ + The temporary BASE_TMP might be set multiple times with this code if this is + called after register allocation. */ rtx rs6000_adjust_vec_address (rtx scalar_reg, @@ -7784,8 +7793,11 @@ rs6000_adjust_vec_address (rtx scalar_reg, rtx addr = XEXP (mem, 0); rtx new_addr; - gcc_assert (!reg_mentioned_p (base_tmp, addr)); - gcc_assert (!reg_mentioned_p (base_tmp, element)); + if (GET_CODE (base_tmp) != SCRATCH) + { + gcc_assert (!reg_mentioned_p (base_tmp, addr)); + gcc_assert (!reg_mentioned_p (base_tmp, element)); + } /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); @@ -7841,6 +7853,9 @@ rs6000_adjust_vec_address (rtx scalar_reg, offset, it has the benefit that if D-FORM instructions are allowed, the offset is part of the memory access to the vector element. */ + if (GET_CODE (base_tmp) == SCRATCH) + base_tmp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1))); new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); } @@ -7848,26 +7863,30 @@ rs6000_adjust_vec_address (rtx scalar_reg, else { - emit_move_insn (base_tmp, addr); - new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); + rtx addr_reg = force_reg (Pmode, addr); + new_addr = gen_rtx_PLUS (Pmode, addr_reg, element_offset); } - /* If the address isn't valid, move the address into the temporary base - register. Some reasons it could not be valid include: + /* If register allocation has been done and the address isn't valid, move + the address into the temporary base register. Some reasons it could not + be valid include: The address offset overflowed the 16 or 34 bit offset size; We need to use a DS-FORM load, and the bottom 2 bits are non-zero; We need to use a DQ-FORM load, and the bottom 4 bits are non-zero; Only X_FORM loads can be done, and the address is D_FORM. */ - enum insn_form iform - = address_to_insn_form (new_addr, scalar_mode, - reg_to_non_prefixed (scalar_reg, scalar_mode)); - - if (iform == INSN_FORM_BAD) + if (!can_create_pseudo_p ()) { - emit_move_insn (base_tmp, new_addr); - new_addr = base_tmp; + enum insn_form iform + = address_to_insn_form (new_addr, scalar_mode, + reg_to_non_prefixed (scalar_reg, scalar_mode)); + + if (iform == INSN_FORM_BAD) + { + emit_move_insn (base_tmp, new_addr); + new_addr = base_tmp; + } } return change_address (mem, scalar_mode, new_addr); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 410183dde93..e227d875dfb 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4027,7 +4027,7 @@ [(set (match_operand: 0 "register_operand" "=r, r, r, wa, wa") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "memory_operand" + (match_operand:VSX_EXTRACT_I 1 "vec_extract_memory_operand" "m, o, m, Z, Q") (parallel [(match_operand:QI 2 "" "O, n, n, O, n")]))) @@ -4035,7 +4035,7 @@ "=X, X, &b, X, &b"))] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (match_dup 4))] { operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], @@ -4060,7 +4060,7 @@ "=r, r, r, wa, wa") (any_extend:DI (vec_select:SI - (match_operand:V4SI 1 "memory_operand" + (match_operand:V4SI 1 "vec_extract_memory_operand" "m, o, m, Z, Q") (parallel [(match_operand:QI 2 "const_0_to_3_operand" "O, n, n, O, n")])))) @@ -4068,7 +4068,7 @@ "=X, X, &b, X, &b"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (any_extend:DI (match_dup 4)))] { @@ -4093,7 +4093,7 @@ "=r, r, r, v, v") (zero_extend:GPR (vec_select: - (match_operand:VSX_EXTRACT_I2 1 "memory_operand" + (match_operand:VSX_EXTRACT_I2 1 "vec_extract_memory_operand" "m, o, m, Z, Q") (parallel [(match_operand:QI 2 "const_int_operand" "O, n, n, O, n")])))) @@ -4101,7 +4101,7 @@ "=X, X, &b, X, &b"))] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (zero_extend:GPR (match_dup 4)))] { @@ -4173,12 +4173,12 @@ [(set (match_operand:GPR 0 "register_operand" "=r,r,r") (sign_extend:GPR (vec_select:HI - (match_operand:V8HI 1 "memory_operand" "m,o,m") + (match_operand:V8HI 1 "vec_extract_memory_operand" "m,o,m") (parallel [(match_operand:QI 2 "const_int_operand" "O,n,n")])))) (clobber (match_scratch:DI 3 "=X,X,&b"))] "VECTOR_MEM_VSX_P (V8HImode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (sign_extend:GPR (match_dup 4)))] { @@ -4213,13 +4213,13 @@ (define_insn_and_split "*vsx_extract__var_load" [(set (match_operand: 0 "gpc_reg_operand" "=r,wa") (unspec: - [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q,Q") + [(match_operand:VSX_EXTRACT_I 1 "vec_extract_memory_operand" "Q,Q") (match_operand:DI 2 "gpc_reg_operand" "r,r")] UNSPEC_VSX_EXTRACT)) (clobber (match_scratch:DI 3 "=&b,&b"))] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (match_dup 4))] { operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], @@ -4234,13 +4234,13 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,wa") (any_extend:DI (unspec:SI - [(match_operand:V4SI 1 "memory_operand" "Q,Q") + [(match_operand:V4SI 1 "vec_extract_memory_operand" "Q,Q") (match_operand:DI 2 "gpc_reg_operand" "r,r")] UNSPEC_VSX_EXTRACT))) (clobber (match_scratch:DI 3 "=&b,&b"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (any_extend:DI (match_dup 4)))] { @@ -4256,13 +4256,13 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,wa") (zero_extend:GPR (unspec: - [(match_operand:VSX_EXTRACT_I2 1 "memory_operand" "Q,Q") + [(match_operand:VSX_EXTRACT_I2 1 "vec_extract_memory_operand" "Q,Q") (match_operand:DI 2 "gpc_reg_operand" "r,r")] UNSPEC_VSX_EXTRACT))) (clobber (match_scratch:DI 3 "=&b,&b"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (zero_extend:GPR (match_dup 4)))] { @@ -4279,13 +4279,13 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (sign_extend:GPR (unspec:HI - [(match_operand:V8HI 1 "memory_operand" "Q") + [(match_operand:V8HI 1 "vec_extract_memory_operand" "Q") (match_operand:DI 2 "gpc_reg_operand" "r")] UNSPEC_VSX_EXTRACT))) (clobber (match_scratch:DI 3 "=&b"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" - "&& reload_completed" + "&& 1" [(set (match_dup 0) (sign_extend:GPR (match_dup 4)))] {