* [PTX] simplify movs
@ 2015-12-02 15:09 Nathan Sidwell
2015-12-02 15:30 ` Bernd Schmidt
2017-05-21 15:32 ` Tom de Vries
0 siblings, 2 replies; 4+ messages in thread
From: Nathan Sidwell @ 2015-12-02 15:09 UTC (permalink / raw)
To: GCC Patches
[-- Attachment #1: Type: text/plain, Size: 420 bytes --]
The PTX md file goes to a lot of effort handling SC and DC movs, including for
unspecs to mov low and high parts around. However, these code paths are not
exercised in any gcc test or the build of newlib. The generic handling of these
movs deals with type punning, (using the stack frame, if needed). There
doesn't appear a need for a separate punbuffer.
Thus this patch deletes a lot of that machinery.
nathan
[-- Attachment #2: trunk-ptx-mov.patch --]
[-- Type: text/x-patch, Size: 18847 bytes --]
2015-12-02 Nathan Sidwell <nathan@acm.org>
* config/nvptx/nvptx-protos.h (nvptx_output_mov_insn): Declare.
(nvptx_underlying_object_mode): Delete.
* config/nvptx/nvptx.c (nvptx_underlying_object_mode): Delete.
(output_reg): New.
(nvptx_declare_function_name): Use output_reg. Remove punning
buffer.
(nvptx_output_mov_insn): New.
(nvptx_print_operand): Separate SUBREG handling, remove 'f' case,
Use output_reg. Merge 't' and 'u' handling.
* config/nvptx/nvptx.h (NVPTX_PUNNING_BUFFER_REGNUM): Delete.
(struct machine_function): Remvoe punning_buffer_size.
(REGISTER_NAMES): Remove %punbuffer.
* config/nvptx/nvptx.md (UNSPEC_CPLX_LOWPART,
UNSPEC_CPLX_HIGHPART): Delete.
(*mov<mode>_insn [QHSDIM): Remove unnecessary constraints, use
nvptx_output_mov_insn.
(*mov<mode>_insn [SDFM): Reorder constraints to match integer
moc. Use nvptx_output_mov_insn.
(highpartscsf2, set_highpartscsf2, lowpartscsf2,
set_lowpartscsf2): Delete.
(mov<mode> [SDCM]): Delete.
Index: config/nvptx/nvptx-protos.h
===================================================================
--- config/nvptx/nvptx-protos.h (revision 231177)
+++ config/nvptx/nvptx-protos.h (working copy)
@@ -38,9 +38,9 @@ extern void nvptx_expand_oacc_join (unsi
extern void nvptx_expand_call (rtx, rtx);
extern rtx nvptx_expand_compare (rtx);
extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
+extern const char *nvptx_output_mov_insn (rtx, rtx);
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
extern const char *nvptx_output_return (void);
-extern machine_mode nvptx_underlying_object_mode (rtx);
extern const char *nvptx_section_from_addr_space (addr_space_t);
extern bool nvptx_hard_regno_mode_ok (int, machine_mode);
extern rtx nvptx_maybe_convert_symbolic_operand (rtx);
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c (revision 231177)
+++ config/nvptx/nvptx.c (working copy)
@@ -155,23 +155,6 @@ nvptx_option_override (void)
worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
}
-/* Return the mode to be used when declaring a ptx object for OBJ.
- For objects with subparts such as complex modes this is the mode
- of the subpart. */
-
-machine_mode
-nvptx_underlying_object_mode (rtx obj)
-{
- if (GET_CODE (obj) == SUBREG)
- obj = SUBREG_REG (obj);
- machine_mode mode = GET_MODE (obj);
- if (mode == TImode)
- return DImode;
- if (COMPLEX_MODE_P (mode))
- return GET_MODE_INNER (mode);
- return mode;
-}
-
/* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
deal with ptx ideosyncracies. */
@@ -257,6 +240,37 @@ maybe_split_mode (machine_mode mode)
return VOIDmode;
}
+/* Output a register, subreg, or register pair (with optional
+ enclosing braces). */
+
+static void
+output_reg (FILE *file, unsigned regno, machine_mode inner_mode,
+ int subreg_offset = -1)
+{
+ if (inner_mode == VOIDmode)
+ {
+ if (HARD_REGISTER_NUM_P (regno))
+ fprintf (file, "%s", reg_names[regno]);
+ else
+ fprintf (file, "%%r%d", regno);
+ }
+ else if (subreg_offset >= 0)
+ {
+ output_reg (file, regno, VOIDmode);
+ fprintf (file, "$%d", subreg_offset);
+ }
+ else
+ {
+ if (subreg_offset == -1)
+ fprintf (file, "{");
+ output_reg (file, regno, inner_mode, GET_MODE_SIZE (inner_mode));
+ fprintf (file, ",");
+ output_reg (file, regno, inner_mode, 0);
+ if (subreg_offset == -1)
+ fprintf (file, "}");
+ }
+}
+
/* Emit forking instructions for MASK. */
static void
@@ -724,16 +738,12 @@ nvptx_declare_function_name (FILE *file,
{
machine_mode mode = PSEUDO_REGNO_MODE (i);
machine_mode split = maybe_split_mode (mode);
+
if (split != VOIDmode)
- {
- fprintf (file, "\t.reg%s %%r%d$%d;\n",
- nvptx_ptx_type_from_mode (split, true), i, 0);
- fprintf (file, "\t.reg%s %%r%d$%d;\n",
- nvptx_ptx_type_from_mode (split, true), i, 1);
- }
- else
- fprintf (file, "\t.reg%s %%r%d;\n",
- nvptx_ptx_type_from_mode (mode, true), i);
+ mode = split;
+ fprintf (file, "\t.reg%s ", nvptx_ptx_type_from_mode (mode, true));
+ output_reg (file, i, split, -2);
+ fprintf (file, ";\n");
}
}
@@ -754,15 +764,6 @@ nvptx_declare_function_name (FILE *file,
BITS_PER_WORD);
}
- if (cfun->machine->punning_buffer_size > 0)
- {
- fprintf (file, "\t.reg.u%d %%punbuffer;\n"
- "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
- BITS_PER_WORD, cfun->machine->punning_buffer_size);
- fprintf (file, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
- BITS_PER_WORD);
- }
-
/* Declare a local variable for the frame. */
sz = get_frame_size ();
if (sz > 0 || cfun->machine->has_call_with_sc)
@@ -1755,6 +1756,7 @@ nvptx_globalize_label (FILE *, const cha
/* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
declaration only for variable DECL with NAME to FILE. */
+
static void
nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
{
@@ -1772,6 +1774,37 @@ nvptx_assemble_undefined_decl (FILE *fil
fprintf (file, ";\n\n");
}
+/* Output a pattern for a move instruction. */
+
+const char *
+nvptx_output_mov_insn (rtx dst, rtx src)
+{
+ machine_mode dst_mode = GET_MODE (dst);
+ machine_mode dst_inner = (GET_CODE (dst) == SUBREG
+ ? GET_MODE (XEXP (dst, 0)) : dst_mode);
+ machine_mode src_inner = (GET_CODE (src) == SUBREG
+ ? GET_MODE (XEXP (src, 0)) : dst_mode);
+
+ if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM && dst_mode == HImode)
+ /* Special handling for the return register. It's never really an
+ HI object, and only occurs as the destination of a move
+ insn. */
+ dst_inner = SImode;
+
+ if (src_inner == dst_inner)
+ return "%.\tmov%t0\t%0, %1;";
+
+ if (CONSTANT_P (src))
+ return (GET_MODE_CLASS (dst_inner) == MODE_INT
+ && GET_MODE_CLASS (src_inner) != MODE_FLOAT
+ ? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
+
+ if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
+ return "%.\tmov.b%T0\t%0, %1;";
+
+ return "%.\tcvt%t0%t1\t%0, %1;";
+}
+
/* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
involves writing .param declarations and in/out copies into them. For
indirect calls, also write the .callprototype. */
@@ -1921,7 +1954,6 @@ nvptx_print_operand_address (FILE *file,
A -- print an address space identifier for a MEM
c -- print an opcode suffix for a comparison operator, including a type code
- f -- print a full reg even for something that must always be split
S -- print a shuffle kind specified by CONST_INT
t -- print a type opcode suffix, promoting QImode to 32 bits
T -- print a type size in bits
@@ -1930,9 +1962,6 @@ nvptx_print_operand_address (FILE *file,
static void
nvptx_print_operand (FILE *file, rtx x, int code)
{
- rtx orig_x = x;
- machine_mode op_mode;
-
if (code == '.')
{
x = current_insn_predicate;
@@ -1954,6 +1983,7 @@ nvptx_print_operand (FILE *file, rtx x,
}
enum rtx_code x_code = GET_CODE (x);
+ machine_mode mode = GET_MODE (x);
switch (code)
{
@@ -1975,13 +2005,16 @@ nvptx_print_operand (FILE *file, rtx x,
break;
case 't':
- op_mode = nvptx_underlying_object_mode (x);
- fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
- break;
-
case 'u':
- op_mode = nvptx_underlying_object_mode (x);
- fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false));
+ if (x_code == SUBREG)
+ {
+ mode = GET_MODE (SUBREG_REG (x));
+ if (mode == TImode)
+ mode = DImode;
+ else if (COMPLEX_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+ }
+ fprintf (file, "%s", nvptx_ptx_type_from_mode (mode, code == 't'));
break;
case 'S':
@@ -1994,7 +2027,7 @@ nvptx_print_operand (FILE *file, rtx x,
break;
case 'T':
- fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
+ fprintf (file, "%d", GET_MODE_BITSIZE (mode));
break;
case 'j':
@@ -2006,14 +2039,14 @@ nvptx_print_operand (FILE *file, rtx x,
goto common;
case 'c':
- op_mode = GET_MODE (XEXP (x, 0));
+ mode = GET_MODE (XEXP (x, 0));
switch (x_code)
{
case EQ:
fputs (".eq", file);
break;
case NE:
- if (FLOAT_MODE_P (op_mode))
+ if (FLOAT_MODE_P (mode))
fputs (".neu", file);
else
fputs (".ne", file);
@@ -2069,38 +2102,39 @@ nvptx_print_operand (FILE *file, rtx x,
default:
gcc_unreachable ();
}
- if (FLOAT_MODE_P (op_mode)
+ if (FLOAT_MODE_P (mode)
|| x_code == EQ || x_code == NE
|| x_code == GEU || x_code == GTU
|| x_code == LEU || x_code == LTU)
- fputs (nvptx_ptx_type_from_mode (op_mode, true), file);
+ fputs (nvptx_ptx_type_from_mode (mode, true), file);
else
- fprintf (file, ".s%d", GET_MODE_BITSIZE (op_mode));
+ fprintf (file, ".s%d", GET_MODE_BITSIZE (mode));
break;
default:
common:
switch (x_code)
{
case SUBREG:
- x = SUBREG_REG (x);
- /* fall through */
+ {
+ rtx inner_x = SUBREG_REG (x);
+ machine_mode inner_mode = GET_MODE (inner_x);
+ machine_mode split = maybe_split_mode (inner_mode);
+
+ if (split != VOIDmode
+ && (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)))
+ output_reg (file, REGNO (inner_x), split);
+ else
+ output_reg (file, REGNO (inner_x), split, SUBREG_BYTE (x));
+ }
+ break;
case REG:
- if (HARD_REGISTER_P (x))
- fprintf (file, "%s", reg_names[REGNO (x)]);
- else
- fprintf (file, "%%r%d", REGNO (x));
- if (code != 'f' && maybe_split_mode (GET_MODE (x)) != VOIDmode)
- {
- gcc_assert (GET_CODE (orig_x) == SUBREG
- && maybe_split_mode (GET_MODE (orig_x)) == VOIDmode);
- fprintf (file, "$%d", SUBREG_BYTE (orig_x) / UNITS_PER_WORD);
- }
+ output_reg (file, REGNO (x), maybe_split_mode (mode));
break;
case MEM:
fputc ('[', file);
- nvptx_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
+ nvptx_print_address_operand (file, XEXP (x, 0), mode);
fputc (']', file);
break;
@@ -2119,10 +2153,10 @@ nvptx_print_operand (FILE *file, rtx x,
case CONST_DOUBLE:
long vals[2];
- real_to_target (vals, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
+ real_to_target (vals, CONST_DOUBLE_REAL_VALUE (x), mode);
vals[0] &= 0xffffffff;
vals[1] &= 0xffffffff;
- if (GET_MODE (x) == SFmode)
+ if (mode == SFmode)
fprintf (file, "0f%08lx", vals[0]);
else
fprintf (file, "0d%08lx%08lx", vals[1], vals[0]);
Index: config/nvptx/nvptx.h
===================================================================
--- config/nvptx/nvptx.h (revision 231177)
+++ config/nvptx/nvptx.h (working copy)
@@ -156,7 +156,6 @@ enum reg_class
#define STACK_POINTER_REGNUM 1
#define HARD_FRAME_POINTER_REGNUM 2
-#define NVPTX_PUNNING_BUFFER_REGNUM 3
#define NVPTX_RETURN_REGNUM 4
#define FRAME_POINTER_REGNUM 15
#define ARG_POINTER_REGNUM 14
@@ -231,7 +230,6 @@ struct GTY(()) machine_function
bool has_call_with_sc;
HOST_WIDE_INT outgoing_stdarg_size;
int ret_reg_mode; /* machine_mode not defined yet. */
- int punning_buffer_size;
rtx axis_predicate[2];
};
#endif
@@ -264,7 +262,7 @@ struct GTY(()) machine_function
#define REGISTER_NAMES \
{ \
- "%hr0", "%outargs", "%hfp", "%punbuffer", "%retval", "%retval_in", "%hr6", "%hr7", \
+ "%hr0", "%outargs", "%hfp", "%hr3", "%retval", "%retval_in", "%hr6", "%hr7", \
"%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%argp", "%frame" \
}
Index: config/nvptx/nvptx.md
===================================================================
--- config/nvptx/nvptx.md (revision 231177)
+++ config/nvptx/nvptx.md (working copy)
@@ -31,9 +31,6 @@
UNSPEC_TO_SHARED
UNSPEC_TO_CONST
- UNSPEC_CPLX_LOWPART
- UNSPEC_CPLX_HIGHPART
-
UNSPEC_COPYSIGN
UNSPEC_LOG2
UNSPEC_EXP2
@@ -258,74 +255,31 @@
%.\\tsetp.eq.u32\\t%0, 1, 1;")
(define_insn "*mov<mode>_insn"
- [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,R,m")
- (match_operand:QHSDIM 1 "general_operand" "n,Ri,m,R"))]
- "!(MEM_P (operands[0])
- && (!REG_P (operands[1]) || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))"
+ [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+ (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
+ "!MEM_P (operands[0])
+ || (REG_P (operands[1]) && REGNO (operands[1]) > LAST_VIRTUAL_REGISTER)"
{
- if (which_alternative == 2)
+ if (which_alternative == 1)
return "%.\\tld%A1%u1\\t%0, %1;";
- if (which_alternative == 3)
+ if (which_alternative == 2)
return "%.\\tst%A0%u0\\t%0, %1;";
- rtx dst = operands[0];
- rtx src = operands[1];
-
- enum machine_mode dst_mode = nvptx_underlying_object_mode (dst);
- enum machine_mode src_mode = nvptx_underlying_object_mode (src);
- if (GET_CODE (dst) == SUBREG)
- dst = SUBREG_REG (dst);
- if (GET_CODE (src) == SUBREG)
- src = SUBREG_REG (src);
- if (src_mode == QImode)
- src_mode = SImode;
- if (dst_mode == QImode)
- dst_mode = SImode;
- if (CONSTANT_P (src))
- {
- if (GET_MODE_CLASS (dst_mode) != MODE_INT)
- return "%.\\tmov.b%T0\\t%0, %1;";
- else
- return "%.\\tmov%t0\\t%0, %1;";
- }
-
- /* Special handling for the return register; we allow this register to
- only occur in the destination of a move insn. */
- if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM
- && dst_mode == HImode)
- dst_mode = SImode;
- if (dst_mode == src_mode)
- return "%.\\tmov%t0\\t%0, %1;";
- /* Mode-punning between floating point and integer. */
- if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode))
- return "%.\\tmov.b%T0\\t%0, %1;";
- return "%.\\tcvt%t0%t1\\t%0, %1;";
+ return nvptx_output_mov_insn (operands[0], operands[1]);
}
[(set_attr "subregs_ok" "true")])
(define_insn "*mov<mode>_insn"
[(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m")
(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
- "!(MEM_P (operands[0]) && !REG_P (operands[1]))"
+ "!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (which_alternative == 1)
return "%.\\tld%A1%u0\\t%0, %1;";
if (which_alternative == 2)
return "%.\\tst%A0%u1\\t%0, %1;";
- rtx dst = operands[0];
- rtx src = operands[1];
- if (GET_CODE (dst) == SUBREG)
- dst = SUBREG_REG (dst);
- if (GET_CODE (src) == SUBREG)
- src = SUBREG_REG (src);
- enum machine_mode dst_mode = GET_MODE (dst);
- enum machine_mode src_mode = GET_MODE (src);
- if (dst_mode == src_mode)
- return "%.\\tmov%t0\\t%0, %1;";
- if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode))
- return "%.\\tmov.b%T0\\t%0, %1;";
- gcc_unreachable ();
+ return nvptx_output_mov_insn (operands[0], operands[1]);
}
[(set_attr "subregs_ok" "true")])
@@ -373,116 +327,6 @@
}
})
-(define_insn "highpartscsf2"
- [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
- (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")]
- UNSPEC_CPLX_HIGHPART))]
- ""
- "%.\\tmov%t0\\t%0, %f1$1;")
-
-(define_insn "set_highpartsfsc2"
- [(set (match_operand:SC 0 "nvptx_register_operand" "+R")
- (unspec:SC [(match_dup 0)
- (match_operand:SF 1 "nvptx_register_operand")]
- UNSPEC_CPLX_HIGHPART))]
- ""
- "%.\\tmov%t1\\t%f0$1, %1;")
-
-(define_insn "lowpartscsf2"
- [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
- (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")]
- UNSPEC_CPLX_LOWPART))]
- ""
- "%.\\tmov%t0\\t%0, %f1$0;")
-
-(define_insn "set_lowpartsfsc2"
- [(set (match_operand:SC 0 "nvptx_register_operand" "+R")
- (unspec:SC [(match_dup 0)
- (match_operand:SF 1 "nvptx_register_operand")]
- UNSPEC_CPLX_LOWPART))]
- ""
- "%.\\tmov%t1\\t%f0$0, %1;")
-
-(define_expand "mov<mode>"
- [(set (match_operand:SDCM 0 "nvptx_nonimmediate_operand" "")
- (match_operand:SDCM 1 "general_operand" ""))]
- ""
-{
- enum machine_mode submode = <MODE>mode == SCmode ? SFmode : DFmode;
- int sz = GET_MODE_SIZE (submode);
- rtx xops[4];
- rtx punning_reg = NULL_RTX;
- rtx copyback = NULL_RTX;
-
- if (GET_CODE (operands[0]) == SUBREG)
- {
- rtx inner = SUBREG_REG (operands[0]);
- enum machine_mode inner_mode = GET_MODE (inner);
- int sz2 = GET_MODE_SIZE (inner_mode);
- gcc_assert (sz2 >= sz);
- cfun->machine->punning_buffer_size
- = MAX (cfun->machine->punning_buffer_size, sz2);
- if (punning_reg == NULL_RTX)
- punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM);
- copyback = gen_move_insn (inner, gen_rtx_MEM (inner_mode, punning_reg));
- operands[0] = gen_rtx_MEM (<MODE>mode, punning_reg);
- }
- if (GET_CODE (operands[1]) == SUBREG)
- {
- rtx inner = SUBREG_REG (operands[1]);
- enum machine_mode inner_mode = GET_MODE (inner);
- int sz2 = GET_MODE_SIZE (inner_mode);
- gcc_assert (sz2 >= sz);
- cfun->machine->punning_buffer_size
- = MAX (cfun->machine->punning_buffer_size, sz2);
- if (punning_reg == NULL_RTX)
- punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM);
- emit_move_insn (gen_rtx_MEM (inner_mode, punning_reg), inner);
- operands[1] = gen_rtx_MEM (<MODE>mode, punning_reg);
- }
-
- if (REG_P (operands[0]) && submode == SFmode)
- {
- xops[0] = gen_reg_rtx (submode);
- xops[1] = gen_reg_rtx (submode);
- }
- else
- {
- xops[0] = gen_lowpart (submode, operands[0]);
- if (MEM_P (operands[0]))
- xops[1] = adjust_address_nv (operands[0], submode, sz);
- else
- xops[1] = gen_highpart (submode, operands[0]);
- }
-
- if (REG_P (operands[1]) && submode == SFmode)
- {
- xops[2] = gen_reg_rtx (submode);
- xops[3] = gen_reg_rtx (submode);
- emit_insn (gen_lowpartscsf2 (xops[2], operands[1]));
- emit_insn (gen_highpartscsf2 (xops[3], operands[1]));
- }
- else
- {
- xops[2] = gen_lowpart (submode, operands[1]);
- if (MEM_P (operands[1]))
- xops[3] = adjust_address_nv (operands[1], submode, sz);
- else
- xops[3] = gen_highpart (submode, operands[1]);
- }
-
- emit_move_insn (xops[0], xops[2]);
- emit_move_insn (xops[1], xops[3]);
- if (REG_P (operands[0]) && submode == SFmode)
- {
- emit_insn (gen_set_lowpartsfsc2 (operands[0], xops[0]));
- emit_insn (gen_set_highpartsfsc2 (operands[0], xops[1]));
- }
- if (copyback)
- emit_insn (copyback);
- DONE;
-})
-
(define_insn "zero_extendqihi2"
[(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
(zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PTX] simplify movs
2015-12-02 15:09 [PTX] simplify movs Nathan Sidwell
@ 2015-12-02 15:30 ` Bernd Schmidt
2017-05-21 15:32 ` Tom de Vries
1 sibling, 0 replies; 4+ messages in thread
From: Bernd Schmidt @ 2015-12-02 15:30 UTC (permalink / raw)
To: Nathan Sidwell, GCC Patches
On 12/02/2015 04:09 PM, Nathan Sidwell wrote:
> The PTX md file goes to a lot of effort handling SC and DC movs,
> including for unspecs to mov low and high parts around. However, these
> code paths are not exercised in any gcc test or the build of newlib.
> The generic handling of these movs deals with type punning, (using the
> stack frame, if needed). There doesn't appear a need for a separate
> punbuffer.
>
> Thus this patch deletes a lot of that machinery.
Hmm, that was definitely necessary at one point. I wonder what changed?
Bernd
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PTX] simplify movs
2015-12-02 15:09 [PTX] simplify movs Nathan Sidwell
2015-12-02 15:30 ` Bernd Schmidt
@ 2017-05-21 15:32 ` Tom de Vries
2017-05-22 12:11 ` Nathan Sidwell
1 sibling, 1 reply; 4+ messages in thread
From: Tom de Vries @ 2017-05-21 15:32 UTC (permalink / raw)
To: Nathan Sidwell; +Cc: GCC Patches, Bernd Schmidt
[-- Attachment #1: Type: text/plain, Size: 2117 bytes --]
On 12/02/2015 04:09 PM, Nathan Sidwell wrote:
> +/* Output a pattern for a move instruction. */
> +
> +const char *
> +nvptx_output_mov_insn (rtx dst, rtx src)
> +{
> + machine_mode dst_mode = GET_MODE (dst);
> + machine_mode dst_inner = (GET_CODE (dst) == SUBREG
> + ? GET_MODE (XEXP (dst, 0)) : dst_mode);
> + machine_mode src_inner = (GET_CODE (src) == SUBREG
> + ? GET_MODE (XEXP (src, 0)) : dst_mode);
> +
> + if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM && dst_mode == HImode)
> + /* Special handling for the return register. It's never really an
> + HI object, and only occurs as the destination of a move
> + insn. */
> + dst_inner = SImode;
> +
> + if (src_inner == dst_inner)
> + return "%.\tmov%t0\t%0, %1;";
> +
> + if (CONSTANT_P (src))
> + return (GET_MODE_CLASS (dst_inner) == MODE_INT
> + && GET_MODE_CLASS (src_inner) != MODE_FLOAT
> + ? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
Hi,
src_inner uses dst_mode rather than GET_MODE (src). I'm trying to
understand if that is intentional or not.
F.i., for this insn:
....
(insn 7 6 8 2
(set (reg:QI 67)
(const_int 1 [0x1])) 2 {*movqi_insn}
(nil))
...
when entering nvptx_output_mov_insn we have:
- GET_MODE (dst) == QI and GET_MODE (src) == VOID, but
- dst_inner == QI and src_inner == QI
So we handle this insn using this clause:
...
if (src_inner == dst_inner)
return "%.\tmov%t0\t%0, %1;";
...
rather than using the const handling clause:
...
if (CONSTANT_P (src))
return (GET_MODE_CLASS (dst_inner) == MODE_INT
&& GET_MODE_CLASS (src_inner) != MODE_FLOAT
? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
...
Using attached patch, we get dst_inner == QI and src_inner == VOID, and
the insn is handled by the const handling clause instead, and the same
string is returned as before.
I can imagine that src_inner uses dst_mode to avoid setting src_inner to
VOIDmode (in which case a comment explaining that would avoid the
impression of a copy-pasto). But AFAICT, it's not necessary.
Thanks,
- Tom
[-- Attachment #2: 0001-Fix-src_inner-in-nvptx_output_mov_insn.patch --]
[-- Type: text/x-patch, Size: 756 bytes --]
Fix src_inner in nvptx_output_mov_insn
---
gcc/config/nvptx/nvptx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 4c35c16..6951e27 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2146,10 +2146,11 @@ const char *
nvptx_output_mov_insn (rtx dst, rtx src)
{
machine_mode dst_mode = GET_MODE (dst);
+ machine_mode src_mode = GET_MODE (src);
machine_mode dst_inner = (GET_CODE (dst) == SUBREG
? GET_MODE (XEXP (dst, 0)) : dst_mode);
machine_mode src_inner = (GET_CODE (src) == SUBREG
- ? GET_MODE (XEXP (src, 0)) : dst_mode);
+ ? GET_MODE (XEXP (src, 0)) : src_mode);
rtx sym = src;
if (GET_CODE (sym) == CONST)
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PTX] simplify movs
2017-05-21 15:32 ` Tom de Vries
@ 2017-05-22 12:11 ` Nathan Sidwell
0 siblings, 0 replies; 4+ messages in thread
From: Nathan Sidwell @ 2017-05-22 12:11 UTC (permalink / raw)
To: Tom de Vries; +Cc: GCC Patches, Bernd Schmidt
On 05/21/2017 03:35 AM, Tom de Vries wrote:
> On 12/02/2015 04:09 PM, Nathan Sidwell wrote:
>> +/* Output a pattern for a move instruction. */
>> +
>> +const char *
>> +nvptx_output_mov_insn (rtx dst, rtx src)
>> +{
> src_inner uses dst_mode rather than GET_MODE (src). I'm trying to
> understand if that is intentional or not.
I have no idea.
--
Nathan Sidwell
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-05-22 11:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-02 15:09 [PTX] simplify movs Nathan Sidwell
2015-12-02 15:30 ` Bernd Schmidt
2017-05-21 15:32 ` Tom de Vries
2017-05-22 12:11 ` Nathan Sidwell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).