* [PATCH v3] SH FDPIC backend support
@ 2015-10-21 3:55 Rich Felker
2015-10-21 13:37 ` Oleg Endo
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-10-21 3:55 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 643 bytes --]
Attached is a hopefully near-ready-for-commit version of the SH/FDPIC
patch. I believe I've addressed all comments by Oleg and Kaz on the
previous versions of the patch. I'm still working on drafting the
Changelog entry (there's a lot to go in it, and I might very well be
going into more detail than is needed).
One thing I've considered doing, since TARGET_FDPIC implies flag_pic
now, is removing all parts of the patch that just replace checks for
flag_pic with (flag_pic || TARGET_FDPIC). Would doing this be
desirable? It shrinks the patch a bit but of course more strongly
codes the assumption that TARGET_FDPIC implies flag_pic.
Rich
[-- Attachment #2: gcc-sh-fdpic-v3.diff --]
[-- Type: text/plain, Size: 66653 bytes --]
diff --git a/gcc/config.gcc b/gcc/config.gcc
index bf26776..ed118f3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2621,6 +2621,9 @@ sh-*-elf* | sh[12346l]*-*-elf* | \
tm_file="${tm_file} dbxelf.h elfos.h sh/elf.h"
case ${target} in
sh*-*-linux*) tmake_file="${tmake_file} sh/t-linux"
+ if test x$enable_fdpic = xyes; then
+ tm_defines="$tm_defines FDPIC_DEFAULT=1"
+ fi
tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h sh/linux.h" ;;
sh*-*-netbsd*)
tm_file="${tm_file} netbsd.h netbsd-elf.h sh/netbsd-elf.h"
diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md
index 4d1eb2d..41c88a2 100644
--- a/gcc/config/sh/constraints.md
+++ b/gcc/config/sh/constraints.md
@@ -25,6 +25,7 @@
;; Bsc: SCRATCH - for the scratch register in movsi_ie in the
;; fldi0 / fldi0 cases
;; Cxx: Constants other than only CONST_INT
+;; Ccl: call site label
;; Css: signed 16-bit constant, literal or symbolic
;; Csu: unsigned 16-bit constant, literal or symbolic
;; Csy: label or symbol
@@ -233,6 +234,11 @@
hence mova is being used, hence do not select this pattern."
(match_code "scratch"))
+(define_constraint "Ccl"
+ "A call site label, for bsrf."
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_CALLER")))
+
(define_constraint "Css"
"A signed 16-bit constant, literal or symbolic."
(and (match_code "const")
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
index a9dd43a..5d4dd1f 100644
--- a/gcc/config/sh/linux.h
+++ b/gcc/config/sh/linux.h
@@ -69,7 +69,8 @@ along with GCC; see the file COPYING3. If not see
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
#undef SUBTARGET_LINK_EMUL_SUFFIX
-#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd;:_linux}"
+
#undef SUBTARGET_LINK_SPEC
#define SUBTARGET_LINK_SPEC \
"%{shared:-shared} \
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
index a98c148..01a12e6 100644
--- a/gcc/config/sh/sh-c.c
+++ b/gcc/config/sh/sh-c.c
@@ -141,6 +141,11 @@ sh_cpu_cpp_builtins (cpp_reader* pfile)
builtin_define ("__HITACHI__");
if (TARGET_FMOVD)
builtin_define ("__FMOVD_ENABLED__");
+ if (TARGET_FDPIC)
+ {
+ builtin_define ("__SH_FDPIC__");
+ builtin_define ("__FDPIC__");
+ }
builtin_define (TARGET_LITTLE_ENDIAN
? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 23a7287..6e521ba 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -123,10 +123,10 @@ expand_block_move (rtx *operands)
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
- function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
return true;
}
else if (! optimize_size)
@@ -139,13 +139,13 @@ expand_block_move (rtx *operands)
rtx r6 = gen_rtx_REG (SImode, 6);
entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
- function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
dwords = bytes >> 3;
emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
- emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
return true;
}
else
@@ -159,10 +159,10 @@ expand_block_move (rtx *operands)
rtx r5 = gen_rtx_REG (SImode, 5);
sprintf (entry, "__movmemSI%d", bytes);
- function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real (func_addr_rtx));
+ emit_insn (gen_block_move_real (func_addr_rtx, lab));
return true;
}
@@ -176,7 +176,7 @@ expand_block_move (rtx *operands)
rtx r5 = gen_rtx_REG (SImode, 5);
rtx r6 = gen_rtx_REG (SImode, 6);
- function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
@@ -189,7 +189,7 @@ expand_block_move (rtx *operands)
final_switch = 16 - ((bytes / 4) % 16);
while_loop = ((bytes / 4) / 16 - 1) * 16;
emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
- emit_insn (gen_block_lump_real (func_addr_rtx));
+ emit_insn (gen_block_lump_real (func_addr_rtx, lab));
return true;
}
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index f94459f..c64a948 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -377,7 +377,19 @@ extern void fpscr_set_from_mem (int, HARD_REG_SET);
extern void sh_pr_interrupt (struct cpp_reader *);
extern void sh_pr_trapa (struct cpp_reader *);
extern void sh_pr_nosave_low_regs (struct cpp_reader *);
-extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+
+struct function_symbol_result
+{
+ function_symbol_result (void) : sym (NULL), lab (NULL) { }
+ function_symbol_result (rtx s, rtx l) : sym (s), lab (l) { }
+
+ rtx sym;
+ rtx lab;
+};
+
+extern function_symbol_result function_symbol (rtx, const char *,
+ sh_function_kind);
+extern rtx sh_get_fdpic_reg_initial_val (void);
extern rtx sh_get_pr_initial_val (void);
extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
@@ -396,4 +408,5 @@ extern bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
extern machine_mode sh_hard_regno_caller_save_mode (unsigned int, unsigned int,
machine_mode);
extern bool sh_can_use_simple_return_p (void);
+extern rtx sh_load_function_descriptor (rtx);
#endif /* ! GCC_SH_PROTOS_H */
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 904201b..49062c9 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -268,6 +268,7 @@ static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static void sh_file_start (void);
+static bool sh_assemble_integer (rtx, unsigned, int);
static bool flow_dependent_p (rtx, rtx);
static void flow_dependent_p_1 (rtx, const_rtx, void *);
static int shiftcosts (rtx);
@@ -276,6 +277,7 @@ static int addsubcosts (rtx);
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx_insn *);
+static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
static int sh_pr_n_sets (void);
@@ -333,6 +335,7 @@ static void sh_encode_section_info (tree, rtx, int);
static bool sh2a_function_vector_p (tree);
static void sh_trampoline_init (rtx, tree, rtx);
static rtx sh_trampoline_adjust_address (rtx);
+static int sh_reloc_rw_mask (void);
static void sh_conditional_register_usage (void);
static bool sh_legitimate_constant_p (machine_mode, rtx);
static int mov_insn_size (machine_mode, bool);
@@ -421,6 +424,9 @@ static const struct attribute_spec sh_attribute_table[] =
#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sh_assemble_integer
+
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
@@ -679,6 +685,12 @@ static const struct attribute_spec sh_attribute_table[] =
#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
+
+#undef TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK sh_reloc_rw_mask
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
@@ -996,6 +1008,13 @@ sh_option_override (void)
if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
TARGET_ZDCBRANCH = 1;
+ if (TARGET_FDPIC && !flag_pic)
+ flag_pic = 2;
+
+ if (TARGET_FDPIC
+ && (TARGET_SHMEDIA || TARGET_SHCOMPACT || !TARGET_SH2))
+ sorry ("non-SH2 FDPIC");
+
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (! VALID_REGISTER_P (regno))
sh_register_names[regno][0] = '\0';
@@ -1004,7 +1023,7 @@ sh_option_override (void)
if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
sh_additional_register_names[regno][0] = '\0';
- if ((flag_pic && ! TARGET_PREFERGOT)
+ if (((flag_pic || TARGET_FDPIC) && ! TARGET_PREFERGOT)
|| (TARGET_SHMEDIA && !TARGET_PT_FIXED))
flag_no_function_cse = 1;
@@ -1687,6 +1706,14 @@ sh_asm_output_addr_const_extra (FILE *file, rtx x)
output_addr_const (file, XVECEXP (x, 0, 1));
fputs ("-.)", file);
break;
+ case UNSPEC_GOTFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTFUNCDESC", file);
+ break;
+ case UNSPEC_GOTOFFFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTOFFFUNCDESC", file);
+ break;
default:
return false;
}
@@ -1714,7 +1741,7 @@ void
prepare_move_operands (rtx operands[], machine_mode mode)
{
if ((mode == SImode || mode == DImode)
- && flag_pic
+ && (flag_pic || TARGET_FDPIC)
&& ! ((mode == Pmode || mode == ptr_mode)
&& tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
{
@@ -1850,7 +1877,7 @@ prepare_move_operands (rtx operands[], machine_mode mode)
{
rtx tga_op1, tga_ret, tmp, tmp2;
- if (! flag_pic
+ if (! flag_pic && ! TARGET_FDPIC
&& (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
|| tls_kind == TLS_MODEL_LOCAL_DYNAMIC
|| tls_kind == TLS_MODEL_INITIAL_EXEC))
@@ -1871,6 +1898,11 @@ prepare_move_operands (rtx operands[], machine_mode mode)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ {
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
+ }
emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
emit_move_insn (tmp, tga_ret);
@@ -1879,6 +1911,11 @@ prepare_move_operands (rtx operands[], machine_mode mode)
case TLS_MODEL_LOCAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ {
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
+ }
emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
@@ -1896,6 +1933,11 @@ prepare_move_operands (rtx operands[], machine_mode mode)
case TLS_MODEL_INITIAL_EXEC:
tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
tmp = gen_sym2GOTTPOFF (op1);
+ if (TARGET_FDPIC)
+ {
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
+ }
emit_insn (gen_tls_initial_exec (tga_op1, tmp));
op1 = tga_op1;
break;
@@ -1922,6 +1964,21 @@ prepare_move_operands (rtx operands[], machine_mode mode)
operands[1] = op1;
}
}
+
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+ split_const (operands[1], &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ {
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+ emit_move_insn (tmp, base);
+ if (!arith_operand (offset, mode))
+ offset = force_reg (mode, offset);
+ emit_insn (gen_add3_insn (operands[0], tmp, offset));
+ }
+ }
}
/* Implement the canonicalize_comparison target hook for the combine
@@ -3026,6 +3083,26 @@ sh_file_start (void)
}
}
\f
+/* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
+ need to be output as pointers to function descriptors for
+ FDPIC. */
+
+static bool
+sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+ if (TARGET_FDPIC
+ && size == UNITS_PER_WORD
+ && GET_CODE (value) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (value))
+ {
+ fputs ("\t.long\t", asm_out_file);
+ output_addr_const (asm_out_file, value);
+ fputs ("@FUNCDESC\n", asm_out_file);
+ return true;
+ }
+ return default_assemble_integer (value, size, aligned_p);
+}
+\f
/* Check if PAT includes UNSPEC_CALLER unspec pattern. */
static bool
unspec_caller_rtx_p (rtx pat)
@@ -3052,7 +3129,7 @@ sh_cannot_copy_insn_p (rtx_insn *insn)
{
rtx pat;
- if (!reload_completed || !flag_pic)
+ if (!reload_completed || (!flag_pic && !TARGET_FDPIC))
return false;
if (!NONJUMP_INSN_P (insn))
@@ -3061,6 +3138,19 @@ sh_cannot_copy_insn_p (rtx_insn *insn)
return false;
pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
+ return false;
+
+ if (TARGET_FDPIC
+ && GET_CODE (pat) == PARALLEL)
+ {
+ rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
+ if (GET_CODE (t) == USE
+ && unspec_caller_rtx_p (XEXP (t, 0)))
+ return true;
+ }
+
if (GET_CODE (pat) != SET)
return false;
pat = SET_SRC (pat);
@@ -4102,8 +4192,8 @@ expand_ashiftrt (rtx *operands)
/* Load the value into an arg reg and call a helper. */
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
sprintf (func, "__ashiftrt_r4_%d", value);
- function_symbol (wrk, func, SFUNC_STATIC);
- emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+ rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
+ emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
return true;
}
@@ -7954,7 +8044,9 @@ sh_expand_prologue (void)
stack_usage += d;
}
- if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+ if (flag_pic
+ && !TARGET_FDPIC
+ && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
emit_insn (gen_GOTaddr2picreg (const0_rtx));
if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -10458,7 +10550,9 @@ nonpic_symbol_mentioned_p (rtx x)
|| XINT (x, 1) == UNSPEC_PLT
|| XINT (x, 1) == UNSPEC_PCREL
|| XINT (x, 1) == UNSPEC_SYMOFF
- || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+ || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
+ || XINT (x, 1) == UNSPEC_GOTFUNCDESC
+ || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
return false;
fmt = GET_RTX_FORMAT (GET_CODE (x));
@@ -10493,7 +10587,28 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, orig));
+ if (TARGET_FDPIC
+ && GET_CODE (orig) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (orig))
+ {
+ /* Weak functions may be NULL which doesn't work with
+ GOTOFFFUNCDESC because the runtime offset is not known. */
+ if (SYMBOL_REF_WEAK (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
+ }
+ else if (TARGET_FDPIC
+ && (GET_CODE (orig) == LABEL_REF
+ || (GET_CODE (orig) == SYMBOL_REF
+ && SYMBOL_REF_DECL (orig)
+ && (TREE_READONLY (SYMBOL_REF_DECL (orig))
+ || SYMBOL_REF_EXTERNAL_P (orig)
+ || DECL_SECTION_NAME(SYMBOL_REF_DECL(orig))) )))
+ /* In FDPIC, GOTOFF can only be used for writable data. */
+ emit_insn (gen_symGOT2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFF2reg (reg, orig));
return reg;
}
else if (GET_CODE (orig) == SYMBOL_REF)
@@ -10501,7 +10616,10 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOT2reg (reg, orig));
+ if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOT2reg (reg, orig));
return reg;
}
return orig;
@@ -11539,6 +11657,19 @@ sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
5 0008 00000000 l1: .long area
6 000c 00000000 l2: .long function
+ FDPIC needs a form that includes a function descriptor and
+ code to load the GOT register:
+ 0 0000 00000000 .long l0
+ 1 0004 00000000 .long gotval
+ 2 0008 D302 l0: mov.l l1,r3
+ 3 000a D203 mov.l l2,r2
+ 4 000c 6122 mov.l @r2,r1
+ 5 000e 5C21 mov.l @(4,r2),r12
+ 6 0010 412B jmp @r1
+ 7 0012 0009 nop
+ 8 0014 00000000 l1: .long area
+ 9 0018 00000000 l2: .long function
+
SH5 (compact) uses r1 instead of r3 for the static chain. */
@@ -11675,20 +11806,41 @@ sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
return;
}
- emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 4),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
- emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+ if (TARGET_FDPIC)
+ {
+ rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+ sh_get_fdpic_reg_initial_val ());
+ emit_move_insn (adjust_address (tramp_mem, SImode, 8),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 12),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 16),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
+ }
+ else
+ {
+ emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+ }
if (TARGET_HARD_SH4 || TARGET_SH5)
{
if (!TARGET_INLINE_IC_INVALIDATE
|| (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
emit_library_call (function_symbol (NULL, "__ic_invalidate",
- FUNCTION_ORDINARY),
+ FUNCTION_ORDINARY).sym,
LCT_NORMAL, VOIDmode, 1, tramp, SImode);
else
emit_insn (gen_ic_invalidate_line (tramp));
@@ -11718,7 +11870,7 @@ sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
&& (! TARGET_SHCOMPACT
|| crtl->args.info.stack_regs == 0)
&& ! sh_cfun_interrupt_handler_p ()
- && (! flag_pic
+ && (! flag_pic || TARGET_FDPIC
|| (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
|| (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
}
@@ -11732,7 +11884,7 @@ sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_sym_label2reg (reg, sym, lab));
- else if (sibcall_p)
+ else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
else
emit_insn (gen_symPLT_label2reg (reg, sym, lab));
@@ -12731,10 +12883,18 @@ sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
else
#endif
- if (TARGET_SH2 && flag_pic)
+ if (TARGET_SH2 && (flag_pic || TARGET_FDPIC))
{
- sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
- XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ if (TARGET_FDPIC)
+ {
+ sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
+ }
+ else
+ {
+ sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ }
}
else
{
@@ -12775,19 +12935,27 @@ sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
epilogue_completed = 0;
}
-rtx
-function_symbol (rtx target, const char *name, enum sh_function_kind kind)
-{
- rtx sym;
+/* Return an RTX for the address of a function NAME of kind KIND,
+ placing the result in TARGET if not NULL. LAB should be non-NULL
+ for SFUNC_STATIC, if FDPIC; it will be set to (const_int 0) if jsr
+ should be used, or a label_ref if bsrf should be used. For FDPIC,
+ both SFUNC_GOT and SFUNC_STATIC will return the address of the
+ function itself, not a function descriptor, so they can only be
+ used with functions not using the FDPIC register that are known to
+ be called directory without a PLT entry. */
+function_symbol_result
+function_symbol (rtx target, const char *name, sh_function_kind kind)
+{
/* If this is not an ordinary function, the name usually comes from a
string literal or an sprintf buffer. Make sure we use the same
string consistently, so that cse will be able to unify address loads. */
if (kind != FUNCTION_ORDINARY)
name = IDENTIFIER_POINTER (get_identifier (name));
- sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx lab = const0_rtx;
SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
- if (flag_pic)
+ if (flag_pic || TARGET_FDPIC)
switch (kind)
{
case FUNCTION_ORDINARY:
@@ -12802,14 +12970,26 @@ function_symbol (rtx target, const char *name, enum sh_function_kind kind)
}
case SFUNC_STATIC:
{
- /* ??? To allow cse to work, we use GOTOFF relocations.
- We could add combiner patterns to transform this into
- straight pc-relative calls with sym2PIC / bsrf when
- label load and function call are still 1:1 and in the
- same basic block during combine. */
rtx reg = target ? target : gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, sym));
+ if (TARGET_FDPIC)
+ {
+ /* We use PC-relative calls, since GOTOFF can only refer
+ to writable data. This works along with
+ sh_sfunc_call. */
+ lab = PATTERN (gen_call_site ());
+ emit_insn (gen_sym_label2reg (reg, sym, lab));
+ }
+ else
+ {
+ /* ??? To allow cse to work, we use GOTOFF relocations.
+ we could add combiner patterns to transform this into
+ straight pc-relative calls with sym2PIC / bsrf when
+ label load and function call are still 1:1 and in the
+ same basic block during combine. */
+ emit_insn (gen_symGOTOFF2reg (reg, sym));
+ }
+
sym = reg;
break;
}
@@ -12817,9 +12997,9 @@ function_symbol (rtx target, const char *name, enum sh_function_kind kind)
if (target && sym != target)
{
emit_move_insn (target, sym);
- return target;
+ return function_symbol_result(target, lab);
}
- return sym;
+ return function_symbol_result(sym, lab);
}
/* Find the number of a general purpose register in S. */
@@ -13432,6 +13612,12 @@ sh_conditional_register_usage (void)
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
}
+ if (TARGET_FDPIC)
+ {
+ fixed_regs[PIC_REG] = 1;
+ call_used_regs[PIC_REG] = 1;
+ call_really_used_regs[PIC_REG] = 1;
+ }
/* Renesas saves and restores mac registers on call. */
if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
{
@@ -13460,14 +13646,32 @@ sh_conditional_register_usage (void)
static bool
sh_legitimate_constant_p (machine_mode mode, rtx x)
{
- return (TARGET_SHMEDIA
- ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
- || x == CONST0_RTX (mode)
- || !TARGET_SHMEDIA_FPU
- || TARGET_SHMEDIA64)
- : (GET_CODE (x) != CONST_DOUBLE
- || mode == DFmode || mode == SFmode
- || mode == DImode || GET_MODE (x) == VOIDmode));
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+
+ split_const (x, &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ return false;
+ }
+
+ if (TARGET_FDPIC
+ && (SYMBOLIC_CONST_P (x)
+ || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+ && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
+ return false;
+
+ if (TARGET_SHMEDIA
+ && ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+ || x == CONST0_RTX (mode)
+ || !TARGET_SHMEDIA_FPU
+ || TARGET_SHMEDIA64))
+ return false;
+
+ return (GET_CODE (x) != CONST_DOUBLE
+ || mode == DFmode || mode == SFmode
+ || mode == DImode || GET_MODE (x) == VOIDmode);
}
enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
@@ -14558,4 +14762,53 @@ sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
}
}
+bool
+sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
+ rtx x ATTRIBUTE_UNUSED)
+{
+ if (TARGET_FDPIC)
+ return true;
+
+ return false;
+}
+
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+ function descriptor) into r1 and the GOT address into r12,
+ returning an rtx for r1. */
+
+rtx
+sh_load_function_descriptor (rtx funcdesc)
+{
+ rtx r1 = gen_rtx_REG (Pmode, R1_REG);
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+ rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+
+ emit_move_insn (r1, fnaddr);
+ /* The ABI requires the entry point address to be loaded first, so
+ prevent the load from being moved after that of the GOT
+ address. */
+ emit_insn (gen_blockage ());
+ emit_move_insn (pic_reg, gotaddr);
+ return r1;
+}
+
+/* Return an rtx holding the initial value of the FDPIC register (the
+ FDPIC pointer passed in from the caller). */
+
+rtx
+sh_get_fdpic_reg_initial_val (void)
+{
+ return get_hard_reg_initial_val (Pmode, PIC_REG);
+}
+
+/* Relocatable data for FDPIC binaries is not permitted in read-only
+ segments. */
+
+static int
+sh_reloc_rw_mask (void)
+{
+ return (flag_pic || TARGET_FDPIC) ? 3 : 0;
+}
+
#include "gt-sh.h"
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index aafcf28..98d8054 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -321,7 +321,7 @@ extern int code_for_indirect_jump_scratch;
#endif
#ifndef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC ""
+#define SUBTARGET_ASM_SPEC "%{mfdpic:--fdpic}"
#endif
#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
@@ -349,7 +349,7 @@ extern int code_for_indirect_jump_scratch;
#define ASM_ISA_DEFAULT_SPEC ""
#endif /* MASK_SH5 */
-#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd}"
#define SUBTARGET_LINK_SPEC ""
/* Go via SH_LINK_SPEC to avoid code replication. */
@@ -383,8 +383,18 @@ extern int code_for_indirect_jump_scratch;
"%{m2a*:%eSH2a does not support little-endian}}"
#endif
+#ifdef FDPIC_DEFAULT
+#define FDPIC_SELF_SPECS "%{!mno-fdpic:-mfdpic}"
+#else
+#define FDPIC_SELF_SPECS
+#endif
+
#undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A SUBTARGET_DRIVER_SELF_SPECS \
+ FDPIC_SELF_SPECS
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS
#define ASSEMBLER_DIALECT assembler_dialect
@@ -942,6 +952,10 @@ extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
code access to data items. */
#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? PIC_REG : INVALID_REGNUM)
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+ entries would need to handle saving and restoring it). */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
/* Definitions for register eliminations.
@@ -1566,7 +1580,8 @@ struct sh_args {
6 000c 00000000 l2: .long function */
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+#define TRAMPOLINE_SIZE \
+ (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : TARGET_FDPIC ? 32 : 16)
/* Alignment required for a trampoline in bits. */
#define TRAMPOLINE_ALIGNMENT \
@@ -1622,6 +1637,10 @@ struct sh_args {
|| GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
: (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+/* True if SYMBOL + OFFSET constants must refer to something within
+ SYMBOL's section. */
+#define SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P TARGET_FDPIC
+
/* Maximum number of registers that can appear in a valid memory
address. */
#define MAX_REGS_PER_ADDRESS 2
@@ -2262,9 +2281,12 @@ extern int current_function_interrupt;
/* We have to distinguish between code and data, so that we apply
datalabel where and only where appropriate. Use sdataN for data. */
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
- ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
- | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
- | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+ ((TARGET_FDPIC \
+ ? ((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel \
+ : DW_EH_PE_pcrel) \
+ : ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+ | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr))) \
+ | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
/* Handle special EH pointer encodings. Absolute, pc-relative, and
indirect are handled automatically. */
@@ -2277,6 +2299,17 @@ extern int current_function_interrupt;
SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
if (0) goto DONE; \
} \
+ if (TARGET_FDPIC \
+ && ((ENCODING) & 0xf0) == (DW_EH_PE_indirect | DW_EH_PE_datarel)) \
+ { \
+ fputs ("\t.ualong ", FILE); \
+ output_addr_const (FILE, ADDR); \
+ if (GET_CODE (ADDR) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (ADDR)) \
+ fputs ("@GOTFUNCDESC", FILE); \
+ else \
+ fputs ("@GOT", FILE); \
+ goto DONE; \
+ } \
} while (0)
#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d758e3b..e7758a6 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -100,6 +100,7 @@
(R8_REG 8)
(R9_REG 9)
(R10_REG 10)
+ (R12_REG 12)
(R20_REG 20)
(R21_REG 21)
(R22_REG 22)
@@ -170,6 +171,9 @@
UNSPEC_SYMOFF
;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
UNSPEC_PCREL_SYMOFF
+ ;; For FDPIC
+ UNSPEC_GOTFUNCDESC
+ UNSPEC_GOTOFFFUNCDESC
;; Misc builtins
UNSPEC_BUILTIN_STRLEN
])
@@ -2591,15 +2595,18 @@
;; This reload would clobber the value in r0 we are trying to store.
;; If we let reload allocate r0, then this problem can never happen.
(define_insn "udivsi3_i1"
- [(set (match_operand:SI 0 "register_operand" "=z")
+ [(set (match_operand:SI 0 "register_operand" "=z,z")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2648,7 +2655,7 @@
})
(define_insn "udivsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2660,16 +2667,19 @@
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "udivsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2680,10 +2690,13 @@
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2742,11 +2755,11 @@
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_udivsi3_i4 (operands[0], operands[3]);
+ last = gen_udivsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SHMEDIA_FPU)
{
@@ -2771,14 +2784,14 @@
if (TARGET_SHMEDIA)
last = gen_udivsi3_i1_media (operands[0], operands[3]);
else if (TARGET_FPU_ANY)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], const0_rtx);
else
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ last = gen_udivsi3_i1 (operands[0], operands[3], const0_rtx);
}
else
{
- function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ rtx lab = function_symbol (operands[3], "__udivsi3", SFUNC_STATIC).lab;
+ last = gen_udivsi3_i1 (operands[0], operands[3], lab);
}
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
@@ -2906,7 +2919,7 @@
emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
break;
}
- sym = function_symbol (NULL, name, kind);
+ sym = function_symbol (NULL, name, kind).sym;
emit_insn (gen_divsi3_media_2 (operands[0], sym));
DONE;
}
@@ -2926,31 +2939,37 @@
})
(define_insn "divsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "divsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI R2_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2994,11 +3013,11 @@
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_divsi3_i4 (operands[0], operands[3]);
+ last = gen_divsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SH2A)
{
@@ -3113,7 +3132,7 @@
last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
(operands[0], operands[3]));
else if (TARGET_FPU_ANY)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], const0_rtx);
else
last = gen_divsi3_i1 (operands[0], operands[3]);
}
@@ -3713,7 +3732,7 @@ label:
{
/* The address must be set outside the libcall,
since it goes into a pseudo. */
- rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+ rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC).sym;
rtx addr = force_reg (SImode, sym);
rtx insns = gen_mulsi3_call (operands[0], operands[1],
operands[2], addr);
@@ -4970,8 +4989,8 @@ label:
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
- emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
@@ -5024,15 +5043,18 @@ label:
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "ashlsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(ashift:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5374,12 +5396,15 @@ label:
(define_insn "ashrsi3_n"
[(set (reg:SI R4_REG)
(ashiftrt:SI (reg:SI R4_REG)
- (match_operand:SI 0 "const_int_operand" "i")))
+ (match_operand:SI 0 "const_int_operand" "i,i")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5532,8 +5557,8 @@ label:
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
- emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
})
@@ -5585,15 +5610,18 @@ label:
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "lshrsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(lshiftrt:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -7315,7 +7343,7 @@ label:
}
else if (TARGET_SHCOMPACT)
{
- operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+ operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC).sym;
operands[1] = force_reg (Pmode, operands[1]);
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
DONE;
@@ -7397,7 +7425,7 @@ label:
tramp = force_reg (Pmode, operands[0]);
sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
- SFUNC_STATIC));
+ SFUNC_STATIC).sym);
emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
@@ -9459,7 +9487,27 @@ label:
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+{
+ if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ return "jsr/n @%0";
+ else
+ return "jsr @%0%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
+(define_insn "calli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
{
if (TARGET_SH2A && (dbr_sequence_length () == 0))
return "jsr/n @%0";
@@ -9588,7 +9636,28 @@ label:
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+{
+ if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ return "jsr/n @%1";
+ else
+ return "jsr @%1%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
{
if (TARGET_SH2A && (dbr_sequence_length () == 0))
return "jsr/n @%1";
@@ -9725,6 +9794,12 @@ label:
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 0);
@@ -9759,8 +9834,8 @@ label:
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9784,7 +9859,7 @@ label:
emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0)));
XEXP (operands[0], 0) = reg;
}
- if (!flag_pic && TARGET_SH2A
+ if (!flag_pic && !TARGET_FDPIC && TARGET_SH2A
&& MEM_P (operands[0])
&& GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
{
@@ -9795,7 +9870,7 @@ label:
DONE;
}
}
- if (flag_pic && TARGET_SH2
+ if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
&& MEM_P (operands[0])
&& GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
{
@@ -9808,7 +9883,13 @@ label:
operands[1] = operands[2];
}
- emit_call_insn (gen_calli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_calli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_calli (operands[0], operands[1]));
DONE;
})
@@ -9888,7 +9969,7 @@ label:
emit_insn (gen_force_mode_for_call ());
operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9913,6 +9994,12 @@ label:
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 0);
@@ -9948,8 +10035,8 @@ label:
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -9975,7 +10062,7 @@ label:
emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0)));
XEXP (operands[1], 0) = reg;
}
- if (!flag_pic && TARGET_SH2A
+ if (!flag_pic && !TARGET_FDPIC && TARGET_SH2A
&& MEM_P (operands[1])
&& GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
{
@@ -9986,7 +10073,7 @@ label:
DONE;
}
}
- if (flag_pic && TARGET_SH2
+ if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
&& MEM_P (operands[1])
&& GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
{
@@ -9997,7 +10084,14 @@ label:
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_call_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
DONE;
})
@@ -10006,7 +10100,21 @@ label:
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
"jmp @%0%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
@@ -10020,7 +10128,25 @@ label:
(use (match_operand 2 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
+{
+ return "braf %0" "\n"
+ "%O2:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+ (match_operand 1))
+ (use (match_operand 2))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
{
return "braf %0" "\n"
"%O2:%#";
@@ -10053,7 +10179,7 @@ label:
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 2 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10073,6 +10199,33 @@ label:
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn_and_split "sibcall_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 2 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+ rtx call_insn;
+
+ sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
+ call_insn = emit_call_insn (gen_sibcalli_pcrel_fdpic (operands[2], operands[1],
+ copy_rtx (lab)));
+ SIBLING_CALL_P (call_insn) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn "sibcall_compact"
[(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
(match_operand 1 "" ""))
@@ -10117,6 +10270,12 @@ label:
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 1);
@@ -10161,8 +10320,8 @@ label:
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
/* We don't need a return trampoline, since the callee will
@@ -10188,7 +10347,7 @@ label:
emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0)));
XEXP (operands[0], 0) = reg;
}
- if (flag_pic && TARGET_SH2
+ if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
&& MEM_P (operands[0])
&& GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
/* The PLT needs the PIC register, but the epilogue would have
@@ -10196,13 +10355,24 @@ label:
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
{
- emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_pcrel_fdpic (XEXP (operands[0], 0),
+ operands[1]));
+ else
+ emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0),
+ operands[1]));
DONE;
}
else
operands[0] = force_reg (SImode, XEXP (operands[0], 0));
- emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_sibcalli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_sibcalli (operands[0], operands[1]));
DONE;
})
@@ -10212,7 +10382,22 @@ label:
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+ "jmp @%1%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
"jmp @%1%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
@@ -10227,7 +10412,26 @@ label:
(use (match_operand 3 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
+{
+ return "braf %1" "\n"
+ "%O3:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+ (match_operand 2)))
+ (use (match_operand 3))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
{
return "braf %1" "\n"
"%O3:%#";
@@ -10245,7 +10449,7 @@ label:
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 3 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10258,6 +10462,38 @@ label:
operands[3],
operands[2],
copy_rtx (lab)));
+
+ SIBLING_CALL_P (call_insn) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 3 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+ rtx call_insn;
+
+ sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
+ call_insn = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0],
+ operands[3],
+ operands[2],
+ copy_rtx (lab)));
+
SIBLING_CALL_P (call_insn) = 1;
DONE;
}
@@ -10314,6 +10550,12 @@ label:
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 1);
@@ -10359,8 +10601,8 @@ label:
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
/* We don't need a return trampoline, since the callee will
@@ -10387,7 +10629,7 @@ label:
emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0)));
XEXP (operands[1], 0) = reg;
}
- if (flag_pic && TARGET_SH2
+ if ((flag_pic || TARGET_FDPIC) && TARGET_SH2
&& MEM_P (operands[1])
&& GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
/* The PLT needs the PIC register, but the epilogue would have
@@ -10395,15 +10637,28 @@ label:
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
{
- emit_call_insn (gen_sibcall_value_pcrel (operands[0],
- XEXP (operands[1], 0),
- operands[2]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_value_pcrel_fdpic (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
+ else
+ emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
DONE;
}
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_sibcall_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_sibcall_valuei (operands[0], operands[1],
+ operands[2]));
DONE;
})
@@ -10487,7 +10742,7 @@ label:
emit_insn (gen_force_mode_for_call ());
operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -10685,6 +10940,13 @@ label:
DONE;
}
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ DONE;
+ }
+
operands[1] = gen_rtx_REG (Pmode, PIC_REG);
operands[2] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
@@ -10820,6 +11082,9 @@ label:
rtx mem;
bool stack_chk_guard_p = false;
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
@@ -10862,8 +11127,7 @@ label:
if (stack_chk_guard_p)
emit_insn (gen_chk_guard_add (operands[3], operands[2]));
else
- emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
- gen_rtx_REG (Pmode, PIC_REG)));
+ emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], picreg));
/* N.B. This is not constant for a GOTPLT relocation. */
mem = gen_rtx_MEM (Pmode, operands[3]);
@@ -10894,6 +11158,26 @@ label:
DONE;
})
+(define_expand "sym2GOTFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTFUNCDESC))]
+ "TARGET_FDPIC"
+ "")
+
+(define_expand "symGOTFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx gotsym, insn;
+
+ gotsym = gen_sym2GOTFUNCDESC (operands[1]);
+ PUT_MODE (gotsym, Pmode);
+ insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+ MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+ DONE;
+})
+
(define_expand "symGOTPLT2reg"
[(match_operand 0 "" "") (match_operand 1 "" "")]
""
@@ -10920,18 +11204,41 @@ label:
? operands[0]
: gen_reg_rtx (GET_MODE (operands[0])));
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
gotoffsym = gen_sym2GOTOFF (operands[1]);
PUT_MODE (gotoffsym, Pmode);
emit_move_insn (t, gotoffsym);
- insn = emit_move_insn (operands[0],
- gen_rtx_PLUS (Pmode, t,
- gen_rtx_REG (Pmode, PIC_REG)));
+ insn = emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
set_unique_reg_note (insn, REG_EQUAL, operands[1]);
DONE;
})
+(define_expand "sym2GOTOFFFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTOFFFUNCDESC))]
+ "TARGET_FDPIC"
+ "")
+
+(define_expand "symGOTOFFFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx picreg = sh_get_fdpic_reg_initial_val ();
+ rtx gotoffsym;
+ rtx t = (!can_create_pseudo_p ()
+ ? operands[0]
+ : gen_reg_rtx (GET_MODE (operands[0])));
+
+ gotoffsym = gen_sym2GOTOFFFUNCDESC (operands[1]);
+ PUT_MODE (gotoffsym, Pmode);
+ emit_move_insn (t, gotoffsym);
+ emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
+ DONE;
+})
+
(define_expand "symPLT_label2reg"
[(set (match_operand:SI 0 "" "")
(const:SI
@@ -12688,18 +12995,22 @@ label:
(define_insn "block_move_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_lump_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12708,27 +13019,33 @@ label:
(clobber (reg:SI R6_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_move_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R2_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_lump_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12740,7 +13057,9 @@ label:
(clobber (reg:SI R2_REG))
(clobber (reg:SI R3_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index 8875b5d..c2e8aca 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -264,6 +264,10 @@ mdivsi3_libfunc=
Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
Specify name for 32 bit signed division function
+mfdpic
+Target Report Var(TARGET_FDPIC) Init(0)
+Generate ELF FDPIC code
+
mfmovd
Target RejectNegative Mask(FMOVD)
Enable the use of 64-bit floating point registers in fmov instructions. See -mdalign if 64-bit alignment is required.
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1fd773e..fe57b97 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1810,6 +1810,9 @@ When neither of these configure options are used, the default will be
128-bit @code{long double} when built against GNU C Library 2.4 and later,
64-bit @code{long double} otherwise.
+@item --enable-fdpic
+On SH Linux systems, generate ELF FDPIC code.
+
@item --with-gmp=@var{pathname}
@itemx --with-gmp-include=@var{pathname}
@itemx --with-gmp-lib=@var{pathname}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ebfaaa1..8b26eac 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21178,6 +21178,10 @@ in effect.
Prefer zero-displacement conditional branches for conditional move instruction
patterns. This can result in faster code on the SH4 processor.
+@item -mfdpic
+@opindex fdpic
+Generate code using the FDPIC ABI.
+
@end table
@node Solaris 2 Options
diff --git a/include/longlong.h b/include/longlong.h
index a0b2ce1..213df5d 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -1102,6 +1102,33 @@ extern UDItype __umulsidi3 (USItype, USItype);
/* This is the same algorithm as __udiv_qrnnd_c. */
#define UDIV_NEEDS_NORMALIZATION 1
+#ifdef __FDPIC__
+/* FDPIC needs a special version of the asm fragment to extract the
+ code address from the function descriptor. __udiv_qrnnd_16 is
+ assumed to be local and not to use the GOT, so loading r12 is
+ not needed. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
+ __attribute__ ((visibility ("hidden"))); \
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
+ __asm__ ( \
+ "mov%M4 %4,r5\n" \
+" swap.w %3,r4\n" \
+" swap.w r5,r6\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" shll16 r6\n" \
+" swap.w r4,r4\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" swap.w r1,%0\n" \
+" or r1,%0" \
+ : "=r" (q), "=&z" (r) \
+ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
+ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
+ } while (0)
+#else
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
@@ -1121,6 +1148,7 @@ extern UDItype __umulsidi3 (USItype, USItype);
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
} while (0)
+#endif
#define UDIV_TIME 80
diff --git a/libitm/config/sh/sjlj.S b/libitm/config/sh/sjlj.S
index 410cef6..76ec6df 100644
--- a/libitm/config/sh/sjlj.S
+++ b/libitm/config/sh/sjlj.S
@@ -58,9 +58,6 @@ _ITM_beginTransaction:
jsr @r1
mov r15, r5
#else
- mova .Lgot, r0
- mov.l .Lgot, r12
- add r0, r12
mov.l .Lbegin, r1
bsrf r1
mov r15, r5
@@ -80,13 +77,11 @@ _ITM_beginTransaction:
cfi_endproc
.align 2
-.Lgot:
- .long _GLOBAL_OFFSET_TABLE_
.Lbegin:
#if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
.long GTM_begin_transaction
#else
- .long GTM_begin_transaction@PLT-(.Lbegin0-.)
+ .long GTM_begin_transaction@PCREL-(.Lbegin0-.)
#endif
.size _ITM_beginTransaction, . - _ITM_beginTransaction
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] SH FDPIC backend support
2015-10-21 3:55 [PATCH v3] SH FDPIC backend support Rich Felker
@ 2015-10-21 13:37 ` Oleg Endo
2015-10-21 20:16 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Oleg Endo @ 2015-10-21 13:37 UTC (permalink / raw)
To: Rich Felker; +Cc: gcc-patches
Rich,
Thanks for the updated patch.
Please do not start new threads for a continuation of an existing
thread. This makes it difficult to track in the archives.
On Tue, 2015-10-20 at 23:41 -0400, Rich Felker wrote:
> Attached is a hopefully near-ready-for-commit version of the SH/FDPIC
> patch. I believe I've addressed all comments by Oleg and Kaz on the
> previous versions of the patch. I'm still working on drafting the
> Changelog entry (there's a lot to go in it, and I might very well be
> going into more detail than is needed).
Other than the missing ChangeLog: How did you test the patch?
> One thing I've considered doing, since TARGET_FDPIC implies flag_pic
> now, is removing all parts of the patch that just replace checks for
> flag_pic with (flag_pic || TARGET_FDPIC). Would doing this be
> desirable? It shrinks the patch a bit but of course more strongly
> codes the assumption that TARGET_FDPIC implies flag_pic.
If FDPIC only ever will make sense in combination with flag_pic != 0,
then I guess this could be done. If you do that, please add a comment
above this hunk:
> + if (TARGET_FDPIC && !flag_pic)
> + flag_pic = 2;
Some other nits:
> rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC).lab;
Break overlong lines to fit into 80 columns. E.g.
rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
SFUNC_STATIC).lab;
> if (TARGET_FDPIC
> && (TARGET_SHMEDIA || TARGET_SHCOMPACT || !TARGET_SH2))
> sorry ("non-SH2 FDPIC");
Drop SH5 stuff.
> if (TARGET_FDPIC)
> {
> emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
> sh_get_fdpic_reg_initial_val ());
> }
>
Remove braces around single statements.
> return (GET_CODE (x) != CONST_DOUBLE
> || mode == DFmode || mode == SFmode
> || mode == DImode || GET_MODE (x) == VOIDmode);
Remove unnecessary parens around return statements.
When applying the patch I'm getting:
patching file gcc/config/sh/sh-protos.h
(Stripping trailing CRs from patch; use --binary to disable.)
Maybe something with your editor settings?
Cheers,
Oleg
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] SH FDPIC backend support
2015-10-21 13:37 ` Oleg Endo
@ 2015-10-21 20:16 ` Rich Felker
2015-10-23 7:22 ` [PATCH v4] " Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-10-21 20:16 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Wed, Oct 21, 2015 at 10:17:51PM +0900, Oleg Endo wrote:
> Rich,
>
> Thanks for the updated patch.
> Please do not start new threads for a continuation of an existing
> thread. This makes it difficult to track in the archives.
>
> On Tue, 2015-10-20 at 23:41 -0400, Rich Felker wrote:
> > Attached is a hopefully near-ready-for-commit version of the SH/FDPIC
> > patch. I believe I've addressed all comments by Oleg and Kaz on the
> > previous versions of the patch. I'm still working on drafting the
> > Changelog entry (there's a lot to go in it, and I might very well be
> > going into more detail than is needed).
>
> Other than the missing ChangeLog: How did you test the patch?
I've tested the new functionality (FDPIC) building musl, busybox,
dropbear, and other software. I believe Kaz tested that sh4 had no
regressions in the gcc tests with the patch applied.
> > One thing I've considered doing, since TARGET_FDPIC implies flag_pic
> > now, is removing all parts of the patch that just replace checks for
> > flag_pic with (flag_pic || TARGET_FDPIC). Would doing this be
> > desirable? It shrinks the patch a bit but of course more strongly
> > codes the assumption that TARGET_FDPIC implies flag_pic.
>
> If FDPIC only ever will make sense in combination with flag_pic != 0,
> then I guess this could be done.
The original patch did not force flag_pic, but I was getting broken
codegen and/or ICE without it and couldn't track down the source. In
any case, FDPIC _is_ position-independent code (an even more
constrained variant of it) so it makes sense for flag_pic to be set, I
think.
> If you do that, please add a comment
> above this hunk:
>
> > + if (TARGET_FDPIC && !flag_pic)
> > + flag_pic = 2;
OK, if I make this change I'll do that.
> Some other nits:
>
> > rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC).lab;
>
> Break overlong lines to fit into 80 columns. E.g.
>
> rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
> SFUNC_STATIC).lab;
OK. For some of these I wasn't sure of the proper style for wrapping
them so I figured I'd just wait to see what you say.
> > if (TARGET_FDPIC
> > && (TARGET_SHMEDIA || TARGET_SHCOMPACT || !TARGET_SH2))
> > sorry ("non-SH2 FDPIC");
>
> Drop SH5 stuff.
Without this passing -mfdpic on sh5 will probably lead to ICE or very
bogus codegen since there are lots of places that assume FDPIC and sh5
are mutually exclusive. If you're ok with that I can drop it, though.
> > if (TARGET_FDPIC)
> > {
> > emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
> > sh_get_fdpic_reg_initial_val ());
> > }
> >
>
> Remove braces around single statements.
OK. I prefer that too but I wasn't sure what style GCC favors.
> > return (GET_CODE (x) != CONST_DOUBLE
> > || mode == DFmode || mode == SFmode
> > || mode == DImode || GET_MODE (x) == VOIDmode);
>
> Remove unnecessary parens around return statements.
OK,
> When applying the patch I'm getting:
> patching file gcc/config/sh/sh-protos.h
> (Stripping trailing CRs from patch; use --binary to disable.)
>
> Maybe something with your editor settings?
I have my editor (Emacs) setup to always force unix text mode, so any
CR's show up as an editable/deletable ^M in the buffer, and I can't
find any. Grepping for literal CRs also fails. Is it possible the mail
system botched this?
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4] SH FDPIC backend support
2015-10-21 20:16 ` Rich Felker
@ 2015-10-23 7:22 ` Rich Felker
2015-10-25 14:32 ` Oleg Endo
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-10-23 7:22 UTC (permalink / raw)
To: gcc-patches
Here's my updated version of the FDPIC patch with all requested
changes made and Changelog added. I've included all the original
authors. This is my first time writing such an extensive Changelog
entry so please let me know if there are things I got wrong.
Rich
2010-08-19 Daniel Jacobowitz <dan@codesourcery.com>
Joseph Myers <joseph@codesourcery.com>
Mark Shinwell <shinwell@codesourcery.com>
Andrew Stubbs <ams@codesourcery.com>
Rich Felker <dalias@libc.org>
gcc/
* config.gcc: Handle --enable-fdpic.
* config/sh/constraints.md: Add Ccl constraint.
* config/sh/linux.h (SUBTARGET_LINK_EMUL_SUFFIX): Handle -mfdpic.
* config/sh/sh-c.c: (sh_cpu_cpp_builtins): Add __FDPIC__ and
__SH_FDPIC__.
* config/sh/sh-mem.cc (expand_block_move): Support FDPIC
for calls to library functions.
* config/sh/sh-protos.h (function_symbol): Adapt for FDPIC
support.
(sh_get_fdpic_reg_initial_val, sh_load_function_descriptor):
Add functions for FDPIC support.
* config/sh/sh.c (sh_assemble_integer,
sh_cannot_force_const_mem_p, sh_reloc_rw_mask,
TARGET_ASM_INTEGER, TARGET_CANNOT_FORCE_CONST_MEM,
TARGET_ASM_RELOC_RW_MASK): New for FDPIC.
(sh_option_override): Force -fPIC and -fno-function-cse for
FDPIC.
(sh_asm_output_addr_const_extra): Add function descriptor
reference outputs.
(prepare_move_operands): Use FDPIC initial GOT register for
TLS-related GOT access; inhibit cross-section address offset
constants for FDPIC.
(sh_assemble_integer): Produce function descriptor addresses
for FDPIC function address values.
(sh_cannot_copy_insn_p): Inhibit copying instructions that are
FDPIC PC-relative call sites.
(expand_ashiftrt): Support FDPIC for call to library function.
(sh_expand_prologue): Inhibit PC-relative GOT address load for
FDPIC; for FDPIC, GOT address is a hidden argument in r12.
(nonpic_symbol_mentioned_p): Add cases for UNSPEC_GOTFUNCDESC
and UNSPEC_GOTOFFFUNCDESC.
(legitimize_pic_address): Resolve function symbols to function
descriptors for FDPIC; do not use GOT-relative addressing for
local data that may be read-only on FDPIC.
(sh_trampoline_init): Generate FDPIC trampolines.
(sh_expand_sym_label2reg): Don't assume sibcalls are local;
this need not be true for FDPIC.
(sh_output_mi_thunk): Generate FDPIC call.
(function_symbol): For SFUNC_STATIC on FDPIC, use PC-relative
addressing rather than GOT-relative addressing; generate call
site labels to make this possible.
(sh_conditional_register_usage): Mark GOT register usage by
FDPIC.
(sh_legitimate_constant_p): Impose FDPIC constant constraints.
(sh_cannot_force_const_mem_p, sh_load_function_descriptor,
sh_get_fdpic_reg_initial_val, sh_reloc_rw_mask): New for
FDPIC.
* config/sh/sh.h (SUBTARGET_ASM_SPEC,
SUBTARGET_LINK_EMUL_SUFFIX): Handle -mfdpic.
(FDPIC_SELF_SPECS): New self specs to insert -mfdpic by
default if configured with --enable-fdpic.
(TRAMPOLINE_SIZE): Select trampoline size for FDPIC.
(PIC_OFFSET_TABLE_REG_CALL_CLOBBERED,
SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P): New for FDPIC.
(ASM_PREFERRED_EH_DATA_FORMAT): Add EH format constraints for
FDPIC.
* config/sh/sh.md (R12_REG, UNSPEC_GOTFUNCDESC,
UNSPEC_GOTOFFFUNCDESC, calli_fdpic, call_valuei_fdpic,
sibcalli_fdpic, sibcalli_pcrel_fdpic, sibcall_pcrel_fdpic,
sibcall_valuei_fdpic, sibcall_valuei_pcrel_fdpic,
sibcall_value_pcrel_fdpic, sym2GOTFUNCDESC,
symGOTFUNCDESC2reg, sym2GOTOFFFUNCDESC,
symGOTOFFFUNCDESC2reg): New.
(udivsi3_i1, udivsi3_i4, udivsi3_i4_single, udivsi3,
divsi_inv_call_combine, divsi3_i4, divsi3_i4_single, divsi3,
ashlsi3, ashlsi3_d_call, ashrsi3_n, lshrsi3, lshrsi3_d_call,
calli, call_valuei, call, call_value, sibcalli,
sibcalli_pcrel, sibcall_pcrel, sibcall, sibcall_valuei,
sibcall_valuei_pcrel, sibcall_value_pcrel, sibcall_value,
GOTaddr2picreg, symGOT_load, symGOTOFF2reg, block_move_real,
block_lump_real, block_move_real_i4, block_lump_real_i4): Add
support for FDPIC.
(mulsi3, ic_invalidate_line, initialize_trampoline, call_pop,
call_value_pop): Adjust for new function_symbol signature.
* config/sh/sh.opt (-mfdpic): New option.
* doc/install.texi (Options specification): Add --enable-fdpic.
* doc/invoke.texi (SH Options): Add -mfdpic.
include/
* longlong.h (udiv_qrnnd): Add FDPIC compatible version.
libitm/
* config/sh/sjlj.S (_ITM_beginTransaction): Bypass PLT calling
GTM_begin_transaction for compatibility with FDPIC.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index bf26776..ed118f3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2621,6 +2621,9 @@ sh-*-elf* | sh[12346l]*-*-elf* | \
tm_file="${tm_file} dbxelf.h elfos.h sh/elf.h"
case ${target} in
sh*-*-linux*) tmake_file="${tmake_file} sh/t-linux"
+ if test x$enable_fdpic = xyes; then
+ tm_defines="$tm_defines FDPIC_DEFAULT=1"
+ fi
tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h sh/linux.h" ;;
sh*-*-netbsd*)
tm_file="${tm_file} netbsd.h netbsd-elf.h sh/netbsd-elf.h"
diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md
index 4d1eb2d..41c88a2 100644
--- a/gcc/config/sh/constraints.md
+++ b/gcc/config/sh/constraints.md
@@ -25,6 +25,7 @@
;; Bsc: SCRATCH - for the scratch register in movsi_ie in the
;; fldi0 / fldi0 cases
;; Cxx: Constants other than only CONST_INT
+;; Ccl: call site label
;; Css: signed 16-bit constant, literal or symbolic
;; Csu: unsigned 16-bit constant, literal or symbolic
;; Csy: label or symbol
@@ -233,6 +234,11 @@ (define_constraint "Bsc"
hence mova is being used, hence do not select this pattern."
(match_code "scratch"))
+(define_constraint "Ccl"
+ "A call site label, for bsrf."
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_CALLER")))
+
(define_constraint "Css"
"A signed 16-bit constant, literal or symbolic."
(and (match_code "const")
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
index a9dd43a..5d4dd1f 100644
--- a/gcc/config/sh/linux.h
+++ b/gcc/config/sh/linux.h
@@ -69,7 +69,8 @@ along with GCC; see the file COPYING3. If not see
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
#undef SUBTARGET_LINK_EMUL_SUFFIX
-#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd;:_linux}"
+
#undef SUBTARGET_LINK_SPEC
#define SUBTARGET_LINK_SPEC \
"%{shared:-shared} \
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
index a98c148..01a12e6 100644
--- a/gcc/config/sh/sh-c.c
+++ b/gcc/config/sh/sh-c.c
@@ -141,6 +141,11 @@ sh_cpu_cpp_builtins (cpp_reader* pfile)
builtin_define ("__HITACHI__");
if (TARGET_FMOVD)
builtin_define ("__FMOVD_ENABLED__");
+ if (TARGET_FDPIC)
+ {
+ builtin_define ("__SH_FDPIC__");
+ builtin_define ("__FDPIC__");
+ }
builtin_define (TARGET_LITTLE_ENDIAN
? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 23a7287..fe95ac9 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -123,10 +123,11 @@ expand_block_move (rtx *operands)
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
- function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
+ SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
return true;
}
else if (! optimize_size)
@@ -139,13 +140,14 @@ expand_block_move (rtx *operands)
rtx r6 = gen_rtx_REG (SImode, 6);
entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
- function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, entry_name,
+ SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
dwords = bytes >> 3;
emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
- emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
return true;
}
else
@@ -159,10 +161,10 @@ expand_block_move (rtx *operands)
rtx r5 = gen_rtx_REG (SImode, 5);
sprintf (entry, "__movmemSI%d", bytes);
- function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real (func_addr_rtx));
+ emit_insn (gen_block_move_real (func_addr_rtx, lab));
return true;
}
@@ -176,7 +178,7 @@ expand_block_move (rtx *operands)
rtx r5 = gen_rtx_REG (SImode, 5);
rtx r6 = gen_rtx_REG (SImode, 6);
- function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
@@ -189,7 +191,7 @@ expand_block_move (rtx *operands)
final_switch = 16 - ((bytes / 4) % 16);
while_loop = ((bytes / 4) / 16 - 1) * 16;
emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
- emit_insn (gen_block_lump_real (func_addr_rtx));
+ emit_insn (gen_block_lump_real (func_addr_rtx, lab));
return true;
}
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index f94459f..c64a948 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -377,7 +377,19 @@ extern void fpscr_set_from_mem (int, HARD_REG_SET);
extern void sh_pr_interrupt (struct cpp_reader *);
extern void sh_pr_trapa (struct cpp_reader *);
extern void sh_pr_nosave_low_regs (struct cpp_reader *);
-extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+
+struct function_symbol_result
+{
+ function_symbol_result (void) : sym (NULL), lab (NULL) { }
+ function_symbol_result (rtx s, rtx l) : sym (s), lab (l) { }
+
+ rtx sym;
+ rtx lab;
+};
+
+extern function_symbol_result function_symbol (rtx, const char *,
+ sh_function_kind);
+extern rtx sh_get_fdpic_reg_initial_val (void);
extern rtx sh_get_pr_initial_val (void);
extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
@@ -396,4 +408,5 @@ extern bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
extern machine_mode sh_hard_regno_caller_save_mode (unsigned int, unsigned int,
machine_mode);
extern bool sh_can_use_simple_return_p (void);
+extern rtx sh_load_function_descriptor (rtx);
#endif /* ! GCC_SH_PROTOS_H */
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 904201b..36de065 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -268,6 +268,7 @@ static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static void sh_file_start (void);
+static bool sh_assemble_integer (rtx, unsigned, int);
static bool flow_dependent_p (rtx, rtx);
static void flow_dependent_p_1 (rtx, const_rtx, void *);
static int shiftcosts (rtx);
@@ -276,6 +277,7 @@ static int addsubcosts (rtx);
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx_insn *);
+static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
static int sh_pr_n_sets (void);
@@ -421,6 +423,9 @@ static const struct attribute_spec sh_attribute_table[] =
#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sh_assemble_integer
+
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
@@ -679,6 +684,9 @@ static const struct attribute_spec sh_attribute_table[] =
#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
@@ -996,6 +1004,13 @@ sh_option_override (void)
if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
TARGET_ZDCBRANCH = 1;
+ /* FDPIC code is a special form of PIC, and the vast majority of code
+ generation constraints that apply to PIC also apply to FDPIC, so we
+ set flag_pic to avoid the need to check TARGET_FDPIC everywhere
+ flag_pic is checked. */
+ if (TARGET_FDPIC && !flag_pic)
+ flag_pic = 2;
+
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (! VALID_REGISTER_P (regno))
sh_register_names[regno][0] = '\0';
@@ -1687,6 +1702,14 @@ sh_asm_output_addr_const_extra (FILE *file, rtx x)
output_addr_const (file, XVECEXP (x, 0, 1));
fputs ("-.)", file);
break;
+ case UNSPEC_GOTFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTFUNCDESC", file);
+ break;
+ case UNSPEC_GOTOFFFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTOFFFUNCDESC", file);
+ break;
default:
return false;
}
@@ -1871,6 +1894,9 @@ prepare_move_operands (rtx operands[], machine_mode mode)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
emit_move_insn (tmp, tga_ret);
@@ -1879,6 +1905,9 @@ prepare_move_operands (rtx operands[], machine_mode mode)
case TLS_MODEL_LOCAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
@@ -1896,6 +1925,9 @@ prepare_move_operands (rtx operands[], machine_mode mode)
case TLS_MODEL_INITIAL_EXEC:
tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
tmp = gen_sym2GOTTPOFF (op1);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_insn (gen_tls_initial_exec (tga_op1, tmp));
op1 = tga_op1;
break;
@@ -1922,6 +1954,21 @@ prepare_move_operands (rtx operands[], machine_mode mode)
operands[1] = op1;
}
}
+
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+ split_const (operands[1], &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ {
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+ emit_move_insn (tmp, base);
+ if (!arith_operand (offset, mode))
+ offset = force_reg (mode, offset);
+ emit_insn (gen_add3_insn (operands[0], tmp, offset));
+ }
+ }
}
/* Implement the canonicalize_comparison target hook for the combine
@@ -3026,6 +3073,26 @@ sh_file_start (void)
}
}
\f
+/* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
+ need to be output as pointers to function descriptors for
+ FDPIC. */
+
+static bool
+sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+ if (TARGET_FDPIC
+ && size == UNITS_PER_WORD
+ && GET_CODE (value) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (value))
+ {
+ fputs ("\t.long\t", asm_out_file);
+ output_addr_const (asm_out_file, value);
+ fputs ("@FUNCDESC\n", asm_out_file);
+ return true;
+ }
+ return default_assemble_integer (value, size, aligned_p);
+}
+\f
/* Check if PAT includes UNSPEC_CALLER unspec pattern. */
static bool
unspec_caller_rtx_p (rtx pat)
@@ -3061,6 +3128,19 @@ sh_cannot_copy_insn_p (rtx_insn *insn)
return false;
pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
+ return false;
+
+ if (TARGET_FDPIC
+ && GET_CODE (pat) == PARALLEL)
+ {
+ rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
+ if (GET_CODE (t) == USE
+ && unspec_caller_rtx_p (XEXP (t, 0)))
+ return true;
+ }
+
if (GET_CODE (pat) != SET)
return false;
pat = SET_SRC (pat);
@@ -4102,8 +4182,8 @@ expand_ashiftrt (rtx *operands)
/* Load the value into an arg reg and call a helper. */
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
sprintf (func, "__ashiftrt_r4_%d", value);
- function_symbol (wrk, func, SFUNC_STATIC);
- emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+ rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
+ emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
return true;
}
@@ -7954,7 +8034,9 @@ sh_expand_prologue (void)
stack_usage += d;
}
- if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+ if (flag_pic
+ && !TARGET_FDPIC
+ && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
emit_insn (gen_GOTaddr2picreg (const0_rtx));
if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -10458,7 +10540,9 @@ nonpic_symbol_mentioned_p (rtx x)
|| XINT (x, 1) == UNSPEC_PLT
|| XINT (x, 1) == UNSPEC_PCREL
|| XINT (x, 1) == UNSPEC_SYMOFF
- || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+ || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
+ || XINT (x, 1) == UNSPEC_GOTFUNCDESC
+ || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
return false;
fmt = GET_RTX_FORMAT (GET_CODE (x));
@@ -10493,7 +10577,28 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, orig));
+ if (TARGET_FDPIC
+ && GET_CODE (orig) == SYMBOL_REF
+ && SYMBOL_REF_FUNCTION_P (orig))
+ {
+ /* Weak functions may be NULL which doesn't work with
+ GOTOFFFUNCDESC because the runtime offset is not known. */
+ if (SYMBOL_REF_WEAK (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
+ }
+ else if (TARGET_FDPIC
+ && (GET_CODE (orig) == LABEL_REF
+ || (GET_CODE (orig) == SYMBOL_REF
+ && SYMBOL_REF_DECL (orig)
+ && (TREE_READONLY (SYMBOL_REF_DECL (orig))
+ || SYMBOL_REF_EXTERNAL_P (orig)
+ || DECL_SECTION_NAME(SYMBOL_REF_DECL(orig))) )))
+ /* In FDPIC, GOTOFF can only be used for writable data. */
+ emit_insn (gen_symGOT2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFF2reg (reg, orig));
return reg;
}
else if (GET_CODE (orig) == SYMBOL_REF)
@@ -10501,7 +10606,10 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOT2reg (reg, orig));
+ if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOT2reg (reg, orig));
return reg;
}
return orig;
@@ -11539,6 +11647,19 @@ sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
5 0008 00000000 l1: .long area
6 000c 00000000 l2: .long function
+ FDPIC needs a form that includes a function descriptor and
+ code to load the GOT register:
+ 0 0000 00000000 .long l0
+ 1 0004 00000000 .long gotval
+ 2 0008 D302 l0: mov.l l1,r3
+ 3 000a D203 mov.l l2,r2
+ 4 000c 6122 mov.l @r2,r1
+ 5 000e 5C21 mov.l @(4,r2),r12
+ 6 0010 412B jmp @r1
+ 7 0012 0009 nop
+ 8 0014 00000000 l1: .long area
+ 9 0018 00000000 l2: .long function
+
SH5 (compact) uses r1 instead of r3 for the static chain. */
@@ -11675,20 +11796,41 @@ sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
return;
}
- emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 4),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
- emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+ if (TARGET_FDPIC)
+ {
+ rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 0), a);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+ sh_get_fdpic_reg_initial_val ());
+ emit_move_insn (adjust_address (tramp_mem, SImode, 8),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd203d302 : 0xd302d203,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 12),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x5c216122 : 0x61225c21,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 16),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009412b : 0x412b0009,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 20), cxt);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 24), fnaddr);
+ }
+ else
+ {
+ emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+ gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+ SImode));
+ emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+ emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+ }
if (TARGET_HARD_SH4 || TARGET_SH5)
{
if (!TARGET_INLINE_IC_INVALIDATE
|| (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
emit_library_call (function_symbol (NULL, "__ic_invalidate",
- FUNCTION_ORDINARY),
+ FUNCTION_ORDINARY).sym,
LCT_NORMAL, VOIDmode, 1, tramp, SImode);
else
emit_insn (gen_ic_invalidate_line (tramp));
@@ -11718,7 +11860,7 @@ sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
&& (! TARGET_SHCOMPACT
|| crtl->args.info.stack_regs == 0)
&& ! sh_cfun_interrupt_handler_p ()
- && (! flag_pic
+ && (! flag_pic || TARGET_FDPIC
|| (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
|| (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
}
@@ -11732,7 +11874,7 @@ sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_sym_label2reg (reg, sym, lab));
- else if (sibcall_p)
+ else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
else
emit_insn (gen_symPLT_label2reg (reg, sym, lab));
@@ -12733,8 +12875,16 @@ sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
#endif
if (TARGET_SH2 && flag_pic)
{
- sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
- XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ if (TARGET_FDPIC)
+ {
+ sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
+ }
+ else
+ {
+ sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ }
}
else
{
@@ -12775,17 +12925,25 @@ sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
epilogue_completed = 0;
}
-rtx
-function_symbol (rtx target, const char *name, enum sh_function_kind kind)
-{
- rtx sym;
+/* Return an RTX pair for the address and call site label of a function
+ NAME of kind KIND, placing the result in TARGET if not NULL. For
+ SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
+ (const_int 0) if jsr should be used, or a label_ref if bsrf should
+ be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
+ address of the function itself, not a function descriptor, so they
+ can only be used with functions not using the FDPIC register that
+ are known to be called directory without a PLT entry. */
+function_symbol_result
+function_symbol (rtx target, const char *name, sh_function_kind kind)
+{
/* If this is not an ordinary function, the name usually comes from a
string literal or an sprintf buffer. Make sure we use the same
string consistently, so that cse will be able to unify address loads. */
if (kind != FUNCTION_ORDINARY)
name = IDENTIFIER_POINTER (get_identifier (name));
- sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx lab = const0_rtx;
SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
if (flag_pic)
switch (kind)
@@ -12802,14 +12960,26 @@ function_symbol (rtx target, const char *name, enum sh_function_kind kind)
}
case SFUNC_STATIC:
{
- /* ??? To allow cse to work, we use GOTOFF relocations.
- We could add combiner patterns to transform this into
- straight pc-relative calls with sym2PIC / bsrf when
- label load and function call are still 1:1 and in the
- same basic block during combine. */
rtx reg = target ? target : gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, sym));
+ if (TARGET_FDPIC)
+ {
+ /* We use PC-relative calls, since GOTOFF can only refer
+ to writable data. This works along with
+ sh_sfunc_call. */
+ lab = PATTERN (gen_call_site ());
+ emit_insn (gen_sym_label2reg (reg, sym, lab));
+ }
+ else
+ {
+ /* ??? To allow cse to work, we use GOTOFF relocations.
+ we could add combiner patterns to transform this into
+ straight pc-relative calls with sym2PIC / bsrf when
+ label load and function call are still 1:1 and in the
+ same basic block during combine. */
+ emit_insn (gen_symGOTOFF2reg (reg, sym));
+ }
+
sym = reg;
break;
}
@@ -12817,9 +12987,9 @@ function_symbol (rtx target, const char *name, enum sh_function_kind kind)
if (target && sym != target)
{
emit_move_insn (target, sym);
- return target;
+ return function_symbol_result(target, lab);
}
- return sym;
+ return function_symbol_result(sym, lab);
}
/* Find the number of a general purpose register in S. */
@@ -13432,6 +13602,12 @@ sh_conditional_register_usage (void)
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
}
+ if (TARGET_FDPIC)
+ {
+ fixed_regs[PIC_REG] = 1;
+ call_used_regs[PIC_REG] = 1;
+ call_really_used_regs[PIC_REG] = 1;
+ }
/* Renesas saves and restores mac registers on call. */
if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
{
@@ -13460,14 +13636,32 @@ sh_conditional_register_usage (void)
static bool
sh_legitimate_constant_p (machine_mode mode, rtx x)
{
- return (TARGET_SHMEDIA
- ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
- || x == CONST0_RTX (mode)
- || !TARGET_SHMEDIA_FPU
- || TARGET_SHMEDIA64)
- : (GET_CODE (x) != CONST_DOUBLE
- || mode == DFmode || mode == SFmode
- || mode == DImode || GET_MODE (x) == VOIDmode));
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+
+ split_const (x, &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ return false;
+ }
+
+ if (TARGET_FDPIC
+ && (SYMBOLIC_CONST_P (x)
+ || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+ && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
+ return false;
+
+ if (TARGET_SHMEDIA
+ && ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+ || x == CONST0_RTX (mode)
+ || !TARGET_SHMEDIA_FPU
+ || TARGET_SHMEDIA64))
+ return false;
+
+ return GET_CODE (x) != CONST_DOUBLE
+ || mode == DFmode || mode == SFmode
+ || mode == DImode || GET_MODE (x) == VOIDmode;
}
enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
@@ -14558,4 +14752,44 @@ sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
}
}
+bool
+sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
+ rtx x ATTRIBUTE_UNUSED)
+{
+ if (TARGET_FDPIC)
+ return true;
+
+ return false;
+}
+
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+ function descriptor) into r1 and the GOT address into r12,
+ returning an rtx for r1. */
+
+rtx
+sh_load_function_descriptor (rtx funcdesc)
+{
+ rtx r1 = gen_rtx_REG (Pmode, R1_REG);
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+ rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+
+ emit_move_insn (r1, fnaddr);
+ /* The ABI requires the entry point address to be loaded first, so
+ prevent the load from being moved after that of the GOT
+ address. */
+ emit_insn (gen_blockage ());
+ emit_move_insn (pic_reg, gotaddr);
+ return r1;
+}
+
+/* Return an rtx holding the initial value of the FDPIC register (the
+ FDPIC pointer passed in from the caller). */
+
+rtx
+sh_get_fdpic_reg_initial_val (void)
+{
+ return get_hard_reg_initial_val (Pmode, PIC_REG);
+}
+
#include "gt-sh.h"
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index aafcf28..98d8054 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -321,7 +321,7 @@ extern int code_for_indirect_jump_scratch;
#endif
#ifndef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC ""
+#define SUBTARGET_ASM_SPEC "%{mfdpic:--fdpic}"
#endif
#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
@@ -349,7 +349,7 @@ extern int code_for_indirect_jump_scratch;
#define ASM_ISA_DEFAULT_SPEC ""
#endif /* MASK_SH5 */
-#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd}"
#define SUBTARGET_LINK_SPEC ""
/* Go via SH_LINK_SPEC to avoid code replication. */
@@ -383,8 +383,18 @@ extern int code_for_indirect_jump_scratch;
"%{m2a*:%eSH2a does not support little-endian}}"
#endif
+#ifdef FDPIC_DEFAULT
+#define FDPIC_SELF_SPECS "%{!mno-fdpic:-mfdpic}"
+#else
+#define FDPIC_SELF_SPECS
+#endif
+
#undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A SUBTARGET_DRIVER_SELF_SPECS \
+ FDPIC_SELF_SPECS
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS
#define ASSEMBLER_DIALECT assembler_dialect
@@ -942,6 +952,10 @@ extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
code access to data items. */
#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? PIC_REG : INVALID_REGNUM)
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+ entries would need to handle saving and restoring it). */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
/* Definitions for register eliminations.
@@ -1566,7 +1580,8 @@ struct sh_args {
6 000c 00000000 l2: .long function */
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+#define TRAMPOLINE_SIZE \
+ (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : TARGET_FDPIC ? 32 : 16)
/* Alignment required for a trampoline in bits. */
#define TRAMPOLINE_ALIGNMENT \
@@ -1622,6 +1637,10 @@ struct sh_args {
|| GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
: (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+/* True if SYMBOL + OFFSET constants must refer to something within
+ SYMBOL's section. */
+#define SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P TARGET_FDPIC
+
/* Maximum number of registers that can appear in a valid memory
address. */
#define MAX_REGS_PER_ADDRESS 2
@@ -2262,9 +2281,12 @@ extern int current_function_interrupt;
/* We have to distinguish between code and data, so that we apply
datalabel where and only where appropriate. Use sdataN for data. */
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
- ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
- | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
- | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+ ((TARGET_FDPIC \
+ ? ((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel \
+ : DW_EH_PE_pcrel) \
+ : ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+ | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr))) \
+ | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
/* Handle special EH pointer encodings. Absolute, pc-relative, and
indirect are handled automatically. */
@@ -2277,6 +2299,17 @@ extern int current_function_interrupt;
SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
if (0) goto DONE; \
} \
+ if (TARGET_FDPIC \
+ && ((ENCODING) & 0xf0) == (DW_EH_PE_indirect | DW_EH_PE_datarel)) \
+ { \
+ fputs ("\t.ualong ", FILE); \
+ output_addr_const (FILE, ADDR); \
+ if (GET_CODE (ADDR) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (ADDR)) \
+ fputs ("@GOTFUNCDESC", FILE); \
+ else \
+ fputs ("@GOT", FILE); \
+ goto DONE; \
+ } \
} while (0)
#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d758e3b..f789f3f 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -100,6 +100,7 @@ (define_constants [
(R8_REG 8)
(R9_REG 9)
(R10_REG 10)
+ (R12_REG 12)
(R20_REG 20)
(R21_REG 21)
(R22_REG 22)
@@ -170,6 +171,9 @@ (define_c_enum "unspec" [
UNSPEC_SYMOFF
;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
UNSPEC_PCREL_SYMOFF
+ ;; For FDPIC
+ UNSPEC_GOTFUNCDESC
+ UNSPEC_GOTOFFFUNCDESC
;; Misc builtins
UNSPEC_BUILTIN_STRLEN
])
@@ -2591,15 +2595,18 @@ (define_insn "udivsi3_sh2a"
;; This reload would clobber the value in r0 we are trying to store.
;; If we let reload allocate r0, then this problem can never happen.
(define_insn "udivsi3_i1"
- [(set (match_operand:SI 0 "register_operand" "=z")
+ [(set (match_operand:SI 0 "register_operand" "=z,z")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2648,7 +2655,7 @@ (define_expand "udivsi3_i4_media"
})
(define_insn "udivsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2660,16 +2667,19 @@ (define_insn "udivsi3_i4"
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "udivsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2680,10 +2690,13 @@ (define_insn "udivsi3_i4_single"
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2742,11 +2755,11 @@ (define_expand "udivsi3"
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_udivsi3_i4 (operands[0], operands[3]);
+ last = gen_udivsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SHMEDIA_FPU)
{
@@ -2771,14 +2784,14 @@ (define_expand "udivsi3"
if (TARGET_SHMEDIA)
last = gen_udivsi3_i1_media (operands[0], operands[3]);
else if (TARGET_FPU_ANY)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], const0_rtx);
else
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ last = gen_udivsi3_i1 (operands[0], operands[3], const0_rtx);
}
else
{
- function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ rtx lab = function_symbol (operands[3], "__udivsi3", SFUNC_STATIC).lab;
+ last = gen_udivsi3_i1 (operands[0], operands[3], lab);
}
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
@@ -2906,7 +2919,7 @@ (define_insn_and_split "*divsi_inv_call_combine"
emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
break;
}
- sym = function_symbol (NULL, name, kind);
+ sym = function_symbol (NULL, name, kind).sym;
emit_insn (gen_divsi3_media_2 (operands[0], sym));
DONE;
}
@@ -2926,31 +2939,37 @@ (define_expand "divsi3_i4_media"
})
(define_insn "divsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "divsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI R2_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2994,11 +3013,12 @@ (define_expand "divsi3"
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], sh_divsi3_libfunc,
+ SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_divsi3_i4 (operands[0], operands[3]);
+ last = gen_divsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SH2A)
{
@@ -3113,7 +3133,7 @@ (define_expand "divsi3"
last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
(operands[0], operands[3]));
else if (TARGET_FPU_ANY)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], const0_rtx);
else
last = gen_divsi3_i1 (operands[0], operands[3]);
}
@@ -3713,7 +3733,7 @@ (define_expand "mulsi3"
{
/* The address must be set outside the libcall,
since it goes into a pseudo. */
- rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+ rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC).sym;
rtx addr = force_reg (SImode, sym);
rtx insns = gen_mulsi3_call (operands[0], operands[1],
operands[2], addr);
@@ -4970,8 +4990,8 @@ (define_expand "ashlsi3"
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
- emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
@@ -5024,15 +5044,18 @@ (define_insn_and_split "ashlsi3_d"
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "ashlsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(ashift:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5374,12 +5397,15 @@ (define_insn "ashrsi3_d"
(define_insn "ashrsi3_n"
[(set (reg:SI R4_REG)
(ashiftrt:SI (reg:SI R4_REG)
- (match_operand:SI 0 "const_int_operand" "i")))
+ (match_operand:SI 0 "const_int_operand" "i,i")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5532,8 +5558,8 @@ (define_expand "lshrsi3"
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
- emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
})
@@ -5585,15 +5611,18 @@ (define_insn_and_split "lshrsi3_d"
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "lshrsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(lshiftrt:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -7315,7 +7344,7 @@ (define_expand "ic_invalidate_line"
}
else if (TARGET_SHCOMPACT)
{
- operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+ operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC).sym;
operands[1] = force_reg (Pmode, operands[1]);
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
DONE;
@@ -7397,7 +7426,7 @@ (define_expand "initialize_trampoline"
tramp = force_reg (Pmode, operands[0]);
sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
- SFUNC_STATIC));
+ SFUNC_STATIC).sym);
emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
@@ -9459,7 +9488,27 @@ (define_insn "calli"
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+{
+ if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ return "jsr/n @%0";
+ else
+ return "jsr @%0%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
+(define_insn "calli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
{
if (TARGET_SH2A && (dbr_sequence_length () == 0))
return "jsr/n @%0";
@@ -9588,7 +9637,28 @@ (define_insn "call_valuei"
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+{
+ if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ return "jsr/n @%1";
+ else
+ return "jsr @%1%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
{
if (TARGET_SH2A && (dbr_sequence_length () == 0))
return "jsr/n @%1";
@@ -9725,6 +9795,12 @@ (define_expand "call"
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 0);
@@ -9759,8 +9835,8 @@ (define_expand "call"
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9808,7 +9884,13 @@ (define_expand "call"
operands[1] = operands[2];
}
- emit_call_insn (gen_calli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_calli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_calli (operands[0], operands[1]));
DONE;
})
@@ -9888,7 +9970,7 @@ (define_expand "call_pop"
emit_insn (gen_force_mode_for_call ());
operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9913,6 +9995,12 @@ (define_expand "call_value"
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 0);
@@ -9948,8 +10036,8 @@ (define_expand "call_value"
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -9997,7 +10085,14 @@ (define_expand "call_value"
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_call_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
DONE;
})
@@ -10006,7 +10101,21 @@ (define_insn "sibcalli"
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
"jmp @%0%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
@@ -10020,7 +10129,25 @@ (define_insn "sibcalli_pcrel"
(use (match_operand 2 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
+{
+ return "braf %0" "\n"
+ "%O2:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+ (match_operand 1))
+ (use (match_operand 2))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
{
return "braf %0" "\n"
"%O2:%#";
@@ -10053,7 +10180,7 @@ (define_insn_and_split "sibcall_pcrel"
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 2 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10073,6 +10200,34 @@ (define_insn_and_split "sibcall_pcrel"
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn_and_split "sibcall_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 2 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+ rtx call_insn;
+
+ sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
+ call_insn = emit_call_insn (gen_sibcalli_pcrel_fdpic (operands[2],
+ operands[1],
+ copy_rtx (lab)));
+ SIBLING_CALL_P (call_insn) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn "sibcall_compact"
[(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
(match_operand 1 "" ""))
@@ -10117,6 +10272,12 @@ (define_expand "sibcall"
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 1);
@@ -10161,8 +10322,8 @@ (define_expand "sibcall"
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
/* We don't need a return trampoline, since the callee will
@@ -10196,13 +10357,24 @@ (define_expand "sibcall"
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
{
- emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_pcrel_fdpic (XEXP (operands[0], 0),
+ operands[1]));
+ else
+ emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0),
+ operands[1]));
DONE;
}
else
operands[0] = force_reg (SImode, XEXP (operands[0], 0));
- emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_sibcalli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_sibcalli (operands[0], operands[1]));
DONE;
})
@@ -10212,7 +10384,22 @@ (define_insn "sibcall_valuei"
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
+ "jmp @%1%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
"jmp @%1%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
@@ -10227,7 +10414,26 @@ (define_insn "sibcall_valuei_pcrel"
(use (match_operand 3 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
+{
+ return "braf %1" "\n"
+ "%O3:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+ (match_operand 2)))
+ (use (match_operand 3))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
{
return "braf %1" "\n"
"%O3:%#";
@@ -10245,7 +10451,7 @@ (define_insn_and_split "sibcall_value_pcrel"
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 3 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10258,6 +10464,38 @@ (define_insn_and_split "sibcall_value_pcrel"
operands[3],
operands[2],
copy_rtx (lab)));
+
+ SIBLING_CALL_P (call_insn) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 3 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+ rtx call_insn;
+
+ sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
+ call_insn = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0],
+ operands[3],
+ operands[2],
+ copy_rtx (lab)));
+
SIBLING_CALL_P (call_insn) = 1;
DONE;
}
@@ -10314,6 +10552,12 @@ (define_expand "sibcall_value"
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 1);
@@ -10359,8 +10603,8 @@ (define_expand "sibcall_value"
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
/* We don't need a return trampoline, since the callee will
@@ -10395,15 +10639,28 @@ (define_expand "sibcall_value"
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
{
- emit_call_insn (gen_sibcall_value_pcrel (operands[0],
- XEXP (operands[1], 0),
- operands[2]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_value_pcrel_fdpic (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
+ else
+ emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
DONE;
}
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_sibcall_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_sibcall_valuei (operands[0], operands[1],
+ operands[2]));
DONE;
})
@@ -10487,7 +10744,7 @@ (define_expand "call_value_pop"
emit_insn (gen_force_mode_for_call ());
operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -10685,6 +10942,13 @@ (define_expand "GOTaddr2picreg"
DONE;
}
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ DONE;
+ }
+
operands[1] = gen_rtx_REG (Pmode, PIC_REG);
operands[2] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
@@ -10820,6 +11084,9 @@ (define_expand "symGOT_load"
rtx mem;
bool stack_chk_guard_p = false;
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
@@ -10862,8 +11129,7 @@ (define_expand "symGOT_load"
if (stack_chk_guard_p)
emit_insn (gen_chk_guard_add (operands[3], operands[2]));
else
- emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
- gen_rtx_REG (Pmode, PIC_REG)));
+ emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], picreg));
/* N.B. This is not constant for a GOTPLT relocation. */
mem = gen_rtx_MEM (Pmode, operands[3]);
@@ -10894,6 +11160,26 @@ (define_expand "symGOT2reg"
DONE;
})
+(define_expand "sym2GOTFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTFUNCDESC))]
+ "TARGET_FDPIC"
+ "")
+
+(define_expand "symGOTFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx gotsym, insn;
+
+ gotsym = gen_sym2GOTFUNCDESC (operands[1]);
+ PUT_MODE (gotsym, Pmode);
+ insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+ MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+ DONE;
+})
+
(define_expand "symGOTPLT2reg"
[(match_operand 0 "" "") (match_operand 1 "" "")]
""
@@ -10920,18 +11206,41 @@ (define_expand "symGOTOFF2reg"
? operands[0]
: gen_reg_rtx (GET_MODE (operands[0])));
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
gotoffsym = gen_sym2GOTOFF (operands[1]);
PUT_MODE (gotoffsym, Pmode);
emit_move_insn (t, gotoffsym);
- insn = emit_move_insn (operands[0],
- gen_rtx_PLUS (Pmode, t,
- gen_rtx_REG (Pmode, PIC_REG)));
+ insn = emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
set_unique_reg_note (insn, REG_EQUAL, operands[1]);
DONE;
})
+(define_expand "sym2GOTOFFFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTOFFFUNCDESC))]
+ "TARGET_FDPIC"
+ "")
+
+(define_expand "symGOTOFFFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx picreg = sh_get_fdpic_reg_initial_val ();
+ rtx gotoffsym;
+ rtx t = (!can_create_pseudo_p ()
+ ? operands[0]
+ : gen_reg_rtx (GET_MODE (operands[0])));
+
+ gotoffsym = gen_sym2GOTOFFFUNCDESC (operands[1]);
+ PUT_MODE (gotoffsym, Pmode);
+ emit_move_insn (t, gotoffsym);
+ emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
+ DONE;
+})
+
(define_expand "symPLT_label2reg"
[(set (match_operand:SI 0 "" "")
(const:SI
@@ -12688,18 +12997,22 @@ (define_expand "movmemsi"
(define_insn "block_move_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_lump_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12708,27 +13021,33 @@ (define_insn "block_lump_real"
(clobber (reg:SI R6_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_move_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R2_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_lump_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12740,7 +13059,9 @@ (define_insn "block_lump_real_i4"
(clobber (reg:SI R2_REG))
(clobber (reg:SI R3_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index 8875b5d..c2e8aca 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -264,6 +264,10 @@ mdivsi3_libfunc=
Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
Specify name for 32 bit signed division function
+mfdpic
+Target Report Var(TARGET_FDPIC) Init(0)
+Generate ELF FDPIC code
+
mfmovd
Target RejectNegative Mask(FMOVD)
Enable the use of 64-bit floating point registers in fmov instructions. See -mdalign if 64-bit alignment is required.
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1fd773e..fe57b97 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1810,6 +1810,9 @@ When neither of these configure options are used, the default will be
128-bit @code{long double} when built against GNU C Library 2.4 and later,
64-bit @code{long double} otherwise.
+@item --enable-fdpic
+On SH Linux systems, generate ELF FDPIC code.
+
@item --with-gmp=@var{pathname}
@itemx --with-gmp-include=@var{pathname}
@itemx --with-gmp-lib=@var{pathname}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ebfaaa1..8b26eac 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21178,6 +21178,10 @@ in effect.
Prefer zero-displacement conditional branches for conditional move instruction
patterns. This can result in faster code on the SH4 processor.
+@item -mfdpic
+@opindex fdpic
+Generate code using the FDPIC ABI.
+
@end table
@node Solaris 2 Options
diff --git a/include/longlong.h b/include/longlong.h
index a0b2ce1..213df5d 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -1102,6 +1102,33 @@ extern UDItype __umulsidi3 (USItype, USItype);
/* This is the same algorithm as __udiv_qrnnd_c. */
#define UDIV_NEEDS_NORMALIZATION 1
+#ifdef __FDPIC__
+/* FDPIC needs a special version of the asm fragment to extract the
+ code address from the function descriptor. __udiv_qrnnd_16 is
+ assumed to be local and not to use the GOT, so loading r12 is
+ not needed. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
+ __attribute__ ((visibility ("hidden"))); \
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
+ __asm__ ( \
+ "mov%M4 %4,r5\n" \
+" swap.w %3,r4\n" \
+" swap.w r5,r6\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" shll16 r6\n" \
+" swap.w r4,r4\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" swap.w r1,%0\n" \
+" or r1,%0" \
+ : "=r" (q), "=&z" (r) \
+ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
+ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
+ } while (0)
+#else
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
@@ -1121,6 +1148,7 @@ extern UDItype __umulsidi3 (USItype, USItype);
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
} while (0)
+#endif
#define UDIV_TIME 80
diff --git a/libitm/config/sh/sjlj.S b/libitm/config/sh/sjlj.S
index 410cef6..76ec6df 100644
--- a/libitm/config/sh/sjlj.S
+++ b/libitm/config/sh/sjlj.S
@@ -58,9 +58,6 @@ _ITM_beginTransaction:
jsr @r1
mov r15, r5
#else
- mova .Lgot, r0
- mov.l .Lgot, r12
- add r0, r12
mov.l .Lbegin, r1
bsrf r1
mov r15, r5
@@ -80,13 +77,11 @@ _ITM_beginTransaction:
cfi_endproc
.align 2
-.Lgot:
- .long _GLOBAL_OFFSET_TABLE_
.Lbegin:
#if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
.long GTM_begin_transaction
#else
- .long GTM_begin_transaction@PLT-(.Lbegin0-.)
+ .long GTM_begin_transaction@PCREL-(.Lbegin0-.)
#endif
.size _ITM_beginTransaction, . - _ITM_beginTransaction
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-10-23 7:22 ` [PATCH v4] " Rich Felker
@ 2015-10-25 14:32 ` Oleg Endo
2015-10-27 3:35 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Oleg Endo @ 2015-10-25 14:32 UTC (permalink / raw)
To: Rich Felker, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 621 bytes --]
On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> Here's my updated version of the FDPIC patch with all requested
> changes made and Changelog added. I've included all the original
> authors. This is my first time writing such an extensive Changelog
> entry so please let me know if there are things I got wrong.
I took the liberty and fixed some minor formatting trivia and extracted
functions sh_emit_storesi and sh_emit_storehi which are used in
sh_trampoline_init to effectively memcpy code into the trampoline
area. Can you please check it? If it's OK I'll commit the attached
patch to trunk.
Cheers,
Oleg
[-- Attachment #2: sh-fdpic-v3.3.patch --]
[-- Type: text/x-patch, Size: 62787 bytes --]
Index: gcc/config/sh/constraints.md
===================================================================
--- gcc/config/sh/constraints.md (revision 229290)
+++ gcc/config/sh/constraints.md (working copy)
@@ -25,6 +25,7 @@
;; Bsc: SCRATCH - for the scratch register in movsi_ie in the
;; fldi0 / fldi0 cases
;; Cxx: Constants other than only CONST_INT
+;; Ccl: call site label
;; Css: signed 16-bit constant, literal or symbolic
;; Csu: unsigned 16-bit constant, literal or symbolic
;; Csy: label or symbol
@@ -233,6 +234,11 @@
hence mova is being used, hence do not select this pattern."
(match_code "scratch"))
+(define_constraint "Ccl"
+ "A call site label, for bsrf."
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_CALLER")))
+
(define_constraint "Css"
"A signed 16-bit constant, literal or symbolic."
(and (match_code "const")
Index: gcc/config/sh/linux.h
===================================================================
--- gcc/config/sh/linux.h (revision 229290)
+++ gcc/config/sh/linux.h (working copy)
@@ -67,7 +67,8 @@
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
#undef SUBTARGET_LINK_EMUL_SUFFIX
-#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd;:_linux}"
+
#undef SUBTARGET_LINK_SPEC
#define SUBTARGET_LINK_SPEC \
"%{shared:-shared} \
Index: gcc/config/sh/sh-c.c
===================================================================
--- gcc/config/sh/sh-c.c (revision 229290)
+++ gcc/config/sh/sh-c.c (working copy)
@@ -137,6 +137,11 @@
builtin_define ("__HITACHI__");
if (TARGET_FMOVD)
builtin_define ("__FMOVD_ENABLED__");
+ if (TARGET_FDPIC)
+ {
+ builtin_define ("__SH_FDPIC__");
+ builtin_define ("__FDPIC__");
+ }
builtin_define (TARGET_LITTLE_ENDIAN
? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
Index: gcc/config/sh/sh-mem.cc
===================================================================
--- gcc/config/sh/sh-mem.cc (revision 229290)
+++ gcc/config/sh/sh-mem.cc (working copy)
@@ -108,29 +108,30 @@
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
- function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
+ SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
return true;
}
else if (! optimize_size)
{
- const char *entry_name;
rtx func_addr_rtx = gen_reg_rtx (Pmode);
- int dwords;
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
rtx r6 = gen_rtx_REG (SImode, 6);
- entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
- function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, bytes & 4
+ ? "__movmem_i4_odd"
+ : "__movmem_i4_even",
+ SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- dwords = bytes >> 3;
+ int dwords = bytes >> 3;
emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
- emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
return true;
}
else
@@ -144,10 +145,10 @@
rtx r5 = gen_rtx_REG (SImode, 5);
sprintf (entry, "__movmemSI%d", bytes);
- function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
- emit_insn (gen_block_move_real (func_addr_rtx));
+ emit_insn (gen_block_move_real (func_addr_rtx, lab));
return true;
}
@@ -161,7 +162,7 @@
rtx r5 = gen_rtx_REG (SImode, 5);
rtx r6 = gen_rtx_REG (SImode, 6);
- function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+ rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
@@ -174,7 +175,7 @@
final_switch = 16 - ((bytes / 4) % 16);
while_loop = ((bytes / 4) / 16 - 1) * 16;
emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
- emit_insn (gen_block_lump_real (func_addr_rtx));
+ emit_insn (gen_block_lump_real (func_addr_rtx, lab));
return true;
}
Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h (revision 229290)
+++ gcc/config/sh/sh-protos.h (working copy)
@@ -377,7 +377,19 @@
extern void sh_pr_interrupt (struct cpp_reader *);
extern void sh_pr_trapa (struct cpp_reader *);
extern void sh_pr_nosave_low_regs (struct cpp_reader *);
-extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+
+struct function_symbol_result
+{
+ function_symbol_result (void) : sym (NULL), lab (NULL) { }
+ function_symbol_result (rtx s, rtx l) : sym (s), lab (l) { }
+
+ rtx sym;
+ rtx lab;
+};
+
+extern function_symbol_result function_symbol (rtx, const char *,
+ sh_function_kind);
+extern rtx sh_get_fdpic_reg_initial_val (void);
extern rtx sh_get_pr_initial_val (void);
extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
@@ -396,4 +408,5 @@
extern machine_mode sh_hard_regno_caller_save_mode (unsigned int, unsigned int,
machine_mode);
extern bool sh_can_use_simple_return_p (void);
+extern rtx sh_load_function_descriptor (rtx);
#endif /* ! GCC_SH_PROTOS_H */
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c (revision 229290)
+++ gcc/config/sh/sh.c (working copy)
@@ -251,6 +251,7 @@
static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static void sh_file_start (void);
+static bool sh_assemble_integer (rtx, unsigned int, int);
static bool flow_dependent_p (rtx, rtx);
static void flow_dependent_p_1 (rtx, const_rtx, void *);
static int shiftcosts (rtx);
@@ -259,6 +260,7 @@
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx_insn *);
+static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
static int sh_pr_n_sets (void);
@@ -404,6 +406,9 @@
#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sh_assemble_integer
+
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
@@ -662,6 +667,9 @@
#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
@@ -979,6 +987,13 @@
if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
TARGET_ZDCBRANCH = 1;
+ /* FDPIC code is a special form of PIC, and the vast majority of code
+ generation constraints that apply to PIC also apply to FDPIC, so we
+ set flag_pic to avoid the need to check TARGET_FDPIC everywhere
+ flag_pic is checked. */
+ if (TARGET_FDPIC && !flag_pic)
+ flag_pic = 2;
+
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (! VALID_REGISTER_P (regno))
sh_register_names[regno][0] = '\0';
@@ -1670,6 +1685,14 @@
output_addr_const (file, XVECEXP (x, 0, 1));
fputs ("-.)", file);
break;
+ case UNSPEC_GOTFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTFUNCDESC", file);
+ break;
+ case UNSPEC_GOTOFFFUNCDESC:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@GOTOFFFUNCDESC", file);
+ break;
default:
return false;
}
@@ -1854,6 +1877,9 @@
{
case TLS_MODEL_GLOBAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
emit_move_insn (tmp, tga_ret);
@@ -1862,6 +1888,9 @@
case TLS_MODEL_LOCAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
@@ -1879,6 +1908,9 @@
case TLS_MODEL_INITIAL_EXEC:
tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
tmp = gen_sym2GOTTPOFF (op1);
+ if (TARGET_FDPIC)
+ emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+ sh_get_fdpic_reg_initial_val ());
emit_insn (gen_tls_initial_exec (tga_op1, tmp));
op1 = tga_op1;
break;
@@ -1905,6 +1937,22 @@
operands[1] = op1;
}
}
+
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+ split_const (operands[1], &base, &offset);
+
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ {
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+ emit_move_insn (tmp, base);
+ if (!arith_operand (offset, mode))
+ offset = force_reg (mode, offset);
+ emit_insn (gen_add3_insn (operands[0], tmp, offset));
+ }
+ }
}
/* Implement the canonicalize_comparison target hook for the combine
@@ -3009,6 +3057,24 @@
}
}
\f
+/* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
+ need to be output as pointers to function descriptors for
+ FDPIC. */
+
+static bool
+sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+ if (TARGET_FDPIC && size == UNITS_PER_WORD
+ && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
+ {
+ fputs ("\t.long\t", asm_out_file);
+ output_addr_const (asm_out_file, value);
+ fputs ("@FUNCDESC\n", asm_out_file);
+ return true;
+ }
+ return default_assemble_integer (value, size, aligned_p);
+}
+\f
/* Check if PAT includes UNSPEC_CALLER unspec pattern. */
static bool
unspec_caller_rtx_p (rtx pat)
@@ -3044,6 +3110,17 @@
return false;
pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
+ return false;
+
+ if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
+ {
+ rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
+ if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
+ return true;
+ }
+
if (GET_CODE (pat) != SET)
return false;
pat = SET_SRC (pat);
@@ -4085,8 +4162,8 @@
/* Load the value into an arg reg and call a helper. */
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
sprintf (func, "__ashiftrt_r4_%d", value);
- function_symbol (wrk, func, SFUNC_STATIC);
- emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+ rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
+ emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
return true;
}
@@ -7937,7 +8014,8 @@
stack_usage += d;
}
- if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+ if (flag_pic && !TARGET_FDPIC
+ && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
emit_insn (gen_GOTaddr2picreg (const0_rtx));
if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -10438,7 +10516,9 @@
|| XINT (x, 1) == UNSPEC_PLT
|| XINT (x, 1) == UNSPEC_PCREL
|| XINT (x, 1) == UNSPEC_SYMOFF
- || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+ || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
+ || XINT (x, 1) == UNSPEC_GOTFUNCDESC
+ || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
return false;
fmt = GET_RTX_FORMAT (GET_CODE (x));
@@ -10473,7 +10553,26 @@
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, orig));
+ if (TARGET_FDPIC
+ && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
+ {
+ /* Weak functions may be NULL which doesn't work with
+ GOTOFFFUNCDESC because the runtime offset is not known. */
+ if (SYMBOL_REF_WEAK (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
+ }
+ else if (TARGET_FDPIC
+ && (GET_CODE (orig) == LABEL_REF
+ || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
+ && (TREE_READONLY (SYMBOL_REF_DECL (orig))
+ || SYMBOL_REF_EXTERNAL_P (orig)
+ || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
+ /* In FDPIC, GOTOFF can only be used for writable data. */
+ emit_insn (gen_symGOT2reg (reg, orig));
+ else
+ emit_insn (gen_symGOTOFF2reg (reg, orig));
return reg;
}
else if (GET_CODE (orig) == SYMBOL_REF)
@@ -10481,7 +10580,10 @@
if (reg == NULL_RTX)
reg = gen_reg_rtx (Pmode);
- emit_insn (gen_symGOT2reg (reg, orig));
+ if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
+ emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+ else
+ emit_insn (gen_symGOT2reg (reg, orig));
return reg;
}
return orig;
@@ -11519,9 +11621,40 @@
5 0008 00000000 l1: .long area
6 000c 00000000 l2: .long function
+ FDPIC needs a form that includes a function descriptor and
+ code to load the GOT register:
+ 0 0000 00000000 .long l0
+ 1 0004 00000000 .long gotval
+ 2 0008 D302 l0: mov.l l1,r3
+ 3 000a D203 mov.l l2,r2
+ 4 000c 6122 mov.l @r2,r1
+ 5 000e 5C21 mov.l @(4,r2),r12
+ 6 0010 412B jmp @r1
+ 7 0012 0009 nop
+ 8 0014 00000000 l1: .long area
+ 9 0018 00000000 l2: .long function
+
SH5 (compact) uses r1 instead of r3 for the static chain. */
+/* Emit insns to store a value at memory address + offset. */
+static void
+sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
+{
+ gcc_assert ((offset & 3) == 0);
+ emit_move_insn (offset == 0
+ ? change_address (addr, SImode, NULL_RTX)
+ : adjust_address (addr, SImode, offset), value);
+}
+/* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
+static void
+sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
+{
+ sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
+ ? (w0 | (w1 << 16))
+ : (w1 | (w0 << 16)), SImode));
+}
+
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
CXT is an RTX for the static chain value for the function. */
@@ -11655,20 +11788,34 @@
emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
return;
}
- emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 4),
- gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
- SImode));
- emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
- emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+ if (TARGET_FDPIC)
+ {
+ rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
+
+ sh_emit_storesi (tramp_mem, 0, a);
+ sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
+
+ sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
+ sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
+ sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
+
+ sh_emit_storesi (tramp_mem, 20, cxt);
+ sh_emit_storesi (tramp_mem, 24, fnaddr);
+ }
+ else
+ {
+ sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
+ sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
+
+ sh_emit_storesi (tramp_mem, 8, cxt);
+ sh_emit_storesi (tramp_mem, 12, fnaddr);
+ }
if (TARGET_HARD_SH4 || TARGET_SH5)
{
if (!TARGET_INLINE_IC_INVALIDATE
|| (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
emit_library_call (function_symbol (NULL, "__ic_invalidate",
- FUNCTION_ORDINARY),
+ FUNCTION_ORDINARY).sym,
LCT_NORMAL, VOIDmode, 1, tramp, SImode);
else
emit_insn (gen_ic_invalidate_line (tramp));
@@ -11698,7 +11845,7 @@
&& (! TARGET_SHCOMPACT
|| crtl->args.info.stack_regs == 0)
&& ! sh_cfun_interrupt_handler_p ()
- && (! flag_pic
+ && (! flag_pic || TARGET_FDPIC
|| (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
|| (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
}
@@ -11712,7 +11859,7 @@
if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_sym_label2reg (reg, sym, lab));
- else if (sibcall_p)
+ else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
else
emit_insn (gen_symPLT_label2reg (reg, sym, lab));
@@ -12715,8 +12862,16 @@
#endif
if (TARGET_SH2 && flag_pic)
{
- sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
- XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ if (TARGET_FDPIC)
+ {
+ sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
+ }
+ else
+ {
+ sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+ XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+ }
}
else
{
@@ -12757,17 +12912,25 @@
epilogue_completed = 0;
}
-rtx
-function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+/* Return an RTX pair for the address and call site label of a function
+ NAME of kind KIND, placing the result in TARGET if not NULL. For
+ SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
+ (const_int 0) if jsr should be used, or a label_ref if bsrf should
+ be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
+ address of the function itself, not a function descriptor, so they
+ can only be used with functions not using the FDPIC register that
+ are known to be called directory without a PLT entry. */
+
+function_symbol_result
+function_symbol (rtx target, const char *name, sh_function_kind kind)
{
- rtx sym;
-
/* If this is not an ordinary function, the name usually comes from a
string literal or an sprintf buffer. Make sure we use the same
string consistently, so that cse will be able to unify address loads. */
if (kind != FUNCTION_ORDINARY)
name = IDENTIFIER_POINTER (get_identifier (name));
- sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+ rtx lab = const0_rtx;
SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
if (flag_pic)
switch (kind)
@@ -12784,14 +12947,25 @@
}
case SFUNC_STATIC:
{
- /* ??? To allow cse to work, we use GOTOFF relocations.
- We could add combiner patterns to transform this into
- straight pc-relative calls with sym2PIC / bsrf when
- label load and function call are still 1:1 and in the
- same basic block during combine. */
rtx reg = target ? target : gen_reg_rtx (Pmode);
- emit_insn (gen_symGOTOFF2reg (reg, sym));
+ if (TARGET_FDPIC)
+ {
+ /* We use PC-relative calls, since GOTOFF can only refer
+ to writable data. This works along with sh_sfunc_call. */
+ lab = PATTERN (gen_call_site ());
+ emit_insn (gen_sym_label2reg (reg, sym, lab));
+ }
+ else
+ {
+ /* ??? To allow cse to work, we use GOTOFF relocations.
+ we could add combiner patterns to transform this into
+ straight pc-relative calls with sym2PIC / bsrf when
+ label load and function call are still 1:1 and in the
+ same basic block during combine. */
+ emit_insn (gen_symGOTOFF2reg (reg, sym));
+ }
+
sym = reg;
break;
}
@@ -12799,9 +12973,9 @@
if (target && sym != target)
{
emit_move_insn (target, sym);
- return target;
+ return function_symbol_result (target, lab);
}
- return sym;
+ return function_symbol_result (sym, lab);
}
/* Find the number of a general purpose register in S. */
@@ -13414,6 +13588,12 @@
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
}
+ if (TARGET_FDPIC)
+ {
+ fixed_regs[PIC_REG] = 1;
+ call_used_regs[PIC_REG] = 1;
+ call_really_used_regs[PIC_REG] = 1;
+ }
/* Renesas saves and restores mac registers on call. */
if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
{
@@ -13442,14 +13622,32 @@
static bool
sh_legitimate_constant_p (machine_mode mode, rtx x)
{
- return (TARGET_SHMEDIA
- ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
- || x == CONST0_RTX (mode)
- || !TARGET_SHMEDIA_FPU
- || TARGET_SHMEDIA64)
- : (GET_CODE (x) != CONST_DOUBLE
- || mode == DFmode || mode == SFmode
- || mode == DImode || GET_MODE (x) == VOIDmode));
+ if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ rtx base, offset;
+ split_const (x, &base, &offset);
+
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ return false;
+ }
+
+ if (TARGET_FDPIC
+ && (SYMBOLIC_CONST_P (x)
+ || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+ && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
+ return false;
+
+ if (TARGET_SHMEDIA
+ && ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+ || x == CONST0_RTX (mode)
+ || !TARGET_SHMEDIA_FPU
+ || TARGET_SHMEDIA64))
+ return false;
+
+ return GET_CODE (x) != CONST_DOUBLE
+ || mode == DFmode || mode == SFmode
+ || mode == DImode || GET_MODE (x) == VOIDmode;
}
enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
@@ -14540,4 +14738,41 @@
}
}
+bool
+sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
+ rtx x ATTRIBUTE_UNUSED)
+{
+ return TARGET_FDPIC;
+}
+
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+ function descriptor) into r1 and the GOT address into r12,
+ returning an rtx for r1. */
+
+rtx
+sh_load_function_descriptor (rtx funcdesc)
+{
+ rtx r1 = gen_rtx_REG (Pmode, R1_REG);
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+ rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+
+ emit_move_insn (r1, fnaddr);
+ /* The ABI requires the entry point address to be loaded first, so
+ prevent the load from being moved after that of the GOT
+ address. */
+ emit_insn (gen_blockage ());
+ emit_move_insn (pic_reg, gotaddr);
+ return r1;
+}
+
+/* Return an rtx holding the initial value of the FDPIC register (the
+ FDPIC pointer passed in from the caller). */
+
+rtx
+sh_get_fdpic_reg_initial_val (void)
+{
+ return get_hard_reg_initial_val (Pmode, PIC_REG);
+}
+
#include "gt-sh.h"
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h (revision 229290)
+++ gcc/config/sh/sh.h (working copy)
@@ -316,7 +316,7 @@
#endif
#ifndef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC ""
+#define SUBTARGET_ASM_SPEC "%{mfdpic:--fdpic}"
#endif
#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
@@ -344,7 +344,7 @@
#define ASM_ISA_DEFAULT_SPEC ""
#endif /* MASK_SH5 */
-#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd}"
#define SUBTARGET_LINK_SPEC ""
/* Go via SH_LINK_SPEC to avoid code replication. */
@@ -378,9 +378,19 @@
"%{m2a*:%eSH2a does not support little-endian}}"
#endif
+#ifdef FDPIC_DEFAULT
+#define FDPIC_SELF_SPECS "%{!mno-fdpic:-mfdpic}"
+#else
+#define FDPIC_SELF_SPECS
+#endif
+
#undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A SUBTARGET_DRIVER_SELF_SPECS \
+ FDPIC_SELF_SPECS
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS
+
#define ASSEMBLER_DIALECT assembler_dialect
extern int assembler_dialect;
@@ -937,6 +947,10 @@
code access to data items. */
#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? PIC_REG : INVALID_REGNUM)
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+ entries would need to handle saving and restoring it). */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
/* Definitions for register eliminations.
@@ -1561,7 +1575,8 @@
6 000c 00000000 l2: .long function */
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+#define TRAMPOLINE_SIZE \
+ (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : TARGET_FDPIC ? 32 : 16)
/* Alignment required for a trampoline in bits. */
#define TRAMPOLINE_ALIGNMENT \
@@ -1617,6 +1632,10 @@
|| GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
: (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+/* True if SYMBOL + OFFSET constants must refer to something within
+ SYMBOL's section. */
+#define SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P TARGET_FDPIC
+
/* Maximum number of registers that can appear in a valid memory
address. */
#define MAX_REGS_PER_ADDRESS 2
@@ -2257,9 +2276,11 @@
/* We have to distinguish between code and data, so that we apply
datalabel where and only where appropriate. Use sdataN for data. */
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
- ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
- | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
- | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+ ((TARGET_FDPIC \
+ ? ((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel : DW_EH_PE_pcrel) \
+ : ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+ | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr))) \
+ | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
/* Handle special EH pointer encodings. Absolute, pc-relative, and
indirect are handled automatically. */
@@ -2272,6 +2293,17 @@
SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
if (0) goto DONE; \
} \
+ if (TARGET_FDPIC \
+ && ((ENCODING) & 0xf0) == (DW_EH_PE_indirect | DW_EH_PE_datarel)) \
+ { \
+ fputs ("\t.ualong ", FILE); \
+ output_addr_const (FILE, ADDR); \
+ if (GET_CODE (ADDR) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (ADDR)) \
+ fputs ("@GOTFUNCDESC", FILE); \
+ else \
+ fputs ("@GOT", FILE); \
+ goto DONE; \
+ } \
} while (0)
#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md (revision 229290)
+++ gcc/config/sh/sh.md (working copy)
@@ -170,6 +170,9 @@
UNSPEC_SYMOFF
;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
UNSPEC_PCREL_SYMOFF
+ ;; For FDPIC
+ UNSPEC_GOTFUNCDESC
+ UNSPEC_GOTOFFFUNCDESC
;; Misc builtins
UNSPEC_BUILTIN_STRLEN
])
@@ -2591,15 +2594,18 @@
;; This reload would clobber the value in r0 we are trying to store.
;; If we let reload allocate r0, then this problem can never happen.
(define_insn "udivsi3_i1"
- [(set (match_operand:SI 0 "register_operand" "=z")
+ [(set (match_operand:SI 0 "register_operand" "=z,z")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2648,7 +2654,7 @@
})
(define_insn "udivsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2660,16 +2666,19 @@
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "udivsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
@@ -2680,10 +2689,13 @@
(clobber (reg:SI R1_REG))
(clobber (reg:SI R4_REG))
(clobber (reg:SI R5_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2742,11 +2754,11 @@
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_udivsi3_i4 (operands[0], operands[3]);
+ last = gen_udivsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SHMEDIA_FPU)
{
@@ -2771,14 +2783,14 @@
if (TARGET_SHMEDIA)
last = gen_udivsi3_i1_media (operands[0], operands[3]);
else if (TARGET_FPU_ANY)
- last = gen_udivsi3_i4_single (operands[0], operands[3]);
+ last = gen_udivsi3_i4_single (operands[0], operands[3], const0_rtx);
else
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ last = gen_udivsi3_i1 (operands[0], operands[3], const0_rtx);
}
else
{
- function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
- last = gen_udivsi3_i1 (operands[0], operands[3]);
+ rtx lab = function_symbol (operands[3], "__udivsi3", SFUNC_STATIC).lab;
+ last = gen_udivsi3_i1 (operands[0], operands[3], lab);
}
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
@@ -2906,7 +2918,7 @@
emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
break;
}
- sym = function_symbol (NULL, name, kind);
+ sym = function_symbol (NULL, name, kind).sym;
emit_insn (gen_divsi3_media_2 (operands[0], sym));
DONE;
}
@@ -2926,31 +2938,37 @@
})
(define_insn "divsi3_i4"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI FPSCR_STAT_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
"TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "fp_mode" "double")
(set_attr "needs_delay_slot" "yes")])
(define_insn "divsi3_i4_single"
- [(set (match_operand:SI 0 "register_operand" "=y")
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
(clobber (reg:DF DR2_REG))
(clobber (reg:SI R2_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
&& TARGET_FPU_SINGLE"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -2994,11 +3012,12 @@
}
else if (TARGET_DIVIDE_CALL_FP)
{
- function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+ rtx lab = function_symbol (operands[3], sh_divsi3_libfunc,
+ SFUNC_STATIC).lab;
if (TARGET_FPU_SINGLE)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], lab);
else
- last = gen_divsi3_i4 (operands[0], operands[3]);
+ last = gen_divsi3_i4 (operands[0], operands[3], lab);
}
else if (TARGET_SH2A)
{
@@ -3113,7 +3132,7 @@
last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
(operands[0], operands[3]));
else if (TARGET_FPU_ANY)
- last = gen_divsi3_i4_single (operands[0], operands[3]);
+ last = gen_divsi3_i4_single (operands[0], operands[3], const0_rtx);
else
last = gen_divsi3_i1 (operands[0], operands[3]);
}
@@ -3713,7 +3732,7 @@
{
/* The address must be set outside the libcall,
since it goes into a pseudo. */
- rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+ rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC).sym;
rtx addr = force_reg (SImode, sym);
rtx insns = gen_mulsi3_call (operands[0], operands[1],
operands[2], addr);
@@ -4970,8 +4989,8 @@
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
- emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
@@ -5024,15 +5043,18 @@
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "ashlsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(ashift:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5374,12 +5396,15 @@
(define_insn "ashrsi3_n"
[(set (reg:SI R4_REG)
(ashiftrt:SI (reg:SI R4_REG)
- (match_operand:SI 0 "const_int_operand" "i")))
+ (match_operand:SI 0 "const_int_operand" "i,i")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
- (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+ (use (match_operand 2 "" "Z,Ccl"))]
"TARGET_SH1"
- "jsr @%1%#"
+ "@
+ jsr @%1%#
+ bsrf %1\n%O2:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -5532,8 +5557,8 @@
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
- function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
- emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+ rtx lab = function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC).lab;
+ emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr, lab));
DONE;
}
})
@@ -5585,15 +5610,18 @@
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "lshrsi3_d_call"
- [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+ [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
(lshiftrt:SI (reg:SI R4_REG)
- (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
(const_int 31))))
- (use (match_operand:SI 2 "arith_reg_operand" "r"))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (use (match_operand 3 "" "Z,Ccl"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
- "jsr @%2%#"
+ "@
+ jsr @%2%#
+ bsrf %2\n%O3:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -7315,7 +7343,7 @@
}
else if (TARGET_SHCOMPACT)
{
- operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+ operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC).sym;
operands[1] = force_reg (Pmode, operands[1]);
emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
DONE;
@@ -7397,7 +7425,7 @@
tramp = force_reg (Pmode, operands[0]);
sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
- SFUNC_STATIC));
+ SFUNC_STATIC).sym);
emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
@@ -9455,9 +9483,9 @@
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
{
- if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ if (TARGET_SH2A && dbr_sequence_length () == 0)
return "jsr/n @%0";
else
return "jsr @%0%#";
@@ -9469,6 +9497,26 @@
(set_attr "needs_delay_slot" "yes")
(set_attr "fp_set" "unknown")])
+(define_insn "calli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
+{
+ if (TARGET_SH2A && dbr_sequence_length () == 0)
+ return "jsr/n @%0";
+ else
+ return "jsr @%0%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
;; This is TBR relative jump instruction for SH2A architecture.
;; Its use is enabled by assigning an attribute "function_vector"
;; and the vector number to a function during its declaration.
@@ -9584,9 +9632,9 @@
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(clobber (reg:SI PR_REG))]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
{
- if (TARGET_SH2A && (dbr_sequence_length () == 0))
+ if (TARGET_SH2A && dbr_sequence_length () == 0)
return "jsr/n @%1";
else
return "jsr @%1%#";
@@ -9598,6 +9646,27 @@
(set_attr "needs_delay_slot" "yes")
(set_attr "fp_set" "unknown")])
+(define_insn "call_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (reg:SI PR_REG))]
+ "TARGET_FDPIC"
+{
+ if (TARGET_SH2A && dbr_sequence_length () == 0)
+ return "jsr/n @%1";
+ else
+ return "jsr @%1%#";
+}
+ [(set_attr "type" "call")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "needs_delay_slot" "yes")
+ (set_attr "fp_set" "unknown")])
+
;; This is TBR relative jump instruction for SH2A architecture.
;; Its use is enabled by assigning an attribute "function_vector"
;; and the vector number to a function during its declaration.
@@ -9721,6 +9790,12 @@
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 0);
@@ -9755,8 +9830,8 @@
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9804,7 +9879,13 @@
operands[1] = operands[2];
}
- emit_call_insn (gen_calli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_calli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_calli (operands[0], operands[1]));
DONE;
})
@@ -9884,7 +9965,7 @@
emit_insn (gen_force_mode_for_call ());
operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
emit_move_insn (r0, func);
@@ -9909,6 +9990,12 @@
(clobber (reg:SI PR_REG))])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 0);
@@ -9944,8 +10031,8 @@
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -9993,7 +10080,14 @@
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_call_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
DONE;
})
@@ -10002,7 +10096,7 @@
(match_operand 1 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
"jmp @%0%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
@@ -10010,6 +10104,20 @@
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn "sibcalli_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn "sibcalli_pcrel"
[(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
(match_operand 1 "" ""))
@@ -10016,7 +10124,7 @@
(use (match_operand 2 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
{
return "braf %0" "\n"
"%O2:%#";
@@ -10027,6 +10135,24 @@
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn "sibcalli_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+ (match_operand 1))
+ (use (match_operand 2))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+{
+ return "braf %0" "\n"
+ "%O2:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
;; This uses an unspec to describe that the symbol_ref is very close.
(define_insn "sibcalli_thunk"
[(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")]
@@ -10049,7 +10175,7 @@
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 2 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10069,6 +10195,32 @@
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn_and_split "sibcall_pcrel_fdpic"
+ [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand"))
+ (match_operand 1))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 2 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+
+ sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
+ rtx i = emit_call_insn (gen_sibcalli_pcrel_fdpic (operands[2], operands[1],
+ copy_rtx (lab)));
+ SIBLING_CALL_P (i) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn "sibcall_compact"
[(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
(match_operand 1 "" ""))
@@ -10113,6 +10265,12 @@
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[0] = shmedia_prepare_call_address (operands[0], 1);
@@ -10157,8 +10315,8 @@
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[0]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[0] = force_reg (SImode, operands[0]);
/* We don't need a return trampoline, since the callee will
@@ -10192,13 +10350,23 @@
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
{
- emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_pcrel_fdpic (XEXP (operands[0], 0),
+ operands[1]));
+ else
+ emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
DONE;
}
else
operands[0] = force_reg (SImode, XEXP (operands[0], 0));
- emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+ if (TARGET_FDPIC)
+ {
+ operands[0] = sh_load_function_descriptor (operands[0]);
+ emit_call_insn (gen_sibcalli_fdpic (operands[0], operands[1]));
+ }
+ else
+ emit_call_insn (gen_sibcalli (operands[0], operands[1]));
DONE;
})
@@ -10208,10 +10376,25 @@
(match_operand 2 "" "")))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH1"
+ "TARGET_SH1 && !TARGET_FDPIC"
"jmp @%1%#"
[(set_attr "needs_delay_slot" "yes")
(set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_FDPIC"
+ "jmp @%1%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
(if_then_else (eq_attr "fpu_single" "yes")
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
@@ -10223,7 +10406,7 @@
(use (match_operand 3 "" ""))
(use (reg:SI FPSCR_MODES_REG))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
{
return "braf %1" "\n"
"%O3:%#";
@@ -10234,6 +10417,25 @@
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn "sibcall_valuei_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+ (match_operand 2)))
+ (use (match_operand 3))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+{
+ return "braf %1" "\n"
+ "%O3:%#";
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn_and_split "sibcall_value_pcrel"
[(set (match_operand 0 "" "=rf")
(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
@@ -10241,7 +10443,7 @@
(use (reg:SI FPSCR_MODES_REG))
(clobber (match_scratch:SI 3 "=&k"))
(return)]
- "TARGET_SH2"
+ "TARGET_SH2 && !TARGET_FDPIC"
"#"
"reload_completed"
[(const_int 0)]
@@ -10263,6 +10465,35 @@
(const_string "single") (const_string "double")))
(set_attr "type" "jump_ind")])
+(define_insn_and_split "sibcall_value_pcrel_fdpic"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "symbol_ref_operand"))
+ (match_operand 2)))
+ (use (reg:SI FPSCR_MODES_REG))
+ (use (reg:SI PIC_REG))
+ (clobber (match_scratch:SI 3 "=k"))
+ (return)]
+ "TARGET_SH2 && TARGET_FDPIC"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx lab = PATTERN (gen_call_site ());
+
+ sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
+ rtx i = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0],
+ operands[3],
+ operands[2],
+ copy_rtx (lab)));
+ SIBLING_CALL_P (i) = 1;
+ DONE;
+}
+ [(set_attr "needs_delay_slot" "yes")
+ (set (attr "fp_mode")
+ (if_then_else (eq_attr "fpu_single" "yes")
+ (const_string "single") (const_string "double")))
+ (set_attr "type" "jump_ind")])
+
(define_insn "sibcall_value_compact"
[(set (match_operand 0 "" "=rf,rf")
(call (mem:SI (match_operand:SI 1 "register_operand" "k,k"))
@@ -10310,6 +10541,12 @@
(return)])]
""
{
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ }
+
if (TARGET_SHMEDIA)
{
operands[1] = shmedia_prepare_call_address (operands[1], 1);
@@ -10355,8 +10592,8 @@
run out of registers when adjusting fpscr for the call. */
emit_insn (gen_force_mode_for_call ());
- operands[1]
- = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+ operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
/* We don't need a return trampoline, since the callee will
@@ -10391,15 +10628,27 @@
static functions. */
&& SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
{
- emit_call_insn (gen_sibcall_value_pcrel (operands[0],
- XEXP (operands[1], 0),
- operands[2]));
+ if (TARGET_FDPIC)
+ emit_call_insn (gen_sibcall_value_pcrel_fdpic (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
+ else
+ emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+ XEXP (operands[1], 0),
+ operands[2]));
DONE;
}
else
operands[1] = force_reg (SImode, XEXP (operands[1], 0));
- emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+ if (TARGET_FDPIC)
+ {
+ operands[1] = sh_load_function_descriptor (operands[1]);
+ emit_call_insn (gen_sibcall_valuei_fdpic (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
DONE;
})
@@ -10483,7 +10732,7 @@
emit_insn (gen_force_mode_for_call ());
operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
- SFUNC_GOT);
+ SFUNC_GOT).sym;
operands[1] = force_reg (SImode, operands[1]);
emit_move_insn (r0, func);
@@ -10681,6 +10930,13 @@
DONE;
}
+ if (TARGET_FDPIC)
+ {
+ rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+ emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+ DONE;
+ }
+
operands[1] = gen_rtx_REG (Pmode, PIC_REG);
operands[2] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
@@ -10816,6 +11072,9 @@
rtx mem;
bool stack_chk_guard_p = false;
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
@@ -10858,8 +11117,7 @@
if (stack_chk_guard_p)
emit_insn (gen_chk_guard_add (operands[3], operands[2]));
else
- emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
- gen_rtx_REG (Pmode, PIC_REG)));
+ emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], picreg));
/* N.B. This is not constant for a GOTPLT relocation. */
mem = gen_rtx_MEM (Pmode, operands[3]);
@@ -10890,6 +11148,23 @@
DONE;
})
+(define_expand "sym2GOTFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTFUNCDESC))]
+ "TARGET_FDPIC")
+
+(define_expand "symGOTFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx gotsym = gen_sym2GOTFUNCDESC (operands[1]);
+ PUT_MODE (gotsym, Pmode);
+ rtx insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+ MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+ DONE;
+})
+
(define_expand "symGOTPLT2reg"
[(match_operand 0 "" "") (match_operand 1 "" "")]
""
@@ -10916,12 +11191,13 @@
? operands[0]
: gen_reg_rtx (GET_MODE (operands[0])));
+ rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+ : gen_rtx_REG (Pmode, PIC_REG);
+
gotoffsym = gen_sym2GOTOFF (operands[1]);
PUT_MODE (gotoffsym, Pmode);
emit_move_insn (t, gotoffsym);
- insn = emit_move_insn (operands[0],
- gen_rtx_PLUS (Pmode, t,
- gen_rtx_REG (Pmode, PIC_REG)));
+ insn = emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
set_unique_reg_note (insn, REG_EQUAL, operands[1]);
@@ -10928,6 +11204,26 @@
DONE;
})
+(define_expand "sym2GOTOFFFUNCDESC"
+ [(const (unspec [(match_operand 0)] UNSPEC_GOTOFFFUNCDESC))]
+ "TARGET_FDPIC")
+
+(define_expand "symGOTOFFFUNCDESC2reg"
+ [(match_operand 0) (match_operand 1)]
+ "TARGET_FDPIC"
+{
+ rtx picreg = sh_get_fdpic_reg_initial_val ();
+ rtx t = !can_create_pseudo_p ()
+ ? operands[0]
+ : gen_reg_rtx (GET_MODE (operands[0]));
+
+ rtx gotoffsym = gen_sym2GOTOFFFUNCDESC (operands[1]);
+ PUT_MODE (gotoffsym, Pmode);
+ emit_move_insn (t, gotoffsym);
+ emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
+ DONE;
+})
+
(define_expand "symPLT_label2reg"
[(set (match_operand:SI 0 "" "")
(const:SI
@@ -12678,11 +12974,14 @@
(define_insn "block_move_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -12689,7 +12988,8 @@
(define_insn "block_lump_real"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12698,7 +12998,9 @@
(clobber (reg:SI R6_REG))
(clobber (reg:SI R0_REG))])]
"TARGET_SH1 && ! TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -12705,13 +13007,16 @@
(define_insn "block_move_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R0_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R2_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -12718,7 +13023,8 @@
(define_insn "block_lump_real_i4"
[(parallel [(set (mem:BLK (reg:SI R4_REG))
(mem:BLK (reg:SI R5_REG)))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+ (use (match_operand 1 "" "Z,Ccl"))
(use (reg:SI R6_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI T_REG))
@@ -12730,7 +13036,9 @@
(clobber (reg:SI R2_REG))
(clobber (reg:SI R3_REG))])]
"TARGET_HARD_SH4"
- "jsr @%0%#"
+ "@
+ jsr @%0%#
+ bsrf %0\n%O1:%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
Index: gcc/config/sh/sh.opt
===================================================================
--- gcc/config/sh/sh.opt (revision 229290)
+++ gcc/config/sh/sh.opt (working copy)
@@ -260,6 +260,10 @@
Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
Specify name for 32 bit signed division function.
+mfdpic
+Target Report Var(TARGET_FDPIC) Init(0)
+Generate ELF FDPIC code
+
mfmovd
Target RejectNegative Mask(FMOVD)
Enable the use of 64-bit floating point registers in fmov instructions. See -mdalign if 64-bit alignment is required.
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc (revision 229290)
+++ gcc/config.gcc (working copy)
@@ -2628,6 +2628,9 @@
tm_file="${tm_file} dbxelf.h elfos.h sh/elf.h"
case ${target} in
sh*-*-linux*) tmake_file="${tmake_file} sh/t-linux"
+ if test x$enable_fdpic = xyes; then
+ tm_defines="$tm_defines FDPIC_DEFAULT=1"
+ fi
tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h sh/linux.h" ;;
sh*-*-netbsd*)
tm_file="${tm_file} netbsd.h netbsd-elf.h sh/netbsd-elf.h"
Index: gcc/doc/install.texi
===================================================================
--- gcc/doc/install.texi (revision 229290)
+++ gcc/doc/install.texi (working copy)
@@ -1810,6 +1810,9 @@
128-bit @code{long double} when built against GNU C Library 2.4 and later,
64-bit @code{long double} otherwise.
+@item --enable-fdpic
+On SH Linux systems, generate ELF FDPIC code.
+
@item --with-gmp=@var{pathname}
@itemx --with-gmp-include=@var{pathname}
@itemx --with-gmp-lib=@var{pathname}
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 229290)
+++ gcc/doc/invoke.texi (working copy)
@@ -21244,6 +21244,10 @@
Prefer zero-displacement conditional branches for conditional move instruction
patterns. This can result in faster code on the SH4 processor.
+@item -mfdpic
+@opindex fdpic
+Generate code using the FDPIC ABI.
+
@end table
@node Solaris 2 Options
Index: include/longlong.h
===================================================================
--- include/longlong.h (revision 229290)
+++ include/longlong.h (working copy)
@@ -1102,6 +1102,11 @@
/* This is the same algorithm as __udiv_qrnnd_c. */
#define UDIV_NEEDS_NORMALIZATION 1
+#ifdef __FDPIC__
+/* FDPIC needs a special version of the asm fragment to extract the
+ code address from the function descriptor. __udiv_qrnnd_16 is
+ assumed to be local and not to use the GOT, so loading r12 is
+ not needed. */
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
@@ -1108,6 +1113,28 @@
__attribute__ ((visibility ("hidden"))); \
/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
__asm__ ( \
+ "mov%M4 %4,r5\n" \
+" swap.w %3,r4\n" \
+" swap.w r5,r6\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" shll16 r6\n" \
+" swap.w r4,r4\n" \
+" mov.l @%5,r2\n" \
+" jsr @r2\n" \
+" swap.w r1,%0\n" \
+" or r1,%0" \
+ : "=r" (q), "=&z" (r) \
+ : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
+ : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
+ } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
+ __attribute__ ((visibility ("hidden"))); \
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
+ __asm__ ( \
"mov%M4 %4,r5\n" \
" swap.w %3,r4\n" \
" swap.w r5,r6\n" \
@@ -1121,6 +1148,7 @@
: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
: "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
} while (0)
+#endif /* __FDPIC__ */
#define UDIV_TIME 80
Index: libitm/config/sh/sjlj.S
===================================================================
--- libitm/config/sh/sjlj.S (revision 229290)
+++ libitm/config/sh/sjlj.S (working copy)
@@ -58,9 +58,6 @@
jsr @r1
mov r15, r5
#else
- mova .Lgot, r0
- mov.l .Lgot, r12
- add r0, r12
mov.l .Lbegin, r1
bsrf r1
mov r15, r5
@@ -79,14 +76,12 @@
nop
cfi_endproc
- .align 2
-.Lgot:
- .long _GLOBAL_OFFSET_TABLE_
+ .align 2
.Lbegin:
#if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
.long GTM_begin_transaction
#else
- .long GTM_begin_transaction@PLT-(.Lbegin0-.)
+ .long GTM_begin_transaction@PCREL-(.Lbegin0-.)
#endif
.size _ITM_beginTransaction, . - _ITM_beginTransaction
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-10-25 14:32 ` Oleg Endo
@ 2015-10-27 3:35 ` Rich Felker
2015-10-27 14:02 ` Oleg Endo
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-10-27 3:35 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > Here's my updated version of the FDPIC patch with all requested
> > changes made and Changelog added. I've included all the original
> > authors. This is my first time writing such an extensive Changelog
> > entry so please let me know if there are things I got wrong.
>
> I took the liberty and fixed some minor formatting trivia and extracted
> functions sh_emit_storesi and sh_emit_storehi which are used in
> sh_trampoline_init to effectively memcpy code into the trampoline
> area. Can you please check it? If it's OK I'll commit the attached
> patch to trunk.
Is there anything in particular you'd like me to check? It builds fine
for fdpic target, successfully compiles musl libc.so, and busybox runs
with the resulting libc.so. I did a quick visual inspection of the
diff between my version and yours too and didn't see anything that
looked suspicious to me.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-10-27 3:35 ` Rich Felker
@ 2015-10-27 14:02 ` Oleg Endo
2015-11-10 20:07 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Oleg Endo @ 2015-10-27 14:02 UTC (permalink / raw)
To: Rich Felker; +Cc: gcc-patches
On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote:
> On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > > Here's my updated version of the FDPIC patch with all requested
> > > changes made and Changelog added. I've included all the original
> > > authors. This is my first time writing such an extensive
> > > Changelog
> > > entry so please let me know if there are things I got wrong.
> >
> > I took the liberty and fixed some minor formatting trivia and
> > extracted
> > functions sh_emit_storesi and sh_emit_storehi which are used in
> > sh_trampoline_init to effectively memcpy code into the trampoline
> > area. Can you please check it? If it's OK I'll commit the
> > attached
> > patch to trunk.
>
> Is there anything in particular you'd like me to check? It builds
> fine
> for fdpic target, successfully compiles musl libc.so, and busybox
> runs
> with the resulting libc.so. I did a quick visual inspection of the
> diff between my version and yours too and didn't see anything that
> looked suspicious to me.
Thanks. I have committed it as r229438 after a sanity check with "make
all" on sh-elf.
The way libcalls are now emitted is a bit unhandy. If more special-ABI
libcalls are to be added in the future, they all have to do the jsr vs.
bsrf handling (some potential candidates for new libcalls are optimized
soft FP routines). Then we still have PR 65374 and PR 54019. In the
future maybe we should come up with something that allows emitting
libcalls in a more transparent way...
Cheers,
Oleg
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-10-27 14:02 ` Oleg Endo
@ 2015-11-10 20:07 ` Rich Felker
2015-11-11 14:36 ` Oleg Endo
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-11-10 20:07 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Tue, Oct 27, 2015 at 11:01:39PM +0900, Oleg Endo wrote:
> On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote:
> > On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> > > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > > > Here's my updated version of the FDPIC patch with all requested
> > > > changes made and Changelog added. I've included all the original
> > > > authors. This is my first time writing such an extensive
> > > > Changelog
> > > > entry so please let me know if there are things I got wrong.
> > >
> > > I took the liberty and fixed some minor formatting trivia and
> > > extracted
> > > functions sh_emit_storesi and sh_emit_storehi which are used in
> > > sh_trampoline_init to effectively memcpy code into the trampoline
> > > area. Can you please check it? If it's OK I'll commit the
> > > attached
> > > patch to trunk.
> >
> > Is there anything in particular you'd like me to check? It builds
> > fine
> > for fdpic target, successfully compiles musl libc.so, and busybox
> > runs
> > with the resulting libc.so. I did a quick visual inspection of the
> > diff between my version and yours too and didn't see anything that
> > looked suspicious to me.
>
> Thanks. I have committed it as r229438 after a sanity check with "make
> all" on sh-elf.
>
> The way libcalls are now emitted is a bit unhandy. If more special-ABI
> libcalls are to be added in the future, they all have to do the jsr vs.
> bsrf handling (some potential candidates for new libcalls are optimized
> soft FP routines). Then we still have PR 65374 and PR 54019. In the
> future maybe we should come up with something that allows emitting
> libcalls in a more transparent way...
I'd like to look into improving this at some point in the near future.
On further reading of the changes made, I think there's a lot of code
we could reduce or simplify.
In all the places where new RTL patterns were added for *call*_fdpic,
the main constraint change vs the non-fdpic version is using REG_PIC.
Is it possible to make a REG_GOT_ARG macro or similar that's defined
as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?
As for the call site stuff, I wonder why the existing call site stuff
used by "call_pcrel" can't be used for SFUNC_STATIC. I'm actually
trying to prepare a simpler FDPIC patch for other gcc versions we're
interested in that's not so invasive, and for now I'm just having
function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC to
avoid needing all the label stuff, but it would be nice to find a way
to reuse the existing framework.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-10 20:07 ` Rich Felker
@ 2015-11-11 14:36 ` Oleg Endo
2015-11-11 14:56 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Oleg Endo @ 2015-11-11 14:36 UTC (permalink / raw)
To: Rich Felker; +Cc: gcc-patches
On Tue, 2015-11-10 at 15:07 -0500, Rich Felker wrote:
> > The way libcalls are now emitted is a bit unhandy. If more special
> > -ABI
> > libcalls are to be added in the future, they all have to do the jsr
> > vs.
> > bsrf handling (some potential candidates for new libcalls are
> > optimized
> > soft FP routines). Then we still have PR 65374 and PR 54019. In
> > the
> > future maybe we should come up with something that allows emitting
> > libcalls in a more transparent way...
>
> I'd like to look into improving this at some point in the near
> future.
> On further reading of the changes made, I think there's a lot of code
> we could reduce or simplify.
>
> In all the places where new RTL patterns were added for *call*_fdpic,
> the main constraint change vs the non-fdpic version is using REG_PIC.
> Is it possible to make a REG_GOT_ARG macro or similar that's defined
> as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?
I'm not sure I understand what you mean by that. Do you have a small
code snippet example?
> As for the call site stuff, I wonder why the existing call site stuff
> used by "call_pcrel" can't be used for SFUNC_STATIC.
"call_pcrel" is a real call insn. The libcalls are not expanded as
real call insns to avoid the regular register save/restores etc which
is needed to do a normal function call.
I guess the generic fix for this issue would be some mechanism to
specify which regs are clobbered/preserved and then provide the right
settings for the libcall functions.
> I'm actually
> trying to prepare a simpler FDPIC patch for other gcc versions we're
> interested in that's not so invasive, and for now I'm just having
> function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> to
> avoid needing all the label stuff, but it would be nice to find a way
> to reuse the existing framework.
Do you know how this affects code size (and inherently performance)?
Cheers,
Oleg
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-11 14:36 ` Oleg Endo
@ 2015-11-11 14:56 ` Rich Felker
2015-11-11 16:41 ` Rich Felker
2015-11-15 5:08 ` Oleg Endo
0 siblings, 2 replies; 14+ messages in thread
From: Rich Felker @ 2015-11-11 14:56 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Wed, Nov 11, 2015 at 11:36:26PM +0900, Oleg Endo wrote:
> On Tue, 2015-11-10 at 15:07 -0500, Rich Felker wrote:
>
> > > The way libcalls are now emitted is a bit unhandy. If more special
> > > -ABI
> > > libcalls are to be added in the future, they all have to do the jsr
> > > vs.
> > > bsrf handling (some potential candidates for new libcalls are
> > > optimized
> > > soft FP routines). Then we still have PR 65374 and PR 54019. In
> > > the
> > > future maybe we should come up with something that allows emitting
> > > libcalls in a more transparent way...
> >
> > I'd like to look into improving this at some point in the near
> > future.
> > On further reading of the changes made, I think there's a lot of code
> > we could reduce or simplify.
> >
> > In all the places where new RTL patterns were added for *call*_fdpic,
> > the main constraint change vs the non-fdpic version is using REG_PIC.
> > Is it possible to make a REG_GOT_ARG macro or similar that's defined
> > as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?
>
> I'm not sure I understand what you mean by that. Do you have a small
> code snippet example?
Sorry, I don't really understand RTL well enough to make a code
snippet. What I want to express is that an insn "uses" (in the (use
...) sense) a register (r12) conditionally depending on a runtime
option (TARGET_FDPIC).
> > As for the call site stuff, I wonder why the existing call site stuff
> > used by "call_pcrel" can't be used for SFUNC_STATIC.
>
> "call_pcrel" is a real call insn. The libcalls are not expanded as
> real call insns to avoid the regular register save/restores etc which
> is needed to do a normal function call.
Yes, I see that. What I was really wondering though is why the new
call site generation code and constraint was added when the call_pcrel
code already has mechanisms for this, rather than just duplicating the
internals that call_pcrel uses. It seems like we're doing things in a
gratuitously different way here.
> I guess the generic fix for this issue would be some mechanism to
> specify which regs are clobbered/preserved and then provide the right
> settings for the libcall functions.
Is this possible in the sh backend or does it need changes to
higher-level gcc code? (i.e. is it presently possible to make an insn
that conditionally clobbers different things rather than having to
make tons of different insns for each possible set of clobbers?)
> > I'm actually
> > trying to prepare a simpler FDPIC patch for other gcc versions we're
> > interested in that's not so invasive, and for now I'm just having
> > function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> > to
> > avoid needing all the label stuff, but it would be nice to find a way
> > to reuse the existing framework.
>
> Do you know how this affects code size (and inherently performance)?
I suspect it makes very little difference, but to compare I'd need to
do the same hack on 5.2.0 or trunk. The only difference should be one
additional load per call, and one additional GOT slot per function
called this way (but just once per executable/library).
Another issue I've started looking at is how r12 is put in fixed_regs,
which is conceptually wrong. Preliminary tests show that removing it
from fixed_regs doesn't break and produces much better code -- r12
gets used as a temp register in functions that don't need it, and in
one function that made multiple calls, the saving of initial r12 to a
call-saved register even happened in the delay slot of the call. I've
been discussing it with Alexander Monakov on IRC (#musl) and based on
my understanding so far of how gcc works (which admittedly may be
wrong) the current FDPIC code looks like it's written not to depend on
r12 being 'fixed'. Also I think I'm pretty close to understanding how
we could make the same improvements for non-FDPIC PIC codegen: instead
of loading r12 in the prologue, load a pseudo, then use that pseudo
for GOT access and force it into r12 the same way FDPIC call code does
for PLT calls. Does this sound correct?
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-11 14:56 ` Rich Felker
@ 2015-11-11 16:41 ` Rich Felker
2015-11-15 5:08 ` Oleg Endo
1 sibling, 0 replies; 14+ messages in thread
From: Rich Felker @ 2015-11-11 16:41 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Wed, Nov 11, 2015 at 09:56:42AM -0500, Rich Felker wrote:
> > > I'm actually
> > > trying to prepare a simpler FDPIC patch for other gcc versions we're
> > > interested in that's not so invasive, and for now I'm just having
> > > function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> > > to
> > > avoid needing all the label stuff, but it would be nice to find a way
> > > to reuse the existing framework.
> >
> > Do you know how this affects code size (and inherently performance)?
>
> I suspect it makes very little difference, but to compare I'd need to
> do the same hack on 5.2.0 or trunk. The only difference should be one
> additional load per call, and one additional GOT slot per function
> called this way (but just once per executable/library).
Actually I think this is not quite right: if the call takes place via
the GOT, this also requires the initial r12 to be preserved somewhere
in order to load the function address, whereas for SFUNC_STATIC, the
initial r12 can be completely discarded, right? (SFUNC functions are
not permitted to use the GOT themselves as far as I can tell, and thus
do not receive the hidden GOT argument in r12.)
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-11 14:56 ` Rich Felker
2015-11-11 16:41 ` Rich Felker
@ 2015-11-15 5:08 ` Oleg Endo
2015-11-15 20:39 ` Rich Felker
1 sibling, 1 reply; 14+ messages in thread
From: Oleg Endo @ 2015-11-15 5:08 UTC (permalink / raw)
To: Rich Felker; +Cc: gcc-patches
On Wed, 2015-11-11 at 09:56 -0500, Rich Felker wrote:
> Sorry, I don't really understand RTL well enough to make a code
> snippet. What I want to express is that an insn "uses" (in the (use
> ...) sense) a register (r12) conditionally depending on a runtime
> option (TARGET_FDPIC).
As far as I know this is not possible. It would require two variants
of the same pattern, one with the use and another without the use.
> Is this possible in the sh backend or does it need changes to
> higher-level gcc code? (i.e. is it presently possible to make an insn
> that conditionally clobbers different things rather than having to
> make tons of different insns for each possible set of clobbers?)
This is basically the same as above ... it's not possible to
conditionally construct/modify pattern descriptions in the .md.
However, it's possible to modify the CALL_INSN_FUNCTION_USAGE field of
call insns -- for some examples see 'grep -r CALL_INSN_FUNCTION_USAGE
gcc/config/*'. Also, it seems the SH backend doesn't make use of some
existing libcall related parameters and target hooks/macros. Maybe
those could be helpful.
> Another issue I've started looking at is how r12 is put in
> fixed_regs, which is conceptually wrong. Preliminary tests show that
> removing it from fixed_regs doesn't break and produces much better
> code -- r12 gets used as a temp register in functions that don't need
> it, and in one function that made multiple calls, the saving of
> initial r12 to a call-saved register even happened in the delay slot
> of the call. I've been discussing it with Alexander Monakov on IRC
> (#musl) and based on my understanding so far of how gcc works (which
> admittedly may be wrong) the current FDPIC code looks like it's
> written not to depend on r12 being 'fixed'. Also I think I'm pretty
> close to understanding how we could make the same improvements for
> non-FDPIC PIC codegen: instead loading r12 in the prologue, load a
> pseudo, then use that pseudo for GOT access and force it into r12 the
> same way FDPIC call code does for PLT calls. Does this sound correct?
Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?
Cheers,
Oleg
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-15 5:08 ` Oleg Endo
@ 2015-11-15 20:39 ` Rich Felker
2015-11-16 14:54 ` Oleg Endo
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2015-11-15 20:39 UTC (permalink / raw)
To: Oleg Endo; +Cc: gcc-patches
On Sun, Nov 15, 2015 at 02:08:34PM +0900, Oleg Endo wrote:
> On Wed, 2015-11-11 at 09:56 -0500, Rich Felker wrote:
>
> > Sorry, I don't really understand RTL well enough to make a code
> > snippet. What I want to express is that an insn "uses" (in the (use
> > ...) sense) a register (r12) conditionally depending on a runtime
> > option (TARGET_FDPIC).
>
> As far as I know this is not possible. It would require two variants
> of the same pattern, one with the use and another without the use.
OK. That's exactly what we've got now.
> > Is this possible in the sh backend or does it need changes to
> > higher-level gcc code? (i.e. is it presently possible to make an insn
> > that conditionally clobbers different things rather than having to
> > make tons of different insns for each possible set of clobbers?)
>
> This is basically the same as above ... it's not possible to
> conditionally construct/modify pattern descriptions in the .md.
> However, it's possible to modify the CALL_INSN_FUNCTION_USAGE field of
> call insns -- for some examples see 'grep -r CALL_INSN_FUNCTION_USAGE
> gcc/config/*'. Also, it seems the SH backend doesn't make use of some
> existing libcall related parameters and target hooks/macros. Maybe
> those could be helpful.
I'll take a look at this. Let me know if you turn up anything
interesting.
> > Another issue I've started looking at is how r12 is put in
> > fixed_regs, which is conceptually wrong. Preliminary tests show that
> > removing it from fixed_regs doesn't break and produces much better
> > code -- r12 gets used as a temp register in functions that don't need
> > it, and in one function that made multiple calls, the saving of
> > initial r12 to a call-saved register even happened in the delay slot
> > of the call. I've been discussing it with Alexander Monakov on IRC
> > (#musl) and based on my understanding so far of how gcc works (which
> > admittedly may be wrong) the current FDPIC code looks like it's
> > written not to depend on r12 being 'fixed'. Also I think I'm pretty
> > close to understanding how we could make the same improvements for
> > non-FDPIC PIC codegen: instead loading r12 in the prologue, load a
> > pseudo, then use that pseudo for GOT access and force it into r12 the
> > same way FDPIC call code does for PLT calls. Does this sound correct?
>
> Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?
Yes. Is there any documentation on using it? I came across that but
couldn't figure out how it compares to just doing the pseudo yourself
in the target files. Is non-target-specific code affected by this?
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] SH FDPIC backend support
2015-11-15 20:39 ` Rich Felker
@ 2015-11-16 14:54 ` Oleg Endo
0 siblings, 0 replies; 14+ messages in thread
From: Oleg Endo @ 2015-11-16 14:54 UTC (permalink / raw)
To: Rich Felker; +Cc: gcc-patches
On Sun, 2015-11-15 at 15:39 -0500, Rich Felker wrote:
> > This is basically the same as above ... it's not possible to
> > conditionally construct/modify pattern descriptions in the .md.
> > However, it's possible to modify the CALL_INSN_FUNCTION_USAGE
> > field of
> > call insns -- for some examples see 'grep -r
> > CALL_INSN_FUNCTION_USAGE
> > gcc/config/*'. Also, it seems the SH backend doesn't make use of
> > some
> > existing libcall related parameters and target hooks/macros. Maybe
> > those could be helpful.
>
> I'll take a look at this. Let me know if you turn up anything
> interesting.
I'm currently working on other things, sorry.
> >
> > Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?
>
> Yes. Is there any documentation on using it? I came across that but
> couldn't figure out how it compares to just doing the pseudo yourself
> in the target files. Is non-target-specific code affected by this?
Yes, non-target-specific code seems to be affected by this in some way,
although I don't know any details. Due to lack of documentation you'll
have to grep yourself through it by looking for "USE_PSEUDO_PIC_REG"
and "use_pseudo_pic_reg" to find the places where it's used.
Cheers,
Oleg
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2015-11-16 14:54 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-21 3:55 [PATCH v3] SH FDPIC backend support Rich Felker
2015-10-21 13:37 ` Oleg Endo
2015-10-21 20:16 ` Rich Felker
2015-10-23 7:22 ` [PATCH v4] " Rich Felker
2015-10-25 14:32 ` Oleg Endo
2015-10-27 3:35 ` Rich Felker
2015-10-27 14:02 ` Oleg Endo
2015-11-10 20:07 ` Rich Felker
2015-11-11 14:36 ` Oleg Endo
2015-11-11 14:56 ` Rich Felker
2015-11-11 16:41 ` Rich Felker
2015-11-15 5:08 ` Oleg Endo
2015-11-15 20:39 ` Rich Felker
2015-11-16 14:54 ` Oleg Endo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).