public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5011] arm: move the switch tables for Arm to the RO data section.
@ 2023-10-30 15:34 Richard Ball
  0 siblings, 0 replies; only message in thread
From: Richard Ball @ 2023-10-30 15:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fb1941d08fc3711b058c1e148e9ce7ed9b7dfbba

commit r14-5011-gfb1941d08fc3711b058c1e148e9ce7ed9b7dfbba
Author: Richard Ball <richard.ball@arm.com>
Date:   Mon Oct 30 15:31:26 2023 +0000

    arm: move the switch tables for Arm to the RO data section.
    
    Follow up patch to arm: Use deltas for Arm switch tables
    This patch moves the switch tables for Arm from the .text section
    into the .rodata section.
    
    gcc/ChangeLog:
    
            * config/arm/aout.h: Change to use the Lrtx label.
            * config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
            from (!target_pure_code) condition.
            (ADDR_VEC_ALIGN): Add align for tables in rodata section.
            * config/arm/arm.cc (arm_output_casesi): Alter the function to include
            .Lrtx label and remove adr instructions.
            * config/arm/arm.md
            (arm_casesi_internal): Use force_reg to generate ldr instructions that
            would otherwise be out of range, and change rtl to accommodate force reg.
            Additionally remove unnecessary register temp.
            (casesi): Remove pure code check for Arm.
            * config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
            targets from JUMP_TABLES_IN_TEXT_SECTION definition.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/arm/arm-switchstatement.c: Alter the tests to
            change adr instruction to ldr.

Diff:
---
 gcc/config/arm/aout.h                              |  6 +--
 gcc/config/arm/arm.cc                              | 55 +++++++++++++---------
 gcc/config/arm/arm.h                               | 19 +++++---
 gcc/config/arm/arm.md                              | 21 +++++----
 gcc/config/arm/elf.h                               | 18 +++++--
 gcc/testsuite/gcc.target/arm/arm-switchstatement.c | 28 ++++++-----
 6 files changed, 90 insertions(+), 57 deletions(-)

diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 6a4c8da5f6d5..49896bb96208 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -187,16 +187,16 @@
 	  switch (GET_MODE (body))					\
 	    {								\
 	    case E_QImode:						\
-	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_HImode:						\
-	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_SImode:						\
 	      if (flag_pic)						\
-		asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",	\
+		asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n",	\
 			     VALUE, REL);				\
 	      else							\
 		asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);	\
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 4e5e6997ed55..620ef7bfb2f3 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -30469,44 +30469,55 @@ arm_output_iwmmxt_tinsr (rtx *operands)
 const char *
 arm_output_casesi (rtx *operands)
 {
+  char label[100];
   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
-
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
   output_asm_insn ("cmp\t%0, %1", operands);
   output_asm_insn ("bhi\t%l3", operands);
+  ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
   switch (GET_MODE (diff_vec))
     {
     case E_QImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrb\t%4, [%4, %0]", operands);
-     else
-	output_asm_insn ("ldrsb\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+	output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
+      else
+	output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+      break;
     case E_HImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
-      output_asm_insn ("add\t%4, %4, %0", operands);
-      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
-     else
-	output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+      if (REGNO (operands[4]) != REGNO (operands[5]))
+	{
+	  output_asm_insn ("add\t%4, %0, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
+	}
+      else
+	{
+	  output_asm_insn ("add\t%4, %5, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
+	}
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+      break;
     case E_SImode:
       if (flag_pic)
 	{
-	  output_asm_insn ("adr\t%4, %l2", operands);
-	  output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands);
-	  return "add\t%|pc, %|pc, %4";
+	  output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
+	  output_asm_insn ("add\t%|pc, %|pc, %4", operands);
 	}
-      output_asm_insn ("adr\t%4, %l2", operands);
-      return "ldr\t%|pc, [%4, %0, lsl #2]";
-
+      else
+	output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
+      break;
     default:
       gcc_unreachable ();
     }
+    assemble_label (asm_out_file, label);
+    output_asm_insn ("nop", operands);
+  return "";
 }
 
 /* Output a Thumb-1 casesi dispatch sequence.  */
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3063e3489094..a9c2752c0ea5 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2092,10 +2092,11 @@ enum arm_auto_incmodes
    for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2		\
-				  || (TARGET_THUMB1			\
-				      && (optimize_size || flag_pic)))	\
-				 && (!target_pure_code))
+#define CASE_VECTOR_PC_RELATIVE						\
+   (TARGET_ARM								\
+    || (!target_pure_code						\
+	&& (TARGET_THUMB2						\
+	    || (TARGET_THUMB1 && (optimize_size || flag_pic)))))
 
 
 #define CASE_VECTOR_SHORTEN_MODE(min, max, body)			\
@@ -2301,8 +2302,14 @@ extern int making_const_table;
 	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
     } while (0)
 
-#define ADDR_VEC_ALIGN(JUMPTABLE)	\
-  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+/* If the switch table is in the code segment, additional alignment is
+   needed for Thumb SImode tables.  Otherwise, tables in RO data have
+   natural alignment.  */
+#define ADDR_VEC_ALIGN(TABLE)						\
+  (JUMP_TABLES_IN_TEXT_SECTION						\
+   ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0)	\
+   : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE)))	\
+		  / BITS_PER_UNIT)))
 
 /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
    default alignment from elfos.h.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0b2eb4bce92b..07eaf06cdeac 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9514,7 +9514,9 @@
    (match_operand:SI 2 "const_int_operand")	; total range
    (match_operand:SI 3 "" "")			; table label
    (match_operand:SI 4 "" "")]			; Out of range label
-  "(TARGET_32BIT || optimize_size || flag_pic) && !target_pure_code"
+   "(TARGET_ARM
+     || (!target_pure_code
+         && (TARGET_THUMB2 || optimize_size || flag_pic)))"
   "
   {
     enum insn_code code;
@@ -9557,14 +9559,14 @@
 		(label_ref:SI (match_operand 3 ""))))
 	      (clobber (reg:CC CC_REGNUM))
 	      (clobber (match_scratch:SI 5))
-	      (clobber (match_scratch:SI 6))
 	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
 {
-  operands[4] = gen_rtx_MULT (SImode, operands[0], GEN_INT (4));
-  operands[4] = gen_rtx_PLUS (SImode, operands[4],
-			      gen_rtx_LABEL_REF (SImode, operands[2]));
-  operands[4] = gen_rtx_MEM (SImode, operands[4]);
+  rtx vec_table_ref = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[2]));
+  rtx tmp = gen_rtx_MULT (SImode, operands[0], GEN_INT (4));
+  tmp = gen_rtx_PLUS (SImode, tmp,
+			      vec_table_ref);
+  operands[4] = gen_rtx_MEM (SImode, tmp);
   MEM_READONLY_P (operands[4]) = 1;
   MEM_NOTRAP_P (operands[4]) = 1;
 })
@@ -9575,12 +9577,11 @@
 		(leu (match_operand:SI 0 "s_register_operand" "r")
 		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
 		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
-				 (label_ref:SI (match_operand 2 "" ""))))
+				 (match_operand:SI 5 "s_register_operand" "r")))
 		(label_ref:SI (match_operand 3 "" ""))))
 	      (clobber (reg:CC CC_REGNUM))
-	      (clobber (match_scratch:SI 4 "=&r"))
-	      (clobber (match_scratch:SI 5 "=r"))
-	      (use (label_ref:SI (match_dup 2)))])]
+	      (clobber (match_scratch:SI 4 "=r"))
+	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
   {
     return arm_output_casesi (operands);
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 5766cb4a9888..f76682898960 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -91,11 +91,21 @@
 /* Define this macro if jump tables (for `tablejump' insns) should be
    output in the text section, along with the assembler instructions.
    Otherwise, the readonly data section is used.  */
-/* We put ARM and Thumb-2 jump tables in the text section, because it makes
-   the code more efficient, but for Thumb-1 it's better to put them out of
-   band unless we are generating compressed tables.  */
+/* The choice of placement for jump tables is nuanced.  For cores with
+   Harvard caches (pretty much all cases these days), there is a
+   benefit of maintaing a separation between I- and D-cache candidates
+   and that favors having jump tables in the RO data section.  This
+   makes the dispatch sequence slightly longer as we need to load the
+   address of the jump table first, but we often save elsewhere as the
+   content of the code section becomes better packed and we need
+   fewer long-range branch operations.  We also require the dispatch
+   table to be in the RO section when compiling for pure code.  We
+   currently place jump tables in the RO-data section for Arm or
+   whenever pure code is required; we also do it for Thumb-1 when
+   using an ADDR_VEC.  The remaining cases put the jump table in the
+   text section, but we should revisit this choice.  */
 #define JUMP_TABLES_IN_TEXT_SECTION					\
-   ((TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) \
+   ((TARGET_THUMB2 || (TARGET_THUMB && (optimize_size || flag_pic))) \
     && !target_pure_code)
 
 #ifndef LINK_SPEC
diff --git a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
index a7aa9d45c763..803731c479c6 100644
--- a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
+++ b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
@@ -53,9 +53,10 @@ inline void SIFunction (const char* flag)
 /*
 **QImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	ldrb	\1, \[\1, r[0-9]+\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	ldrb	(r[0-9]+), \[\1, r[0-9]+\]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x)
@@ -77,10 +78,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enu
 /*
 **HImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x)
@@ -102,7 +104,8 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enu
 /*
 **SImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
 **	ldr	pc, \[\1, r[0-9]+, lsl #2\]
 **	...
 */
@@ -125,10 +128,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enu
 /*
 **backwards_branch_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrsh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrsh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-30 15:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-30 15:34 [gcc r14-5011] arm: move the switch tables for Arm to the RO data section Richard Ball

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).