public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/2] arm: move the switch tables for Arm to the RO data section.
@ 2023-09-28 13:29 Richard Ball
  2023-10-19 10:58 ` Richard Earnshaw
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Ball @ 2023-09-28 13:29 UTC (permalink / raw)
  To: gcc-patches, Richard Earnshaw, Kyrylo Tkachov, Nick Clifton, ramana.gcc

[-- Attachment #1: Type: text/plain, Size: 1050 bytes --]

Follow up patch to arm: Use deltas for Arm switch tables
This patch moves the switch tables for Arm from the .text section
into the .rodata section.

gcc/ChangeLog:

	* config/arm/aout.h: Change to use the Lrtx label.
	* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
        from (!target_pure_code) condition.
        (ADDR_VEC_ALIGN): Add align for tables in rodata section.
	* config/arm/arm.cc (arm_output_casesi): Alter the function to include
        .Lrtx label and remove adr instructions.
	* config/arm/arm.md
        (arm_casesi_internal): Use force_reg to generate ldr instructions that
        would otherwise be out of range, and change rtl to accommodate force reg.
        Additionally remove unnecessary register temp.
        (casesi): Remove pure code check for Arm.
	* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
        targets from JUMP_TABLES_IN_TEXT_SECTION definition.

gcc/testsuite/ChangeLog:

	* gcc.target/arm/arm-switchstatement.c: Alter the tests to
        change adr instruction to ldr.

[-- Attachment #2: Arm-switch2.patch --]
[-- Type: text/plain, Size: 10080 bytes --]

diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 6a4c8da5f6d5a1695518f42830b9d045888eeed6..49896bb962081a5ee4b5328029813c681c489a9e 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -187,16 +187,16 @@
 	  switch (GET_MODE (body))					\
 	    {								\
 	    case E_QImode:						\
-	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_HImode:						\
-	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_SImode:						\
 	      if (flag_pic)						\
-		asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",	\
+		asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n",	\
 			     VALUE, REL);				\
 	      else							\
 		asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);	\
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3063e3489094f04ecf03a52952c185d4a75da645..ba61cf6fb9e4969776b49b499ce2205a940385d0 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2092,10 +2092,10 @@ enum arm_auto_incmodes
    for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2		\
+#define CASE_VECTOR_PC_RELATIVE (TARGET_ARM || ((TARGET_THUMB2		\
 				  || (TARGET_THUMB1			\
 				      && (optimize_size || flag_pic)))	\
-				 && (!target_pure_code))
+				 && (!target_pure_code)))
 
 
 #define CASE_VECTOR_SHORTEN_MODE(min, max, body)			\
@@ -2301,8 +2301,14 @@ extern int making_const_table;
 	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
     } while (0)
 
-#define ADDR_VEC_ALIGN(JUMPTABLE)	\
-  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+/* If the switch table is in the code segment, additional alignment is
+   needed for Thumb SImode tables.  Otherwise, tables in RO data have
+   natural alignment.  */
+#define ADDR_VEC_ALIGN(TABLE)						\
+  (JUMP_TABLES_IN_TEXT_SECTION						\
+   ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0)	\
+   : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE)))	\
+		  / BITS_PER_UNIT)))
 
 /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
    default alignment from elfos.h.  */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 4e5e6997ed555372683e01b2aff5c25265f4e50c..c3a5ef274276cdef1b41690eb0ad7fd4f4218ecf 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -30469,44 +30469,58 @@ arm_output_iwmmxt_tinsr (rtx *operands)
 const char *
 arm_output_casesi (rtx *operands)
 {
+  char buf[100];
+  char label[100];
   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
-
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
   output_asm_insn ("cmp\t%0, %1", operands);
   output_asm_insn ("bhi\t%l3", operands);
+  ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
   switch (GET_MODE (diff_vec))
     {
     case E_QImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrb\t%4, [%4, %0]", operands);
+	output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
      else
-	output_asm_insn ("ldrsb\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+	output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);;
+      break;
     case E_HImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
-      output_asm_insn ("add\t%4, %4, %0", operands);
-      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
-     else
-	output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+      if (REGNO (operands[4]) != REGNO (operands[5]))
+	{
+	  output_asm_insn ("add\t%4, %0, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
+	}
+      else
+	{
+	  output_asm_insn ("add\t%4, %5, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
+	}
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+      break;
     case E_SImode:
       if (flag_pic)
 	{
-	  output_asm_insn ("adr\t%4, %l2", operands);
-	  output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands);
-	  return "add\t%|pc, %|pc, %4";
+	  output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
+	  output_asm_insn ("add\t%|pc, %|pc, %4", operands);
 	}
-      output_asm_insn ("adr\t%4, %l2", operands);
-      return "ldr\t%|pc, [%4, %0, lsl #2]";
-
+      else
+	{
+	  output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
+	}
+      break;
     default:
       gcc_unreachable ();
     }
+    assemble_label (asm_out_file, label);
+    output_asm_insn ("nop;", operands);
+  return "";
 }
 
 /* Output a Thumb-1 casesi dispatch sequence.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0b2eb4bce92bb7e8b1ca0c5a04b1a52e9c16b64a..810d862df7f343f2e4f5f11af3f6061b41ca3606 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9514,7 +9514,8 @@
    (match_operand:SI 2 "const_int_operand")	; total range
    (match_operand:SI 3 "" "")			; table label
    (match_operand:SI 4 "" "")]			; Out of range label
-  "(TARGET_32BIT || optimize_size || flag_pic) && !target_pure_code"
+  "TARGET_ARM || ((TARGET_THUMB2 || optimize_size || flag_pic) &&
+		  (!target_pure_code))"
   "
   {
     enum insn_code code;
@@ -9557,13 +9558,13 @@
 		(label_ref:SI (match_operand 3 ""))))
 	      (clobber (reg:CC CC_REGNUM))
 	      (clobber (match_scratch:SI 5))
-	      (clobber (match_scratch:SI 6))
 	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
 {
+  rtx tmp = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[2]));
   operands[4] = gen_rtx_MULT (SImode, operands[0], GEN_INT (4));
   operands[4] = gen_rtx_PLUS (SImode, operands[4],
-			      gen_rtx_LABEL_REF (SImode, operands[2]));
+			      tmp);
   operands[4] = gen_rtx_MEM (SImode, operands[4]);
   MEM_READONLY_P (operands[4]) = 1;
   MEM_NOTRAP_P (operands[4]) = 1;
@@ -9575,12 +9576,11 @@
 		(leu (match_operand:SI 0 "s_register_operand" "r")
 		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
 		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
-				 (label_ref:SI (match_operand 2 "" ""))))
+				 (match_operand:SI 5 "s_register_operand" "r")))
 		(label_ref:SI (match_operand 3 "" ""))))
 	      (clobber (reg:CC CC_REGNUM))
-	      (clobber (match_scratch:SI 4 "=&r"))
-	      (clobber (match_scratch:SI 5 "=r"))
-	      (use (label_ref:SI (match_dup 2)))])]
+	      (clobber (match_scratch:SI 4 "=r"))
+	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
   {
     return arm_output_casesi (operands);
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 5766cb4a9888512889334c48ccfbe46e9980f800..3e878f21350a008c363618599058f33ff759a5fa 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -91,11 +91,11 @@
 /* Define this macro if jump tables (for `tablejump' insns) should be
    output in the text section, along with the assembler instructions.
    Otherwise, the readonly data section is used.  */
-/* We put ARM and Thumb-2 jump tables in the text section, because it makes
-   the code more efficient, but for Thumb-1 it's better to put them out of
+/* We put Thumb-2 jump tables in the text section, because it makes
+   the code more efficient, but for Thumb-1 and ARM it's better to put them out of
    band unless we are generating compressed tables.  */
 #define JUMP_TABLES_IN_TEXT_SECTION					\
-   ((TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) \
+   ((TARGET_THUMB2 || (TARGET_THUMB && (optimize_size || flag_pic))) \
     && !target_pure_code)
 
 #ifndef LINK_SPEC
diff --git a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
index a7aa9d45c7634db6c8192072019e497db969b681..803731c479c66e9e684cae532a6071335c042f0e 100644
--- a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
+++ b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
@@ -53,9 +53,10 @@ inline void SIFunction (const char* flag)
 /*
 **QImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	ldrb	\1, \[\1, r[0-9]+\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	ldrb	(r[0-9]+), \[\1, r[0-9]+\]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x)
@@ -77,10 +78,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enu
 /*
 **HImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x)
@@ -102,7 +104,8 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enu
 /*
 **SImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
 **	ldr	pc, \[\1, r[0-9]+, lsl #2\]
 **	...
 */
@@ -125,10 +128,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enu
 /*
 **backwards_branch_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrsh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrsh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH 2/2] arm: move the switch tables for Arm to the RO data section.
  2023-09-28 13:29 [PATCH 2/2] arm: move the switch tables for Arm to the RO data section Richard Ball
@ 2023-10-19 10:58 ` Richard Earnshaw
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Earnshaw @ 2023-10-19 10:58 UTC (permalink / raw)
  To: Richard Ball, gcc-patches, Richard Earnshaw, Kyrylo Tkachov,
	Nick Clifton, ramana.gcc



On 28/09/2023 14:29, Richard Ball wrote:
> Follow up patch to arm: Use deltas for Arm switch tables
> This patch moves the switch tables for Arm from the .text section
> into the .rodata section.
> 
> gcc/ChangeLog:
> 
> 	* config/arm/aout.h: Change to use the Lrtx label.
> 	* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
>          from (!target_pure_code) condition.
>          (ADDR_VEC_ALIGN): Add align for tables in rodata section.
> 	* config/arm/arm.cc (arm_output_casesi): Alter the function to include
>          .Lrtx label and remove adr instructions.
> 	* config/arm/arm.md
>          (arm_casesi_internal): Use force_reg to generate ldr instructions that
>          would otherwise be out of range, and change rtl to accommodate force reg.
>          Additionally remove unnecessary register temp.
>          (casesi): Remove pure code check for Arm.
> 	* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
>          targets from JUMP_TABLES_IN_TEXT_SECTION definition.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/arm/arm-switchstatement.c: Alter the tests to
>          change adr instruction to ldr.

This all looks pretty good, but there are some minor niggles to sort out 
before it can go in...

arm.cc:

  arm_output_casesi (rtx *operands)
  {
+  char buf[100];

buf is unused, so this breaks a native bootstrap.

       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);;

Two semicolons at the end of the line.

+      else
+	{
+	  output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
+	}

Our normal coding style is to omit the braces for a single statement in 
an 'if/else' clause, even if the other arm of the clause uses braces, so:

       else
	output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);

+    output_asm_insn ("nop;", operands);

Stray semicolon after the "nop".

#define CASE_VECTOR_PC_RELATIVE (TARGET_ARM || ((TARGET_THUMB2		\
				  || (TARGET_THUMB1			\
				      && (optimize_size || flag_pic)))	\
				 && (!target_pure_code)))

The indentation here is incorrect, which makes it very hard to 
understand the logic.  But I think a bit of reordering would help 
clarify things as well..

#define CASE_VECTOR_PC_RELATIVE
   (TARGET_ARM
    || (!target_pure_code
        && (TARGET_THUMB2
            || (TARGET_THUMB1 && (optimize_size || flag_pic)))))

(obviously with the line escapes added back in)

arm.md (casesi):

   "TARGET_ARM || ((TARGET_THUMB2 || optimize_size || flag_pic) &&
                                                                ^^
operators should be at the start of the following line, not the end of 
the previous one.
		  (!target_pure_code))"

So:

   "TARGET_ARM || ((TARGET_THUMB2 || optimize_size || flag_pic)
		  && (!target_pure_code))"

But I think this could be laid out better as well:

   "(TARGET_ARM
     || (!target_pure_code
         && (TARGET_THUMB2 || optimize_size || flag_pic)))"

arm_casesi_internal:

   rtx tmp = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[2]));

Tmp is not generally a good choice of name, even for short fragments 
like this.  Use something more descriptive to the object it holds, like 
"lref"; or, better still, a name that describes what the label points to 
(vec_table_ref?).

elf.h:

/* We put Thumb-2 jump tables in the text section, because it makes
    the code more efficient, but for Thumb-1 and ARM it's better to put 
them out of
    band unless we are generating compressed tables.  */

This comment is misleading now, as it implies that compressed tables for 
arm are still sometimes placed in the text segment (the unless clause) 
and that's not true.

R.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-10-19 10:58 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-28 13:29 [PATCH 2/2] arm: move the switch tables for Arm to the RO data section Richard Ball
2023-10-19 10:58 ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).