public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v3] AArch64: Improve GOT addressing
@ 2021-06-04 13:44 Wilco Dijkstra
  2021-10-20 14:53 ` Wilco Dijkstra
  0 siblings, 1 reply; 5+ messages in thread
From: Wilco Dijkstra @ 2021-06-04 13:44 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Kyrylo Tkachov, GCC Patches

Hi Richard,

This merges the v1 and v2 patches and removes the spurious MEM from
ldr_got_small_si/di. This has been rebased after [1], and the performance
gain has now doubled.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571708.html

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-04  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.md (movsi): Split GOT accesses after reload.
        (movdi): Likewise.
        (ldr_got_small_<mode>): Remove MEM and LO_SUM, emit ADRP+LDR GOT sequence.
        (ldr_got_small_sidi): Likewise.
        * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Delay
        splitting of GOT accesses until after reload. Remove tmp_reg and MEM.
        (aarch64_print_operand): Correctly print got_lo12 in L specifier.
        (aarch64_rtx_costs): Set rematerialization cost for GOT accesses.
        (aarch64_mov_operand_p): Make GOT accesses valid move operands.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 08245827daa3f8199b29031e754244c078f0f500..11ea33c70fb06194fadfe94322fdfa098e5320fc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3615,6 +3615,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 
     case SYMBOL_SMALL_GOT_4G:
       {
+	/* Use movdi for GOT accesses until after reload - this improves
+	   CSE and rematerialization.  */
+	if (!reload_completed)
+	  {
+	    emit_insn (gen_rtx_SET (dest, imm));
+	    return;
+	  }
+
 	/* In ILP32, the mode of dest can be either SImode or DImode,
 	   while the got entry is always of SImode size.  The mode of
 	   dest depends on how dest is used: if dest is assigned to a
@@ -3624,34 +3632,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	   patterns here (two patterns for ILP32).  */
 
 	rtx insn;
-	rtx mem;
-	rtx tmp_reg = dest;
 	machine_mode mode = GET_MODE (dest);
 
-	if (can_create_pseudo_p ())
-	  tmp_reg = gen_reg_rtx (mode);
-
-	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 	if (mode == ptr_mode)
 	  {
 	    if (mode == DImode)
-	      insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
+	      insn = gen_ldr_got_small_di (dest, imm);
 	    else
-	      insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-	    mem = XVECEXP (SET_SRC (insn), 0, 0);
+	      insn = gen_ldr_got_small_si (dest, imm);
 	  }
 	else
 	  {
 	    gcc_assert (mode == Pmode);
-
-	    insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-	    mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
+	    insn = gen_ldr_got_small_sidi (dest, imm);
 	  }
 
-	gcc_assert (MEM_P (mem));
-	MEM_READONLY_P (mem) = 1;
-	MEM_NOTRAP_P (mem) = 1;
 	emit_insn (insn);
 	return;
       }
@@ -11019,7 +11014,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       switch (aarch64_classify_symbolic_expression (x))
 	{
 	case SYMBOL_SMALL_GOT_4G:
-	  asm_fprintf (asm_out_file, ":lo12:");
+	  asm_fprintf (asm_out_file, ":got_lo12:");
 	  break;
 
 	case SYMBOL_SMALL_TLSGD:
@@ -13452,6 +13447,12 @@ cost_plus:
 
     case SYMBOL_REF:
       *cost = 0;
+
+      /* Use a separate remateralization cost for GOT accesses.  */
+      if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
+	  && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
+	*cost = COSTS_N_INSNS (1) / 2;
+
       return true;
 
     case HIGH:
@@ -19907,6 +19908,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
       return aarch64_simd_valid_immediate (x, NULL);
     }
 
+  /* GOT accesses are valid moves until after regalloc.  */
+  if (SYMBOL_REF_P (x)
+      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+    return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index abfd84526745d029ad4953eabad6dd17b159a218..30effca6f3562f6870a6cc8097750e63bb0d424d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1283,8 +1283,11 @@ (define_insn_and_split "*movsi_aarch64"
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
-  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
+    || (reload_completed
+	&& (aarch64_classify_symbolic_expression (operands[1])
+	    == SYMBOL_SMALL_GOT_4G))"
    [(const_int 0)]
    "{
        aarch64_expand_mov_immediate (operands[0], operands[1]);
@@ -1319,8 +1322,11 @@ (define_insn_and_split "*movdi_aarch64"
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
+    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
+    || (reload_completed
+	&& (aarch64_classify_symbolic_expression (operands[1])
+	    == SYMBOL_SMALL_GOT_4G))"
    [(const_int 0)]
    "{
        aarch64_expand_mov_immediate (operands[0], operands[1]);
@@ -6703,27 +6709,26 @@ (define_insn "add_losym_<mode>"
   [(set_attr "type" "alu_imm")]
 )
 
+;;; GOT accesses use a single insn so linkers can relax GOT relocations.
 (define_insn "ldr_got_small_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=r")
-	(unspec:PTR [(mem:PTR (lo_sum:PTR
-			      (match_operand:PTR 1 "register_operand" "r")
-			      (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
+	(unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
 		    UNSPEC_GOTSMALLPIC))]
   ""
-  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_<ldst_sz>")]
+  "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, %L1]"
+  [(set_attr "type" "load_<ldst_sz>")
+   (set_attr "length" "8")]
 )
 
 (define_insn "ldr_got_small_sidi"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (unspec:SI [(mem:SI (lo_sum:DI
-			     (match_operand:DI 1 "register_operand" "r")
-			     (match_operand:DI 2 "aarch64_valid_symref" "S")))]
+	 (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
 		    UNSPEC_GOTSMALLPIC)))]
   "TARGET_ILP32"
-  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_4")]
+  "adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]"
+  [(set_attr "type" "load_4")
+   (set_attr "length" "8")]
 )
 
 (define_insn "ldr_got_small_28k_<mode>"


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] AArch64: Improve GOT addressing
  2021-06-04 13:44 [PATCH v3] AArch64: Improve GOT addressing Wilco Dijkstra
@ 2021-10-20 14:53 ` Wilco Dijkstra
  2021-11-02 11:23   ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Wilco Dijkstra @ 2021-10-20 14:53 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Kyrylo Tkachov, GCC Patches

ping


From: Wilco Dijkstra
Sent: 04 June 2021 14:44
To: Richard Sandiford <Richard.Sandiford@arm.com>
Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; GCC Patches <gcc-patches@gcc.gnu.org>
Subject: [PATCH v3] AArch64: Improve GOT addressing 
 
Hi Richard,

This merges the v1 and v2 patches and removes the spurious MEM from
ldr_got_small_si/di. This has been rebased after [1], and the performance
gain has now doubled.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571708.html

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-04  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.md (movsi): Split GOT accesses after reload.
        (movdi): Likewise.
        (ldr_got_small_<mode>): Remove MEM and LO_SUM, emit ADRP+LDR GOT sequence.
        (ldr_got_small_sidi): Likewise.
        * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Delay
        splitting of GOT accesses until after reload. Remove tmp_reg and MEM.
        (aarch64_print_operand): Correctly print got_lo12 in L specifier.
        (aarch64_rtx_costs): Set rematerialization cost for GOT accesses.
        (aarch64_mov_operand_p): Make GOT accesses valid move operands.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 08245827daa3f8199b29031e754244c078f0f500..11ea33c70fb06194fadfe94322fdfa098e5320fc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3615,6 +3615,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 
     case SYMBOL_SMALL_GOT_4G:
       {
+       /* Use movdi for GOT accesses until after reload - this improves
+          CSE and rematerialization.  */
+       if (!reload_completed)
+         {
+           emit_insn (gen_rtx_SET (dest, imm));
+           return;
+         }
+
         /* In ILP32, the mode of dest can be either SImode or DImode,
            while the got entry is always of SImode size.  The mode of
            dest depends on how dest is used: if dest is assigned to a
@@ -3624,34 +3632,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
            patterns here (two patterns for ILP32).  */
 
         rtx insn;
-       rtx mem;
-       rtx tmp_reg = dest;
         machine_mode mode = GET_MODE (dest);
 
-       if (can_create_pseudo_p ())
-         tmp_reg = gen_reg_rtx (mode);
-
-       emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
         if (mode == ptr_mode)
           {
             if (mode == DImode)
-             insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
+             insn = gen_ldr_got_small_di (dest, imm);
             else
-             insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-           mem = XVECEXP (SET_SRC (insn), 0, 0);
+             insn = gen_ldr_got_small_si (dest, imm);
           }
         else
           {
             gcc_assert (mode == Pmode);
-
-           insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-           mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
+           insn = gen_ldr_got_small_sidi (dest, imm);
           }
 
-       gcc_assert (MEM_P (mem));
-       MEM_READONLY_P (mem) = 1;
-       MEM_NOTRAP_P (mem) = 1;
         emit_insn (insn);
         return;
       }
@@ -11019,7 +11014,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       switch (aarch64_classify_symbolic_expression (x))
         {
         case SYMBOL_SMALL_GOT_4G:
-         asm_fprintf (asm_out_file, ":lo12:");
+         asm_fprintf (asm_out_file, ":got_lo12:");
           break;
 
         case SYMBOL_SMALL_TLSGD:
@@ -13452,6 +13447,12 @@ cost_plus:
 
     case SYMBOL_REF:
       *cost = 0;
+
+      /* Use a separate remateralization cost for GOT accesses.  */
+      if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
+         && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
+       *cost = COSTS_N_INSNS (1) / 2;
+
       return true;
 
     case HIGH:
@@ -19907,6 +19908,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
       return aarch64_simd_valid_immediate (x, NULL);
     }
 
+  /* GOT accesses are valid moves until after regalloc.  */
+  if (SYMBOL_REF_P (x)
+      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+    return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index abfd84526745d029ad4953eabad6dd17b159a218..30effca6f3562f6870a6cc8097750e63bb0d424d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1283,8 +1283,11 @@ (define_insn_and_split "*movsi_aarch64"
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
-  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
+    || (reload_completed
+       && (aarch64_classify_symbolic_expression (operands[1])
+           == SYMBOL_SMALL_GOT_4G))"
    [(const_int 0)]
    "{
        aarch64_expand_mov_immediate (operands[0], operands[1]);
@@ -1319,8 +1322,11 @@ (define_insn_and_split "*movdi_aarch64"
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
+    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
+    || (reload_completed
+       && (aarch64_classify_symbolic_expression (operands[1])
+           == SYMBOL_SMALL_GOT_4G))"
    [(const_int 0)]
    "{
        aarch64_expand_mov_immediate (operands[0], operands[1]);
@@ -6703,27 +6709,26 @@ (define_insn "add_losym_<mode>"
   [(set_attr "type" "alu_imm")]
 )
 
+;;; GOT accesses use a single insn so linkers can relax GOT relocations.
 (define_insn "ldr_got_small_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=r")
-       (unspec:PTR [(mem:PTR (lo_sum:PTR
-                             (match_operand:PTR 1 "register_operand" "r")
-                             (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
+       (unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
                     UNSPEC_GOTSMALLPIC))]
   ""
-  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_<ldst_sz>")]
+  "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, %L1]"
+  [(set_attr "type" "load_<ldst_sz>")
+   (set_attr "length" "8")]
 )
 
 (define_insn "ldr_got_small_sidi"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (zero_extend:DI
-        (unspec:SI [(mem:SI (lo_sum:DI
-                            (match_operand:DI 1 "register_operand" "r")
-                            (match_operand:DI 2 "aarch64_valid_symref" "S")))]
+        (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
                     UNSPEC_GOTSMALLPIC)))]
   "TARGET_ILP32"
-  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_4")]
+  "adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]"
+  [(set_attr "type" "load_4")
+   (set_attr "length" "8")]
 )
 
 (define_insn "ldr_got_small_28k_<mode>"

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] AArch64: Improve GOT addressing
  2021-10-20 14:53 ` Wilco Dijkstra
@ 2021-11-02 11:23   ` Richard Sandiford
  2021-11-02 18:41     ` Wilco Dijkstra
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Sandiford @ 2021-11-02 11:23 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Kyrylo Tkachov, GCC Patches

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> ping
>
>
> From: Wilco Dijkstra
> Sent: 04 June 2021 14:44
> To: Richard Sandiford <Richard.Sandiford@arm.com>
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; GCC Patches <gcc-patches@gcc.gnu.org>
> Subject: [PATCH v3] AArch64: Improve GOT addressing
>
> Hi Richard,
>
> This merges the v1 and v2 patches and removes the spurious MEM from
> ldr_got_small_si/di. This has been rebased after [1], and the performance
> gain has now doubled.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571708.html
>
> Improve GOT addressing by treating the instructions as a pair.  This reduces
> register pressure and improves code quality significantly.  SPECINT2017 improves
> by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
> codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.
>
> Passes bootstrap and regress. OK for commit?

Looks like everyone agrees that
https://github.com/ARM-software/abi-aa/pull/106 should go in in some form,
so I think it's OK for GCC to keep the instructions together.  Some comments
on the implementation though.  (We might have covered these earlier,
sorry if so.)

- Why do we rewrite the constant moves after reload into ldr_got_small_sidi
  and ldr_got_small_<mode>?  Couldn't we just get the move patterns to
  output the sequence directly?

- I think we should leave out the rtx_costs part and deal with that
  separately.  This patch should just be about whether we emit two
  separate define_insns for the move or whether we keep a single one
  (to support relaxation).

Thanks,
Richard

>
> ChangeLog:
> 2021-06-04  Wilco Dijkstra  <wdijkstr@arm.com>
>
>         * config/aarch64/aarch64.md (movsi): Split GOT accesses after reload.
>         (movdi): Likewise.
>         (ldr_got_small_<mode>): Remove MEM and LO_SUM, emit ADRP+LDR GOT sequence.
>         (ldr_got_small_sidi): Likewise.
>         * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Delay
>         splitting of GOT accesses until after reload. Remove tmp_reg and MEM.
>         (aarch64_print_operand): Correctly print got_lo12 in L specifier.
>         (aarch64_rtx_costs): Set rematerialization cost for GOT accesses.
>         (aarch64_mov_operand_p): Make GOT accesses valid move operands.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 08245827daa3f8199b29031e754244c078f0f500..11ea33c70fb06194fadfe94322fdfa098e5320fc 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3615,6 +3615,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
>
>      case SYMBOL_SMALL_GOT_4G:
>        {
> +       /* Use movdi for GOT accesses until after reload - this improves
> +          CSE and rematerialization.  */
> +       if (!reload_completed)
> +         {
> +           emit_insn (gen_rtx_SET (dest, imm));
> +           return;
> +         }
> +
>          /* In ILP32, the mode of dest can be either SImode or DImode,
>             while the got entry is always of SImode size.  The mode of
>             dest depends on how dest is used: if dest is assigned to a
> @@ -3624,34 +3632,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
>             patterns here (two patterns for ILP32).  */
>
>          rtx insn;
> -       rtx mem;
> -       rtx tmp_reg = dest;
>          machine_mode mode = GET_MODE (dest);
>
> -       if (can_create_pseudo_p ())
> -         tmp_reg = gen_reg_rtx (mode);
> -
> -       emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
>          if (mode == ptr_mode)
>            {
>              if (mode == DImode)
> -             insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
> +             insn = gen_ldr_got_small_di (dest, imm);
>              else
> -             insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
> -
> -           mem = XVECEXP (SET_SRC (insn), 0, 0);
> +             insn = gen_ldr_got_small_si (dest, imm);
>            }
>          else
>            {
>              gcc_assert (mode == Pmode);
> -
> -           insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
> -           mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
> +           insn = gen_ldr_got_small_sidi (dest, imm);
>            }
>
> -       gcc_assert (MEM_P (mem));
> -       MEM_READONLY_P (mem) = 1;
> -       MEM_NOTRAP_P (mem) = 1;
>          emit_insn (insn);
>          return;
>        }
> @@ -11019,7 +11014,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
>        switch (aarch64_classify_symbolic_expression (x))
>          {
>          case SYMBOL_SMALL_GOT_4G:
> -         asm_fprintf (asm_out_file, ":lo12:");
> +         asm_fprintf (asm_out_file, ":got_lo12:");
>            break;
>
>          case SYMBOL_SMALL_TLSGD:
> @@ -13452,6 +13447,12 @@ cost_plus:
>
>      case SYMBOL_REF:
>        *cost = 0;
> +
> +      /* Use a separate remateralization cost for GOT accesses.  */
> +      if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
> +         && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
> +       *cost = COSTS_N_INSNS (1) / 2;
> +
>        return true;
>
>      case HIGH:
> @@ -19907,6 +19908,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
>        return aarch64_simd_valid_immediate (x, NULL);
>      }
>
> +  /* GOT accesses are valid moves until after regalloc.  */
> +  if (SYMBOL_REF_P (x)
> +      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
> +    return true;
> +
>    x = strip_salt (x);
>    if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
>      return true;
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index abfd84526745d029ad4953eabad6dd17b159a218..30effca6f3562f6870a6cc8097750e63bb0d424d 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1283,8 +1283,11 @@ (define_insn_and_split "*movsi_aarch64"
>     fmov\\t%w0, %s1
>     fmov\\t%s0, %s1
>     * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
> -  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
> -    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> +  "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
> +    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
> +    || (reload_completed
> +       && (aarch64_classify_symbolic_expression (operands[1])
> +           == SYMBOL_SMALL_GOT_4G))"
>     [(const_int 0)]
>     "{
>         aarch64_expand_mov_immediate (operands[0], operands[1]);
> @@ -1319,8 +1322,11 @@ (define_insn_and_split "*movdi_aarch64"
>     fmov\\t%x0, %d1
>     fmov\\t%d0, %d1
>     * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
> -   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
> -    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> +   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
> +    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0])))
> +    || (reload_completed
> +       && (aarch64_classify_symbolic_expression (operands[1])
> +           == SYMBOL_SMALL_GOT_4G))"
>     [(const_int 0)]
>     "{
>         aarch64_expand_mov_immediate (operands[0], operands[1]);
> @@ -6703,27 +6709,26 @@ (define_insn "add_losym_<mode>"
>    [(set_attr "type" "alu_imm")]
>  )
>
> +;;; GOT accesses use a single insn so linkers can relax GOT relocations.
>  (define_insn "ldr_got_small_<mode>"
>    [(set (match_operand:PTR 0 "register_operand" "=r")
> -       (unspec:PTR [(mem:PTR (lo_sum:PTR
> -                             (match_operand:PTR 1 "register_operand" "r")
> -                             (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
> +       (unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
>                      UNSPEC_GOTSMALLPIC))]
>    ""
> -  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
> -  [(set_attr "type" "load_<ldst_sz>")]
> +  "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, %L1]"
> +  [(set_attr "type" "load_<ldst_sz>")
> +   (set_attr "length" "8")]
>  )
>
>  (define_insn "ldr_got_small_sidi"
>    [(set (match_operand:DI 0 "register_operand" "=r")
>          (zero_extend:DI
> -        (unspec:SI [(mem:SI (lo_sum:DI
> -                            (match_operand:DI 1 "register_operand" "r")
> -                            (match_operand:DI 2 "aarch64_valid_symref" "S")))]
> +        (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
>                      UNSPEC_GOTSMALLPIC)))]
>    "TARGET_ILP32"
> -  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
> -  [(set_attr "type" "load_4")]
> +  "adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]"
> +  [(set_attr "type" "load_4")
> +   (set_attr "length" "8")]
>  )
>
>  (define_insn "ldr_got_small_28k_<mode>"

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] AArch64: Improve GOT addressing
  2021-11-02 11:23   ` Richard Sandiford
@ 2021-11-02 18:41     ` Wilco Dijkstra
  2021-11-02 18:55       ` Richard Sandiford
  0 siblings, 1 reply; 5+ messages in thread
From: Wilco Dijkstra @ 2021-11-02 18:41 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Kyrylo Tkachov, GCC Patches

Hi Richard,

> - Why do we rewrite the constant moves after reload into ldr_got_small_sidi
>   and ldr_got_small_<mode>?  Couldn't we just get the move patterns to
>   output the sequence directly?

That's possible too, however it makes the movsi/di patterns more complex.
See version v4 below.

> - I think we should leave out the rtx_costs part and deal with that
>   separately.  This patch should just be about whether we emit two
>   separate define_insns for the move or whether we keep a single one
>   (to support relaxation).

As the title and description explain, code quality improves significantly by
keeping the instructions together before we even consider linker relaxation.
The cost improvements can be done separately but they are important to
get the measured code quality gains.

Here is v4:

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-11-02  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.md (movsi): Add alternative for GOT accesses.
        (movdi): Likewise.
        (ldr_got_small_<mode>): Remove pattern.
        (ldr_got_small_sidi): Likewise.
        * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Keep
        GOT accesses as moves.
        (aarch64_print_operand): Correctly print got_lo12 in L specifier.
        (aarch64_mov_operand_p): Make GOT accesses valid move operands.
        * config/aarch64/constraints.md: Add new constraint Usw for GOT access.

---
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6d26218a61bd0f9e369bb32388b7f8643b632172..ab2954d11333b3f5a419c55d0a74f13d1df70680 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3753,47 +3753,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       }
 
     case SYMBOL_SMALL_GOT_4G:
-      {
-	/* In ILP32, the mode of dest can be either SImode or DImode,
-	   while the got entry is always of SImode size.  The mode of
-	   dest depends on how dest is used: if dest is assigned to a
-	   pointer (e.g. in the memory), it has SImode; it may have
-	   DImode if dest is dereferenced to access the memeory.
-	   This is why we have to handle three different ldr_got_small
-	   patterns here (two patterns for ILP32).  */
-
-	rtx insn;
-	rtx mem;
-	rtx tmp_reg = dest;
-	machine_mode mode = GET_MODE (dest);
-
-	if (can_create_pseudo_p ())
-	  tmp_reg = gen_reg_rtx (mode);
-
-	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
-	if (mode == ptr_mode)
-	  {
-	    if (mode == DImode)
-	      insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
-	    else
-	      insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-	    mem = XVECEXP (SET_SRC (insn), 0, 0);
-	  }
-	else
-	  {
-	    gcc_assert (mode == Pmode);
-
-	    insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-	    mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
-	  }
-
-	gcc_assert (MEM_P (mem));
-	MEM_READONLY_P (mem) = 1;
-	MEM_NOTRAP_P (mem) = 1;
-	emit_insn (insn);
-	return;
-      }
+      emit_insn (gen_rtx_SET (dest, imm));
+      return;
 
     case SYMBOL_SMALL_TLSGD:
       {
@@ -11159,7 +11120,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       switch (aarch64_classify_symbolic_expression (x))
 	{
 	case SYMBOL_SMALL_GOT_4G:
-	  asm_fprintf (asm_out_file, ":lo12:");
+	  asm_fprintf (asm_out_file, ":got_lo12:");
 	  break;
 
 	case SYMBOL_SMALL_TLSGD:
@@ -20241,6 +20202,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
       return aarch64_simd_valid_immediate (x, NULL);
     }
 
+  /* GOT accesses are valid moves.  */
+  if (SYMBOL_REF_P (x)
+      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+    return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7085cd4a51dc4c22def9b95f2221b3847603a9e5..9d38269e9429597b825838faf9f241216cc6ab47 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1263,8 +1263,8 @@ (define_expand "mov<mode>"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r, w,r,w, w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -1278,6 +1278,7 @@ (define_insn_and_split "*movsi_aarch64"
    ldr\\t%s0, %1
    str\\t%w1, %0
    str\\t%s1, %0
+   * return \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]\";
    adr\\t%x0, %c1
    adrp\\t%x0, %A1
    fmov\\t%s0, %w1
@@ -1293,13 +1294,15 @@ (define_insn_and_split "*movsi_aarch64"
     }"
   ;; The "mov_imm" type for CNT is just a placeholder.
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
-		    load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
+		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
+]
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,  r,  r, w,r,w, w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -1314,13 +1317,14 @@ (define_insn_and_split "*movdi_aarch64"
    ldr\\t%d0, %1
    str\\t%x1, %0
    str\\t%d1, %0
+   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
    adr\\t%x0, %c1
    adrp\\t%x0, %A1
    fmov\\t%d0, %x1
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
+   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
    [(const_int 0)]
    "{
@@ -1329,9 +1333,10 @@ (define_insn_and_split "*movdi_aarch64"
     }"
   ;; The "mov_imm" type for CNTD is just a placeholder.
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
-		     load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,
-		     neon_move")
-   (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
+		     load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
+		     fmov,neon_move")
+   (set_attr "arch"   "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+   (set_attr "length" "4,4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
 )
 
 (define_insn "insv_imm<mode>"
@@ -6707,29 +6712,6 @@ (define_insn "add_losym_<mode>"
   [(set_attr "type" "alu_imm")]
 )
 
-(define_insn "ldr_got_small_<mode>"
-  [(set (match_operand:PTR 0 "register_operand" "=r")
-	(unspec:PTR [(mem:PTR (lo_sum:PTR
-			      (match_operand:PTR 1 "register_operand" "r")
-			      (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
-		    UNSPEC_GOTSMALLPIC))]
-  ""
-  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_<ldst_sz>")]
-)
-
-(define_insn "ldr_got_small_sidi"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	 (unspec:SI [(mem:SI (lo_sum:DI
-			     (match_operand:DI 1 "register_operand" "r")
-			     (match_operand:DI 2 "aarch64_valid_symref" "S")))]
-		    UNSPEC_GOTSMALLPIC)))]
-  "TARGET_ILP32"
-  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_4")]
-)
-
 (define_insn "ldr_got_small_28k_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=r")
 	(unspec:PTR [(mem:PTR (lo_sum:PTR
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3b49b452119c49320020fa9183314d9a25b92491..985fa2217e6a044b1eb2adc886864f63a1f9f9b2 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -152,6 +152,14 @@ (define_constraint "Usa"
        (match_test "aarch64_symbolic_address_p (op)")
        (match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
 
+(define_constraint "Usw"
+  "@internal
+   A constraint that matches a small GOT access."
+  (and (match_code "symbol_ref")
+       (match_test "aarch64_symbolic_address_p (op)")
+       (match_test "aarch64_classify_symbolic_expression (op)
+		     == SYMBOL_SMALL_GOT_4G")))
+
 (define_constraint "Uss"
   "@internal
   A constraint that matches an immediate shift constant in SImode."

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] AArch64: Improve GOT addressing
  2021-11-02 18:41     ` Wilco Dijkstra
@ 2021-11-02 18:55       ` Richard Sandiford
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Sandiford @ 2021-11-02 18:55 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Kyrylo Tkachov, GCC Patches

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,
>
>> - Why do we rewrite the constant moves after reload into ldr_got_small_sidi
>>   and ldr_got_small_<mode>?  Couldn't we just get the move patterns to
>>   output the sequence directly?
>
> That's possible too, however it makes the movsi/di patterns more complex.

Yeah, it certainly does that, but it also makes the other code
significantly simpler. :-)

> See version v4 below.

Thanks, this looks good apart from a couple of nits:

> […]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 7085cd4a51dc4c22def9b95f2221b3847603a9e5..9d38269e9429597b825838faf9f241216cc6ab47 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1263,8 +1263,8 @@ (define_expand "mov<mode>"
>  )
>
>  (define_insn_and_split "*movsi_aarch64"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r, w,r,w, w")
> -       (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
> +       (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>    "(register_operand (operands[0], SImode)
>      || aarch64_reg_or_zero (operands[1], SImode))"
>    "@
> @@ -1278,6 +1278,7 @@ (define_insn_and_split "*movsi_aarch64"
>     ldr\\t%s0, %1
>     str\\t%w1, %0
>     str\\t%s1, %0
> +   * return \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]\";

The * return stuff shouldn't be necessary here.  E.g. the SVE MOVPRFX
alternatives directly use \; in @ alternatives.

>     adr\\t%x0, %c1
>     adrp\\t%x0, %A1
>     fmov\\t%s0, %w1
> @@ -1293,13 +1294,15 @@ (define_insn_and_split "*movsi_aarch64"
>      }"
>    ;; The "mov_imm" type for CNT is just a placeholder.
>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
> -                   load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
> -   (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
> +                   load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
> +   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> +   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
> +]
>  )
>
>  (define_insn_and_split "*movdi_aarch64"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,  r,  r, w,r,w, w")
> -       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
> +       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
>    "(register_operand (operands[0], DImode)
>      || aarch64_reg_or_zero (operands[1], DImode))"
>    "@
> @@ -1314,13 +1317,14 @@ (define_insn_and_split "*movdi_aarch64"
>     ldr\\t%d0, %1
>     str\\t%x1, %0
>     str\\t%d1, %0
> +   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
>     adr\\t%x0, %c1
>     adrp\\t%x0, %A1
>     fmov\\t%d0, %x1
>     fmov\\t%x0, %d1
>     fmov\\t%d0, %d1
>     * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
> -   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
> +   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>     [(const_int 0)]
>     "{
> @@ -1329,9 +1333,10 @@ (define_insn_and_split "*movdi_aarch64"
>      }"
>    ;; The "mov_imm" type for CNTD is just a placeholder.
>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> -                    load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,
> -                    neon_move")
> -   (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
> +                    load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
> +                    fmov,neon_move")
> +   (set_attr "arch"   "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> +   (set_attr "length" "4,4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
>  )
>
>  (define_insn "insv_imm<mode>"
> @@ -6707,29 +6712,6 @@ (define_insn "add_losym_<mode>"
>    [(set_attr "type" "alu_imm")]
>  )
>
> -(define_insn "ldr_got_small_<mode>"
> -  [(set (match_operand:PTR 0 "register_operand" "=r")
> -       (unspec:PTR [(mem:PTR (lo_sum:PTR
> -                             (match_operand:PTR 1 "register_operand" "r")
> -                             (match_operand:PTR 2 "aarch64_valid_symref" "S")))]
> -                   UNSPEC_GOTSMALLPIC))]
> -  ""
> -  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
> -  [(set_attr "type" "load_<ldst_sz>")]
> -)
> -
> -(define_insn "ldr_got_small_sidi"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> -       (zero_extend:DI
> -        (unspec:SI [(mem:SI (lo_sum:DI
> -                            (match_operand:DI 1 "register_operand" "r")
> -                            (match_operand:DI 2 "aarch64_valid_symref" "S")))]
> -                   UNSPEC_GOTSMALLPIC)))]
> -  "TARGET_ILP32"
> -  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
> -  [(set_attr "type" "load_4")]
> -)
> -
>  (define_insn "ldr_got_small_28k_<mode>"
>    [(set (match_operand:PTR 0 "register_operand" "=r")
>         (unspec:PTR [(mem:PTR (lo_sum:PTR
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 3b49b452119c49320020fa9183314d9a25b92491..985fa2217e6a044b1eb2adc886864f63a1f9f9b2 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -152,6 +152,14 @@ (define_constraint "Usa"
>         (match_test "aarch64_symbolic_address_p (op)")
>         (match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
>
> +(define_constraint "Usw"
> +  "@internal
> +   A constraint that matches a small GOT access."
> +  (and (match_code "symbol_ref")
> +       (match_test "aarch64_symbolic_address_p (op)")

This test is redundant, since aarch64_symbolic_address_p is always
true for symbol_refs.

OK with those changes, thanks.

Richard

> +       (match_test "aarch64_classify_symbolic_expression (op)
> +                    == SYMBOL_SMALL_GOT_4G")))
> +
>  (define_constraint "Uss"
>    "@internal
>    A constraint that matches an immediate shift constant in SImode."

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-11-02 18:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-04 13:44 [PATCH v3] AArch64: Improve GOT addressing Wilco Dijkstra
2021-10-20 14:53 ` Wilco Dijkstra
2021-11-02 11:23   ` Richard Sandiford
2021-11-02 18:41     ` Wilco Dijkstra
2021-11-02 18:55       ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).