public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
@ 2015-07-28 14:02 Jiong Wang
  2015-08-04  9:48 ` James Greenhalgh
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Jiong Wang @ 2015-07-28 14:02 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1778 bytes --]


The instruction sequences for preparing argument for TLS descriptor
runtime resolver and the later function call to resolver can actually be
hoisted out of the loop.

Currently we can't because we have exposed the hard register X0 as
destination of "set".  While GCC's RTL data flow infrastructure will
skip or do very conservative assumption when hard register involved in
and thus some loop IV opportunities are missed.

This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
expose x0 to gcc generic code.

Generally, we define a new register class FIXED_R0 which only contains register
0, so the instruction sequences generated from the new add pattern is the same
as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
RTL IV opt can handle it.

Ideally, we should allow operand 0 to be any pseudo register, but then
we can't model the override of x0 caused by the function call which is
hidded by the UNSPEC.

So here, we restricting operand 0 to be x0, the override of x0 can be
reflected to the gcc.

OK for trunk?

2015-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
            Jiong Wang  <jiong.wang@arm.com>

gcc/
  * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
  * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
  (REG_CLASS_NAMES): Likewise.
  (REG_CLASS_CONTENTS): Likewise.
  * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
  (aarch64_register_move_cost): Likewise.
  (aarch64_load_symref_appropriately): Invoke the new added pattern if
  possible.
  * config/aarch64/constraints.md (Uc0): New constraint.

gcc/testsuite.
  * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.

-- 
Regards,
Jiong


[-- Attachment #2: tlsdesc-hoist.patch --]
[-- Type: text/x-diff, Size: 5975 bytes --]

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 3851564..fb4834a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -454,6 +454,7 @@ extern unsigned long aarch64_isa_flags;
 enum reg_class
 {
   NO_REGS,
+  FIXED_REG0,
   CALLER_SAVE_REGS,
   GENERAL_REGS,
   STACK_REG,
@@ -469,6 +470,7 @@ enum reg_class
 #define REG_CLASS_NAMES				\
 {						\
   "NO_REGS",					\
+  "FIXED_REG0"					\
   "CALLER_SAVE_REGS",				\
   "GENERAL_REGS",				\
   "STACK_REG",					\
@@ -481,6 +483,7 @@ enum reg_class
 #define REG_CLASS_CONTENTS						\
 {									\
   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x00000001, 0x00000000, 0x00000000 },	/* FIXED_REG0 */	\
   { 0x0007ffff, 0x00000000, 0x00000000 },	/* CALLER_SAVE_REGS */	\
   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ef07e05..f1f2cab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1038,22 +1038,39 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       {
 	machine_mode mode = GET_MODE (dest);
 	rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
+	rtx offset;
 	rtx tp;
 
 	gcc_assert (mode == Pmode || mode == ptr_mode);
 
-	/* In ILP32, the got entry is always of SImode size.  Unlike
-	   small GOT, the dest is fixed at reg 0.  */
-	if (TARGET_ILP32)
-	  emit_insn (gen_tlsdesc_small_si (imm));
+	if (can_create_pseudo_p ())
+	  {
+	    rtx reg = gen_reg_rtx (mode);
+
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_pseudo_si (reg, imm));
+	    else
+	      emit_insn (gen_tlsdesc_small_pseudo_di (reg, imm));
+
+	    offset = reg;
+	  }
 	else
-	  emit_insn (gen_tlsdesc_small_di (imm));
+	  {
+	    /* In ILP32, the got entry is always of SImode size.  Unlike
+	       small GOT, the dest is fixed at reg 0.  */
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_si (imm));
+	    else
+	      emit_insn (gen_tlsdesc_small_di (imm));
+
+	    offset = x0;
+	  }
 	tp = aarch64_load_tp (NULL);
 
 	if (mode != Pmode)
 	  tp = gen_lowpart (mode, tp);
 
-	emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
+	emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, offset)));
 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
@@ -5099,6 +5116,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
 	aarch64_vector_mode_p (mode)
 	  ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
 	  : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    case FIXED_REG0:
     case STACK_REG:
       return 1;
 
@@ -6948,10 +6966,10 @@ aarch64_register_move_cost (machine_mode mode,
     = aarch64_tune_params.regmove_cost;
 
   /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
-  if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
+  if (to == CALLER_SAVE_REGS || to == POINTER_REGS || to == FIXED_REG0)
     to = GENERAL_REGS;
 
-  if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
+  if (from == CALLER_SAVE_REGS || from == POINTER_REGS || from == FIXED_REG0)
     from = GENERAL_REGS;
 
   /* Moving between GPR and stack cost is the same as GP2GP.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c681cf1..3fcfe55 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4496,6 +4496,25 @@
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
+;; The same as tlsdesc_small_<mode> with hard register hiding.
+;; The first operand is actually x0, while we wrap it under a delicated
+;; register class so that before register allocation, it's seen as pseudo
+;; register.  The reason for doing this is we don't expose hard register X0
+;; as the destination of set as it will cause trouble for RTL loop iv.
+;; RTL loop iv will abort ongoing optimization once it finds there is hard reg
+;; as destination of set.
+(define_insn "tlsdesc_small_pseudo_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=Uc0")
+	(unspec:PTR [(match_operand 1 "aarch64_valid_symref" "S")]
+		    UNSPEC_TLSDESC))
+   (clobber (reg:DI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_TLS_DESC"
+  "adrp\\t<w>0, %A1\;ldr\\t%<w>2, [%<w>0, #%L1]\;add\\t%<w>0, %<w>0, %L1\;.tlsdesccall\\t%1\;blr\\t%2"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
 (define_insn "stack_tie"
   [(set (mem:BLK (scratch))
 	(unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 5b189ea..b09d10d 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -24,6 +24,9 @@
 (define_register_constraint "Ucs" "CALLER_SAVE_REGS"
   "@internal The caller save registers.")
 
+(define_register_constraint "Uc0" "FIXED_REG0"
+  "@internal Represent X0/W0.")
+
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
diff --git a/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
new file mode 100644
index 0000000..f9a6d7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target tls_native } */
+/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */
+
+int cal (int, int);
+__thread int tls_data;
+
+int
+foo (int bound)
+{
+  int i = 0;
+  int sum = 0;
+
+  for (i; i < bound; i++)
+    sum = cal (sum, tls_data);
+
+  return sum;
+}
+
+/* Insn sequences for TLS descriptor should be hoisted out of the loop.  */
+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-07-28 14:02 [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Jiong Wang
@ 2015-08-04  9:48 ` James Greenhalgh
  2015-08-06 16:13   ` [COMMITTED][AArch64] " Jiong Wang
  2015-09-26  7:38 ` [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Andrew Pinski
  2015-09-28 16:10 ` Marcus Shawcroft
  2 siblings, 1 reply; 19+ messages in thread
From: James Greenhalgh @ 2015-08-04  9:48 UTC (permalink / raw)
  To: Jiong Wang; +Cc: gcc-patches

On Tue, Jul 28, 2015 at 02:12:36PM +0100, Jiong Wang wrote:
> 
> The instruction sequences for preparing argument for TLS descriptor
> runtime resolver and the later function call to resolver can actually be
> hoisted out of the loop.
> 
> Currently we can't because we have exposed the hard register X0 as
> destination of "set".  While GCC's RTL data flow infrastructure will
> skip or do very conservative assumption when hard register involved in
> and thus some loop IV opportunities are missed.
> 
> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
> expose x0 to gcc generic code.
> 
> Generally, we define a new register class FIXED_R0 which only contains register
> 0, so the instruction sequences generated from the new add pattern is the same
> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
> RTL IV opt can handle it.
> 
> Ideally, we should allow operand 0 to be any pseudo register, but then
> we can't model the override of x0 caused by the function call which is
> hidded by the UNSPEC.
> 
> So here, we restricting operand 0 to be x0, the override of x0 can be
> reflected to the gcc.
> 
> OK for trunk?

OK.

Thanks,
James

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-04  9:48 ` James Greenhalgh
@ 2015-08-06 16:13   ` Jiong Wang
  2015-08-08 11:59     ` Andreas Schwab
  0 siblings, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-06 16:13 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1576 bytes --]


James Greenhalgh writes:

> On Tue, Jul 28, 2015 at 02:12:36PM +0100, Jiong Wang wrote:
>> 
>> The instruction sequences for preparing argument for TLS descriptor
>> runtime resolver and the later function call to resolver can actually be
>> hoisted out of the loop.
>> 
>> Currently we can't because we have exposed the hard register X0 as
>> destination of "set".  While GCC's RTL data flow infrastructure will
>> skip or do very conservative assumption when hard register involved in
>> and thus some loop IV opportunities are missed.
>> 
>> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
>> expose x0 to gcc generic code.
>> 
>> Generally, we define a new register class FIXED_R0 which only contains register
>> 0, so the instruction sequences generated from the new add pattern is the same
>> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
>> RTL IV opt can handle it.
>> 
>> Ideally, we should allow operand 0 to be any pseudo register, but then
>> we can't model the override of x0 caused by the function call which is
>> hidded by the UNSPEC.
>> 
>> So here, we restricting operand 0 to be x0, the override of x0 can be
>> reflected to the gcc.
>> 
>> OK for trunk?
>
> OK.
>
> Thanks,
> James

When I am about to commit this patch, I realized I need to apply the
same trick as I have done at

  https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01653.html

then the included testcases can work well on any of tiny, small, large model.

commited the following patch:


[-- Attachment #2: new-3.patch --]
[-- Type: text/x-diff, Size: 4584 bytes --]

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 226682)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,16 @@
+2015-08-06    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+	      Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
+	* config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
+	(REG_CLASS_NAMES): Likewise.
+	(REG_CLASS_CONTENTS): Likewise.
+	* config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
+	(aarch64_register_move_cost): Likewise.
+	(aarch64_load_symref_appropriately): Invoke the new added pattern if
+	possible.
+	* config/aarch64/constraints.md (Uc0): New constraint.
+
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
 
 	* config/aarch64/constraints.md (Usf): Add the test of
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	(revision 226681)
+++ gcc/config/aarch64/aarch64.c	(working copy)
@@ -1048,12 +1048,26 @@
 
 	gcc_assert (mode == Pmode || mode == ptr_mode);
 
-	/* In ILP32, the got entry is always of SImode size.  Unlike
-	   small GOT, the dest is fixed at reg 0.  */
-	if (TARGET_ILP32)
-	  emit_insn (gen_tlsdesc_small_si (imm));
+	if (can_create_pseudo_p ())
+	  {
+	    rtx reg = gen_reg_rtx (mode);
+
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_pseudo_si (imm, reg));
+	    else
+	      emit_insn (gen_tlsdesc_small_pseudo_di (imm, reg));
+
+	    emit_use (reg);
+	  }
 	else
-	  emit_insn (gen_tlsdesc_small_di (imm));
+	  {
+	    /* In ILP32, the got entry is always of SImode size.  Unlike
+	       small GOT, the dest is fixed at reg 0.  */
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_si (imm));
+	    else
+	      emit_insn (gen_tlsdesc_small_di (imm));
+	  }
 	tp = aarch64_load_tp (NULL);
 
 	if (mode != Pmode)
Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	(revision 226681)
+++ gcc/config/aarch64/aarch64.md	(working copy)
@@ -4549,6 +4549,23 @@
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
+;; The same as tlsdesc_small_<mode> except that we don't expose hard register X0
+;; as the destination of set as it will cause trouble for RTL loop iv.
+;; RTL loop iv will abort ongoing optimization once it finds there is hard reg
+;; as destination of set.  This pattern thus could help these tlsdesc
+;; instruction sequences hoisted out of loop.
+(define_insn "tlsdesc_small_pseudo_<mode>"
+  [(set (match_operand:PTR 1 "register_operand" "=r")
+        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:DI R0_REGNUM))
+   (clobber (reg:DI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_TLS_DESC"
+  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
 (define_insn "stack_tie"
   [(set (mem:BLK (scratch))
 	(unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
Index: gcc/testsuite/ChangeLog
===================================================================
--- gcc/testsuite/ChangeLog	(revision 226682)
+++ gcc/testsuite/ChangeLog	(working copy)
@@ -1,5 +1,9 @@
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
 
+	* gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
+
+2015-08-06  Jiong Wang  <jiong.wang@arm.com>
+
 	* gcc.target/aarch64/noplt_3.c: New testcase.
 
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
Index: gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c	(revision 0)
+++ gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c	(working copy)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target tls_native } */
+/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */
+/* { dg-skip-if "-mcmodel=large, no support for -fpic" { aarch64-*-* }  { "-mcmodel=large" } { "" } } */
+
+int cal (int, int);
+__thread int tls_data;
+
+int
+foo (int bound)
+{
+  int i = 0;
+  int sum = 0;
+
+  for (i; i < bound; i++)
+    sum = cal (sum, tls_data);
+
+  return sum;
+}
+
+/* Insn sequences for TLS descriptor should be hoisted out of the loop.  */
+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-06 16:13   ` [COMMITTED][AArch64] " Jiong Wang
@ 2015-08-08 11:59     ` Andreas Schwab
  2015-08-10 10:28       ` Jiong Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Andreas Schwab @ 2015-08-08 11:59 UTC (permalink / raw)
  To: Jiong Wang; +Cc: James Greenhalgh, gcc-patches

Jiong Wang <jiong.wang@arm.com> writes:

> Index: gcc/ChangeLog
> ===================================================================
> --- gcc/ChangeLog	(revision 226682)
> +++ gcc/ChangeLog	(working copy)
> @@ -1,3 +1,16 @@
> +2015-08-06    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
> +	      Jiong Wang  <jiong.wang@arm.com>
> +
> +	* config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
> +	* config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
> +	(REG_CLASS_NAMES): Likewise.
> +	(REG_CLASS_CONTENTS): Likewise.
> +	* config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
> +	(aarch64_register_move_cost): Likewise.
> +	(aarch64_load_symref_appropriately): Invoke the new added pattern if
> +	possible.
> +	* config/aarch64/constraints.md (Uc0): New constraint.

That breaks go, all tests are crashing now.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-08 11:59     ` Andreas Schwab
@ 2015-08-10 10:28       ` Jiong Wang
  2015-08-10 13:08         ` Jiong Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-10 10:28 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Andreas Schwab writes:

> Jiong Wang <jiong.wang@arm.com> writes:
>
>> Index: gcc/ChangeLog
>> ===================================================================
>> --- gcc/ChangeLog	(revision 226682)
>> +++ gcc/ChangeLog	(working copy)
>> @@ -1,3 +1,16 @@
>> +2015-08-06    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>> +	      Jiong Wang  <jiong.wang@arm.com>
>> +
>> +	* config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>> +	* config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>> +	(REG_CLASS_NAMES): Likewise.
>> +	(REG_CLASS_CONTENTS): Likewise.
>> +	* config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>> +	(aarch64_register_move_cost): Likewise.
>> +	(aarch64_load_symref_appropriately): Invoke the new added pattern if
>> +	possible.
>> +	* config/aarch64/constraints.md (Uc0): New constraint.
>
> That breaks go, all tests are crashing now.

Andreas,

  Thanks for the information.

  * I found I committed the wrong patch!
    there are two patches in my local directory, one is
    "tlsdesc_hoist.patch" the other is "tlsdesc-hoist.patch", the one
    approved and up-to-date is tlsdesc-hoist.patch while I committed
    tlsdesc_hoist.patch.
    
    Reverted the wrong commit and committed the correct/approved
    version.

  * Even after the correct patch applied, I still found go check failed
    on my local native check.

    Tring to understand why and if I can't figure out today I will
    revert the patch.
    
  Sorry about the trouble!
-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 10:28       ` Jiong Wang
@ 2015-08-10 13:08         ` Jiong Wang
  2015-08-10 13:20           ` Andreas Schwab
  0 siblings, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-10 13:08 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Jiong Wang writes:

> Andreas Schwab writes:
>
>> Jiong Wang <jiong.wang@arm.com> writes:
>>
>>> Index: gcc/ChangeLog
>>> ===================================================================
>>> --- gcc/ChangeLog	(revision 226682)
>>> +++ gcc/ChangeLog	(working copy)
>>> @@ -1,3 +1,16 @@
>>> +2015-08-06    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>>> +	      Jiong Wang  <jiong.wang@arm.com>
>>> +
>>> +	* config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>>> +	* config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>>> +	(REG_CLASS_NAMES): Likewise.
>>> +	(REG_CLASS_CONTENTS): Likewise.
>>> +	* config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>>> +	(aarch64_register_move_cost): Likewise.
>>> +	(aarch64_load_symref_appropriately): Invoke the new added pattern if
>>> +	possible.
>>> +	* config/aarch64/constraints.md (Uc0): New constraint.
>>
>> That breaks go, all tests are crashing now.
>
> Andreas,
>
>   Thanks for the information.
>
>   * I found I committed the wrong patch!
>     there are two patches in my local directory, one is
>     "tlsdesc_hoist.patch" the other is "tlsdesc-hoist.patch", the one
>     approved and up-to-date is tlsdesc-hoist.patch while I committed
>     tlsdesc_hoist.patch.
>     
>     Reverted the wrong commit and committed the correct/approved
>     version.
>
>   * Even after the correct patch applied, I still found go check failed
>     on my local native check.
>
>     Tring to understand why and if I can't figure out today I will
>     revert the patch.

And I just finished two round of native aarch64 build/check w/wo my patch.

I got the same go.sum.

And my patch only touches one tls descriptor pattern which will only be
used if there is tls variable. So I suspect the go test regressions are
not caused by my patch.

Andreas, can you please double confirm this?

Thanks

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 13:08         ` Jiong Wang
@ 2015-08-10 13:20           ` Andreas Schwab
  2015-08-10 13:26             ` Jiong Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Andreas Schwab @ 2015-08-10 13:20 UTC (permalink / raw)
  To: Jiong Wang; +Cc: James Greenhalgh, gcc-patches

Jiong Wang <jiong.wang@arm.com> writes:

> And I just finished two round of native aarch64 build/check w/wo my patch.

Did you rebuild everything?

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 13:20           ` Andreas Schwab
@ 2015-08-10 13:26             ` Jiong Wang
  2015-08-10 13:33               ` Andreas Schwab
  0 siblings, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-10 13:26 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Andreas Schwab writes:

> Jiong Wang <jiong.wang@arm.com> writes:
>
>> And I just finished two round of native aarch64 build/check w/wo my patch.
>
> Did you rebuild everything?

No.

Just applied the patch, then "make all" and re-check

>
> Andreas.

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 13:26             ` Jiong Wang
@ 2015-08-10 13:33               ` Andreas Schwab
  2015-08-10 22:55                 ` Jiong Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Andreas Schwab @ 2015-08-10 13:33 UTC (permalink / raw)
  To: Jiong Wang; +Cc: James Greenhalgh, gcc-patches

Jiong Wang <jiong.wang@arm.com> writes:

> Andreas Schwab writes:
>
>> Jiong Wang <jiong.wang@arm.com> writes:
>>
>>> And I just finished two round of native aarch64 build/check w/wo my patch.
>>
>> Did you rebuild everything?
>
> No.

Please do.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 13:33               ` Andreas Schwab
@ 2015-08-10 22:55                 ` Jiong Wang
  2015-08-11  7:11                   ` Andreas Schwab
  0 siblings, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-10 22:55 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Andreas Schwab writes:

> Jiong Wang <jiong.wang@arm.com> writes:
>
>> Andreas Schwab writes:
>>
>>> Jiong Wang <jiong.wang@arm.com> writes:
>>>
>>>> And I just finished two round of native aarch64 build/check w/wo my patch.
>>>
>>> Did you rebuild everything?
>>
>> No.
>
> Please do.

Andreas,

  I Just finished several round of rebuild & testing on clean
  environment.

  The problem should have gone away after my new tlsdesc patch.  

  While I do have seen lots of failures with my old tlsdesc patch.

  Please let me know if you still have problem on this after my patch
  correction.
  
  Thanks.
  
-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-10 22:55                 ` Jiong Wang
@ 2015-08-11  7:11                   ` Andreas Schwab
  2015-08-11  8:36                     ` Jiong Wang
  2015-08-11  8:48                     ` Jiong Wang
  0 siblings, 2 replies; 19+ messages in thread
From: Andreas Schwab @ 2015-08-11  7:11 UTC (permalink / raw)
  To: Jiong Wang; +Cc: James Greenhalgh, gcc-patches

Jiong Wang <jiong.wang@arm.com> writes:

>   I Just finished several round of rebuild & testing on clean
>   environment.

How did you even manage to compile it?

../../gcc/ira.c: In function 'void print_translated_classes(FILE*, bool)':
../../gcc/ira.c:1415:49: error: iteration 8u invokes undefined behavior [-Werror=aggressive-loop-optimizations]
     fprintf (f, " %s -> %s\n", reg_class_names[i],
                                                 ^
../../gcc/ira.c:1414:17: note: containing loop
   for (i = 0; i < N_REG_CLASSES; i++)
                 ^

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-11  7:11                   ` Andreas Schwab
@ 2015-08-11  8:36                     ` Jiong Wang
  2015-08-11  8:48                     ` Jiong Wang
  1 sibling, 0 replies; 19+ messages in thread
From: Jiong Wang @ 2015-08-11  8:36 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Andreas Schwab writes:

> Jiong Wang <jiong.wang@arm.com> writes:
>
>>   I Just finished several round of rebuild & testing on clean
>>   environment.
>
> How did you even manage to compile it?
>
> ../../gcc/ira.c: In function 'void print_translated_classes(FILE*, bool)':
> ../../gcc/ira.c:1415:49: error: iteration 8u invokes undefined behavior [-Werror=aggressive-loop-optimizations]
>      fprintf (f, " %s -> %s\n", reg_class_names[i],
>                                                  ^
> ../../gcc/ira.c:1414:17: note: containing loop
>    for (i = 0; i < N_REG_CLASSES; i++)
>                  ^
>
> Andreas.

What I have done on a clean environment:

  1. checkout e5c427f3245ff69d8b014d4ff170715962178151 which might be
  the commit cause the trouble.

     mkdir build-dir1

     configure/build/check-go in build-dir1
     (configuation options --enable-languages=c,c++,fortran,go
     --prefix=/home/jiowan01/COMMUNITY/install-gcc-native-reference
     --disable-bootstrap)

     and I do see lots of go failures.
     
  2. checkout the one commit before this wrong commit.

     re-done everything in a new build-dir2

     check-go OK

  3. applied my new tlsdesc patch

     re-done everything in a new build-dir3
     check-go OK

  4. checkout

     commit 8b221598008f60621859caee561e8466e80049f7
     Author: jiwang <jiwang@138bc75d-0d04-0410-961f-82ee72b054a4>
     Date:   Mon Aug 10 10:06:28 2015 +0000

    [AArch64] Recommit correct version for improving TLS descriptor
    pattern

    re-done everything in a new build-dir4
    check-go OK


  my host gcc is "gcc version 4.8.2 (Ubuntu/Linaro 4.8.2-19ubuntu1)", I
  guess you have enabled bootstrap.
-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-08-11  7:11                   ` Andreas Schwab
  2015-08-11  8:36                     ` Jiong Wang
@ 2015-08-11  8:48                     ` Jiong Wang
  2015-08-11 11:22                       ` [COMMITTED][AArch64] Add the missing "," for enumeration element Jiong Wang
  1 sibling, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-08-11  8:48 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches


Andreas Schwab writes:

> Jiong Wang <jiong.wang@arm.com> writes:
>
>>   I Just finished several round of rebuild & testing on clean
>>   environment.
>
> How did you even manage to compile it?
>
> ../../gcc/ira.c: In function 'void print_translated_classes(FILE*, bool)':
> ../../gcc/ira.c:1415:49: error: iteration 8u invokes undefined behavior [-Werror=aggressive-loop-optimizations]
>      fprintf (f, " %s -> %s\n", reg_class_names[i],
>                                                  ^
> ../../gcc/ira.c:1414:17: note: containing loop
>    for (i = 0; i < N_REG_CLASSES; i++)

And, my new patch missed a ',' after "FIXED_REG0", sorry about
that.. that should be the problem for the bootstrap issue.


>
> Andreas.

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [COMMITTED][AArch64] Add the missing "," for enumeration element
  2015-08-11  8:48                     ` Jiong Wang
@ 2015-08-11 11:22                       ` Jiong Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Jiong Wang @ 2015-08-11 11:22 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: James Greenhalgh, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 991 bytes --]


Jiong Wang writes:

> Andreas Schwab writes:
>
>> Jiong Wang <jiong.wang@arm.com> writes:
>>
>>>   I Just finished several round of rebuild & testing on clean
>>>   environment.
>>
>> How did you even manage to compile it?
>>
>> ../../gcc/ira.c: In function 'void print_translated_classes(FILE*, bool)':
>> ../../gcc/ira.c:1415:49: error: iteration 8u invokes undefined behavior [-Werror=aggressive-loop-optimizations]
>>      fprintf (f, " %s -> %s\n", reg_class_names[i],
>>                                                  ^
>> ../../gcc/ira.c:1414:17: note: containing loop
>>    for (i = 0; i < N_REG_CLASSES; i++)
>
> And, my new patch missed a ',' after "FIXED_REG0", sorry about
> that.. that should be the problem for the bootstrap issue.

committed patch below as obivious after bootstrap OK and check-go OK.

2015-08-11  Jiong Wang  <jiong.wang@arm.com>

gcc/
  * config/aarch64/aarch64.h (REG_CLASS_NAMES): Add the missing ',' after
  FIXED_REG0.


[-- Attachment #2: k.patch --]
[-- Type: text/x-diff, Size: 400 bytes --]

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 43ff895..d3ae393 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -417,7 +417,7 @@ enum reg_class
 #define REG_CLASS_NAMES				\
 {						\
   "NO_REGS",					\
-  "FIXED_REG0"					\
+  "FIXED_REG0",					\
   "CALLER_SAVE_REGS",				\
   "GENERAL_REGS",				\
   "STACK_REG",					\

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-07-28 14:02 [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Jiong Wang
  2015-08-04  9:48 ` James Greenhalgh
@ 2015-09-26  7:38 ` Andrew Pinski
  2015-09-26  7:42   ` Andrew Pinski
  2015-09-28 15:17   ` Jiong Wang
  2015-09-28 16:10 ` Marcus Shawcroft
  2 siblings, 2 replies; 19+ messages in thread
From: Andrew Pinski @ 2015-09-26  7:38 UTC (permalink / raw)
  To: Jiong Wang; +Cc: gcc-patches

On Tue, Jul 28, 2015 at 6:12 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>
> The instruction sequences for preparing argument for TLS descriptor
> runtime resolver and the later function call to resolver can actually be
> hoisted out of the loop.
>
> Currently we can't because we have exposed the hard register X0 as
> destination of "set".  While GCC's RTL data flow infrastructure will
> skip or do very conservative assumption when hard register involved in
> and thus some loop IV opportunities are missed.
>
> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
> expose x0 to gcc generic code.
>
> Generally, we define a new register class FIXED_R0 which only contains register
> 0, so the instruction sequences generated from the new add pattern is the same
> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
> RTL IV opt can handle it.
>
> Ideally, we should allow operand 0 to be any pseudo register, but then
> we can't model the override of x0 caused by the function call which is
> hidded by the UNSPEC.
>
> So here, we restricting operand 0 to be x0, the override of x0 can be
> reflected to the gcc.
>
> OK for trunk?


This patch broke ILP32 because we used mode rather than ptr_mode for
the psedu .  I have an idea on how to fix it (like tlsie_small_sidi
case) but I still need to test it fully.

This is the smallest testcase where the problem is:
struct dtor_list
{
  struct dtor_list *next;
};
static __thread struct dtor_list *tls_dtor_list;
__cxa_thread_atexit_impl ( struct dtor_list *new)
{
  new->next = tls_dtor_list;
  tls_dtor_list = new;
}


Thanks,
Andrew

>
> 2015-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>             Jiong Wang  <jiong.wang@arm.com>
>
> gcc/
>   * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>   * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>   (REG_CLASS_NAMES): Likewise.
>   (REG_CLASS_CONTENTS): Likewise.
>   * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>   (aarch64_register_move_cost): Likewise.
>   (aarch64_load_symref_appropriately): Invoke the new added pattern if
>   possible.
>   * config/aarch64/constraints.md (Uc0): New constraint.
>
> gcc/testsuite.
>   * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
>
> --
> Regards,
> Jiong
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-09-26  7:38 ` [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Andrew Pinski
@ 2015-09-26  7:42   ` Andrew Pinski
  2015-09-28 15:17   ` Jiong Wang
  1 sibling, 0 replies; 19+ messages in thread
From: Andrew Pinski @ 2015-09-26  7:42 UTC (permalink / raw)
  To: Jiong Wang; +Cc: gcc-patches

On Fri, Sep 25, 2015 at 11:40 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> On Tue, Jul 28, 2015 at 6:12 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>>
>> The instruction sequences for preparing argument for TLS descriptor
>> runtime resolver and the later function call to resolver can actually be
>> hoisted out of the loop.
>>
>> Currently we can't because we have exposed the hard register X0 as
>> destination of "set".  While GCC's RTL data flow infrastructure will
>> skip or do very conservative assumption when hard register involved in
>> and thus some loop IV opportunities are missed.
>>
>> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
>> expose x0 to gcc generic code.
>>
>> Generally, we define a new register class FIXED_R0 which only contains register
>> 0, so the instruction sequences generated from the new add pattern is the same
>> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
>> RTL IV opt can handle it.
>>
>> Ideally, we should allow operand 0 to be any pseudo register, but then
>> we can't model the override of x0 caused by the function call which is
>> hidded by the UNSPEC.
>>
>> So here, we restricting operand 0 to be x0, the override of x0 can be
>> reflected to the gcc.
>>
>> OK for trunk?
>
>
> This patch broke ILP32 because we used mode rather than ptr_mode for
> the psedu .  I have an idea on how to fix it (like tlsie_small_sidi
> case) but I still need to test it fully.
>
> This is the smallest testcase where the problem is:
> struct dtor_list
> {
>   struct dtor_list *next;
> };
> static __thread struct dtor_list *tls_dtor_list;
> __cxa_thread_atexit_impl ( struct dtor_list *new)
> {
>   new->next = tls_dtor_list;
>   tls_dtor_list = new;
> }

Actually there is another bug with respect of the output too.  Some of
the <w>0 should have been plain x0 due to only the 64bit register is
accepted in some contexts.

Thanks,
Andrew

>
>
> Thanks,
> Andrew
>
>>
>> 2015-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>>             Jiong Wang  <jiong.wang@arm.com>
>>
>> gcc/
>>   * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>>   * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>>   (REG_CLASS_NAMES): Likewise.
>>   (REG_CLASS_CONTENTS): Likewise.
>>   * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>>   (aarch64_register_move_cost): Likewise.
>>   (aarch64_load_symref_appropriately): Invoke the new added pattern if
>>   possible.
>>   * config/aarch64/constraints.md (Uc0): New constraint.
>>
>> gcc/testsuite.
>>   * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
>>
>> --
>> Regards,
>> Jiong
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-09-26  7:38 ` [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Andrew Pinski
  2015-09-26  7:42   ` Andrew Pinski
@ 2015-09-28 15:17   ` Jiong Wang
  2015-09-28 17:08     ` Jiong Wang
  1 sibling, 1 reply; 19+ messages in thread
From: Jiong Wang @ 2015-09-28 15:17 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches


Andrew Pinski writes:

> On Tue, Jul 28, 2015 at 6:12 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>>
>> The instruction sequences for preparing argument for TLS descriptor
>> runtime resolver and the later function call to resolver can actually be
>> hoisted out of the loop.
>>
>> Currently we can't because we have exposed the hard register X0 as
>> destination of "set".  While GCC's RTL data flow infrastructure will
>> skip or do very conservative assumption when hard register involved in
>> and thus some loop IV opportunities are missed.
>>
>> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
>> expose x0 to gcc generic code.
>>
>> Generally, we define a new register class FIXED_R0 which only contains register
>> 0, so the instruction sequences generated from the new add pattern is the same
>> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
>> RTL IV opt can handle it.
>>
>> Ideally, we should allow operand 0 to be any pseudo register, but then
>> we can't model the override of x0 caused by the function call which is
>> hidded by the UNSPEC.
>>
>> So here, we restricting operand 0 to be x0, the override of x0 can be
>> reflected to the gcc.
>>
>> OK for trunk?
>
>
> This patch broke ILP32 because we used mode rather than ptr_mode for
> the psedu .  I have an idea on how to fix it (like tlsie_small_sidi
> case) but I still need to test it fully.

Have done a quick re-visit the code, the use of "mode" instead of
"ptr_mode" looks OK to me.

While what looks strange to me is under ILP32 symbol_ref is DImode.

   (symbol_ref:DI ("*.LANCHOR0")

My my understanding, the symbol_ref should be SI mode under ilp32
instead of DI mode.

So it's better to fix "create_block_symbol" in varasm, and we should let
it use ptr_mode instead of Pmode as Pmode is used to describe the
underline hardware mode instead of the mode view in C language level.

>
> This is the smallest testcase where the problem is:
> struct dtor_list
> {
>   struct dtor_list *next;
> };
> static __thread struct dtor_list *tls_dtor_list;
> __cxa_thread_atexit_impl ( struct dtor_list *new)
> {
>   new->next = tls_dtor_list;
>   tls_dtor_list = new;
> }
>
>
> Thanks,
> Andrew
>
>>
>> 2015-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>>             Jiong Wang  <jiong.wang@arm.com>
>>
>> gcc/
>>   * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>>   * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>>   (REG_CLASS_NAMES): Likewise.
>>   (REG_CLASS_CONTENTS): Likewise.
>>   * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>>   (aarch64_register_move_cost): Likewise.
>>   (aarch64_load_symref_appropriately): Invoke the new added pattern if
>>   possible.
>>   * config/aarch64/constraints.md (Uc0): New constraint.
>>
>> gcc/testsuite.
>>   * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
>>
>> --
>> Regards,
>> Jiong
>>

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-07-28 14:02 [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Jiong Wang
  2015-08-04  9:48 ` James Greenhalgh
  2015-09-26  7:38 ` [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Andrew Pinski
@ 2015-09-28 16:10 ` Marcus Shawcroft
  2 siblings, 0 replies; 19+ messages in thread
From: Marcus Shawcroft @ 2015-09-28 16:10 UTC (permalink / raw)
  To: Jiong Wang; +Cc: gcc-patches

On 28 July 2015 at 14:12, Jiong Wang <jiong.wang@arm.com> wrote:
>
> The instruction sequences for preparing argument for TLS descriptor
> runtime resolver and the later function call to resolver can actually be
> hoisted out of the loop.
>
> Currently we can't because we have exposed the hard register X0 as
> destination of "set".  While GCC's RTL data flow infrastructure will
> skip or do very conservative assumption when hard register involved in
> and thus some loop IV opportunities are missed.

This patch feels like we are botching the back end to over come a
limitation in RTL IV opt.  Isn't the real solution in RTL IV opt ?

/Marcus

>
> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
> expose x0 to gcc generic code.
>
> Generally, we define a new register class FIXED_R0 which only contains register
> 0, so the instruction sequences generated from the new add pattern is the same
> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
> RTL IV opt can handle it.
>
> Ideally, we should allow operand 0 to be any pseudo register, but then
> we can't model the override of x0 caused by the function call which is
> hidded by the UNSPEC.
>
> So here, we restricting operand 0 to be x0, the override of x0 can be
> reflected to the gcc.
>
> OK for trunk?
>
> 2015-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>             Jiong Wang  <jiong.wang@arm.com>
>
> gcc/
>   * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
>   * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
>   (REG_CLASS_NAMES): Likewise.
>   (REG_CLASS_CONTENTS): Likewise.
>   * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
>   (aarch64_register_move_cost): Likewise.
>   (aarch64_load_symref_appropriately): Invoke the new added pattern if
>   possible.
>   * config/aarch64/constraints.md (Uc0): New constraint.
>
> gcc/testsuite.
>   * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
>
> --
> Regards,
> Jiong
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
  2015-09-28 15:17   ` Jiong Wang
@ 2015-09-28 17:08     ` Jiong Wang
  0 siblings, 0 replies; 19+ messages in thread
From: Jiong Wang @ 2015-09-28 17:08 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches, Marcus Shawcroft


Jiong Wang writes:

> Andrew Pinski writes:
>
>> On Tue, Jul 28, 2015 at 6:12 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>>>
>>> The instruction sequences for preparing argument for TLS descriptor
>>> runtime resolver and the later function call to resolver can actually be
>>> hoisted out of the loop.
>>>
>>> Currently we can't because we have exposed the hard register X0 as
>>> destination of "set".  While GCC's RTL data flow infrastructure will
>>> skip or do very conservative assumption when hard register involved in
>>> and thus some loop IV opportunities are missed.
>>>
>>> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
>>> expose x0 to gcc generic code.
>>>
>>> Generally, we define a new register class FIXED_R0 which only contains register
>>> 0, so the instruction sequences generated from the new add pattern is the same
>>> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
>>> RTL IV opt can handle it.
>>>
>>> Ideally, we should allow operand 0 to be any pseudo register, but then
>>> we can't model the override of x0 caused by the function call which is
>>> hidded by the UNSPEC.
>>>
>>> So here, we restricting operand 0 to be x0, the override of x0 can be
>>> reflected to the gcc.
>>>
>>> OK for trunk?
>>
>>
>> This patch broke ILP32 because we used mode rather than ptr_mode for
>> the psedu .  I have an idea on how to fix it (like tlsie_small_sidi
>> case) but I still need to test it fully.
>
> Have done a quick re-visit the code, the use of "mode" instead of
> "ptr_mode" looks OK to me.
>
> While what looks strange to me is under ILP32 symbol_ref is DImode.
>
>    (symbol_ref:DI ("*.LANCHOR0")
>
> My my understanding, the symbol_ref should be SI mode under ilp32
> instead of DI mode.
>
> So it's better to fix "create_block_symbol" in varasm, and we should let
> it use ptr_mode instead of Pmode as Pmode is used to describe the
> underline hardware mode instead of the mode view in C language level.

meanwhile I revert this patch (r228211) as there is unresolved ILP32
issue.

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-09-28 16:21 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-28 14:02 [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Jiong Wang
2015-08-04  9:48 ` James Greenhalgh
2015-08-06 16:13   ` [COMMITTED][AArch64] " Jiong Wang
2015-08-08 11:59     ` Andreas Schwab
2015-08-10 10:28       ` Jiong Wang
2015-08-10 13:08         ` Jiong Wang
2015-08-10 13:20           ` Andreas Schwab
2015-08-10 13:26             ` Jiong Wang
2015-08-10 13:33               ` Andreas Schwab
2015-08-10 22:55                 ` Jiong Wang
2015-08-11  7:11                   ` Andreas Schwab
2015-08-11  8:36                     ` Jiong Wang
2015-08-11  8:48                     ` Jiong Wang
2015-08-11 11:22                       ` [COMMITTED][AArch64] Add the missing "," for enumeration element Jiong Wang
2015-09-26  7:38 ` [AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt Andrew Pinski
2015-09-26  7:42   ` Andrew Pinski
2015-09-28 15:17   ` Jiong Wang
2015-09-28 17:08     ` Jiong Wang
2015-09-28 16:10 ` Marcus Shawcroft

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).