public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
@ 2019-09-03  8:38 Shaokun Zhang
  2019-09-03 11:13 ` Kyrill Tkachov
  2019-09-25 12:40 ` Kyrill Tkachov
  0 siblings, 2 replies; 6+ messages in thread
From: Shaokun Zhang @ 2019-09-03  8:38 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, james.greenhalgh, marcus.shawcroft,
	kyrylo.tkachov, Shaokun Zhang

The DCache clean & ICache invalidation requirements for instructions
to be data coherence are discoverable through new fields in CTR_EL0.
Let's support the two bits if they are enabled, the CPU core will
not execute the unnecessary DCache clean or Icache Invalidation
instructions.

2019-09-03  Shaokun Zhang  <zhangshaokun@hisilicon.com>

    * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
__aarch64_sync_cache_range function.
---
 libgcc/config/aarch64/sync-cache.c | 57 ++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/libgcc/config/aarch64/sync-cache.c b/libgcc/config/aarch64/sync-cache.c
index 791f5e42ff44..ea3da4be02b3 100644
--- a/libgcc/config/aarch64/sync-cache.c
+++ b/libgcc/config/aarch64/sync-cache.c
@@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
+#define CTR_IDC_SHIFT           28
+#define CTR_DIC_SHIFT           29
+
 void __aarch64_sync_cache_range (const void *, const void *);
 
 void
@@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, const void *end)
   icache_lsize = 4 << (cache_info & 0xF);
   dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
 
-  /* Loop over the address range, clearing one cache line at once.
-     Data cache must be flushed to unification first to make sure the
-     instruction cache fetches the updated data.  'end' is exclusive,
-     as per the GNU definition of __clear_cache.  */
+  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
+     not required for instruction to data coherence.  */
+
+  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
+    /* Loop over the address range, clearing one cache line at once.
+       Data cache must be flushed to unification first to make sure the
+       instruction cache fetches the updated data.  'end' is exclusive,
+       as per the GNU definition of __clear_cache.  */
 
-  /* Make the start address of the loop cache aligned.  */
-  address = (const char*) ((__UINTPTR_TYPE__) base
-			   & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
+    /* Make the start address of the loop cache aligned.  */
+    address = (const char*) ((__UINTPTR_TYPE__) base
+			     & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
 
-  for (; address < (const char *) end; address += dcache_lsize)
-    asm volatile ("dc\tcvau, %0"
-		  :
-		  : "r" (address)
-		  : "memory");
+    for (; address < (const char *) end; address += dcache_lsize)
+      asm volatile ("dc\tcvau, %0"
+		    :
+		    : "r" (address)
+		    : "memory");
+  }
 
   asm volatile ("dsb\tish" : : : "memory");
 
-  /* Make the start address of the loop cache aligned.  */
-  address = (const char*) ((__UINTPTR_TYPE__) base
-			   & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
+  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
+     Unification is not required for instruction to data coherence.  */
+
+  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
+    /* Make the start address of the loop cache aligned.  */
+    address = (const char*) ((__UINTPTR_TYPE__) base
+			     & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
+
+    for (; address < (const char *) end; address += icache_lsize)
+      asm volatile ("ic\tivau, %0"
+		    :
+		    : "r" (address)
+		    : "memory");
 
-  for (; address < (const char *) end; address += icache_lsize)
-    asm volatile ("ic\tivau, %0"
-		  :
-		  : "r" (address)
-		  : "memory");
+    asm volatile ("dsb\tish" : : : "memory");
+  }
 
-  asm volatile ("dsb\tish; isb" : : : "memory");
+  asm volatile("isb" : : : "memory");
 }
-- 
2.7.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
  2019-09-03  8:38 [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC Shaokun Zhang
@ 2019-09-03 11:13 ` Kyrill Tkachov
  2019-09-05  9:37   ` Shaokun Zhang
  2019-09-25 12:40 ` Kyrill Tkachov
  1 sibling, 1 reply; 6+ messages in thread
From: Kyrill Tkachov @ 2019-09-03 11:13 UTC (permalink / raw)
  To: Shaokun Zhang, gcc-patches
  Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft

Hi Shaokun,

On 9/3/19 9:35 AM, Shaokun Zhang wrote:
> The DCache clean & ICache invalidation requirements for instructions
> to be data coherence are discoverable through new fields in CTR_EL0.
> Let's support the two bits if they are enabled, the CPU core will
> not execute the unnecessary DCache clean or Icache Invalidation
> instructions.
>
> 2019-09-03  Shaokun Zhang <zhangshaokun@hisilicon.com>
>
>     * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
> __aarch64_sync_cache_range function.


This version looks ok to me. I've checked the assembly generated for 
sync-cache.c and it looks as expected.

Can you please confirm that you've bootstrapped and tested this patch on 
an aarch64 system?

Thanks,

Kyrill


> ---
>  libgcc/config/aarch64/sync-cache.c | 57 
> ++++++++++++++++++++++++--------------
>  1 file changed, 36 insertions(+), 21 deletions(-)
>
> diff --git a/libgcc/config/aarch64/sync-cache.c 
> b/libgcc/config/aarch64/sync-cache.c
> index 791f5e42ff44..ea3da4be02b3 100644
> --- a/libgcc/config/aarch64/sync-cache.c
> +++ b/libgcc/config/aarch64/sync-cache.c
> @@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along 
> with this program;
>  see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
>  <http://www.gnu.org/licenses/>. */
>
> +#define CTR_IDC_SHIFT           28
> +#define CTR_DIC_SHIFT           29
> +
>  void __aarch64_sync_cache_range (const void *, const void *);
>
>  void
> @@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, 
> const void *end)
>    icache_lsize = 4 << (cache_info & 0xF);
>    dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
>
> -  /* Loop over the address range, clearing one cache line at once.
> -     Data cache must be flushed to unification first to make sure the
> -     instruction cache fetches the updated data.  'end' is exclusive,
> -     as per the GNU definition of __clear_cache.  */
> +  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of 
> Unification is
> +     not required for instruction to data coherence.  */
> +
> +  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
> +    /* Loop over the address range, clearing one cache line at once.
> +       Data cache must be flushed to unification first to make sure the
> +       instruction cache fetches the updated data.  'end' is exclusive,
> +       as per the GNU definition of __clear_cache.  */
>
> -  /* Make the start address of the loop cache aligned.  */
> -  address = (const char*) ((__UINTPTR_TYPE__) base
> -                          & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
> +    /* Make the start address of the loop cache aligned. */
> +    address = (const char*) ((__UINTPTR_TYPE__) base
> +                            & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>
> -  for (; address < (const char *) end; address += dcache_lsize)
> -    asm volatile ("dc\tcvau, %0"
> -                 :
> -                 : "r" (address)
> -                 : "memory");
> +    for (; address < (const char *) end; address += dcache_lsize)
> +      asm volatile ("dc\tcvau, %0"
> +                   :
> +                   : "r" (address)
> +                   : "memory");
> +  }
>
>    asm volatile ("dsb\tish" : : : "memory");
>
> -  /* Make the start address of the loop cache aligned.  */
> -  address = (const char*) ((__UINTPTR_TYPE__) base
> -                          & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
> +  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the 
> Point of
> +     Unification is not required for instruction to data coherence.  */
> +
> +  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
> +    /* Make the start address of the loop cache aligned. */
> +    address = (const char*) ((__UINTPTR_TYPE__) base
> +                            & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
> +
> +    for (; address < (const char *) end; address += icache_lsize)
> +      asm volatile ("ic\tivau, %0"
> +                   :
> +                   : "r" (address)
> +                   : "memory");
>
> -  for (; address < (const char *) end; address += icache_lsize)
> -    asm volatile ("ic\tivau, %0"
> -                 :
> -                 : "r" (address)
> -                 : "memory");
> +    asm volatile ("dsb\tish" : : : "memory");
> +  }
>
> -  asm volatile ("dsb\tish; isb" : : : "memory");
> +  asm volatile("isb" : : : "memory");
>  }
> -- 
> 2.7.4
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
  2019-09-03 11:13 ` Kyrill Tkachov
@ 2019-09-05  9:37   ` Shaokun Zhang
  2019-09-05  9:42     ` Kyrill Tkachov
  0 siblings, 1 reply; 6+ messages in thread
From: Shaokun Zhang @ 2019-09-05  9:37 UTC (permalink / raw)
  To: Kyrill Tkachov, gcc-patches
  Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft

Hi Kyrill,

On 2019/9/3 19:13, Kyrill Tkachov wrote:
> Hi Shaokun,
> 
> On 9/3/19 9:35 AM, Shaokun Zhang wrote:
>> The DCache clean & ICache invalidation requirements for instructions
>> to be data coherence are discoverable through new fields in CTR_EL0.
>> Let's support the two bits if they are enabled, the CPU core will
>> not execute the unnecessary DCache clean or Icache Invalidation
>> instructions.
>>
>> 2019-09-03  Shaokun Zhang <zhangshaokun@hisilicon.com>
>>
>>     * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
>> __aarch64_sync_cache_range function.
> 
> 
> This version looks ok to me. I've checked the assembly generated for sync-cache.c and it looks as expected.
> 
> Can you please confirm that you've bootstrapped and tested this patch on an aarch64 system?
> 

Sorry to reply later.

I have bootstrapped and also check the prev-aarch64-unknown-linux-gnu/libgcc/sync-cache.o file.
It has been compiled the patch.

About the test, I run the self-modify testcase which calls __clear_cache directly,
and use the new GCC to compile it, it works ok on both Huawei Kunpeng 920 server whose cpu
is HiSilicon tsv110 core and HiSilicon D05 server board which uses A72 core.

Thanks,
Shaokun

> Thanks,
> 
> Kyrill
> 
> 
>> ---
>>  libgcc/config/aarch64/sync-cache.c | 57 ++++++++++++++++++++++++--------------
>>  1 file changed, 36 insertions(+), 21 deletions(-)
>>
>> diff --git a/libgcc/config/aarch64/sync-cache.c b/libgcc/config/aarch64/sync-cache.c
>> index 791f5e42ff44..ea3da4be02b3 100644
>> --- a/libgcc/config/aarch64/sync-cache.c
>> +++ b/libgcc/config/aarch64/sync-cache.c
>> @@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along with this program;
>>  see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
>>  <http://www.gnu.org/licenses/>. */
>>
>> +#define CTR_IDC_SHIFT           28
>> +#define CTR_DIC_SHIFT           29
>> +
>>  void __aarch64_sync_cache_range (const void *, const void *);
>>
>>  void
>> @@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, const void *end)
>>    icache_lsize = 4 << (cache_info & 0xF);
>>    dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
>>
>> -  /* Loop over the address range, clearing one cache line at once.
>> -     Data cache must be flushed to unification first to make sure the
>> -     instruction cache fetches the updated data.  'end' is exclusive,
>> -     as per the GNU definition of __clear_cache.  */
>> +  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
>> +     not required for instruction to data coherence.  */
>> +
>> +  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
>> +    /* Loop over the address range, clearing one cache line at once.
>> +       Data cache must be flushed to unification first to make sure the
>> +       instruction cache fetches the updated data.  'end' is exclusive,
>> +       as per the GNU definition of __clear_cache.  */
>>
>> -  /* Make the start address of the loop cache aligned.  */
>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>> -                          & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>> +    /* Make the start address of the loop cache aligned. */
>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>> +                            & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>>
>> -  for (; address < (const char *) end; address += dcache_lsize)
>> -    asm volatile ("dc\tcvau, %0"
>> -                 :
>> -                 : "r" (address)
>> -                 : "memory");
>> +    for (; address < (const char *) end; address += dcache_lsize)
>> +      asm volatile ("dc\tcvau, %0"
>> +                   :
>> +                   : "r" (address)
>> +                   : "memory");
>> +  }
>>
>>    asm volatile ("dsb\tish" : : : "memory");
>>
>> -  /* Make the start address of the loop cache aligned.  */
>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>> -                          & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>> +  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
>> +     Unification is not required for instruction to data coherence.  */
>> +
>> +  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
>> +    /* Make the start address of the loop cache aligned. */
>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>> +                            & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>> +
>> +    for (; address < (const char *) end; address += icache_lsize)
>> +      asm volatile ("ic\tivau, %0"
>> +                   :
>> +                   : "r" (address)
>> +                   : "memory");
>>
>> -  for (; address < (const char *) end; address += icache_lsize)
>> -    asm volatile ("ic\tivau, %0"
>> -                 :
>> -                 : "r" (address)
>> -                 : "memory");
>> +    asm volatile ("dsb\tish" : : : "memory");
>> +  }
>>
>> -  asm volatile ("dsb\tish; isb" : : : "memory");
>> +  asm volatile("isb" : : : "memory");
>>  }
>> -- 
>> 2.7.4
>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
  2019-09-05  9:37   ` Shaokun Zhang
@ 2019-09-05  9:42     ` Kyrill Tkachov
  2019-09-06  0:59       ` Shaokun Zhang
  0 siblings, 1 reply; 6+ messages in thread
From: Kyrill Tkachov @ 2019-09-05  9:42 UTC (permalink / raw)
  To: Shaokun Zhang, gcc-patches
  Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft

Hi Shaokun,

On 9/5/19 10:37 AM, Shaokun Zhang wrote:
> Hi Kyrill,
>
> On 2019/9/3 19:13, Kyrill Tkachov wrote:
>> Hi Shaokun,
>>
>> On 9/3/19 9:35 AM, Shaokun Zhang wrote:
>>> The DCache clean & ICache invalidation requirements for instructions
>>> to be data coherence are discoverable through new fields in CTR_EL0.
>>> Let's support the two bits if they are enabled, the CPU core will
>>> not execute the unnecessary DCache clean or Icache Invalidation
>>> instructions.
>>>
>>> 2019-09-03  Shaokun Zhang <zhangshaokun@hisilicon.com>
>>>
>>>      * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
>>> __aarch64_sync_cache_range function.
>>
>> This version looks ok to me. I've checked the assembly generated for sync-cache.c and it looks as expected.
>>
>> Can you please confirm that you've bootstrapped and tested this patch on an aarch64 system?
>>
> Sorry to reply later.
>
> I have bootstrapped and also check the prev-aarch64-unknown-linux-gnu/libgcc/sync-cache.o file.
> It has been compiled the patch.
>
> About the test, I run the self-modify testcase which calls __clear_cache directly,
> and use the new GCC to compile it, it works ok on both Huawei Kunpeng 920 server whose cpu
> is HiSilicon tsv110 core and HiSilicon D05 server board which uses A72 core.

Thanks, that looks good to me now.

Kyrill


> Thanks,
> Shaokun
>
>> Thanks,
>>
>> Kyrill
>>
>>
>>> ---
>>>   libgcc/config/aarch64/sync-cache.c | 57 ++++++++++++++++++++++++--------------
>>>   1 file changed, 36 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/libgcc/config/aarch64/sync-cache.c b/libgcc/config/aarch64/sync-cache.c
>>> index 791f5e42ff44..ea3da4be02b3 100644
>>> --- a/libgcc/config/aarch64/sync-cache.c
>>> +++ b/libgcc/config/aarch64/sync-cache.c
>>> @@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along with this program;
>>>   see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
>>>   <http://www.gnu.org/licenses/>. */
>>>
>>> +#define CTR_IDC_SHIFT           28
>>> +#define CTR_DIC_SHIFT           29
>>> +
>>>   void __aarch64_sync_cache_range (const void *, const void *);
>>>
>>>   void
>>> @@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, const void *end)
>>>     icache_lsize = 4 << (cache_info & 0xF);
>>>     dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
>>>
>>> -  /* Loop over the address range, clearing one cache line at once.
>>> -     Data cache must be flushed to unification first to make sure the
>>> -     instruction cache fetches the updated data.  'end' is exclusive,
>>> -     as per the GNU definition of __clear_cache.  */
>>> +  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
>>> +     not required for instruction to data coherence.  */
>>> +
>>> +  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
>>> +    /* Loop over the address range, clearing one cache line at once.
>>> +       Data cache must be flushed to unification first to make sure the
>>> +       instruction cache fetches the updated data.  'end' is exclusive,
>>> +       as per the GNU definition of __clear_cache.  */
>>>
>>> -  /* Make the start address of the loop cache aligned.  */
>>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>>> -                          & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>>> +    /* Make the start address of the loop cache aligned. */
>>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>>> +                            & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>>>
>>> -  for (; address < (const char *) end; address += dcache_lsize)
>>> -    asm volatile ("dc\tcvau, %0"
>>> -                 :
>>> -                 : "r" (address)
>>> -                 : "memory");
>>> +    for (; address < (const char *) end; address += dcache_lsize)
>>> +      asm volatile ("dc\tcvau, %0"
>>> +                   :
>>> +                   : "r" (address)
>>> +                   : "memory");
>>> +  }
>>>
>>>     asm volatile ("dsb\tish" : : : "memory");
>>>
>>> -  /* Make the start address of the loop cache aligned.  */
>>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>>> -                          & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>>> +  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
>>> +     Unification is not required for instruction to data coherence.  */
>>> +
>>> +  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
>>> +    /* Make the start address of the loop cache aligned. */
>>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>>> +                            & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>>> +
>>> +    for (; address < (const char *) end; address += icache_lsize)
>>> +      asm volatile ("ic\tivau, %0"
>>> +                   :
>>> +                   : "r" (address)
>>> +                   : "memory");
>>>
>>> -  for (; address < (const char *) end; address += icache_lsize)
>>> -    asm volatile ("ic\tivau, %0"
>>> -                 :
>>> -                 : "r" (address)
>>> -                 : "memory");
>>> +    asm volatile ("dsb\tish" : : : "memory");
>>> +  }
>>>
>>> -  asm volatile ("dsb\tish; isb" : : : "memory");
>>> +  asm volatile("isb" : : : "memory");
>>>   }
>>> -- 
>>> 2.7.4
>>>
>> .
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
  2019-09-05  9:42     ` Kyrill Tkachov
@ 2019-09-06  0:59       ` Shaokun Zhang
  0 siblings, 0 replies; 6+ messages in thread
From: Shaokun Zhang @ 2019-09-06  0:59 UTC (permalink / raw)
  To: Kyrill Tkachov, gcc-patches
  Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft

Hi Kyrill,

On 2019/9/5 17:41, Kyrill Tkachov wrote:
> Hi Shaokun,
> 
> On 9/5/19 10:37 AM, Shaokun Zhang wrote:
>> Hi Kyrill,
>>
>> On 2019/9/3 19:13, Kyrill Tkachov wrote:
>>> Hi Shaokun,
>>>
>>> On 9/3/19 9:35 AM, Shaokun Zhang wrote:
>>>> The DCache clean & ICache invalidation requirements for instructions
>>>> to be data coherence are discoverable through new fields in CTR_EL0.
>>>> Let's support the two bits if they are enabled, the CPU core will
>>>> not execute the unnecessary DCache clean or Icache Invalidation
>>>> instructions.
>>>>
>>>> 2019-09-03  Shaokun Zhang <zhangshaokun@hisilicon.com>
>>>>
>>>>      * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
>>>> __aarch64_sync_cache_range function.
>>>
>>> This version looks ok to me. I've checked the assembly generated for sync-cache.c and it looks as expected.
>>>
>>> Can you please confirm that you've bootstrapped and tested this patch on an aarch64 system?
>>>
>> Sorry to reply later.
>>
>> I have bootstrapped and also check the prev-aarch64-unknown-linux-gnu/libgcc/sync-cache.o file.
>> It has been compiled the patch.
>>
>> About the test, I run the self-modify testcase which calls __clear_cache directly,
>> and use the new GCC to compile it, it works ok on both Huawei Kunpeng 920 server whose cpu
>> is HiSilicon tsv110 core and HiSilicon D05 server board which uses A72 core.
> 
> Thanks, that looks good to me now.
> 

Thanks your reply.

Shaokun

> Kyrill
> 
> 
>> Thanks,
>> Shaokun
>>
>>> Thanks,
>>>
>>> Kyrill
>>>
>>>
>>>> ---
>>>>   libgcc/config/aarch64/sync-cache.c | 57 ++++++++++++++++++++++++--------------
>>>>   1 file changed, 36 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/libgcc/config/aarch64/sync-cache.c b/libgcc/config/aarch64/sync-cache.c
>>>> index 791f5e42ff44..ea3da4be02b3 100644
>>>> --- a/libgcc/config/aarch64/sync-cache.c
>>>> +++ b/libgcc/config/aarch64/sync-cache.c
>>>> @@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along with this program;
>>>>   see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
>>>>   <http://www.gnu.org/licenses/>. */
>>>>
>>>> +#define CTR_IDC_SHIFT           28
>>>> +#define CTR_DIC_SHIFT           29
>>>> +
>>>>   void __aarch64_sync_cache_range (const void *, const void *);
>>>>
>>>>   void
>>>> @@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, const void *end)
>>>>     icache_lsize = 4 << (cache_info & 0xF);
>>>>     dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
>>>>
>>>> -  /* Loop over the address range, clearing one cache line at once.
>>>> -     Data cache must be flushed to unification first to make sure the
>>>> -     instruction cache fetches the updated data.  'end' is exclusive,
>>>> -     as per the GNU definition of __clear_cache.  */
>>>> +  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
>>>> +     not required for instruction to data coherence.  */
>>>> +
>>>> +  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
>>>> +    /* Loop over the address range, clearing one cache line at once.
>>>> +       Data cache must be flushed to unification first to make sure the
>>>> +       instruction cache fetches the updated data.  'end' is exclusive,
>>>> +       as per the GNU definition of __clear_cache.  */
>>>>
>>>> -  /* Make the start address of the loop cache aligned.  */
>>>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>>>> -                          & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>>>> +    /* Make the start address of the loop cache aligned. */
>>>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>>>> +                            & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>>>>
>>>> -  for (; address < (const char *) end; address += dcache_lsize)
>>>> -    asm volatile ("dc\tcvau, %0"
>>>> -                 :
>>>> -                 : "r" (address)
>>>> -                 : "memory");
>>>> +    for (; address < (const char *) end; address += dcache_lsize)
>>>> +      asm volatile ("dc\tcvau, %0"
>>>> +                   :
>>>> +                   : "r" (address)
>>>> +                   : "memory");
>>>> +  }
>>>>
>>>>     asm volatile ("dsb\tish" : : : "memory");
>>>>
>>>> -  /* Make the start address of the loop cache aligned.  */
>>>> -  address = (const char*) ((__UINTPTR_TYPE__) base
>>>> -                          & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>>>> +  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
>>>> +     Unification is not required for instruction to data coherence.  */
>>>> +
>>>> +  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
>>>> +    /* Make the start address of the loop cache aligned. */
>>>> +    address = (const char*) ((__UINTPTR_TYPE__) base
>>>> +                            & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
>>>> +
>>>> +    for (; address < (const char *) end; address += icache_lsize)
>>>> +      asm volatile ("ic\tivau, %0"
>>>> +                   :
>>>> +                   : "r" (address)
>>>> +                   : "memory");
>>>>
>>>> -  for (; address < (const char *) end; address += icache_lsize)
>>>> -    asm volatile ("ic\tivau, %0"
>>>> -                 :
>>>> -                 : "r" (address)
>>>> -                 : "memory");
>>>> +    asm volatile ("dsb\tish" : : : "memory");
>>>> +  }
>>>>
>>>> -  asm volatile ("dsb\tish; isb" : : : "memory");
>>>> +  asm volatile("isb" : : : "memory");
>>>>   }
>>>> -- 
>>>> 2.7.4
>>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
  2019-09-03  8:38 [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC Shaokun Zhang
  2019-09-03 11:13 ` Kyrill Tkachov
@ 2019-09-25 12:40 ` Kyrill Tkachov
  1 sibling, 0 replies; 6+ messages in thread
From: Kyrill Tkachov @ 2019-09-25 12:40 UTC (permalink / raw)
  To: Shaokun Zhang, gcc-patches
  Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft

Hi all,

On 9/3/19 9:35 AM, Shaokun Zhang wrote:
> The DCache clean & ICache invalidation requirements for instructions
> to be data coherence are discoverable through new fields in CTR_EL0.
> Let's support the two bits if they are enabled, the CPU core will
> not execute the unnecessary DCache clean or Icache Invalidation
> instructions.
>
> 2019-09-03  Shaokun Zhang <zhangshaokun@hisilicon.com>
>
>     * config/aarch64/sync-cache.c: Support CTR_EL0.IDC and CTR_EL0.DIC in
> __aarch64_sync_cache_range function.


James has approved this offline, so I've committed it on Shaokun's 
behalf with r276122 with a slightly adjusted ChangeLog.

2019-09-25  Shaokun Zhang  <zhangshaokun@hisilicon.com>

     * config/aarch64/sync-cache.c (__aarch64_sync_cache_range): Add 
support for
     CTR_EL0.IDC and CTR_EL0.DIC.

Thanks,

Kyrill

> ---
>  libgcc/config/aarch64/sync-cache.c | 57 
> ++++++++++++++++++++++++--------------
>  1 file changed, 36 insertions(+), 21 deletions(-)
>
> diff --git a/libgcc/config/aarch64/sync-cache.c 
> b/libgcc/config/aarch64/sync-cache.c
> index 791f5e42ff44..ea3da4be02b3 100644
> --- a/libgcc/config/aarch64/sync-cache.c
> +++ b/libgcc/config/aarch64/sync-cache.c
> @@ -23,6 +23,9 @@ a copy of the GCC Runtime Library Exception along 
> with this program;
>  see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
>  <http://www.gnu.org/licenses/>. */
>
> +#define CTR_IDC_SHIFT           28
> +#define CTR_DIC_SHIFT           29
> +
>  void __aarch64_sync_cache_range (const void *, const void *);
>
>  void
> @@ -41,32 +44,44 @@ __aarch64_sync_cache_range (const void *base, 
> const void *end)
>    icache_lsize = 4 << (cache_info & 0xF);
>    dcache_lsize = 4 << ((cache_info >> 16) & 0xF);
>
> -  /* Loop over the address range, clearing one cache line at once.
> -     Data cache must be flushed to unification first to make sure the
> -     instruction cache fetches the updated data.  'end' is exclusive,
> -     as per the GNU definition of __clear_cache.  */
> +  /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of 
> Unification is
> +     not required for instruction to data coherence.  */
> +
> +  if (((cache_info >> CTR_IDC_SHIFT) & 0x1) == 0x0) {
> +    /* Loop over the address range, clearing one cache line at once.
> +       Data cache must be flushed to unification first to make sure the
> +       instruction cache fetches the updated data.  'end' is exclusive,
> +       as per the GNU definition of __clear_cache.  */
>
> -  /* Make the start address of the loop cache aligned.  */
> -  address = (const char*) ((__UINTPTR_TYPE__) base
> -                          & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
> +    /* Make the start address of the loop cache aligned. */
> +    address = (const char*) ((__UINTPTR_TYPE__) base
> +                            & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1));
>
> -  for (; address < (const char *) end; address += dcache_lsize)
> -    asm volatile ("dc\tcvau, %0"
> -                 :
> -                 : "r" (address)
> -                 : "memory");
> +    for (; address < (const char *) end; address += dcache_lsize)
> +      asm volatile ("dc\tcvau, %0"
> +                   :
> +                   : "r" (address)
> +                   : "memory");
> +  }
>
>    asm volatile ("dsb\tish" : : : "memory");
>
> -  /* Make the start address of the loop cache aligned.  */
> -  address = (const char*) ((__UINTPTR_TYPE__) base
> -                          & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
> +  /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the 
> Point of
> +     Unification is not required for instruction to data coherence.  */
> +
> +  if (((cache_info >> CTR_DIC_SHIFT) & 0x1) == 0x0) {
> +    /* Make the start address of the loop cache aligned. */
> +    address = (const char*) ((__UINTPTR_TYPE__) base
> +                            & ~ (__UINTPTR_TYPE__) (icache_lsize - 1));
> +
> +    for (; address < (const char *) end; address += icache_lsize)
> +      asm volatile ("ic\tivau, %0"
> +                   :
> +                   : "r" (address)
> +                   : "memory");
>
> -  for (; address < (const char *) end; address += icache_lsize)
> -    asm volatile ("ic\tivau, %0"
> -                 :
> -                 : "r" (address)
> -                 : "memory");
> +    asm volatile ("dsb\tish" : : : "memory");
> +  }
>
> -  asm volatile ("dsb\tish; isb" : : : "memory");
> +  asm volatile("isb" : : : "memory");
>  }
> -- 
> 2.7.4
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-09-25 12:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-03  8:38 [PATCH v2] [AARCH64] Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC Shaokun Zhang
2019-09-03 11:13 ` Kyrill Tkachov
2019-09-05  9:37   ` Shaokun Zhang
2019-09-05  9:42     ` Kyrill Tkachov
2019-09-06  0:59       ` Shaokun Zhang
2019-09-25 12:40 ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).