public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Stam Markianos-Wright <Stam.Markianos-Wright@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	Richard Earnshaw	<Richard.Earnshaw@arm.com>,
	Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>,
	Marcus Shawcroft <Marcus.Shawcroft@arm.com>,
	Richard Sandiford	<Richard.Sandiford@arm.com>
Subject: Re: [GCC][PATCH][Aarch64] Add Bfloat16_t scalar type, vector types and machine modes to Aarch64 back-end [1/2]
Date: Mon, 23 Dec 2019 16:57:00 -0000	[thread overview]
Message-ID: <f7dbad33-19d4-3c19-c380-c7df020dafe3@arm.com> (raw)
In-Reply-To: <mpteex0r5wf.fsf@arm.com>

[-- Attachment #1: Type: text/plain, Size: 18159 bytes --]



On 12/19/19 10:01 AM, Richard Sandiford wrote:
> Stam Markianos-Wright <Stam.Markianos-Wright@arm.com> writes:
>> [...]
>> @@ -659,6 +666,8 @@ aarch64_simd_builtin_std_type (machine_mode mode,
>>         return float_type_node;
>>       case E_DFmode:
>>         return double_type_node;
>> +    case E_BFmode:
>> +      return aarch64_bf16_type_node;
>>       default:
>>         gcc_unreachable ();
>>       }
>> @@ -750,6 +759,11 @@ aarch64_init_simd_builtin_types (void)
>>     aarch64_simd_types[Float64x1_t].eltype = double_type_node;
>>     aarch64_simd_types[Float64x2_t].eltype = double_type_node;
>>   
>> +
>> +/* Init Bfloat vector types with underlying uint types.  */
>> +  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
>> +  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
> 
> Formatting nits: too many blank lines, comment should be indented
> to match the code.

Done :)

> 
>> +
>>     for (i = 0; i < nelts; i++)
>>       {
>>         tree eltype = aarch64_simd_types[i].eltype;
>> @@ -1059,6 +1073,19 @@ aarch64_init_fp16_types (void)
>>     aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
>>   }
>>   
>> +/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
>> +static void
>> +aarch64_init_bf16_types (void)
>> +{
>> +  aarch64_bf16_type_node = make_node (REAL_TYPE);
>> +  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
>> +  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
>> +  layout_type (aarch64_bf16_type_node);
>> +
>> +  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, "__bf16");
> 
> This style is mostly a carry-over from pre-ANSI days.  New code
> can just use "lang_hooks.types.register_builtin_type (...)".

Ahh good to know, thanks! Done

> 
>> +  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
>> +}
>> +
>>   /* Pointer authentication builtins that will become NOP on legacy platform.
>>      Currently, these builtins are for internal use only (libgcc EH unwinder).  */
>>   
>> [...]
>> diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
>> index b015694293c..3b387377f38 100644
>> --- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
>> +++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
>> @@ -50,3 +50,5 @@
>>     ENTRY (Float32x4_t, V4SF, none, 13)
>>     ENTRY (Float64x1_t, V1DF, none, 13)
>>     ENTRY (Float64x2_t, V2DF, none, 13)
>> +  ENTRY (Bfloat16x4_t, V4BF, none, 15)
>> +  ENTRY (Bfloat16x8_t, V8BF, none, 15)
> 
> Should be 14 (number of characters + 2 for "__").  Would be good to have
> a test for correct C++ mangling.

Done, thank you for pointing it out!!

> 
>> [...]
>> @@ -101,10 +101,10 @@
>>     [(set_attr "type" "neon_dup<q>")]
>>   )
>>   
>> -(define_insn "*aarch64_simd_mov<VD:mode>"
>> -  [(set (match_operand:VD 0 "nonimmediate_operand"
>> +(define_insn "*aarch64_simd_mov<VDMOV:mode>"
>> +  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
>>   		"=w, m,  m,  w, ?r, ?w, ?r, w")
>> -	(match_operand:VD 1 "general_operand"
>> +	(match_operand:VDMOV 1 "general_operand"
>>   		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
>>     "TARGET_SIMD
>>      && (register_operand (operands[0], <MODE>mode)
>> @@ -126,13 +126,14 @@
>>   }
>>     [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
>>   		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
>> -		     mov_reg, neon_move<q>")]
>> +		     mov_reg, neon_move<q>")
>> +    (set_attr "arch" "*,notbf16,*,*,*,*,*,notbf16")]
>>   )
> 
> Together with the changes to the arch attribute:
> 
>> @@ -378,6 +378,12 @@
>>   	(and (eq_attr "arch" "fp16")
>>   	     (match_test "TARGET_FP_F16INST"))
>>   
>> +	(and (eq_attr "arch" "fp16_notbf16")
>> +	     (match_test "TARGET_FP_F16INST && !TARGET_BF16_FP"))
>> +
>> +	(and (eq_attr "arch" "notbf16")
>> +	     (match_test "!TARGET_BF16_SIMD"))
>> +
>>   	(and (eq_attr "arch" "sve")
>>   	     (match_test "TARGET_SVE")))
>>       (const_string "yes")
> 
> this will disable the second and final alternatives for all VDMOV modes
> when bf16 is enabled.  E.g. enabling bf16 will disable those alternatives
> for V4HI as well as V4BF.
> 
> If you want to disable some alternatives for V4BF then it'd be better to
> use define_mode_attr instead.  But are you sure we need to disable them?
> The m<-Dz alternative should work for V4BF as well.  The w<-Dn alternative
> should work too -- it's up to aarch64_simd_valid_immediate to decide
> which immediates are valid.

Oh yes, I see what you mean about blocking it for V4HI and everything else under 
VDMOV as well...
Yea it was in the principle of doing what we can to block any internal Bfloat 
processing, Bfloat immediates, Bfloat constants, etc., but I wasn't sure on what 
should/shouldn't be allowed so was blocking anything that might allow for 
unintended operations to happen in BFmode. But I've got a better idea of this 
now, so, agreed, looking these basically fine to be enabled :)

This does make this patch look cleaner!
> 
>> [...]
>> @@ -1174,6 +1174,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>>   extern tree aarch64_fp16_type_node;
>>   extern tree aarch64_fp16_ptr_type_node;
>>   
>> +/* This type is the user-visible __bf16, and a pointer to that type.  We
>> +   need it in many places in the backend.  Defined in aarch64-builtins.c.  */
> 
> Not sure the number of places in this patch counts as "many" :-)
> Probably best just to drop that sentence.

Haha, fair!! Dropped it.

> 
>> +extern tree aarch64_bf16_type_node;
>> +extern tree aarch64_bf16_ptr_type_node;
>> +
>>   /* The generic unwind code in libgcc does not initialize the frame pointer.
>>      So in order to unwind a function using a frame pointer, the very first
>>      function that is unwound must save the frame pointer.  That way the frame
>> [...]
>> @@ -1321,11 +1327,11 @@
>>     }
>>   )
>>   
>> -(define_insn "*movhf_aarch64"
>> -  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
>> -	(match_operand:HF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
>> -  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
>> -    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
>> +(define_insn "*mov<mode>_aarch64"
>> +  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
>> +	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
>> +  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
>> +    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
>>     "@
>>      movi\\t%0.4h, #0
>>      fmov\\t%h0, %w1
>> @@ -1341,7 +1347,7 @@
>>      mov\\t%w0, %w1"
>>     [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
>>   		     neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
>> -   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")]
>> +   (set_attr "arch" "simd,fp16,simd,simd,simd,fp16_notbf16,simd,*,*,*,*,*")]
>>   )
> 
> Here too we should avoid changing "arch" if possible.  Why do you need
> to exclude the FMOV alternative for bf16?

Same as above. but as you say these should work regardless.

> 
>> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
>> new file mode 100644
>> index 00000000000..aedb0972735
>> --- /dev/null
>> +++ b/gcc/config/aarch64/arm_bf16.h
>> @@ -0,0 +1,42 @@
>> +/* Arm BF16 instrinsics include file.
>> +
>> +   Copyright (C) 2019 Free Software Foundation, Inc.
>> +   Contributed by Arm.
>> +
>> +   This file is part of GCC.
>> +
>> +   GCC is free software; you can redistribute it and/or modify it
>> +   under the terms of the GNU General Public License as published
>> +   by the Free Software Foundation; either version 3, or (at your
>> +   option) any later version.
>> +
>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> +   License for more details.
>> +
>> +   Under Section 7 of GPL version 3, you are granted additional
>> +   permissions described in the GCC Runtime Library Exception, version
>> +   3.1, as published by the Free Software Foundation.
>> +
>> +   You should have received a copy of the GNU General Public License and
>> +   a copy of the GCC Runtime Library Exception along with this program;
>> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _AARCH64_BF16_H_
>> +#define _AARCH64_BF16_H_
>> +
>> +#include <stdint.h>
> 
> Are we supposed to include stdint.h?  The ACLE spec doesn't seem
> to require it.

Hmm, agreed, I included it only because arm_fp16 did, too.
As far as I can tell everything works without it, so removed it :)

> 
>> +
>> +#pragma GCC push_options
>> +#pragma GCC target ("arch=armv8.2-a+bf16")
>> +#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
>> +
>> +typedef __bf16 bfloat16_t;
>> +
>> +
>> +#endif
>> +#pragma GCC pop_options
>> +
>> +#endif
> 
> Are you sure we need the #ifdef?  The target pragma should guarantee
> that the macro's defined.
> 
> But the validity of the typedef shouldn't depend on target options,
> so AFAICT this should just be:
> 
> typedef __bf16 bfloat16_t;

Ok so it's a case of "what do we want to happen if the user tries to use bfloats 
without +bf16 enabled.

So the intent of the ifdef was to not have bfloat16_t be visible if the macro 
wasn't defined (i.e. not having any bf16 support), but I see now that this was 
being negated by the target macro, anyway! Oops, my bad for not really 
understanding that, sorry!

If we have the types always visible, then the user may use them, resulting in an 
ICE.

But even if the #ifdef worked this still doesn't stop the user from trying to 
use  __bf16 or __Bfloat16x4_t, __Bfloat16x8_t , which would still do produce an 
ICE, so it's not a perfect solution anyway...

One other thing I tried was the below change to aarch64-builtins.c which stops 
__bf16 or the vector types from being registered at all:

--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -759,26 +759,32 @@ aarch64_init_simd_builtin_types (void)
     aarch64_simd_types[Float64x1_t].eltype = double_type_node;
     aarch64_simd_types[Float64x2_t].eltype = double_type_node;

-  /* Init Bfloat vector types with underlying __bf16 type.  */
-  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
-  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+  if (TARGET_BF16_SIMD)
+    {
+      /* Init Bfloat vector types with underlying __bf16 type.  */
+      aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
+      aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+    }

     for (i = 0; i < nelts; i++)
       {
         tree eltype = aarch64_simd_types[i].eltype;
         machine_mode mode = aarch64_simd_types[i].mode;

-      if (aarch64_simd_types[i].itype == NULL)
+      if (eltype != NULL)
          {
-         aarch64_simd_types[i].itype
-           = build_distinct_type_copy
-             (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
-         SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
-       }
+         if (aarch64_simd_types[i].itype == NULL)
+           {
+             aarch64_simd_types[i].itype
+               = build_distinct_type_copy
+               (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
+             SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
+           }

-      tdecl = add_builtin_type (aarch64_simd_types[i].name,
-                               aarch64_simd_types[i].itype);
-      TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
+         tdecl = add_builtin_type (aarch64_simd_types[i].name,
+                                   aarch64_simd_types[i].itype);
+         TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
+       }
       }

   #define AARCH64_BUILD_SIGNED_TYPE(mode)  \
@@ -1240,7 +1246,8 @@ aarch64_general_init_builtins (void)

     aarch64_init_fp16_types ();

-  aarch64_init_bf16_types ();
+  if (TARGET_BF16_FP)
+    aarch64_init_bf16_types ();

     if (TARGET_SIMD)
       aarch64_init_simd_builtins ();



But the problem in that case was that it the types could not be re-enabled using 
a target pragma like:

#pragma GCC push_options
#pragma GCC target ("+bf16")

Inside the test.

(i.e. the pragma caused the ifdef to be TRUE, but __bf16 was still not being 
enabled afaict?)

So I'm not sure what to do, presumably we do want some guard around the type so 
as not to just ICE if the type is used without +bf16?

> 
>> diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c
>> new file mode 100644
>> index 00000000000..f2bef671deb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile.c
>> @@ -0,0 +1,51 @@
>> +/* { dg-do assemble { target { aarch64*-*-* } } } */
>> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
>> +/* { dg-add-options arm_v8_2a_bf16_neon }  */
>> +/* { dg-additional-options "-O3 --save-temps" } */
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +
>> +#include <arm_neon.h>
>> +
>> +/*
>> +**stacktest1:
>> +**	...
>> +**	str	h0, \[sp, [0-9]+\]
>> +**	ldr	h0, \[sp, [0-9]+\]
>> +**	...
>> +**	ret
>> +*/
>> +bfloat16_t stacktest1 (bfloat16_t __a)
>> +{
>> +  volatile bfloat16_t b = __a;
>> +  return b;
>> +}
>> +
>> +/*
>> +**stacktest2:
>> +**	...
>> +**	str	d0, \[sp, [0-9]+\]
>> +**	ldr	d0, \[sp, [0-9]+\]
>> +**	...
>> +**	ret
>> +*/
>> +bfloat16x4_t stacktest2 (bfloat16x4_t __a)
>> +{
>> +  volatile bfloat16x4_t b = __a;
>> +  return b;
>> +}
>> +
>> +/*
>> +**stacktest3:
>> +**	...
>> +**	str	q0, \[sp\]
>> +**	ldr	q0, \[sp\]
>> +**	...
>> +**	ret
>> +*/
>> +bfloat16x8_t stacktest3 (bfloat16x8_t __a)
>> +{
>> +  volatile bfloat16x8_t b = __a;
>> +  return b;
>> +}
>> +
>> +
> 
> It would be good to have more test coverage than this.  E.g.:
> 
> - a test that includes arm_bf16.h, with just scalar tests.

Done as test 2, but it is a small test. Is there anything I could add to it?
(I feel like ideally I'd want to try and force it down every alternative of the 
RTL pattern)

> 
> - a test that includes arm_bf16.h without bf16 enabled, switches bf16 on,
>    and then uses bfloat16_t.

Done as test 3. Same question as above, lmk if you have any ideas of things to 
add to it.

> 
> - a test that includes arm_bf16.h without bf16 enabled and tries to use
>    bfloat16_t without turning bf16 on.

Would have been test 4, but depends on what sort of behaviour we want and where 
the error message will come from.

> 
> - a test for _Complex bfloat16_t.

I don't think we currently have a decision on whether this should be supported 
or not.
AFAICT we also don't have complex __fp16 support either. I'm getting the same 
error messages attempting to compile a _Complex __fp16 but it's always likely 
I'm going at this wrong!

Added test 5 to show you what I was trying to do and to catch the error messages 
in their current form, but I'm not sure if I've done this right either, tbh!

> 
> - a test for moves involving:
> 
>      typedef bfloat16_t v16bf __attribute__((vector_size(32)));

Oh that's a good idea, thank you for pointing it out!

See test 6 for reference.

So for vector size 16, 128bits, this looks fine, loading and storing from q 
registers (using aarch64_simd_movv8bf).

For vector size 32, 256 bits, the compiler chooses to use 4*x-registers instead, 
resulting in this piece of assembler

stacktest2:
          sub     sp, sp, #64
          ldp     x2, x3, [x0]
          stp     x2, x3, [sp]
          ldp     x0, x1, [x0, 16]
          stp     x0, x1, [sp, 16]
          ldp     x0, x1, [sp]
          stp     x0, x1, [sp, 32]
          ldp     x2, x3, [sp, 16]
          stp     x2, x3, [sp, 48]
          stp     x0, x1, [x8]
          ldp     x0, x1, [sp, 48]
          stp     x0, x1, [x8, 16]
          add     sp, sp, 64
          ret

Which looks strange using regular registers in movti mode, but I tested it with 
float16 and float32 vectors and they the same also give the same result.

However, using an integer vector generates:

stacktest2:
          ld1     {v0.16b - v1.16b}, [x0]
          sub     sp, sp, #32
          st1     {v0.16b - v1.16b}, [sp]
          ld1     {v0.16b - v1.16b}, [sp]
          st1     {v0.16b - v1.16b}, [x8]
          add     sp, sp, 32
          ret

from the aarch64_movoi pattern. So now I'm unsure whether to leave this as is or 
to look into why all float modes are not being used through the seemingly more 
efficient movoi pattern. What do you think?
(i intend to look into this further)

> 
> - a test that involves moving constants, for both scalars and vectors.
>    You can create zero scalar constants in C++ using bfloat16_t() etc.
>    For vectors it's possible to do things like:
> 
>      typedef short v2bf __attribute__((vector_size(4)));
>      v2hi foo (void) { return (v2hi) 0x12345678; }
> 
>    The same sort of things should work for bfloat16x4_t and bfloat16x8_t.

Leaving this as an open issue for now because I'm not 100% sure what we 
should/shouldn't be allowing past the tree-level target hooks.

If we do want to block this we would do this in the [2/2] patch.
I will come back to it and create a scan-assembler test when I'm more clear on 
what we should and shouldn't allow at the higher level :)
> 
> Thanks,
> Richard
> 


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: BFmode1of2-rev2.patch --]
[-- Type: text/x-patch; name="BFmode1of2-rev2.patch", Size: 19803 bytes --]

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9802f436e06..b49c110ccaf 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -315,7 +315,7 @@ m32c*-*-*)
         ;;
 aarch64*-*-*)
 	cpu_type=aarch64
-	extra_headers="arm_fp16.h arm_neon.h arm_acle.h arm_sve.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
 	c_target_objs="aarch64-c.o"
 	cxx_target_objs="aarch64-c.o"
 	d_target_objs="aarch64-d.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c35a1b1f029..7512f8cf01d 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -68,6 +68,9 @@
 #define hi_UP    E_HImode
 #define hf_UP    E_HFmode
 #define qi_UP    E_QImode
+#define bf_UP    E_BFmode
+#define v4bf_UP  E_V4BFmode
+#define v8bf_UP  E_V8BFmode
 #define UP(X) X##_UP
 
 #define SIMD_MAX_BUILTIN_ARGS 5
@@ -568,6 +571,10 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE;
 tree aarch64_fp16_type_node = NULL_TREE;
 tree aarch64_fp16_ptr_type_node = NULL_TREE;
 
+/* Back-end node type for brain float (bfloat) types.  */
+tree aarch64_bf16_type_node = NULL_TREE;
+tree aarch64_bf16_ptr_type_node = NULL_TREE;
+
 /* Wrapper around add_builtin_function.  NAME is the name of the built-in
    function, TYPE is the function type, and CODE is the function subcode
    (relative to AARCH64_BUILTIN_GENERAL).  */
@@ -659,6 +666,8 @@ aarch64_simd_builtin_std_type (machine_mode mode,
       return float_type_node;
     case E_DFmode:
       return double_type_node;
+    case E_BFmode:
+      return aarch64_bf16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -750,6 +759,10 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Float64x1_t].eltype = double_type_node;
   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
 
+  /* Init Bfloat vector types with underlying __bf16 type.  */
+  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
+  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = aarch64_simd_types[i].eltype;
@@ -1059,6 +1072,19 @@ aarch64_init_fp16_types (void)
   aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
 }
 
+/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
+static void
+aarch64_init_bf16_types (void)
+{
+  aarch64_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
+  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
+  layout_type (aarch64_bf16_type_node);
+
+  lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
+  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+}
+
 /* Pointer authentication builtins that will become NOP on legacy platform.
    Currently, these builtins are for internal use only (libgcc EH unwinder).  */
 
@@ -1214,6 +1240,8 @@ aarch64_general_init_builtins (void)
 
   aarch64_init_fp16_types ();
 
+  aarch64_init_bf16_types ();
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 3c698b620cd..59f2ec4eaec 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -69,6 +69,13 @@ VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
 VECTOR_MODE (FLOAT, DF, 1);   /*                 V1DF.  */
 VECTOR_MODE (FLOAT, HF, 2);   /*                 V2HF.  */
 
+/* Bfloat16 modes.  */
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+
+VECTOR_MODE (FLOAT, BF, 4);   /*		 V4BF.  */
+VECTOR_MODE (FLOAT, BF, 8);   /*		 V8BF.  */
+
 /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments.  */
 INT_MODE (OI, 32);
 
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index b015694293c..2be0ce82445 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -50,3 +50,5 @@
   ENTRY (Float32x4_t, V4SF, none, 13)
   ENTRY (Float64x1_t, V1DF, none, 13)
   ENTRY (Float64x2_t, V2DF, none, 13)
+  ENTRY (Bfloat16x4_t, V4BF, none, 14)
+  ENTRY (Bfloat16x8_t, V8BF, none, 14)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ad4676bc167..7dd28b31547 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,8 +19,8 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
-	(match_operand:VALL_F16 1 "general_operand"))]
+  [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
+	(match_operand:VALL_F16MOV 1 "general_operand"))]
   "TARGET_SIMD"
   "
   /* Force the operand into a register if it is not an
@@ -101,10 +101,10 @@
   [(set_attr "type" "neon_dup<q>")]
 )
 
-(define_insn "*aarch64_simd_mov<VD:mode>"
-  [(set (match_operand:VD 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VDMOV:mode>"
+  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 		"=w, m,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VD 1 "general_operand"
+	(match_operand:VDMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -129,10 +129,10 @@
 		     mov_reg, neon_move<q>")]
 )
 
-(define_insn "*aarch64_simd_mov<VQ:mode>"
-  [(set (match_operand:VQ 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VQMOV:mode>"
+  [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
-	(match_operand:VQ 1 "general_operand"
+	(match_operand:VQMOV 1 "general_operand"
 		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a85f8b04c20..0d08382ebbe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1692,6 +1692,7 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V2SImode:
     /* ...E_V1DImode doesn't exist.  */
     case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SFmode:
     case E_V1DFmode:
     /* 128-bit Advanced SIMD vectors.  */
@@ -1700,6 +1701,7 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V4SImode:
     case E_V2DImode:
     case E_V8HFmode:
+    case E_V8BFmode:
     case E_V4SFmode:
     case E_V2DFmode:
       return TARGET_SIMD ? VEC_ADVSIMD : 0;
@@ -15603,6 +15605,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  field_t = aarch64_fp16_type_node;
 	  field_ptr_t = aarch64_fp16_ptr_type_node;
 	  break;
+	case E_BFmode:
+	  field_t = aarch64_bf16_type_node;
+	  field_ptr_t = aarch64_bf16_ptr_type_node;
+	  break;
 	case E_V2SImode:
 	case E_V4SImode:
 	    {
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2bb5a208720..68121a16072 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1120,13 +1120,13 @@ extern enum aarch64_code_model aarch64_cmodel;
 #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
    || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \
-   || (MODE) == DFmode)
+   || (MODE) == DFmode || (MODE) == V4BFmode)
 
 /* Modes valid for AdvSIMD Q registers.  */
 #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
   ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
    || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
-   || (MODE) == V2DFmode)
+   || (MODE) == V2DFmode || (MODE) == V8BFmode)
 
 #define ENDIAN_LANE_N(NUNITS, N) \
   (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N)
@@ -1174,6 +1174,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 extern tree aarch64_fp16_type_node;
 extern tree aarch64_fp16_ptr_type_node;
 
+/* This type is the user-visible __bf16, and a pointer to that type.  Defined
+   in aarch64-builtins.c.  */
+extern tree aarch64_bf16_type_node;
+extern tree aarch64_bf16_ptr_type_node;
+
 /* The generic unwind code in libgcc does not initialize the frame pointer.
    So in order to unwind a function using a frame pointer, the very first
    function that is unwound must save the frame pointer.  That way the frame
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b11ead7ab23..d48d67ea7ec 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1304,8 +1304,8 @@
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand")
-	(match_operand:GPF_TF_F16 1 "general_operand"))]
+  [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand")
+	(match_operand:GPF_TF_F16_MOV 1 "general_operand"))]
   ""
   {
     if (!TARGET_FLOAT)
@@ -1321,11 +1321,11 @@
   }
 )
 
-(define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
-  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
-    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w  , w,?r,w,w  ,w  ,w,m,r,m ,r")
+	(match_operand:HFBF 1 "general_operand"      "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    movi\\t%0.4h, #0
    fmov\\t%h0, %w1
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
new file mode 100644
index 00000000000..884b6f3bc7a
--- /dev/null
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -0,0 +1,32 @@
+/* Arm BF16 instrinsics include file.
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by Arm.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_BF16_H_
+#define _AARCH64_BF16_H_
+
+typedef __bf16 bfloat16_t;
+
+#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 8b861601a48..ee4bb76bcd4 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -73,6 +73,9 @@ typedef __fp16 float16_t;
 typedef float float32_t;
 typedef double float64_t;
 
+typedef __Bfloat16x4_t bfloat16x4_t;
+typedef __Bfloat16x8_t bfloat16x8_t;
+
 typedef struct int8x8x2_t
 {
   int8x8_t val[2];
@@ -34606,6 +34609,8 @@ vrnd64xq_f64 (float64x2_t __a)
 
 #pragma GCC pop_options
 
+#include "arm_bf16.h"
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1ca5ed1ef1b..9480efef47c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -57,9 +57,17 @@
 ;; Iterator for all scalar floating point modes (HF, SF, DF)
 (define_mode_iterator GPF_HF [HF SF DF])
 
+;; Iterator for all 16-bit scalar floating point modes (HF, BF)
+(define_mode_iterator HFBF [HF BF])
+
 ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
 (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
 
+;; Iterator for all scalar floating point modes suitable for moving, including
+;; special BF type.(HF, SF, DF, TF and BF)
+(define_mode_iterator GPF_TF_F16_MOV [(HF "") (BF "TARGET_BF16_FP") (SF "")
+				      (DF "") (TF "")])
+
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
@@ -79,6 +87,9 @@
 ;; Double vector modes.
 (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
 
+;; Double vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF])
+
 ;; All modes stored in registers d0-d31.
 (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
 
@@ -94,6 +105,9 @@
 ;; Quad vector modes.
 (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
 
+;; Quad vector modes suitable for moving.  Includes BFmode.
+(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF])
+
 ;; Copy of the above.
 (define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
 
@@ -160,6 +174,15 @@
 (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
 				V4HF V8HF V2SF V4SF V2DF])
 
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; including special Bfloat vector types.
+(define_mode_iterator VALL_F16MOV [(V8QI "") (V16QI "") (V4HI "") (V8HI "")
+				   (V2SI "") (V4SI "") (V2DI "")
+				   (V4HF "") (V8HF "")
+				   (V4BF "TARGET_BF16_SIMD")
+				   (V8BF "TARGET_BF16_SIMD")
+				   (V2SF "") (V4SF "") (V2DF "")])
+
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
 (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
 				V4HF V8HF V2SF V4SF])
@@ -885,7 +908,8 @@
 			  (V8HF "16b") (V2SF  "8b")
 			  (V4SF "16b") (V2DF  "16b")
 			  (DI   "8b")  (DF    "8b")
-			  (SI   "8b")  (SF    "8b")])
+			  (SI   "8b")  (SF    "8b")
+			  (V4BF "8b")  (V8HF  "16b")])
 
 ;; Define element mode for each vector mode.
 (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
@@ -1265,6 +1289,7 @@
 		     (V2SI "") (V4SI  "_q")
 		     (DI   "") (V2DI  "_q")
 		     (V4HF "") (V8HF "_q")
+		     (V4BF "") (V8BF "_q")
 		     (V2SF "") (V4SF  "_q")
 			       (V2DF  "_q")
 		     (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_1.c
new file mode 100644
index 00000000000..f2bef671deb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_1.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+**	...
+**	str	h0, \[sp, [0-9]+\]
+**	ldr	h0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
+
+/*
+**stacktest2:
+**	...
+**	str	d0, \[sp, [0-9]+\]
+**	ldr	d0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+  volatile bfloat16x4_t b = __a;
+  return b;
+}
+
+/*
+**stacktest3:
+**	...
+**	str	q0, \[sp\]
+**	ldr	q0, \[sp\]
+**	...
+**	ret
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+  volatile bfloat16x8_t b = __a;
+  return b;
+}
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_2.c
new file mode 100644
index 00000000000..c3c3a951111
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_2.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_bf16.h>
+
+/*
+**stacktest1:
+**	...
+**	str	h0, \[sp, [0-9]+\]
+**	ldr	h0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_3.c
new file mode 100644
index 00000000000..9bcb53b32d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a -O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC push_options
+#pragma GCC target ("+bf16")
+
+#include <arm_bf16.h>
+
+/*
+**stacktest1:
+**	...
+**	str	h0, \[sp, [0-9]+\]
+**	ldr	h0, \[sp, [0-9]+\]
+**	...
+**	ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
+
+#pragma GCC pop_options
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_5.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_5.c
new file mode 100644
index 00000000000..b812011c223
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_5.c
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-std=c99 -pedantic-errors -O3 --save-temps" } */
+
+#include <arm_bf16.h>
+
+_Complex bfloat16_t stacktest1 (_Complex bfloat16_t __a)
+{
+  volatile _Complex bfloat16_t b = __a;
+  return b;
+}
+
+/* { dg-error {ISO C does not support plain 'complex' meaning 'double complex'} "" { target *-*-* } 8 } */
+/* { dg-error {expected '=', ',', ';', 'asm' or '__attribute__' before 'stacktest1'} "" { target *-*-* } 8 } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_6.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_6.c
new file mode 100644
index 00000000000..9a967de439b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_compile_6.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*  Create vectors of 8 and 16 BFloats.  */
+typedef bfloat16_t v8bf __attribute__((vector_size(16)));
+typedef bfloat16_t v16bf __attribute__((vector_size(32)));
+
+/*
+**stacktest1:
+**	...
+**	str	q0, \[sp\]
+**	ldr	q0, \[sp\]
+**	...
+**	ret
+*/
+v8bf stacktest1 (v8bf __a)
+{
+  volatile v8bf b = __a;
+  return b;
+}
+
+
+/*
+**stacktest2:
+**	...
+**	ldp	x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**	stp	x[0-9]+, x[0-9]+, \[sp\]
+**	ldp	x[0-9]+, x[0-9]+, \[x0, [0-9]+\]
+**	stp	x[0-9]+, x[0-9]+, \[sp, [0-9]+\]
+**	ldp	x[0-9]+, x[0-9]+, \[sp\]
+**	stp	x[0-9]+, x[0-9]+, \[sp, [0-9]+\]
+**	ldp	x[0-9]+, x[0-9]+, \[sp, [0-9]+\]
+**	stp	x[0-9]+, x[0-9]+, \[sp, [0-9]+\]
+**	stp	x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**	ldp	x[0-9]+, x[0-9]+, \[sp, [0-9]+\]
+**	stp	x[0-9]+, x[0-9]+, \[x[0-9]+, [0-9]+\]
+**	...
+**	ret
+*/
+v16bf stacktest2 (v16bf __a)
+{
+  volatile v16bf b = __a;
+  return b;
+}

  reply	other threads:[~2019-12-23 16:05 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-18 16:35 Stam Markianos-Wright
2019-12-19 10:07 ` Richard Sandiford
2019-12-23 16:57   ` Stam Markianos-Wright [this message]
2019-12-23 17:07     ` Richard Sandiford
2020-01-07 11:42       ` Stam Markianos-Wright
2020-01-07 17:15         ` Richard Sandiford
2020-01-09 15:12           ` Stam Markianos-Wright
2020-01-09 15:48             ` Richard Sandiford
2020-01-10 19:31               ` Stam Markianos-Wright

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f7dbad33-19d4-3c19-c380-c7df020dafe3@arm.com \
    --to=stam.markianos-wright@arm.com \
    --cc=Kyrylo.Tkachov@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=Richard.Sandiford@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).