[PATCH]AArch64 Optimize right shift rounding narrowing

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com,
	Kyrylo.Tkachov@arm.com, richard.sandiford@arm.com
Subject: [PATCH]AArch64 Optimize right shift rounding narrowing
Date: Fri, 12 Nov 2021 12:08:04 +0000	[thread overview]
Message-ID: <patch-15064-tamar@arm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 8885 bytes --]

Hi All,

This optimizes right shift rounding narrow instructions to
rounding add narrow high where one vector is 0 when the shift amount is half
that of the original input type.

i.e.

uint32x4_t foo (uint64x2_t a, uint64x2_t b)
{
  return vrshrn_high_n_u64 (vrshrn_n_u64 (a, 32), b, 32);
}

now generates:

foo:
        movi    v3.4s, 0
        raddhn  v0.2s, v2.2d, v3.2d
        raddhn2 v0.4s, v2.2d, v3.2d

instead of:

foo:
        rshrn   v0.2s, v0.2d, 32
        rshrn2  v0.4s, v1.2d, 32
        ret

On Arm cores this is an improvement in both latency and throughput.
Because a vector zero is needed I created a new method
aarch64_gen_shareable_zero that creates zeros using V4SI and then takes a subreg
of the zero to the desired type.  This allows CSE to share all the zero
constants.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-protos.h (aarch64_gen_shareable_zero): New.
	* config/aarch64/aarch64-simd.md (aarch64_rshrn<mode>,
	aarch64_rshrn2<mode>): 
	* config/aarch64/aarch64.c (aarch64_gen_shareable_zero): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/advsimd-intrinsics/shrn-1.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/shrn-2.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/shrn-3.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/shrn-4.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f7887d06139f01c1591c4e755538d94e5e608a52..f7f5cae82bc9198e54d0298f25f7c0f5902d5fb1 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -846,6 +846,7 @@ const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr_rtx (void);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
+rtx aarch64_gen_shareable_zero (machine_mode);
 bool aarch64_simd_mem_operand_p (rtx);
 bool aarch64_sve_ld1r_operand_p (rtx);
 bool aarch64_sve_ld1rq_operand_p (rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c71658e2bf52b26bf9fc9fa702dd5446447f4d43..d7f8694add540e32628893a7b7471c08de6f760f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1956,20 +1956,32 @@ (define_expand "aarch64_rshrn<mode>"
    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						 INTVAL (operands[2]));
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
+      {
+	rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
+	emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
+      }
     else
-      emit_insn (gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
+      {
+	rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						         INTVAL (operands[2]));
+	if (BYTES_BIG_ENDIAN)
+	  emit_insn (
+		gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
+						 operands[2],
+						 CONST0_RTX (<VNARROWQ>mode)));
+	else
+	  emit_insn (
+		gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
+						 operands[2],
+						 CONST0_RTX (<VNARROWQ>mode)));
+
+	/* The intrinsic expects a narrow result, so emit a subreg that will
+	   get optimized away as appropriate.  */
+	emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						     <VNARROWQ2>mode));
+      }
     DONE;
   }
 )
@@ -2049,14 +2061,27 @@ (define_expand "aarch64_rshrn2<mode>"
    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
   "TARGET_SIMD"
   {
-    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						 INTVAL (operands[3]));
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], operands[1],
-						  operands[2], operands[3]));
+    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
+      {
+	rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
+	emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
+					      operands[2], tmp));
+      }
     else
-      emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], operands[1],
-						  operands[2], operands[3]));
+      {
+	operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+							 INTVAL (operands[3]));
+	if (BYTES_BIG_ENDIAN)
+	  emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
+						       operands[1],
+						       operands[2],
+						       operands[3]));
+	else
+	  emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
+						       operands[1],
+						       operands[2],
+						       operands[3]));
+      }
     DONE;
   }
 )
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846721b02059df494d6395ae9423a8ef..11201ea3498beb270c0a7f8da5f5009d710535ee 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20397,6 +20397,18 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
     == SYMBOL_TINY_ABSOLUTE;
 }
 
+/* Create a 0 constant that is based of V4SI to allow CSE to optimally share
+   the constant creation.  */
+
+rtx
+aarch64_gen_shareable_zero (machine_mode mode)
+{
+  machine_mode zmode = V4SImode;
+  rtx tmp = gen_reg_rtx (zmode);
+  emit_move_insn (tmp, CONST0_RTX (zmode));
+  return lowpart_subreg (mode, tmp, zmode);
+}
+
 /* Return a const_int vector of VAL.  */
 rtx
 aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..4bc3aa9563ee7d0dc46557d30d9a29149706229d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+uint8x16_t foo (uint32x4_t a, uint32x4_t b)
+{
+  uint16x4_t a1 = vrshrn_n_u32 (a, 16);
+  uint16x8_t b1 = vrshrn_high_n_u32 (a1, b, 16);
+  return vrshrn_high_n_u16 (vrshrn_n_u16 (b1, 8), b1, 8);
+}
+
+/* { dg-final { scan-assembler-times {\tmovi\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn\t} 2 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..09d913e85524f06367c1c2cf51dda0f57578e9ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint32x4_t foo (uint64x2_t a, uint64x2_t b)
+{
+  return vrshrn_high_n_u64 (vrshrn_n_u64 (a, 32), b, 32);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..bdccbb3410f049d7e45aabdcc3d2964fbabca807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint16x8_t foo (uint32x4_t a, uint32x4_t b)
+{
+  return vrshrn_high_n_u32 (vrshrn_n_u32 (a, 16), b, 16);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..4b23eddb85891975b8e122060e2a9ebfe56d842c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint8x16_t foo (uint16x8_t a, uint16x8_t b)
+{
+  return vrshrn_high_n_u16 (vrshrn_n_u16 (a, 8), b, 8);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */


-- 

[-- Attachment #2: rb15064.patch --]
[-- Type: text/x-diff, Size: 7457 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f7887d06139f01c1591c4e755538d94e5e608a52..f7f5cae82bc9198e54d0298f25f7c0f5902d5fb1 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -846,6 +846,7 @@ const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr_rtx (void);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
+rtx aarch64_gen_shareable_zero (machine_mode);
 bool aarch64_simd_mem_operand_p (rtx);
 bool aarch64_sve_ld1r_operand_p (rtx);
 bool aarch64_sve_ld1rq_operand_p (rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c71658e2bf52b26bf9fc9fa702dd5446447f4d43..d7f8694add540e32628893a7b7471c08de6f760f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1956,20 +1956,32 @@ (define_expand "aarch64_rshrn<mode>"
    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
   "TARGET_SIMD"
   {
-    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						 INTVAL (operands[2]));
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
+    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
+      {
+	rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
+	emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
+      }
     else
-      emit_insn (gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
+      {
+	rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						         INTVAL (operands[2]));
+	if (BYTES_BIG_ENDIAN)
+	  emit_insn (
+		gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
+						 operands[2],
+						 CONST0_RTX (<VNARROWQ>mode)));
+	else
+	  emit_insn (
+		gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
+						 operands[2],
+						 CONST0_RTX (<VNARROWQ>mode)));
+
+	/* The intrinsic expects a narrow result, so emit a subreg that will
+	   get optimized away as appropriate.  */
+	emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						     <VNARROWQ2>mode));
+      }
     DONE;
   }
 )
@@ -2049,14 +2061,27 @@ (define_expand "aarch64_rshrn2<mode>"
    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
   "TARGET_SIMD"
   {
-    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						 INTVAL (operands[3]));
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], operands[1],
-						  operands[2], operands[3]));
+    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
+      {
+	rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
+	emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
+					      operands[2], tmp));
+      }
     else
-      emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], operands[1],
-						  operands[2], operands[3]));
+      {
+	operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+							 INTVAL (operands[3]));
+	if (BYTES_BIG_ENDIAN)
+	  emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
+						       operands[1],
+						       operands[2],
+						       operands[3]));
+	else
+	  emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
+						       operands[1],
+						       operands[2],
+						       operands[3]));
+      }
     DONE;
   }
 )
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846721b02059df494d6395ae9423a8ef..11201ea3498beb270c0a7f8da5f5009d710535ee 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20397,6 +20397,18 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
     == SYMBOL_TINY_ABSOLUTE;
 }
 
+/* Create a 0 constant that is based of V4SI to allow CSE to optimally share
+   the constant creation.  */
+
+rtx
+aarch64_gen_shareable_zero (machine_mode mode)
+{
+  machine_mode zmode = V4SImode;
+  rtx tmp = gen_reg_rtx (zmode);
+  emit_move_insn (tmp, CONST0_RTX (zmode));
+  return lowpart_subreg (mode, tmp, zmode);
+}
+
 /* Return a const_int vector of VAL.  */
 rtx
 aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..4bc3aa9563ee7d0dc46557d30d9a29149706229d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+uint8x16_t foo (uint32x4_t a, uint32x4_t b)
+{
+  uint16x4_t a1 = vrshrn_n_u32 (a, 16);
+  uint16x8_t b1 = vrshrn_high_n_u32 (a1, b, 16);
+  return vrshrn_high_n_u16 (vrshrn_n_u16 (b1, 8), b1, 8);
+}
+
+/* { dg-final { scan-assembler-times {\tmovi\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn\t} 2 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..09d913e85524f06367c1c2cf51dda0f57578e9ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint32x4_t foo (uint64x2_t a, uint64x2_t b)
+{
+  return vrshrn_high_n_u64 (vrshrn_n_u64 (a, 32), b, 32);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..bdccbb3410f049d7e45aabdcc3d2964fbabca807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint16x8_t foo (uint32x4_t a, uint32x4_t b)
+{
+  return vrshrn_high_n_u32 (vrshrn_n_u32 (a, 16), b, 16);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..4b23eddb85891975b8e122060e2a9ebfe56d842c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/shrn-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+
+#include <arm_neon.h>
+
+uint8x16_t foo (uint16x8_t a, uint16x8_t b)
+{
+  return vrshrn_high_n_u16 (vrshrn_n_u16 (a, 8), b, 8);
+}
+
+/* { dg-final { scan-assembler-times {\traddhn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\traddhn2\t} 1 } } */

next             reply	other threads:[~2021-11-12 12:08 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-12 12:08 Tamar Christina [this message]
     [not found] ` <VI1PR08MB5325FDB17F172D46F1E358C5FF609@VI1PR08MB5325.eurprd08.prod.outlook.com>
2021-11-23 15:21   ` Tamar Christina
2021-11-30 16:13 ` Richard Sandiford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=patch-15064-tamar@arm.com \
    --to=tamar.christina@arm.com \
    --cc=Kyrylo.Tkachov@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).