public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Sandiford <richard.sandiford@arm.com>
To: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH][AArch64] Cleanup move immediate code
Date: Thu, 24 Nov 2022 12:20:38 +0000	[thread overview]
Message-ID: <mpt7czk7dd5.fsf@arm.com> (raw)
In-Reply-To: <PAWPR08MB8982542953A831514A369D9883369@PAWPR08MB8982.eurprd08.prod.outlook.com> (Wilco Dijkstra's message of "Tue, 1 Nov 2022 13:08:18 +0000")

Sorry for the very long delay in reviewing this.

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,
>
> Here is the immediate cleanup splitoff from the previous patch:
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVZ/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate.  Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.

Just to make sure I understand: isn't it really just MOVN?  I would have
expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
a 64-bit MOVZ.

> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
>         (aarch64_zeroextended_move_imm): New function.
>         (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
>         (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
>         Simplify special cases.
>         (aarch64_uimm12_shift): Simplify code.
>         (aarch64_clamp_to_uimm12_shift): Likewise.
>         (aarch64_movw_imm): Remove.
>         (aarch64_float_const_rtx_p): Pass either SImode or DImode to
>         aarch64_internal_mov_immediate.
>         (aarch64_rtx_costs): Likewise.
>         * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
>         constraints into single 'O'.
>         (mov<mode>_aarch64): Likewise.
>         * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
>         (aarch64_bitmask_imm): Likewise.
>         (aarch64_uimm12_shift): Likewise.
>         (aarch64_zeroextended_move_imm): New prototype.
>         * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
>         limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 1a71f02284137c64e7115b26e6aa00447596f105..a73bfa20acb9b92ae0475794c3f11c67d22feb97 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
>  poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
>  unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
>  bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
> @@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
>                                         unsigned HOST_WIDE_INT,
>                                         unsigned HOST_WIDE_INT);
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
>  machine_mode aarch64_sve_int_mode (machine_mode);
>  opt_machine_mode aarch64_sve_pred_mode (unsigned int);
>  machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
>  bool aarch64_sve_float_mul_immediate_p (rtx);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
>  int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
> +bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
>  bool aarch64_use_return_insn_p (void);
>  const char *aarch64_output_casesi (rtx *);
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 5d1ab5aa42b2cda0a655d2bc69c4df19da457ab3..798363bcc449c414de5bbb4f26b8e1c64a0cf71a 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -5558,12 +5558,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
>
>  /* Return true if VAL is a valid bitmask immediate for MODE.  */
>  bool
> -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
> +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
>  {
>    if (mode == DImode)
> -    return aarch64_bitmask_imm (val_in);
> -
> -  unsigned HOST_WIDE_INT val = val_in;
> +    return aarch64_bitmask_imm (val);
>
>    if (mode == SImode)
>      return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
> @@ -5602,51 +5600,60 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
>  }
>
>
> -/* Return true if val is an immediate that can be loaded into a
> -   register by a MOVZ instruction.  */
> -static bool
> -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
> +/* Return true if immediate VAL can only be created by using a 32-bit
> +   zero-extended move immediate, not by a 64-bit move.  */
> +bool
> +aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
>  {
> -  if (GET_MODE_SIZE (mode) > 4)
> -    {
> -      if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
> -          || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
> -       return 1;
> -    }
> -  else
> -    {
> -      /* Ignore sign extension.  */
> -      val &= (HOST_WIDE_INT) 0xffffffff;
> -    }
> -  return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
> -         || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
> +  if (val < 65536 || (val >> 32) != 0 || (val & 0xffff) == 0)
> +    return false;
> +  return !aarch64_bitmask_imm (val);
>  }
>
>
> -/* Return true if VAL is an immediate that can be loaded into a
> -   register in a single instruction.  */
> +/* Return true if VAL is an immediate that can be created by a single
> +   MOV instruction.  */
>  bool
> -aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
> +aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
>  {
> -  scalar_int_mode int_mode;
> -  if (!is_a <scalar_int_mode> (mode, &int_mode))
> -    return false;
> +  unsigned HOST_WIDE_INT val2;
>
> -  if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
> -    return 1;
> -  return aarch64_bitmask_imm (val, int_mode);
> +  gcc_assert (mode == SImode || mode == DImode);
> +
> +  if (val < 65536)
> +    return true;
> +
> +  val2 = val ^ ((HOST_WIDE_INT) val >> 63);

I think we're supposed to avoid relying on >> being an arithmetic shift,
since C++11 doesn't guarantee it.  So I think it needs to be something like:

  val2 = ((HOST_WIDE_INT) val < 0 ? ~val : val);

> +  if ((val2 >> (__builtin_ctzll (val2) & 48)) < 65536)
> +    return true;

We should use ctz_hwi here, instead of using the GCC builtin directly.

> +
> +  /* Special case 0xyyyyffffffffffff. */
> +  if (((val2 + 1) << 16) == 0)
> +    return true;
> +
> +  /* Special case immediates 0xffffyyyy and 0xyyyyffff.  */
> +  val2 = (mode == DImode) ? val : val2;
> +  if (((val2 + 1) & ~(unsigned HOST_WIDE_INT) 0xffff0000) == 0
> +      || (val2 >> 16) == 0xffff)
> +    return true;
> +
> +  if (mode == SImode || (val >> 32) == 0)
> +    val = (val & 0xffffffff) | (val << 32);
> +  return aarch64_bitmask_imm (val);

I agree the ctz trick is more elegant than (and an improvement over)
the current approach to testing for movz.  But I think the overall logic
is harder to follow than it was in the original version.  Initially
canonicalising val2 based on the sign bit seems unintuitive since we
still need to handle all four combinations of (top bit set, top bit clear)
x (low 48 bits set, low 48 bits clear).  I preferred the original
approach of testing once with the original value (for MOVZ) and once
with the inverted value (for MOVN).

Don't the new cases boil down to: if mode is DImode and the upper 32 bits
are clear, we can test based on SImode instead?  In other words, couldn't
the "(val >> 32) == 0" part of the final test be done first, with the
effect of changing the mode to SImode?  Something like:

  gcc_assert (mode == SImode || mode == DImode);

  /* We can't use a 64-bit MOVN if the upper 32 bits are clear, but we might
     be able to use a 32-bit MOVN.  We could also use a 32-bit ORR.  */
  if (mode == DImode && (val >> 32) == 0)
    mode = SImode;
  
  if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
    return true;

  if (mode == SImode)
    val = (val & 0xffffffff) | (val << 32);

  return aarch64_bitmask_imm (val);

with movw_imm rewritten to use ctz?  (Completely untested :-))

Or am I missing a case that the patch handles and the code above doesn't?

Thanks,
Richard


>  }
>
>
>  static int
>  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> -                               scalar_int_mode mode)
> +                               machine_mode mode)
>  {
>    int i;
>    unsigned HOST_WIDE_INT val, val2, mask;
>    int one_match, zero_match;
>    int num_insns;
>
> +  gcc_assert (mode == SImode || mode == DImode);
> +
>    val = INTVAL (imm);
>
>    if (aarch64_move_imm (val, mode))
> @@ -5656,31 +5663,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>        return 1;
>      }
>
> -  /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
> -     (with XXXX non-zero). In that case check to see if the move can be done in
> -     a smaller mode.  */
> -  val2 = val & 0xffffffff;
> -  if (mode == DImode
> -      && aarch64_move_imm (val2, SImode)
> -      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
> -    {
> -      if (generate)
> -       emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> -
> -      /* Check if we have to emit a second instruction by checking to see
> -        if any of the upper 32 bits of the original DI mode value is set.  */
> -      if (val == val2)
> -       return 1;
> -
> -      i = (val >> 48) ? 48 : 32;
> -
> -      if (generate)
> -        emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -                                   GEN_INT ((val >> i) & 0xffff)));
> -
> -      return 2;
> -    }
> -
>    if ((val >> 32) == 0 || mode == SImode)
>      {
>        if (generate)
> @@ -5704,24 +5686,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>    one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
>      ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
>
> +  /* Try a bitmask immediate and a movk to generate the immediate
> +     in 2 instructions.  */
> +
>    if (zero_match < 2 && one_match < 2)
>      {
> -      /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> -        For a 64-bit bitmask try whether changing 16 bits to all ones or
> -        zeroes creates a valid bitmask.  To check any repeated bitmask,
> -        try using 16 bits from the other 32-bit half of val.  */
> -
>        for (i = 0; i < 64; i += 16)
> -       if (aarch64_check_bitmask (val, val2, mask << i))
> -         {
> -           if (generate)
> -             {
> -               emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> -               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -                                          GEN_INT ((val >> i) & 0xffff)));
> -             }
> -           return 2;
> -         }
> +       {
> +         if (aarch64_check_bitmask (val, val2, mask << i))
> +           break;
> +
> +         val2 = val & ~(mask << i);
> +         if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
> +           break;
> +       }
> +
> +      if (i != 64)
> +       {
> +         if (generate)
> +           {
> +             emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> +             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +                                        GEN_INT ((val >> i) & 0xffff)));
> +           }
> +         return 2;
> +       }
>      }
>
>    /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions.  */
> @@ -5790,26 +5779,24 @@ aarch64_mov128_immediate (rtx imm)
>  /* Return true if val can be encoded as a 12-bit unsigned immediate with
>     a left shift of 0 or 12 bits.  */
>  bool
> -aarch64_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
>  {
> -  return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
> -         || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
> -         );
> +  return val < 4096 || (val & 0xfff000) == val;
>  }
>
>  /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
>     that can be created with a left shift of 0 or 12.  */
>  static HOST_WIDE_INT
> -aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
>  {
>    /* Check to see if the value fits in 24 bits, as that is the maximum we can
>       handle correctly.  */
> -  gcc_assert ((val & 0xffffff) == val);
> +  gcc_assert (val < 0x1000000);
>
> -  if (((val & 0xfff) << 0) == val)
> +  if (val < 4096)
>      return val;
>
> -  return val & (0xfff << 12);
> +  return val & 0xfff000;
>  }
>
>
> @@ -6957,8 +6944,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>        return;
>      }
>
> -  aarch64_internal_mov_immediate (dest, imm, true,
> -                                 as_a <scalar_int_mode> (mode));
> +  aarch64_internal_mov_immediate (dest, imm, true, mode);
>  }
>
>  /* Return the MEM rtx that provides the canary value that should be used
> @@ -11130,9 +11116,7 @@ aarch64_float_const_rtx_p (rtx x)
>        && SCALAR_FLOAT_MODE_P (mode)
>        && aarch64_reinterpret_float_as_int (x, &ival))
>      {
> -      scalar_int_mode imode = (mode == HFmode
> -                              ? SImode
> -                              : int_mode_for_mode (mode).require ());
> +      machine_mode imode = (mode == DFmode) ? DImode : SImode;
>        int num_instr = aarch64_internal_mov_immediate
>                         (NULL_RTX, gen_int_mode (ival, imode), false, imode);
>        return num_instr < 3;
> @@ -13790,10 +13774,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
>              proportionally expensive to the number of instructions
>              required to build that constant.  This is true whether we
>              are compiling for SPEED or otherwise.  */
> -         if (!is_a <scalar_int_mode> (mode, &int_mode))
> -           int_mode = word_mode;
> +         machine_mode imode = (mode == SImode) ? SImode : DImode;
>           *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
> -                                (NULL_RTX, x, false, int_mode));
> +                                (NULL_RTX, x, false, imode));
>         }
>        return true;
>
> @@ -13809,9 +13792,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
>           bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
>           gcc_assert (succeed);
>
> -         scalar_int_mode imode = (mode == HFmode
> -                                  ? SImode
> -                                  : int_mode_for_mode (mode).require ());
> +         machine_mode imode = (mode == DFmode) ? DImode : SImode;
>           int ncost = aarch64_internal_mov_immediate
>                 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
>           *cost += COSTS_N_INSNS (ncost);
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index f2e3d905dbbeb2949f2947f5cfd68208c94c9272..604a67d87a7ef525efebed48f39de43c97f2a397 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
>  )
>
>  (define_insn_and_split "*movdi_aarch64"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
> -       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
> +       (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
>    "(register_operand (operands[0], DImode)
>      || aarch64_reg_or_zero (operands[1], DImode))"
>    "@
>     mov\\t%x0, %x1
>     mov\\t%0, %x1
>     mov\\t%x0, %1
> -   mov\\t%x0, %1
> -   mov\\t%w0, %1
> +   * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
>     #
>     * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
>     ldr\\t%x0, %1
> @@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
>         DONE;
>      }"
>    ;; The "mov_imm" type for CNTD is just a placeholder.
> -  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> +  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
>                      load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
>                      fmov,neon_move")
> -   (set_attr "arch"   "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> -   (set_attr "length" "4,4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
> +   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> +   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
>  )
>
>  (define_insn "insv_imm<mode>"
> @@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
>
>  (define_insn "*mov<mode>_aarch64"
>    [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
> -       (match_operand:DFD 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
> +       (match_operand:DFD 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
>    "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
>      || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
>    "@
> @@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
>     ldr\\t%x0, %1
>     str\\t%x1, %0
>     mov\\t%x0, %x1
> -   mov\\t%x0, %1"
> +   * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
>                      f_loadd,f_stored,load_8,store_8,mov_reg,\
>                      fconstd")
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index ee7587cca1673208e2bfd6b503a21d0c8b69bf75..e91c7eab0b3674ca34ac2f790c38fcd27986c35f 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -106,6 +106,12 @@ (define_constraint "M"
>
>  (define_constraint "N"
>   "A constant that can be used with a 64-bit MOV immediate operation."
> + (and (match_code "const_int")
> +      (match_test "aarch64_move_imm (ival, DImode)")
> +      (match_test "!aarch64_zeroextended_move_imm (ival)")))
> +
> +(define_constraint "O"
> + "A constant that can be used with a 32 or 64-bit MOV immediate operation."
>   (and (match_code "const_int")
>        (match_test "aarch64_move_imm (ival, DImode)")))

  reply	other threads:[~2022-11-24 12:20 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-01 13:08 Wilco Dijkstra
2022-11-24 12:20 ` Richard Sandiford [this message]
2022-11-29 13:57   ` Wilco Dijkstra
2022-12-01 10:59     ` Richard Sandiford
2022-12-05 12:12       ` Wilco Dijkstra
2022-12-05 13:40         ` Richard Sandiford
2022-12-07 14:55         ` Andreas Schwab
2022-12-07 17:18           ` Wilco Dijkstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=mpt7czk7dd5.fsf@arm.com \
    --to=richard.sandiford@arm.com \
    --cc=Wilco.Dijkstra@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).