public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
To: "Dr. David Alan Gilbert" <david.gilbert@linaro.org>
Cc: gcc-patches@gcc.gnu.org, patches@linaro.org
Subject: Re: [Patch 1/3] ARM 64 bit atomic operations
Date: Tue, 12 Jul 2011 21:24:00 -0000	[thread overview]
Message-ID: <CACUk7=VpvPGJBbg-uWM3Jv4oaS9SEqFishRWq_ZH7EuSnmN-LA@mail.gmail.com> (raw)
In-Reply-To: <20110701155446.GB5242@davesworkthinkpad>

Hi Dave,

Could you split this further into a patch that deals with the
case for disabling MCR memory barriers for Thumb1 so that it
maybe backported to the release branches ? I have commented inline
as well.

Could you also provide a proper changelog entry for this that will
also help with review of the patch ?

I've not yet managed to fully review all the bits in this patch but
here's some initial comments that should be looked at.

On 1 July 2011 16:54, Dr. David Alan Gilbert <david.gilbert@linaro.org> wrote:
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 057f9ba..39057d2 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c

>
>  /* Emit a strex{b,h,d, } instruction appropriate for the specified
> @@ -23374,14 +23388,29 @@ arm_output_strex (emit_f emit,
>                  rtx value,
>                  rtx memory)
>  {
> -  const char *suffix = arm_ldrex_suffix (mode);
> -  rtx operands[3];
> +  rtx operands[4];
>
>   operands[0] = result;
>   operands[1] = value;
> -  operands[2] = memory;
> -  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
> -                      cc);
> +  if (mode != DImode)
> +    {
> +      const char *suffix = arm_ldrex_suffix (mode);
> +      operands[2] = memory;
> +      arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
> +                         suffix, cc);
> +    }
> +  else
> +    {
> +      /* The restrictions on target registers in ARM mode are that the two
> +        registers are consecutive and the first one is even; Thumb is
> +        actually more flexible, but DI should give us this anyway.
> +        Note that the 1st register always gets the lowest word in memory.  */
> +      gcc_assert ((REGNO (value) & 1) == 0);
> +      operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
> +      operands[3] = memory;
> +      arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
> +                          cc);
> +    }
>  }
>
>  /* Helper to emit a two operand instruction.  */
> @@ -23423,7 +23452,7 @@ arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
>
>    required_value:
>
> -   RTX register or const_int representing the required old_value for
> +   RTX register representing the required old_value for
>    the modify to continue, if NULL no comparsion is performed.  */
>  static void
>  arm_output_sync_loop (emit_f emit,
> @@ -23437,7 +23466,13 @@ arm_output_sync_loop (emit_f emit,
>                      enum attr_sync_op sync_op,
>                      int early_barrier_required)
>  {
> -  rtx operands[1];
> +  rtx operands[2];
> +  /* We'll use the lo for the normal rtx in the none-DI case
> +     as well as the least-sig word in the DI case.  */
> +  rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
> +  rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
> +
> +  bool is_di = mode == DImode;
>
>   gcc_assert (t1 != t2);
>
> @@ -23448,82 +23483,142 @@ arm_output_sync_loop (emit_f emit,
>
>   arm_output_ldrex (emit, mode, old_value, memory);
>
> +  if (is_di)
> +    {
> +      old_value_lo = gen_lowpart (SImode, old_value);
> +      old_value_hi = gen_highpart (SImode, old_value);
> +      if (required_value)
> +       {
> +         required_value_lo = gen_lowpart (SImode, required_value);
> +         required_value_hi = gen_highpart (SImode, required_value);
> +       }
> +      else
> +       {
> +         /* Silence false potentially unused warning */
> +         required_value_lo = NULL;
> +         required_value_hi = NULL;
> +       }
> +      new_value_lo = gen_lowpart (SImode, new_value);
> +      new_value_hi = gen_highpart (SImode, new_value);
> +      t1_lo = gen_lowpart (SImode, t1);
> +      t1_hi = gen_highpart (SImode, t1);
> +    }
> +  else
> +    {
> +      old_value_lo = old_value;
> +      new_value_lo = new_value;
> +      required_value_lo = required_value;
> +      t1_lo = t1;
> +
> +      /* Silence false potentially unused warning */
> +      t1_hi = NULL;
> +      new_value_hi = NULL;
> +      required_value_hi = NULL;
> +      old_value_hi = NULL;
> +    }
> +
>   if (required_value)
>     {
> -      rtx operands[2];
> +      operands[0] = old_value_lo;
> +      operands[1] = required_value_lo;
>
> -      operands[0] = old_value;
> -      operands[1] = required_value;
>       arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
> +      if (is_di)
> +        {
> +          arm_output_asm_insn (emit, 0, operands, "it\teq");

This should be guarded with a if (TARGET_THUMB2) - there's no point in
accounting for the length of this instruction in the compiler and then
have the assembler fold it away in ARM state.

>
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index c32ef1a..3fdd22f 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -282,7 +282,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
> -#define TARGET_HAVE_DMB_MCR    (arm_arch6k && ! TARGET_HAVE_DMB)
> +#define TARGET_HAVE_DMB_MCR    (arm_arch6k && ! TARGET_HAVE_DMB \
> +                                && ! TARGET_THUMB1)

This hunk (TARGET_HAVE_DMB_MCR) should probably be backported to
release branches because this is technically fixing an issue and
hence should be a separate patch that can be looked at separately.

>
>  /* Nonzero if this chip implements a memory barrier instruction.  */
>  #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
> @@ -290,8 +291,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void);

sync.md changes -

> (define_mode_iterator NARROW [QI HI])
>+(define_mode_iterator QHSD [QI HI SI DI])
>+(define_mode_iterator SIDI [SI DI])
>+
>+(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER")
>+				(QI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
>+				(HI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
>+				(DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER")])
>+

Can we move all the iterators to iterators.md and then arrange
includes to work automatically ? Minor nit - could you align the entries
for QI, HI and DI with the start of the SI ?

>+(define_mode_attr sync_atleastsi [(SI "SI")
>+				  (DI "DI")
>+				  (HI "SI")
>+				  (QI "SI")])
>

I couldn't spot where this was being used. Can this be removed if not
necessary ?

>
>-(define_insn "arm_sync_new_nandsi"
>+(define_insn "arm_sync_new_<sync_optab><mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-	                    ]
>-	                    VUNSPEC_SYNC_NEW_OP))
>+        (unspec_volatile:SI [(syncop:SI
>+			       (zero_extend:SI
>+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>+			       (match_operand:SI 2 "s_register_operand" "r"))
>+			    ]
>+			    VUNSPEC_SYNC_NEW_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-	                    VUNSPEC_SYNC_NEW_OP))
>+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+				VUNSPEC_SYNC_NEW_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Can't this just use <sync_predtab> instead since the condition is identical
for QImode and HImode from that mode attribute and in quite a few
places below. ?

>@@ -461,19 +359,19 @@
>         (unspec_volatile:SI
> 	  [(not:SI
> 	     (and:SI
>-               (zero_extend:SI	
>-	         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>-               (match_operand:SI 2 "s_register_operand" "r")))
>+	       (zero_extend:SI	
>+		 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>+	       (match_operand:SI 2 "s_register_operand" "r")))
> 	  ] VUNSPEC_SYNC_NEW_OP))
>    (set (match_dup 1)
>         (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>-	                        VUNSPEC_SYNC_NEW_OP))
>+				VUNSPEC_SYNC_NEW_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Again here . Not sure which pattern this is though just looking at the patch.

>   {
>     return arm_output_sync_insn (insn, operands);
>-  }
>+  }
>   [(set_attr "sync_result"          "0")
>    (set_attr "sync_memory"          "1")
>    (set_attr "sync_new_value"       "2")
>@@ -483,20 +381,20 @@
>    (set_attr "conds" "clob")
>    (set_attr "predicable" "no")])
>
>-(define_insn "arm_sync_old_<sync_optab>si"
>-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(syncop:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r"))
>-	                    ]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+(define_insn "arm_sync_old_<sync_optab><mode>"
>+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
>+	(unspec_volatile:SIDI [(syncop:SIDI
>+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
>+			       (match_operand:SIDI 2 "s_register_operand" "r"))
>+			    ]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
>+			      VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>-   (clobber (match_scratch:SI 3 "=&r"))
>+   (clobber (match_scratch:SIDI 3 "=&r"))
>    (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+  "<sync_predtab>"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }
>@@ -509,20 +407,21 @@
>    (set_attr "conds" "clob")
>    (set_attr "predicable" "no")])
>
>-(define_insn "arm_sync_old_nandsi"
>+(define_insn "arm_sync_old_<sync_optab><mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-	                    ]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+        (unspec_volatile:SI [(syncop:SI
>+			       (zero_extend:SI
>+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>+			       (match_operand:SI 2 "s_register_operand" "r"))
>+			    ]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))
>-   (clobber (match_scratch:SI 4 "=&r"))]
>-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
>+   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"

Likewise for sync_predtab.

>-(define_insn "arm_sync_old_<sync_optab><mode>"
>-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(syncop:SI
>-                               (zero_extend:SI
>-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>-                               (match_operand:SI 2 "s_register_operand" "r"))
>-	                    ]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+(define_insn "arm_sync_old_nand<mode>"
>+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
>+	(unspec_volatile:SIDI [(not:SIDI (and:SIDI
>+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
>+			       (match_operand:SIDI 2 "s_register_operand" "r")))
>+			    ]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
> 	                    VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>-   (clobber (match_scratch:SI 3 "=&r"))
>-   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
>-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
>+   (clobber (match_scratch:SIDI 3 "=&r"))
>+   (clobber (match_scratch:SI 4 "=&r"))]
>+  "<sync_predtab>"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }
>@@ -557,26 +455,26 @@
>    (set_attr "sync_memory"          "1")
>    (set_attr "sync_new_value"       "2")
>    (set_attr "sync_t1"              "3")
>-   (set_attr "sync_t2"              "<sync_t2_reqd>")
>-   (set_attr "sync_op"              "<sync_optab>")
>+   (set_attr "sync_t2"              "4")
>+   (set_attr "sync_op"              "nand")
>    (set_attr "conds" 		    "clob")
>    (set_attr "predicable" "no")])
>
> (define_insn "arm_sync_old_nand<mode>"
>   [(set (match_operand:SI 0 "s_register_operand" "=&r")
>-        (unspec_volatile:SI [(not:SI (and:SI
>-                               (zero_extend:SI
>-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>-                               (match_operand:SI 2 "s_register_operand" "r")))
>-	                    ]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+	(unspec_volatile:SI [(not:SI (and:SI
>+			       (zero_extend:SI
>+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
>+			       (match_operand:SI 2 "s_register_operand" "r")))
<>+			    ]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (set (match_dup 1)
>-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>-	                    VUNSPEC_SYNC_OLD_OP))
>+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
>+			    VUNSPEC_SYNC_OLD_OP))
>    (clobber (reg:CC CC_REGNUM))
>    (clobber (match_scratch:SI 3 "=&r"))
>    (clobber (match_scratch:SI 4 "=&r"))]
>-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
>+  "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER"
>   {
>     return arm_output_sync_insn (insn, operands);
>   }


Cheers
Ramana

  reply	other threads:[~2011-07-12 21:08 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-01 15:53 [Patch 0/3] " Dr. David Alan Gilbert
2011-07-01 15:55 ` [Patch 1/3] " Dr. David Alan Gilbert
2011-07-12 21:24   ` Ramana Radhakrishnan [this message]
2011-07-13  9:18     ` David Gilbert
2011-07-01 15:56 ` [Patch 2/3] " Dr. David Alan Gilbert
2011-07-01 16:03   ` Richard Henderson
2011-07-01 17:08     ` David Gilbert
2011-07-01 19:38   ` Joseph S. Myers
2011-07-04 22:27     ` David Gilbert
2011-07-01 15:57 ` [Patch 3/3] " Dr. David Alan Gilbert
2011-07-12 15:31   ` Ramana Radhakrishnan
2011-07-12 12:55 ` [Patch 0/3] " Ramana Radhakrishnan
2011-07-26  9:14   ` [Patch 0/4] ARM 64 bit sync atomic operations [V2] Dr. David Alan Gilbert
2011-07-26  9:16     ` [Patch 1/4] " Dr. David Alan Gilbert
2011-07-26  9:18       ` [Patch 2/4] " Dr. David Alan Gilbert
2011-07-26  9:23         ` [Patch 3/4] " Dr. David Alan Gilbert
2011-07-26  9:24           ` [Patch 4/4] " Dr. David Alan Gilbert
2011-08-01 15:43             ` Ramana Radhakrishnan
2011-08-17 13:07           ` [Patch 3/4] " Ramana Radhakrishnan
2011-09-30 18:04         ` [Patch 2/4] " Ramana Radhakrishnan
2011-09-30 18:29           ` H.J. Lu
2011-10-03 13:22             ` David Gilbert
2011-09-30 20:51           ` Joseph S. Myers
2011-10-03  8:35             ` Andrew Haley
2011-10-03 14:04               ` David Gilbert
2011-10-03 15:56               ` Joseph S. Myers
2011-10-03 16:07                 ` Jakub Jelinek
2011-09-30 14:25       ` [Patch 1/4] " Ramana Radhakrishnan
2011-10-03 12:53         ` David Gilbert
2011-10-03 15:18           ` David Gilbert
2011-10-06 17:52             ` [Patch 0/5] ARM 64 bit sync atomic operations [V3] Dr. David Alan Gilbert
2011-10-06 17:53               ` [Patch 1/5] " Dr. David Alan Gilbert
2011-10-06 17:54                 ` [Patch 2/5] " Dr. David Alan Gilbert
2011-10-06 17:55                   ` [Patch 3/5] " Dr. David Alan Gilbert
2011-10-06 18:03                     ` [Patch 4/5] " Dr. David Alan Gilbert
2011-10-06 18:03                       ` [Patch 5/5] " Dr. David Alan Gilbert
2011-10-12  4:51                         ` Ramana Radhakrishnan
2011-10-12  5:06                         ` Mike Stump
2011-10-12  0:53                       ` [Patch 4/5] " Ramana Radhakrishnan
2011-10-12  2:35                     ` [Patch 3/5] " Ramana Radhakrishnan
2011-10-12  0:52                   ` [Patch 2/5] " Ramana Radhakrishnan
2011-10-12  1:45                   ` Ramana Radhakrishnan
2011-10-12  0:51                 ` [Patch 1/5] " Ramana Radhakrishnan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CACUk7=VpvPGJBbg-uWM3Jv4oaS9SEqFishRWq_ZH7EuSnmN-LA@mail.gmail.com' \
    --to=ramana.radhakrishnan@linaro.org \
    --cc=david.gilbert@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=patches@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).