public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* [4.3] Invalid code or invalid optimisation?
@ 2009-06-04  1:39 Dave Korn
  2009-06-04  8:27 ` Andrew Haley
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Korn @ 2009-06-04  1:39 UTC (permalink / raw)
  To: gcc

[-- Attachment #1: Type: text/plain, Size: 4048 bytes --]


    Good morning everyone,

  I have an interesting situation.  In this bit of code below, extracted from
a simple testcase, I have a singly-linked list:

------------------------------<snip!>------------------------------
template <class list_node> class List
{
 public:
  List() : head(__null)
  {
  }
  void insert (list_node *node)
  {
    List_insert (head, node);
  }
  list_node *head;
};

class pthread_mutex: public verifyable_object
{
public:
        [ ... data members ... ]
  pthread_mutex (pthread_mutexattr * = __null);
  ~pthread_mutex ();

  class pthread_mutex * next;

private:
  static List<pthread_mutex> mutexes;
};

List<pthread_mutex> pthread_mutex::mutexes;

pthread_mutex::pthread_mutex (pthread_mutexattr *attr) :
        [ ... member initialisers ... ]
{
        [ ... code ... ]
  mutexes.insert (this);
}
------------------------------<snip!>------------------------------

  I am getting unexpected results in the inlined List.insert operation in the
pthread_mutex constructor.  The critical part of the code uses an inlined
interlocked compare-and-exchange asm, that looks like this:

------------------------------<snip!>------------------------------
extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
  return ({
		__typeof (*t) ret;
		__asm __volatile ("lock cmpxchgl %2, %1"
			: "=a" (ret), "=m" (*t)
			: "r" (v), "m" (*t), "0" (c));
		ret;
	});
}

template <class list_node> inline void
List_insert (list_node *&head, list_node *node)
{
  if (!node)
    return;
  do
    node->next = head;
  while ((PVOID)ilockcmpexch((LONG volatile
*)(&head),(LONG)(node),(LONG)(node->next)) != node->next);
}
------------------------------<snip!>------------------------------

  To my surprise, GCCs 4.3.2 and 4.3.3 at -O2 both sink the store to
node->next after the call to ilockcmpexch:

------------------------------<snip!>------------------------------
__ZN13pthread_mutexC1EP17pthread_mutexattr:
        [ ... code ... ]
L15:
	movl	__ZN13pthread_mutex7mutexesE, %edx	 # mutexes.head, D.1991
	movl	%edx, %eax	 # D.1991, tmp69
/APP
 # 35 "mxfull.cpp" 1
	lock cmpxchgl %esi, __ZN13pthread_mutex7mutexesE	 # this,
 # 0 "" 2
/NO_APP
	movl	%eax, -12(%ebp)	 # tmp69, ret
	movl	-12(%ebp), %eax	 # ret, D.1988
	cmpl	%eax, %edx	 # D.1988, D.1991
	jne	L15	 #,
	movl	%edx, 36(%esi)	 # D.1991, <variable>.next
------------------------------<snip!>------------------------------

  This is obviously bad news for the consistency of the list; the value of
'head' is cached in %edx and not written to node->next until after the
ilockcmpexch inline, meaning an incompletely-constructed node gets linked on
the front of the chain for a window of several instructions.  By contrast, a
recent build from head does what I want: it writes to node->next in front of
the ilockcmpexch, and only tests its value afterward:

------------------------------<snip!>------------------------------
__ZN13pthread_mutexC2EP17pthread_mutexattr:
        [ ... code ... ]
L9:
	movl	__ZN13pthread_mutex7mutexesE, %eax	 # mutexes.head, D.2119
	movl	%eax, 36(%ebx)	 # D.2119, <variable>.next
/APP
 # 35 "mxfull.cpp" 1
	lock cmpxchgl %ebx, __ZN13pthread_mutex7mutexesE	 # this,
 # 0 "" 2
/NO_APP
	movl	%eax, -12(%ebp)	 # tmp79, ret
	movl	-12(%ebp), %eax	 # ret, D.2120
	cmpl	%eax, 36(%ebx)	 # D.2120, <variable>.next
	jne	L9	 #,
------------------------------<snip!>------------------------------

  Adding a "memory" clobber to the inline asm works around the problem,
causing 4.3 series to generate the same assembly as head, but I think it's a
sledgehammer approach.  Am I asking too much of GCC to not sink the store, or
is 4.3 doing something wrong?  I /think/ that the fact that there's a volatile
store in ilockcmpexch means the earlier store shouldn't be moved past it, and
that GCC is perhaps missing that the asm's output operand effectively
represents a volatile write through *t, but I could be misunderstanding the
rules about volatile.  Anyone got their language lawyer's hat on at the moment?

    cheers,
      DaveK


[-- Attachment #2: mxfull.cpp --]
[-- Type: text/plain, Size: 2369 bytes --]


// g++ -c mxfull.cpp -o mxfull.o --save-temps -O2 -fverbose-asm

typedef long LONG;
typedef void *HANDLE;
typedef void *PVOID;
typedef char *LPCSTR;

typedef class pthread_mutex *pthread_mutex_t;
typedef class pthread_mutexattr *pthread_mutexattr_t;
typedef class pthread *pthread_t;

struct SECURITY_ATTRIBUTES;
typedef struct SECURITY_ATTRIBUTES *LPSECURITY_ATTRIBUTES;
extern struct SECURITY_ATTRIBUTES sec_none_nih;

HANDLE __attribute__((__stdcall__)) CreateSemaphoreA(LPSECURITY_ATTRIBUTES,LONG,LONG,LPCSTR);

class verifyable_object
{
public:
  long magic;

  verifyable_object (long);
  virtual ~verifyable_object ();
};

extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
  return ({
		__typeof (*t) ret;
		__asm __volatile ("lock cmpxchgl %2, %1"
			: "=a" (ret), "=m" (*t)
			: "r" (v), "m" (*t), "0" (c));
		ret;
	});
}

class pthread_mutexattr: public verifyable_object
{
public:
  int pshared;
  int mutextype;
  pthread_mutexattr ();
  ~pthread_mutexattr ();
};

template <class list_node> inline void
List_insert (list_node *&head, list_node *node)
{
  if (!node)
    return;
  do
    node->next = head;
  while ((PVOID)ilockcmpexch((LONG volatile *)(&head),(LONG)(node),(LONG)(node->next)) != node->next);
}

template <class list_node> class List
{
 public:
  List() : head(__null)
  {
  }

  ~List()
  {
  }

  void insert (list_node *node)
  {
    List_insert (head, node);
  }

  list_node *head;

};

class pthread_mutex: public verifyable_object
{
public:

  unsigned long lock_counter;
  HANDLE win32_obj_id;
  unsigned int recursion_counter;
  LONG condwaits;
  pthread_t owner;



  int type;
  int pshared;


  pthread_mutex (pthread_mutexattr * = __null);
  ~pthread_mutex ();

  class pthread_mutex * next;

private:
  static List<pthread_mutex> mutexes;
};

List<pthread_mutex> pthread_mutex::mutexes;

pthread_mutex::pthread_mutex (pthread_mutexattr *attr) :
  verifyable_object (0xdf0df045 +1),
  lock_counter (0),
  win32_obj_id (__null), recursion_counter (0),
  condwaits (0), owner (__null),



  type (1),
  pshared (0)
{
  win32_obj_id = ::CreateSemaphoreA (&sec_none_nih, 0, 2147483647L, __null);
  if (!win32_obj_id)
    {
      magic = 0;
      return;
    }

  if (attr)
    {
      if (attr->pshared == 1)
 {

   magic = 0;
   return;
 }

      type = attr->mutextype;
    }

  mutexes.insert (this);
}


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-06-04  9:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-04  1:39 [4.3] Invalid code or invalid optimisation? Dave Korn
2009-06-04  8:27 ` Andrew Haley
2009-06-04  9:22   ` Dave Korn
2009-06-04  9:48     ` Dave Korn
2009-06-04  9:57     ` Andrew Haley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).