From: Dave Korn <dave.korn.cygwin@googlemail.com>
To: "gcc@gcc.gnu.org" <gcc@gcc.gnu.org>
Subject: [4.3] Invalid code or invalid optimisation?
Date: Thu, 04 Jun 2009 01:39:00 -0000 [thread overview]
Message-ID: <4A2728AA.3010205@gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 4048 bytes --]
Good morning everyone,
I have an interesting situation. In this bit of code below, extracted from
a simple testcase, I have a singly-linked list:
------------------------------<snip!>------------------------------
template <class list_node> class List
{
public:
List() : head(__null)
{
}
void insert (list_node *node)
{
List_insert (head, node);
}
list_node *head;
};
class pthread_mutex: public verifyable_object
{
public:
[ ... data members ... ]
pthread_mutex (pthread_mutexattr * = __null);
~pthread_mutex ();
class pthread_mutex * next;
private:
static List<pthread_mutex> mutexes;
};
List<pthread_mutex> pthread_mutex::mutexes;
pthread_mutex::pthread_mutex (pthread_mutexattr *attr) :
[ ... member initialisers ... ]
{
[ ... code ... ]
mutexes.insert (this);
}
------------------------------<snip!>------------------------------
I am getting unexpected results in the inlined List.insert operation in the
pthread_mutex constructor. The critical part of the code uses an inlined
interlocked compare-and-exchange asm, that looks like this:
------------------------------<snip!>------------------------------
extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
return ({
__typeof (*t) ret;
__asm __volatile ("lock cmpxchgl %2, %1"
: "=a" (ret), "=m" (*t)
: "r" (v), "m" (*t), "0" (c));
ret;
});
}
template <class list_node> inline void
List_insert (list_node *&head, list_node *node)
{
if (!node)
return;
do
node->next = head;
while ((PVOID)ilockcmpexch((LONG volatile
*)(&head),(LONG)(node),(LONG)(node->next)) != node->next);
}
------------------------------<snip!>------------------------------
To my surprise, GCCs 4.3.2 and 4.3.3 at -O2 both sink the store to
node->next after the call to ilockcmpexch:
------------------------------<snip!>------------------------------
__ZN13pthread_mutexC1EP17pthread_mutexattr:
[ ... code ... ]
L15:
movl __ZN13pthread_mutex7mutexesE, %edx # mutexes.head, D.1991
movl %edx, %eax # D.1991, tmp69
/APP
# 35 "mxfull.cpp" 1
lock cmpxchgl %esi, __ZN13pthread_mutex7mutexesE # this,
# 0 "" 2
/NO_APP
movl %eax, -12(%ebp) # tmp69, ret
movl -12(%ebp), %eax # ret, D.1988
cmpl %eax, %edx # D.1988, D.1991
jne L15 #,
movl %edx, 36(%esi) # D.1991, <variable>.next
------------------------------<snip!>------------------------------
This is obviously bad news for the consistency of the list; the value of
'head' is cached in %edx and not written to node->next until after the
ilockcmpexch inline, meaning an incompletely-constructed node gets linked on
the front of the chain for a window of several instructions. By contrast, a
recent build from head does what I want: it writes to node->next in front of
the ilockcmpexch, and only tests its value afterward:
------------------------------<snip!>------------------------------
__ZN13pthread_mutexC2EP17pthread_mutexattr:
[ ... code ... ]
L9:
movl __ZN13pthread_mutex7mutexesE, %eax # mutexes.head, D.2119
movl %eax, 36(%ebx) # D.2119, <variable>.next
/APP
# 35 "mxfull.cpp" 1
lock cmpxchgl %ebx, __ZN13pthread_mutex7mutexesE # this,
# 0 "" 2
/NO_APP
movl %eax, -12(%ebp) # tmp79, ret
movl -12(%ebp), %eax # ret, D.2120
cmpl %eax, 36(%ebx) # D.2120, <variable>.next
jne L9 #,
------------------------------<snip!>------------------------------
Adding a "memory" clobber to the inline asm works around the problem,
causing 4.3 series to generate the same assembly as head, but I think it's a
sledgehammer approach. Am I asking too much of GCC to not sink the store, or
is 4.3 doing something wrong? I /think/ that the fact that there's a volatile
store in ilockcmpexch means the earlier store shouldn't be moved past it, and
that GCC is perhaps missing that the asm's output operand effectively
represents a volatile write through *t, but I could be misunderstanding the
rules about volatile. Anyone got their language lawyer's hat on at the moment?
cheers,
DaveK
[-- Attachment #2: mxfull.cpp --]
[-- Type: text/plain, Size: 2369 bytes --]
// g++ -c mxfull.cpp -o mxfull.o --save-temps -O2 -fverbose-asm
typedef long LONG;
typedef void *HANDLE;
typedef void *PVOID;
typedef char *LPCSTR;
typedef class pthread_mutex *pthread_mutex_t;
typedef class pthread_mutexattr *pthread_mutexattr_t;
typedef class pthread *pthread_t;
struct SECURITY_ATTRIBUTES;
typedef struct SECURITY_ATTRIBUTES *LPSECURITY_ATTRIBUTES;
extern struct SECURITY_ATTRIBUTES sec_none_nih;
HANDLE __attribute__((__stdcall__)) CreateSemaphoreA(LPSECURITY_ATTRIBUTES,LONG,LONG,LPCSTR);
class verifyable_object
{
public:
long magic;
verifyable_object (long);
virtual ~verifyable_object ();
};
extern __inline__ long
ilockcmpexch (volatile long *t, long v, long c)
{
return ({
__typeof (*t) ret;
__asm __volatile ("lock cmpxchgl %2, %1"
: "=a" (ret), "=m" (*t)
: "r" (v), "m" (*t), "0" (c));
ret;
});
}
class pthread_mutexattr: public verifyable_object
{
public:
int pshared;
int mutextype;
pthread_mutexattr ();
~pthread_mutexattr ();
};
template <class list_node> inline void
List_insert (list_node *&head, list_node *node)
{
if (!node)
return;
do
node->next = head;
while ((PVOID)ilockcmpexch((LONG volatile *)(&head),(LONG)(node),(LONG)(node->next)) != node->next);
}
template <class list_node> class List
{
public:
List() : head(__null)
{
}
~List()
{
}
void insert (list_node *node)
{
List_insert (head, node);
}
list_node *head;
};
class pthread_mutex: public verifyable_object
{
public:
unsigned long lock_counter;
HANDLE win32_obj_id;
unsigned int recursion_counter;
LONG condwaits;
pthread_t owner;
int type;
int pshared;
pthread_mutex (pthread_mutexattr * = __null);
~pthread_mutex ();
class pthread_mutex * next;
private:
static List<pthread_mutex> mutexes;
};
List<pthread_mutex> pthread_mutex::mutexes;
pthread_mutex::pthread_mutex (pthread_mutexattr *attr) :
verifyable_object (0xdf0df045 +1),
lock_counter (0),
win32_obj_id (__null), recursion_counter (0),
condwaits (0), owner (__null),
type (1),
pshared (0)
{
win32_obj_id = ::CreateSemaphoreA (&sec_none_nih, 0, 2147483647L, __null);
if (!win32_obj_id)
{
magic = 0;
return;
}
if (attr)
{
if (attr->pshared == 1)
{
magic = 0;
return;
}
type = attr->mutextype;
}
mutexes.insert (this);
}
next reply other threads:[~2009-06-04 1:39 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-04 1:39 Dave Korn [this message]
2009-06-04 8:27 ` Andrew Haley
2009-06-04 9:22 ` Dave Korn
2009-06-04 9:48 ` Dave Korn
2009-06-04 9:57 ` Andrew Haley
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4A2728AA.3010205@gmail.com \
--to=dave.korn.cygwin@googlemail.com \
--cc=gcc@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).