public inbox for lvm2-cvs@sourceware.org
help / color / mirror / Atom feed
From: mornfall@sourceware.org
To: lvm-devel@redhat.com, lvm2-cvs@sourceware.org
Subject: LVM2/daemons/lvmetad lvmetad-core.c
Date: Wed, 20 Jul 2011 21:23:00 -0000	[thread overview]
Message-ID: <20110720212344.5756.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	mornfall@sourceware.org	2011-07-20 21:23:44

Modified files:
	daemons/lvmetad: lvmetad-core.c 

Log message:
	First stab at making lvmetad-core threadsafe. The current design should allow
	very reasonable amount of parallel access, although the hash tables may become
	a point of contention under heavy loads. Nevertheless, there should be orders
	of magnitude less contention on the hash table locks than we currently have on
	block device scanning.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/daemons/lvmetad/lvmetad-core.c.diff?cvsroot=lvm2&r1=1.15&r2=1.16

--- LVM2/daemons/lvmetad/lvmetad-core.c	2011/07/20 18:45:32	1.15
+++ LVM2/daemons/lvmetad/lvmetad-core.c	2011/07/20 21:23:43	1.16
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <pthread.h>
 
 #include "libdevmapper.h"
 #include <malloc.h>
@@ -10,15 +11,64 @@
 	struct dm_hash_table *pvs;
 	struct dm_hash_table *vgs;
 	struct dm_hash_table *pvid_map;
+	struct {
+		struct dm_hash_table *vg;
+		pthread_mutex_t pvs;
+		pthread_mutex_t vgs;
+		pthread_mutex_t pvid_map;
+	} lock;
 } lvmetad_state;
 
+void debug(const char *fmt, ...) {
+	va_list ap;
+	va_start(ap, fmt);
+	fprintf(stderr, "[D %u] ", pthread_self());
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+};
+
+void lock_pvs(lvmetad_state *s) { pthread_mutex_lock(&s->lock.pvs); }
+void unlock_pvs(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.pvs); }
+
+void lock_vgs(lvmetad_state *s) { pthread_mutex_lock(&s->lock.vgs); }
+void unlock_vgs(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.vgs); }
+
+void lock_pvid_map(lvmetad_state *s) { pthread_mutex_lock(&s->lock.pvid_map); }
+void unlock_pvid_map(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.pvid_map); }
+
+struct config_tree *lock_vg(lvmetad_state *s, const char *id) {
+	lock_vgs(s);
+	pthread_mutex_t *vg = dm_hash_lookup(s->lock.vg, id);
+	if (!vg) {
+		pthread_mutexattr_t rec;
+		pthread_mutexattr_init(&rec);
+		pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+		vg = malloc(sizeof(pthread_mutex_t));
+		pthread_mutex_init(vg, &rec);
+		dm_hash_insert(s->lock.vg, id, vg);
+	}
+	pthread_mutex_lock(vg);
+	struct config_tree *cft = dm_hash_lookup(s->vgs, id);
+	unlock_vgs(s);
+	return cft;
+}
+
+void unlock_vg(lvmetad_state *s, const char *id) {
+	lock_vgs(s); /* someone might be changing the s->lock.vg structure right
+		      * now, so avoid stepping on each other's toes */
+	pthread_mutex_unlock(dm_hash_lookup(s->lock.vg, id));
+	unlock_vgs(s);
+}
+
 static response vg_by_uuid(lvmetad_state *s, request r)
 {
 	const char *uuid = daemon_request_str(r, "uuid", "NONE");
-	fprintf(stderr, "[D] vg_by_uuid: %s (vgs = %p)\n", uuid, s->vgs);
-	struct config_tree *cft = dm_hash_lookup(s->vgs, uuid);
-	if (!cft || !cft->root)
+	debug("vg_by_uuid: %s (vgs = %p)\n", uuid, s->vgs);
+	struct config_tree *cft = lock_vg(s, uuid);
+	if (!cft || !cft->root) {
+		unlock_vg(s, uuid);
 		return daemon_reply_simple("failed", "reason = %s", "uuid not found", NULL);
+	}
 
 	struct config_node *metadata = cft->root;
 
@@ -35,6 +85,8 @@
 	n = n->sib = clone_config_node(res.cft, metadata, 1);
 	n->parent = res.cft->root;
 	res.error = 0;
+	unlock_vg(s, uuid);
+
 	return res;
 }
 
@@ -47,7 +99,7 @@
 	if (node)
 		value = node->v;
 
-	while (value && strcmp(value->v.str, flag)) {
+	while (value && value->type != CFG_EMPTY_ARRAY && strcmp(value->v.str, flag)) {
 		pred = value;
 		value = value->next;
 	}
@@ -90,62 +142,49 @@
 	return pv;
 }
 
-static void update_pv_status_in_vg(lvmetad_state *s, struct config_tree *vg)
+/* Either the "big" vgs lock, or a per-vg lock needs to be held before entering
+ * this function. */
+static void update_pv_status(lvmetad_state *s, struct config_tree *vg)
 {
+	lock_pvs(s);
 	struct config_node *pv = pvs(vg);
 	while (pv) {
 		const char *uuid = find_config_str(pv->child, "id", "N/A");
 		const char *vgid = find_config_str(vg->root, "metadata/id", "N/A");
 		int found = dm_hash_lookup(s->pvs, uuid) ? 1 : 0;
+		// TODO: avoid the override here if MISSING came from the actual
+		// metadata, as opposed from our manipulation...
 		set_flag(vg, pv, "status", "MISSING", !found);
 		pv = pv->sib;
 	}
+	unlock_pvs(s);
 }
 
 static int vg_status(lvmetad_state *s, const char *vgid)
 {
-	struct config_tree *vg = dm_hash_lookup(s->vgs, vgid);
+	struct config_tree *vg = lock_vg(s, vgid);
 	struct config_node *pv = pvs(vg);
 
 	while (pv) {
-		const char *uuid = find_config_str(pv->child, "id", "N/A");
-		const char *vgid = find_config_str(vg->root, "metadata/id", "N/A");
+		const char *uuid = find_config_str(pv->child, "id", NULL);
+		if (!uuid)
+			continue; // FIXME?
+
+		lock_pvs(s);
 		int found = dm_hash_lookup(s->pvs, uuid) ? 1 : 0;
-		if (!found)
+		unlock_pvs(s);
+		if (!found) {
+			unlock_vg(s, vgid);
 			return 0;
+		}
 		pv = pv->sib;
 	}
 
+	unlock_vg(s, vgid);
 	return 1;
 }
 
-/*
- * Walk through metadata cache and update PV flags to reflect our current
- * picture of the PVs in the system. If pvid is non-NULL, this is used as a hint
- * as to which PV has changed state. Otherwise, all flags are recomputed from
- * authoritative data (the s->pvs hash).
- */
-static void update_pv_status(lvmetad_state *s, const char *pvid)
-{
-	if (pvid) {
-		const char *vgid = dm_hash_lookup(s->pvid_map, pvid);
-		if (!vgid)
-			return; /* nothing to update */
-
-		struct config_tree *vg = dm_hash_lookup(s->vgs, vgid);
-		assert(vg);
-
-		update_pv_status_in_vg(s, vg);
-	} else {
-		struct dm_hash_node *n = dm_hash_get_first(s->vgs);
-		while (n) {
-			struct config_tree *vg = dm_hash_get_data(s->vgs, n);
-			update_pv_status_in_vg(s, vg);
-			n = dm_hash_get_next(s->vgs, n);
-		}
-	}
-}
-
+/* You need to be holding the pvid_map lock already to call this. */
 int update_pvid_map(lvmetad_state *s, struct config_tree *vg, const char *vgid)
 {
 	struct config_node *pv = pvs(vg);
@@ -162,9 +201,17 @@
 	return 1;
 }
 
+/* No locks need to be held. The pointers are never used outside of the scope of
+ * this function, so they can be safely destroyed after update_metadata returns
+ * (anything that might have been retained is copied). */
 static int update_metadata(lvmetad_state *s, const char *_vgid, struct config_node *metadata)
 {
+	int retval = 0;
+	lock_vgs(s);
 	struct config_tree *old = dm_hash_lookup(s->vgs, _vgid);
+	lock_vg(s, _vgid);
+	unlock_vgs(s);
+
 	int seq = find_config_int(metadata, "metadata/seqno", -1);
 	int haveseq = -1;
 
@@ -172,17 +219,19 @@
 		haveseq = find_config_int(old->root, "metadata/seqno", -1);
 
 	if (seq < 0)
-		return 0; /* bad */
+		goto out;
 
 	if (seq == haveseq) {
 		// TODO: compare old->root with metadata to ensure equality
-		return 1;
+		retval = 1;
+		goto out;
 	}
 
 	if (seq < haveseq) {
 		// TODO: we may want to notify the client that their metadata is
 		// out of date?
-		return 1;
+		retval = 1;
+		goto out;
 	}
 
 	struct config_tree *cft = create_config_tree(NULL, 0);
@@ -190,7 +239,9 @@
 	const char *vgid = find_config_str(cft->root, "metadata/id", NULL);
 
 	if (!vgid)
-		return 0;
+		goto out;
+
+	lock_pvid_map(s);
 
 	if (haveseq >= 0 && haveseq < seq) {
 		/* temporarily orphan all of our PVs */
@@ -200,10 +251,17 @@
 		dm_hash_remove(s->vgs, vgid);
 	}
 
+	lock_vgs(s);
 	dm_hash_insert(s->vgs, vgid, cft);
+	unlock_vgs(s);
+
 	update_pvid_map(s, cft, vgid);
 
-	return 1;
+	unlock_pvid_map(s);
+	retval = 1;
+out:
+	unlock_vg(s, _vgid);
+	return retval;
 }
 
 static response pv_add(lvmetad_state *s, request r)
@@ -215,7 +273,9 @@
 	if (!pvid)
 		return daemon_reply_simple("failed", "reason = %s", "need PV UUID", NULL);
 
+	lock_pvs(s);
 	dm_hash_insert(s->pvs, pvid, (void*)1);
+	unlock_pvs(s);
 
 	if (metadata) {
 		if (!vgid)
@@ -226,10 +286,18 @@
 		if (!update_metadata(s, vgid, metadata))
 			return daemon_reply_simple("failed", "reason = %s",
 						   "metadata update failed", NULL);
-	} else
+	} else {
+		lock_pvid_map(s);
 		vgid = dm_hash_lookup(s->pvid_map, pvid);
+		unlock_pvid_map(s);
+	}
+
+	if (vgid) {
+		struct config_tree *cft = lock_vg(s, vgid);
+		update_pv_status(s, cft);
+		unlock_vg(s, vgid);
+	}
 
-	update_pv_status(s, pvid);
 	int complete = vgid ? vg_status(s, vgid) : 0;
 
 	return daemon_reply_simple("OK",
@@ -247,7 +315,7 @@
 	lvmetad_state *state = s.private;
 	const char *rq = daemon_request_str(r, "request", "NONE");
 
-	fprintf(stderr, "[D] REQUEST: %s\n", rq);
+	debug("REQUEST: %s\n", rq);
 
 	if (!strcmp(rq, "pv_add"))
 		return pv_add(state, r);
@@ -266,7 +334,16 @@
 	ls->pvs = dm_hash_create(32);
 	ls->vgs = dm_hash_create(32);
 	ls->pvid_map = dm_hash_create(32);
-	fprintf(stderr, "[D] initialised state: vgs = %p\n", ls->vgs);
+
+	ls->lock.vg = dm_hash_create(32);
+	pthread_mutexattr_t rec;
+	pthread_mutexattr_init(&rec);
+	pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+	pthread_mutex_init(&ls->lock.pvs, NULL);
+	pthread_mutex_init(&ls->lock.vgs, &rec);
+	pthread_mutex_init(&ls->lock.pvid_map, NULL);
+
+	debug("initialised state: vgs = %p\n", ls->vgs);
 	if (!ls->pvs || !ls->vgs)
 		return 0;
 


             reply	other threads:[~2011-07-20 21:23 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-20 21:23 mornfall [this message]
  -- strict thread matches above, loose matches on Subject: below --
2012-03-23 10:34 zkabelac
2012-02-27 10:19 zkabelac
2012-02-27 10:10 zkabelac
2012-02-24  0:24 mornfall
2012-02-24  0:12 mornfall
2012-02-21  9:19 mornfall
2012-02-15 17:37 mornfall
2012-02-15 17:30 mornfall
2012-02-15 14:15 mornfall
2012-02-15 14:06 mornfall
2012-02-15 11:43 mornfall
2012-02-13 14:25 zkabelac
2012-01-25 21:42 zkabelac
2011-12-18 22:31 mornfall
2011-09-17 13:33 zkabelac
2011-09-02 11:04 zkabelac
2011-07-25 17:59 mornfall
2011-07-25 15:51 mornfall
2011-07-25 15:33 mornfall
2011-07-20 21:33 mornfall
2011-07-20 21:27 mornfall
2011-07-20 21:26 mornfall
2011-07-20 18:45 mornfall
2011-07-20 18:34 mornfall
2011-07-20 18:24 mornfall
2011-07-20 16:49 mornfall
2011-07-20 16:46 mornfall
2011-07-20 15:14 mornfall
2011-07-19 19:15 mornfall
2011-07-19 14:14 mornfall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110720212344.5756.qmail@sourceware.org \
    --to=mornfall@sourceware.org \
    --cc=lvm-devel@redhat.com \
    --cc=lvm2-cvs@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).