public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* dlm: master - dlm_controld: Escalate fencing to the cluster manager and reuse Pacemaker networking functions
@ 2009-03-12 20:45 David Teigland
  0 siblings, 0 replies; only message in thread
From: David Teigland @ 2009-03-12 20:45 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=230849dd80971678bdcc2255b80b36bf8ab0ae76
Commit:        230849dd80971678bdcc2255b80b36bf8ab0ae76
Parent:        b4de074a7a5b34f8fe2ceec252e847f2efb66389
Author:        Andrew Beekhof <beekhof@void.local>
AuthorDate:    Fri Jan 30 13:06:56 2009 +0100
Committer:     David Teigland <teigland@redhat.com>
CommitterDate: Thu Mar 12 15:40:59 2009 -0500

dlm_controld: Escalate fencing to the cluster manager and reuse Pacemaker networking functions

---
 group/dlm_controld/pacemaker.c |  349 ++++++++++++++++++----------------------
 1 files changed, 156 insertions(+), 193 deletions(-)

diff --git a/group/dlm_controld/pacemaker.c b/group/dlm_controld/pacemaker.c
index fed9ca7..b737e50 100644
--- a/group/dlm_controld/pacemaker.c
+++ b/group/dlm_controld/pacemaker.c
@@ -11,10 +11,15 @@
 
 #include <pacemaker/crm/crm.h>
 #include <pacemaker/crm/ais.h>
+#include <pacemaker/crm/attrd.h>
 /* heartbeat support is irrelevant here */
 #undef SUPPORT_HEARTBEAT 
 #define SUPPORT_HEARTBEAT 0
 #include <pacemaker/crm/common/cluster.h>
+#include <pacemaker/crm/common/stack.h>
+#include <pacemaker/crm/common/ipc.h>
+#include <pacemaker/crm/msg_xml.h>
+#include <pacemaker/crm/cib.h>
 
 #define COMMS_DIR     "/sys/kernel/config/dlm/cluster/comms"
 
@@ -24,7 +29,6 @@ int setup_ccs(void)
      * only allow configuration from the command-line until CoroSync is stable
      * enough to be used with Pacemaker
      */
-    cfgd_groupd_compat = 0; /* always use libcpg and disable backward compatability */
     return 0;
 }
 
@@ -52,51 +56,12 @@ void dlm_process_node(gpointer key, gpointer value, gpointer user_data);
 
 int setup_cluster(void)
 {
-    int retries = 0;
-    int rc = SA_AIS_OK;
-    struct utsname name;
-
-    crm_peer_init();
-
-    if(local_node_uname == NULL) {
-	if(uname(&name) < 0) {
-	    cl_perror("uname(2) call failed");
-	    exit(100);
-	}
-	local_node_uname = crm_strdup(name.nodename);
-	log_debug("Local node name: %s", local_node_uname);
-    }
+    ais_fd_async = -1;
+    crm_log_init("cluster-dlm", LOG_INFO, FALSE, TRUE, 0, NULL);
     
-    /* 16 := CRM_SERVICE */
-  retry:
-    log_debug("Creating connection to our AIS plugin");
-    rc = saServiceConnect (&ais_fd_sync, &ais_fd_async, CRM_SERVICE);
-    if (rc != SA_AIS_OK) {
-	log_error("Connection to our AIS plugin (%d) failed: %s (%d)", CRM_SERVICE, ais_error2text(rc), rc);
-    }
-
-    switch(rc) {
-	case SA_AIS_OK:
-	    break;
-	case SA_AIS_ERR_TRY_AGAIN:
-	    if(retries < 30) {
-		sleep(1);
-		retries++;
-		goto retry;
-	    }
-	    log_error("Retry count exceeded");
-	    return 0;
-	default:
-	    return 0;
-    }
-
-    log_debug("AIS connection established");
-
-    {
-	int pid = getpid();
-	char *pid_s = crm_itoa(pid);
-	send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
-	crm_free(pid_s);
+    if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) {
+	log_printf(LOG_ERR, "Connection to our AIS plugin (%d) failed", CRM_SERVICE);
+	return -1;
     }
 
     /* Sign up for membership updates */
@@ -105,158 +70,28 @@ int setup_cluster(void)
     /* Requesting the current list of known nodes */
     send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais);
 
-    our_nodeid = get_ais_nodeid();
-    log_debug("Local node id: %d", our_nodeid);
-
     return ais_fd_async;
 }
 
-static void statechange(void)
+void update_cluster(void)
 {
     static uint64_t last_membership = 0;
     cluster_quorate = crm_have_quorum;
     if(last_membership < crm_peer_seq) {
-	log_debug("Processing membership %llu", crm_peer_seq);
+	log_printf(LOG_INFO, "Processing membership %llu", crm_peer_seq);
 	g_hash_table_foreach(crm_peer_cache, dlm_process_node, &last_membership);
 	last_membership = crm_peer_seq;
     }
 }
 
-void update_cluster(void)
-{
-    statechange();
-}
-
 void process_cluster(int ci)
 {
-/* ci ::= client number */    
-    char *data = NULL;
-    char *uncompressed = NULL;
-
-    AIS_Message *msg = NULL;
-    SaAisErrorT rc = SA_AIS_OK;
-    mar_res_header_t *header = NULL;
-    static int header_len = sizeof(mar_res_header_t);
-
-    header = malloc(header_len);
-    memset(header, 0, header_len);
-    
-    errno = 0;
-    rc = saRecvRetry(ais_fd_async, header, header_len);
-    if (rc != SA_AIS_OK) {
-	cl_perror("Receiving message header failed: (%d) %s", rc, ais_error2text(rc));
-	goto bail;
-
-    } else if(header->size == header_len) {
-	log_error("Empty message: id=%d, size=%d, error=%d, header_len=%d",
-		  header->id, header->size, header->error, header_len);
-	goto done;
-	
-    } else if(header->size == 0 || header->size < header_len) {
-	log_error("Mangled header: size=%d, header=%d, error=%d",
-		  header->size, header_len, header->error);
-	goto done;
-	
-    } else if(header->error != 0) {
-	log_error("Header contined error: %d", header->error);
-    }
-    
-    header = realloc(header, header->size);
-    /* Use a char* so we can store the remainder into an offset */
-    data = (char*)header;
-
-    errno = 0;
-    rc = saRecvRetry(ais_fd_async, data+header_len, header->size - header_len);
-    msg = (AIS_Message*)data;
-
-    if (rc != SA_AIS_OK) {
-	cl_perror("Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
-	goto bail;
-    }
-    
-    data = msg->data;
-    if(msg->is_compressed && msg->size > 0) {
-	int rc = BZ_OK;
-	unsigned int new_size = msg->size;
-
-	if(check_message_sanity(msg, NULL) == FALSE) {
-	    goto badmsg;
-	}
-
-	log_debug("Decompressing message data");
-	uncompressed = malloc(new_size);
-	memset(uncompressed, 0, new_size);
-	
-	rc = BZ2_bzBuffToBuffDecompress(
-	    uncompressed, &new_size, data, msg->compressed_size, 1, 0);
-
-	if(rc != BZ_OK) {
-	    log_error("Decompression failed: %d", rc);
-	    goto badmsg;
-	}
-	
-	CRM_ASSERT(rc == BZ_OK);
-	CRM_ASSERT(new_size == msg->size);
-
-	data = uncompressed;
-
-    } else if(check_message_sanity(msg, data) == FALSE) {
-	goto badmsg;
-
-    } else if(safe_str_eq("identify", data)) {
-	int pid = getpid();
-	char *pid_s = crm_itoa(pid);
-	send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais);
-	crm_free(pid_s);
-	goto done;
-    }
-
-    if(msg->header.id == crm_class_members) {
-	xmlNode *xml = string2xml(data);
-
-	if(xml != NULL) {
-	    const char *value = crm_element_value(xml, "id");
-	    if(value) {
-		crm_peer_seq = crm_int_helper(value, NULL);
-	    }
-
-	    log_debug("Updating membership %llu", crm_peer_seq);
-	    /* crm_log_xml_info(xml, __PRETTY_FUNCTION__); */
-	    xml_child_iter(xml, node, crm_update_ais_node(node, crm_peer_seq));
-	    crm_calculate_quorum();
-	    statechange();
-	    free_xml(xml);
-	    
-	} else {
-	    log_error("Invalid peer update: %s", data);
-	}
-
-    } else {
-	log_error("Unexpected AIS message type: %d", msg->header.id);
-    }
-
-  done:
-    free(uncompressed);
-    free(msg);
-    return;
-
-  badmsg:
-    log_error("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
-	      " min=%d, total=%d, size=%d, bz2_size=%d",
-	      msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
-	      ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
-	      msg->sender.pid, (int)sizeof(AIS_Message),
-	      msg->header.size, msg->size, msg->compressed_size);
-    goto done;
-    
-  bail:
-    log_error("AIS connection failed");
-    return;
+    ais_dispatch(ais_fd_async, NULL);
+    update_cluster();
 }
 
 void close_cluster(void) {
-    /* TODO: Implement something for this */
-    return;
+    terminate_ais_connection();
 }
 
 #include <arpa/inet.h>
@@ -333,7 +168,7 @@ void dlm_process_node(gpointer key, gpointer value, gpointer user_data)
 	    } else if(strchr(addr, ':')) {
 		rc = sscanf(addr, "ip(%[0-9A-Fa-f:])", ipaddr);
 		if(rc != 1) {
-		    log_error("Could not extract IPv6 address from '%s'", addr);
+		    log_printf(LOG_ERR, "Could not extract IPv6 address from '%s'", addr);
 		    continue;			
 		}
 		addr_family = AF_INET6;
@@ -341,26 +176,26 @@ void dlm_process_node(gpointer key, gpointer value, gpointer user_data)
 	    } else {
 		rc = sscanf(addr, "ip(%[0-9.]) ", ipaddr);
 		if(rc != 1) {
-		    log_error("Could not extract IPv4 address from '%s'", addr);
+		    log_printf(LOG_ERR, "Could not extract IPv4 address from '%s'", addr);
 		    continue;			
 		}
 	    }
 		
 	    rc = inet_pton(addr_family, ipaddr, &totem_addr);
 	    if(rc != 1) {
-		log_error("Could not parse '%s' as in IPv%c address", ipaddr, (addr_family==AF_INET)?'4':'6');
+		log_printf(LOG_ERR, "Could not parse '%s' as in IPv%c address", ipaddr, (addr_family==AF_INET)?'4':'6');
 		continue;
 	    }
 
 	    rc = totemip_parse(&totem_addr, ipaddr, addr_family);
 	    if(rc != 0) {
-		log_error("Could not convert '%s' into a totem address", ipaddr);
+		log_printf(LOG_ERR, "Could not convert '%s' into a totem address", ipaddr);
 		continue;
 	    }
 
 	    rc = totemip_totemip_to_sockaddr_convert(&totem_addr, 0, &cna_addr, &cna_len);
 	    if(rc != 0) {
-		log_error("Could not convert totem address for '%s' into sockaddr", ipaddr);
+		log_printf(LOG_ERR, "Could not convert totem address for '%s' into sockaddr", ipaddr);
 		continue;
 	    }
 
@@ -371,10 +206,10 @@ void dlm_process_node(gpointer key, gpointer value, gpointer user_data)
 	free(addr_top);
     }
 
-    log_debug("%s %sctive node %u '%s': born-on=%llu, last-seen=%llu, this-event=%llu, last-event=%llu",
-	      action, crm_is_member_active(value)?"a":"ina",
-	      node->id, node->uname, node->born, node->last_seen,
-	      crm_peer_seq, (unsigned long long)*last);
+    log_printf(LOG_INFO, "%s %sctive node %u '%s': born-on=%llu, last-seen=%llu, this-event=%llu, last-event=%llu",
+	       action, crm_is_member_active(value)?"a":"ina",
+	       node->id, node->uname, node->born, node->last_seen,
+	       crm_peer_seq, (unsigned long long)*last);
 }
 
 int is_cluster_member(int nodeid)
@@ -391,19 +226,147 @@ char *nodeid2name(int nodeid) {
     return strdup(node->uname);
 }
 
+static IPC_Channel *attrd = NULL;
+
+static void attrd_deadfn(int ci) 
+{
+    log_printf(LOG_ERR, "Lost connection to attrd");
+    attrd = NULL;
+    return;
+}
+
 void kick_node_from_cluster(int nodeid)
 {
-    log_error("%s not yet implemented", __FUNCTION__);
+    gboolean rc = FALSE;
+    xmlNode *update = NULL;
+    time_t now = time(NULL);
+    crm_node_t *node = crm_get_peer(nodeid, NULL);
+
+    if(node == NULL || node->uname == NULL) {
+	log_printf(LOG_ERR, "%s: Don't know how to kick node %d/%p", __FUNCTION__, nodeid, node);
+	return;
+    }
+
+    if(attrd == NULL) {
+	log_printf(LOG_INFO, "Connecting to attrd...");
+	attrd = init_client_ipc_comms_nodispatch(T_ATTRD);
+	if(attrd) {
+	    client_add(attrd->ops->get_recv_select_fd(attrd), NULL, attrd_deadfn);
+	}
+    }
+    
+    if(attrd != NULL) {
+	update = create_xml_node(NULL, __FUNCTION__);
+	crm_xml_add(update, F_TYPE, T_ATTRD);
+	crm_xml_add(update, F_ORIG, crm_system_name);
+    
+	crm_xml_add(update, F_ATTRD_TASK, "update");
+	crm_xml_add(update, F_ATTRD_SECTION, XML_CIB_TAG_STATUS);
+	crm_xml_add(update, F_ATTRD_ATTRIBUTE, "terminate");
+	crm_xml_add_int(update, F_ATTRD_VALUE, now);
+	crm_xml_add(update, F_ATTRD_HOST, node->uname);
+	
+	rc = send_ipc_message(attrd, update);
+	free_xml(update);
+    }
+
+    if(rc) {
+	log_printf(LOG_INFO, "Requested that node %d/%s be kicked from the cluster", nodeid, node->uname);
+    } else {
+	log_printf(LOG_ERR, "Could not kick node %d/%s from the cluster", nodeid, node->uname);
+    }
+    
+    return;
+}
+
+cib_t *cib = NULL;
+
+static void cib_deadfn(int ci) 
+{
+    log_printf(LOG_ERR, "Lost connection to the cib");
+    cib = NULL; /* TODO: memory leak in unlikely error path */
     return;
 }
 
-int fence_node_time(int nodeid, uint64_t *last_fenced_time)
+static cib_t *cib_connect(void) 
 {
-	return 0;
+    int rc = 0;
+    int cib_fd = 0;
+    if(cib) {
+	return cib;
+    }
+    
+    cib = cib_new();
+    rc = cib->cmds->signon_raw(cib, crm_system_name, cib_command, &cib_fd, NULL);
+    if(rc != cib_ok) {
+	log_printf(LOG_ERR, "Signon to cib failed: %s", cib_error2string(rc));
+	cib = NULL; /* TODO: memory leak in unlikely error path */
+
+    } else {
+	client_add(cib_fd, NULL, cib_deadfn);
+    }
+    return cib;
 }
 
-int fence_in_progress(int *count)
+
+int fence_in_progress(int *unused)
 {
+    int rc = 0;
+    xmlNode *xpath_data;
+
+    cib_connect();    
+    if(cib == NULL) {
+	return -1;
+    }
+
+    /* TODO: Not definitive - but a good approximation */
+    rc = cib->cmds->query(cib, "//nvpar[@name='terminate']", &xpath_data,
+			  cib_xpath|cib_scope_local|cib_sync_call);
+
+    log_printf(LOG_INFO, "Fencing in progress: %s", xpath_data?"true":"false");	
+
+    if(xpath_data == NULL) {
 	return 0;
+    }
+
+    free_xml(xpath_data);
+    return 1;
 }
 
+#define XPATH_MAX  1024
+
+int fence_node_time(int nodeid, uint64_t *last_fenced_time)
+{
+    int rc = 0;
+    xmlNode *xpath_data;
+    char xpath_query[XPATH_MAX];
+    crm_node_t *node = crm_get_peer(nodeid, NULL);
+
+    if(last_fenced_time) {
+	*last_fenced_time = 0;
+    }
+
+    if(node == NULL || node->uname == NULL) {
+	log_printf(LOG_ERR, "Nothing known about node %d", nodeid);	
+	return 0;
+    }
+
+    cib_connect();
+    if(cib == NULL) {
+	return -1;
+    }
+
+    snprintf(xpath_query, XPATH_MAX, "//lrm[@id='%s']", node->uname);
+    rc = cib->cmds->query(
+	cib, xpath_query, &xpath_data, cib_xpath|cib_scope_local|cib_sync_call);
+
+    if(xpath_data == NULL) {
+	/* the node has been shot - return 'now' */
+	log_printf(LOG_INFO, "Node %d/%s was last shot 'now'", nodeid, node->uname);	
+	*last_fenced_time = time(NULL);
+    }
+
+    free_xml(xpath_data);
+    log_printf(LOG_INFO, "It does not appear node %d/%s has been shot", nodeid, node->uname);	
+    return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-03-12 20:45 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-12 20:45 dlm: master - dlm_controld: Escalate fencing to the cluster manager and reuse Pacemaker networking functions David Teigland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).