public inbox for cluster-cvs@sourceware.org help / color / mirror / Atom feed
From: jbrassow@sourceware.org To: cluster-cvs@sources.redhat.com, cluster-devel@redhat.com Subject: Cluster Project branch, RHEL5, updated. cmirror_1_1_15-150-gb291647 Date: Fri, 18 Jul 2008 15:19:00 -0000 [thread overview] Message-ID: <20080718151913.12461.qmail@sourceware.org> (raw) This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "Cluster Project". http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=b2916471b1b7c79dba7f9624a1a148240375891f The branch, RHEL5 has been updated via b2916471b1b7c79dba7f9624a1a148240375891f (commit) from 705c5ceb17da0daf018c688ac478b4fae2371e3d (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit b2916471b1b7c79dba7f9624a1a148240375891f Author: Jonathan Brassow <jbrassow@redhat.com> Date: Fri Jul 18 10:16:45 2008 -0500 dm-log-clustered: Fix bug that would cause communication problems When an error is received from user space, the caller of the communication function is given the return code along with a parameter indicating that no data is available. However, if the error was EAGAIN, the data_size variable was still set to 0. So, when the retry was attempted and data returned from userspace, the kernel would think that there was not enough room to store the request. Also did quite a bit of code clean-up (no functional changes) in userspace.... like a number->string translation for openAIS error codes, etc... ----------------------------------------------------------------------- Summary of changes: cmirror-kernel/src/dm-clog-tfr.c | 8 +- cmirror/src/cluster.c | 237 ++++++++++++++++++-------------------- 2 files changed, 117 insertions(+), 128 deletions(-) diff --git a/cmirror-kernel/src/dm-clog-tfr.c b/cmirror-kernel/src/dm-clog-tfr.c index 95d4bd2..7932f77 100644 --- a/cmirror-kernel/src/dm-clog-tfr.c +++ b/cmirror-kernel/src/dm-clog-tfr.c @@ -96,7 +96,13 @@ static int fill_pkg(struct cn_msg *msg, struct clog_tfr *tfr) if (msg) { pkg->error = -msg->ack; - *(pkg->data_size) = 0; + /* + * If we are trying again, we will need to know our + * storage capacity. Otherwise, along with the + * error code, we make explicit that we have no data. + */ + if (pkg->error != -EAGAIN) + *(pkg->data_size) = 0; } else if (tfr->data_size > *(pkg->data_size)) { DMERR("Insufficient space to receive package [%s]::", RQ_TYPE(tfr->request_type)); diff --git a/cmirror/src/cluster.c b/cmirror/src/cluster.c index 3a18687..68ffefb 100644 --- a/cmirror/src/cluster.c +++ b/cmirror/src/cluster.c @@ -19,35 +19,36 @@ #include "logging.h" #include "link_mon.h" -/* Open AIS error codes - SA_AIS_OK = 1, - SA_AIS_ERR_LIBRARY = 2, - SA_AIS_ERR_VERSION = 3, - SA_AIS_ERR_INIT = 4, - SA_AIS_ERR_TIMEOUT = 5, - SA_AIS_ERR_TRY_AGAIN = 6, - SA_AIS_ERR_INVALID_PARAM = 7, - SA_AIS_ERR_NO_MEMORY = 8, - SA_AIS_ERR_BAD_HANDLE = 9, - SA_AIS_ERR_BUSY = 10, - SA_AIS_ERR_ACCESS = 11, - SA_AIS_ERR_NOT_EXIST = 12, - SA_AIS_ERR_NAME_TOO_LONG = 13, - SA_AIS_ERR_EXIST = 14, - SA_AIS_ERR_NO_SPACE = 15, - SA_AIS_ERR_INTERRUPT = 16, - SA_AIS_ERR_NAME_NOT_FOUND = 17, - SA_AIS_ERR_NO_RESOURCES = 18, - SA_AIS_ERR_NOT_SUPPORTED = 19, - SA_AIS_ERR_BAD_OPERATION = 20, - SA_AIS_ERR_FAILED_OPERATION = 21, - SA_AIS_ERR_MESSAGE_ERROR = 22, - SA_AIS_ERR_QUEUE_FULL = 23, - SA_AIS_ERR_QUEUE_NOT_AVAILABLE = 24, - SA_AIS_ERR_BAD_FLAGS = 25, - SA_AIS_ERR_TOO_BIG = 26, - SA_AIS_ERR_NO_SECTIONS = 27 -*/ +/* Open AIS error codes */ +#define str_ais_error(x) \ + ((x) == SA_AIS_OK) ? "SA_AIS_OK" : \ + ((x) == SA_AIS_ERR_LIBRARY) ? "SA_AIS_ERR_LIBRARY" : \ + ((x) == SA_AIS_ERR_VERSION) ? "SA_AIS_ERR_VERSION" : \ + ((x) == SA_AIS_ERR_INIT) ? "SA_AIS_ERR_INIT" : \ + ((x) == SA_AIS_ERR_TIMEOUT) ? "SA_AIS_ERR_TIMEOUT" : \ + ((x) == SA_AIS_ERR_TRY_AGAIN) ? "SA_AIS_ERR_TRY_AGAIN" : \ + ((x) == SA_AIS_ERR_INVALID_PARAM) ? "SA_AIS_ERR_INVALID_PARAM" : \ + ((x) == SA_AIS_ERR_NO_MEMORY) ? "SA_AIS_ERR_NO_MEMORY" : \ + ((x) == SA_AIS_ERR_BAD_HANDLE) ? "SA_AIS_ERR_BAD_HANDLE" : \ + ((x) == SA_AIS_ERR_BUSY) ? "SA_AIS_ERR_BUSY" : \ + ((x) == SA_AIS_ERR_ACCESS) ? "SA_AIS_ERR_ACCESS" : \ + ((x) == SA_AIS_ERR_NOT_EXIST) ? "SA_AIS_ERR_NOT_EXIST" : \ + ((x) == SA_AIS_ERR_NAME_TOO_LONG) ? "SA_AIS_ERR_NAME_TOO_LONG" : \ + ((x) == SA_AIS_ERR_EXIST) ? "SA_AIS_ERR_EXIST" : \ + ((x) == SA_AIS_ERR_NO_SPACE) ? "SA_AIS_ERR_NO_SPACE" : \ + ((x) == SA_AIS_ERR_INTERRUPT) ? "SA_AIS_ERR_INTERRUPT" : \ + ((x) == SA_AIS_ERR_NAME_NOT_FOUND) ? "SA_AIS_ERR_NAME_NOT_FOUND" : \ + ((x) == SA_AIS_ERR_NO_RESOURCES) ? "SA_AIS_ERR_NO_RESOURCES" : \ + ((x) == SA_AIS_ERR_NOT_SUPPORTED) ? "SA_AIS_ERR_NOT_SUPPORTED" : \ + ((x) == SA_AIS_ERR_BAD_OPERATION) ? "SA_AIS_ERR_BAD_OPERATION" : \ + ((x) == SA_AIS_ERR_FAILED_OPERATION) ? "SA_AIS_ERR_FAILED_OPERATION" : \ + ((x) == SA_AIS_ERR_MESSAGE_ERROR) ? "SA_AIS_ERR_MESSAGE_ERROR" : \ + ((x) == SA_AIS_ERR_QUEUE_FULL) ? "SA_AIS_ERR_QUEUE_FULL" : \ + ((x) == SA_AIS_ERR_QUEUE_NOT_AVAILABLE) ? "SA_AIS_ERR_QUEUE_NOT_AVAILABLE" : \ + ((x) == SA_AIS_ERR_BAD_FLAGS) ? "SA_AIS_ERR_BAD_FLAGS" : \ + ((x) == SA_AIS_ERR_TOO_BIG) ? "SA_AIS_ERR_TOO_BIG" : \ + ((x) == SA_AIS_ERR_NO_SECTIONS) ? "SA_AIS_ERR_NO_SECTIONS" : \ + "ais_error_unknown" #define DM_CLOG_RESPONSE 0x1000 /* in last byte of 32-bit value */ #define DM_CLOG_CHECKPOINT_READY 21 @@ -145,10 +146,7 @@ int cluster_send(struct clog_tfr *tfr) return 0; /* error codes found in openais/cpg.h */ - LOG_ERROR("cpg_mcast_joined error: %d%s", r, - (r == SA_AIS_ERR_TRY_AGAIN) ? "/SA_AIS_ERR_TRY_AGAIN" : - (r == CPG_ERR_BAD_HANDLE) ? "/CPG_ERR_BAD_HANDLE" : - (r == CPG_ERR_ACCESS) ? "/CPG_ERR_ACCESS" : ""); + LOG_ERROR("cpg_mcast_joined error: %s", str_ais_error(r)); tfr->error = -EBADE; return -EBADE; @@ -358,16 +356,19 @@ static int export_checkpoint(struct checkpoint_data *cp) LOG_DBG("Sending checkpointed data to %u", cp->requester); - len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, "bitmaps_%s_%u", - SHORT_UUID(cp->uuid), cp->requester); + len = snprintf((char *)(name.value), SA_MAX_NAME_LENGTH, + "bitmaps_%s_%u", SHORT_UUID(cp->uuid), cp->requester); name.length = len; + len = strlen(cp->recovering_region) + 1; + attr.creationFlags = SA_CKPT_WR_ALL_REPLICAS; - attr.checkpointSize = cp->bitmap_size * 2 + strlen(cp->recovering_region) + 1; + attr.checkpointSize = cp->bitmap_size * 2 + len; + attr.retentionDuration = SA_TIME_MAX; attr.maxSections = 4; /* don't know why we need +1 */ - attr.maxSectionSize = (cp->bitmap_size > (strlen(cp->recovering_region) + 1)) ? - cp->bitmap_size : (strlen(cp->recovering_region) + 1); + + attr.maxSectionSize = (cp->bitmap_size > len) ? cp->bitmap_size : len; attr.maxSectionIdSize = 22; flags = SA_CKPT_CHECKPOINT_READ | @@ -388,8 +389,9 @@ open_retry: } if (rv != SA_AIS_OK) { - LOG_ERROR("[%s] Failed to open checkpoint for %u: Reason = %d", - SHORT_UUID(cp->uuid), cp->requester, rv); + LOG_ERROR("[%s] Failed to open checkpoint for %u: %s", + SHORT_UUID(cp->uuid), cp->requester, + str_ais_error(rv)); return -EIO; /* FIXME: better error */ } @@ -402,21 +404,23 @@ open_retry: section_attr.expirationTime = SA_TIME_END; sync_create_retry: - rv = saCkptSectionCreate(h, §ion_attr, cp->sync_bits, cp->bitmap_size); + rv = saCkptSectionCreate(h, §ion_attr, + cp->sync_bits, cp->bitmap_size); if (rv == SA_AIS_ERR_TRY_AGAIN) { - LOG_ERROR("export_checkpoint: sync create retry"); + LOG_ERROR("Sync checkpoint section create retry"); sleep(1); goto sync_create_retry; } if (rv == SA_AIS_ERR_EXIST) { - LOG_DBG("export_checkpoint: sync checkpoint section already exists"); + LOG_DBG("Sync checkpoint section already exists"); saCkptCheckpointClose(h); return -EEXIST; } if (rv != SA_AIS_OK) { - LOG_ERROR("export_checkpoint: sync checkpoint section creation failed"); + LOG_ERROR("Sync checkpoint section creation failed: %s", + str_ais_error(rv)); saCkptCheckpointClose(h); return -EIO; /* FIXME: better error */ } @@ -432,19 +436,20 @@ sync_create_retry: clean_create_retry: rv = saCkptSectionCreate(h, §ion_attr, cp->clean_bits, cp->bitmap_size); if (rv == SA_AIS_ERR_TRY_AGAIN) { - LOG_ERROR("export_checkpoint: clean create retry"); + LOG_ERROR("Clean checkpoint section create retry"); sleep(1); goto clean_create_retry; } if (rv == SA_AIS_ERR_EXIST) { - LOG_DBG("export_checkpoint: clean checkpoint section already exists"); + LOG_DBG("Clean checkpoint section already exists"); saCkptCheckpointClose(h); return -EEXIST; } if (rv != SA_AIS_OK) { - LOG_ERROR("export_checkpoint: clean checkpoint section creation failed"); + LOG_ERROR("Clean checkpoint section creation failed: %s", + str_ais_error(rv)); saCkptCheckpointClose(h); return -EIO; /* FIXME: better error */ } @@ -461,19 +466,20 @@ rr_create_retry: rv = saCkptSectionCreate(h, §ion_attr, cp->recovering_region, strlen(cp->recovering_region) + 1); if (rv == SA_AIS_ERR_TRY_AGAIN) { - LOG_ERROR("export_checkpoint: RR create retry"); + LOG_ERROR("RR checkpoint section create retry"); sleep(1); goto rr_create_retry; } if (rv == SA_AIS_ERR_EXIST) { - LOG_DBG("export_checkpoint: RR checkpoint section already exists"); + LOG_DBG("RR checkpoint section already exists"); saCkptCheckpointClose(h); return -EEXIST; } if (rv != SA_AIS_OK) { - LOG_ERROR("export_checkpoint: RR checkpoint section creation failed"); + LOG_ERROR("RR checkpoint section creation failed: %s", + str_ais_error(rv)); saCkptCheckpointClose(h); return -EIO; /* FIXME: better error */ } @@ -509,21 +515,6 @@ rr_create_retry: return 0; } -void ckpt_print (char *str, SaCkptCheckpointHandleT handle) -{ - SaCkptCheckpointDescriptorT descriptor; - SaAisErrorT rv; - -retry_statusget: - rv = saCkptCheckpointStatusGet (handle, &descriptor); - if (rv == SA_AIS_ERR_TRY_AGAIN) - goto retry_statusget; - - LOG_DBG("printing [%s] sections [%d] result [%d]", - str, descriptor.numberOfSections, rv); -} - - static int import_checkpoint(struct clog_cpg *entry, int no_read) { int rtn = 0; @@ -554,12 +545,11 @@ open_retry: } if (rv != SA_AIS_OK) { - LOG_ERROR("Failed to open checkpoint"); + LOG_ERROR("[%s] Failed to open checkpoint: %s", + SHORT_UUID(entry->name.value), str_ais_error(rv)); return -EIO; /* FIXME: better error */ } - ckpt_print ("Before unlink", h); - unlink_retry: rv = saCkptCheckpointUnlink(ckpt_handle, &name); if (rv == SA_AIS_ERR_TRY_AGAIN) { @@ -573,10 +563,9 @@ unlink_retry: goto no_read; } - ckpt_print ("After unlink", h); - init_retry: - rv = saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, SA_TIME_END, &itr); + rv = saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, + SA_TIME_END, &itr); if (rv == SA_AIS_ERR_TRY_AGAIN) { LOG_ERROR("import_checkpoint: sync create retry"); sleep(1); @@ -584,7 +573,8 @@ init_retry: } if (rv != SA_AIS_OK) { - LOG_ERROR("import_checkpoint: sync checkpoint section creation failed"); + LOG_ERROR("[%s] Sync checkpoint section creation failed: %s", + SHORT_UUID(entry->name.value), str_ais_error(rv)); return -EIO; /* FIXME: better error */ } @@ -602,11 +592,13 @@ init_retry: } saCkptSectionIterationFinalize(itr); if (len != 3) { - LOG_ERROR("import_checkpoint: %d checkpoint sections found", len); + LOG_ERROR("import_checkpoint: %d checkpoint sections found", + len); sleep(1); goto init_retry; } - saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, SA_TIME_END, &itr); + saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, + SA_TIME_END, &itr); while (1) { rv = saCkptSectionIterationNext(itr, &desc); @@ -620,7 +612,8 @@ init_retry: } if (rv != SA_AIS_OK) { - LOG_ERROR("import_checkpoint: clean checkpoint section creation failed"); + LOG_ERROR("import_checkpoint: clean checkpoint section " + "creation failed: %s", str_ais_error(rv)); rtn = -EIO; /* FIXME: better error */ goto fail; } @@ -645,20 +638,15 @@ init_retry: } if (rv != SA_AIS_OK) { - LOG_ERROR("import_checkpoint: ckpt read error"); + LOG_ERROR("import_checkpoint: ckpt read error: %s", + str_ais_error(rv)); rtn = -EIO; /* FIXME: better error */ goto fail; } - /* FIXME: Is this catching something special? - if (!iov.readSize) { - LOG_ERROR("%s section empty", (char *)desc.sectionId.id); - continue; - } - */ - if (iov.readSize) { - if (pull_state(entry->name.value, (char *)desc.sectionId.id, bitmap, + if (pull_state(entry->name.value, + (char *)desc.sectionId.id, bitmap, iov.readSize)) { LOG_ERROR("Error loading state"); rtn = -EIO; @@ -676,67 +664,62 @@ fail: no_read: saCkptCheckpointClose(h); - /* - LOG_PRINT("Testing if chkpoint exists after unlink/close"); - rv = saCkptCheckpointOpen(ckpt_handle, &name, NULL, - SA_CKPT_CHECKPOINT_READ, 0, &h); - if (rv != SA_AIS_OK) { - LOG_PRINT("Checkpoint was not removed!!!"); - saCkptCheckpointClose(h); - } else - LOG_PRINT(" rv == %d", rv); - */ - free(bitmap); return rtn; } +static void do_checkpoints(struct clog_cpg *entry) +{ + struct checkpoint_data *cp; + + for (cp = entry->checkpoint_list; cp;) { + LOG_DBG("[%s] Checkpoint data available for node %u", + SHORT_UUID(entry->name.value), cp->requester); + + /* + * FIXME: Check return code. Could send failure + * notice in tfr in export_checkpoint function + * by setting tfr->error + */ + switch (export_checkpoint(cp)) { + case -EEXIST: + LOG_DBG("[%s] Checkpoint for %u already handled", + SHORT_UUID(entry->name.value), cp->requester); + case 0: + entry->checkpoint_list = cp->next; + free_checkpoint(cp); + cp = entry->checkpoint_list; + break; + default: + /* FIXME: Skipping will cause list corruption */ + LOG_ERROR("[%s] Failed to export checkpoint for %u", + SHORT_UUID(entry->name.value), cp->requester); + } + } +} + static int do_cluster_work(void *data) { int r = SA_AIS_OK; struct clog_cpg *entry, *tmp; - struct checkpoint_data *cp; list_for_each_entry_safe(entry, tmp, &clog_cpg_list, list) { r = cpg_dispatch(entry->handle, CPG_DISPATCH_ALL); if (r != SA_AIS_OK) - LOG_ERROR("cpg_dispatch failed: %d", r); + LOG_ERROR("cpg_dispatch failed: %s", str_ais_error(r)); if (entry->free_me) { free(entry); continue; } - - for (cp = entry->checkpoint_list; cp;) { - LOG_DBG("[%s] Checkpoint data available for node %u", - SHORT_UUID(entry->name.value), cp->requester); - - /* - * FIXME: Check return code. Could send failure - * notice in tfr in export_checkpoint function - * by setting tfr->error - */ - switch (export_checkpoint(cp)) { - case -EEXIST: - LOG_DBG("[%s] Checkpoint for %u already handled by someone else", - SHORT_UUID(entry->name.value), cp->requester); - case 0: - entry->checkpoint_list = cp->next; - free_checkpoint(cp); - cp = entry->checkpoint_list; - break; - default: - /* FIXME: Skipping will cause list corruption */ - LOG_ERROR("[%s] Failed to export checkpoint for %u", - SHORT_UUID(entry->name.value), cp->requester); - } - } + do_checkpoints(entry); } return (r == SA_AIS_OK) ? 0 : -1; /* FIXME: good error number? */ } static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname, - uint32_t nodeid, uint32_t pid, void *msg, int msg_len) + uint32_t nodeid, uint32_t pid, + void *msg, int msg_len) { int i; int r = 0; @@ -785,7 +768,7 @@ static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname, * get our config callback. However, since we can't respond after * leaving, we simply return. */ - if (match->cpg_state != VALID) + if (match->state == LEAVING) return; i_am_server = (my_cluster_id == match->lowest_id) ? 1 : 0; @@ -1098,10 +1081,10 @@ static void cpg_leave_callback(struct clog_cpg *match, */ if (!strcmp(match->name.value, tfr->uuid) && (tfr->request_type != DM_CLOG_POSTSUSPEND)){ - LOG_PRINT("[%s] Resending %s due to new server(%u)", + LOG_PRINT("[%s] Resending %s due to new server(%u -> %u)", SHORT_UUID(match->name.value), RQ_TYPE(tfr->request_type), - match->lowest_id); + lowest, match->lowest_id); if (cluster_send(tfr)) LOG_ERROR("Failed resend"); } @@ -1225,11 +1208,11 @@ int destroy_cluster_cpg(char *str) list_for_each_entry_safe(del, tmp, &clog_cpg_list, list) if (!strncmp(del->name.value, str, CPG_MAX_NAME_LENGTH)) { del->cpg_state = INVALID; + del->state = LEAVING; r = cpg_leave(del->handle, &del->name); if (r != CPG_OK) LOG_ERROR("Error leaving CPG!"); break; - del->state = LEAVING; } return 0; hooks/post-receive -- Cluster Project
reply other threads:[~2008-07-18 15:19 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20080718151913.12461.qmail@sourceware.org \ --to=jbrassow@sourceware.org \ --cc=cluster-cvs@sources.redhat.com \ --cc=cluster-devel@redhat.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).