This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Cluster Project branch, master, updated. gfs-kernel_0_1_22-159-gb4c3351


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=b4c3351a0850da056f879705a28aead767d78072

The branch, master has been updated
       via  b4c3351a0850da056f879705a28aead767d78072 (commit)
      from  77bce77b5034adf8f00090b13dde7c7d481b0dd9 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit b4c3351a0850da056f879705a28aead767d78072
Author: David Teigland <teigland@redhat.com>
Date:   Fri Apr 11 14:45:41 2008 -0500

    dlm_controld: quorum checking
    
    Fill out the quorum dependency checking, and refine structure of the
    fencing and fs dependency checking which don't actually work yet.
    
    Signed-off-by: David Teigland <teigland@redhat.com>

-----------------------------------------------------------------------

Summary of changes:
 group/dlm_controld/cpg.c         |  107 +++++++++++++++++++++++++++----------
 group/dlm_controld/dlm_daemon.h  |    2 +
 group/dlm_controld/main.c        |    1 +
 group/dlm_controld/member_cman.c |    2 +
 group/dlm_controld/plock.c       |    2 +-
 5 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index 21c1a43..afe12bd 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -32,7 +32,10 @@ struct member {
 struct node {
 	struct list_head list;
 	int nodeid;
-	int needs_fencing;
+	int check_fencing;
+	int check_quorum;
+	int check_fs;
+	int fs_notify;
 	struct timeval add_time;
 };
 
@@ -266,8 +269,8 @@ static void free_ls(struct lockspace *ls)
    when we see a node not in this list, add entry for it with zero add_time
    record the time we get a good start message from the node, add_time
    clear add_time if the node leaves
-   if node fails with non-zero add_time, set needs_fencing
-   when a node is fenced, clear add_time and clear needs_fencing
+   if node fails with non-zero add_time, set check_fencing
+   when a node is fenced, clear add_time and clear check_fencing
    if a node remerges after this, no good start message, no new add_time set
    if a node fails with zero add_time, it doesn't need fencing
    if a node remerges before it's been fenced, no good start message, no new
@@ -340,63 +343,109 @@ static void node_history_fail(struct lockspace *ls, int nodeid)
 	}
 
 	if (!timerisset(&node->add_time))
-		node->needs_fencing = 1;
+		node->check_fencing = 1;
+
+	node->check_quorum = 1;
+	node->check_fs = 1;
 }
 
-static int failed_nodes_fenced(struct lockspace *ls)
+static int check_fencing_done(struct lockspace *ls)
 {
-#if 0
 	struct node *node;
 	struct timeval last_fenced;
 	int wait_count = 0;
 
 	list_for_each_entry(node, &ls->node_history, list) {
-		if (!node->needs_fencing)
+		if (!node->check_fencing)
 			continue;
 
 		/* check with fenced to see if the node has been
 		   fenced since node->add_time */
 
-		fencedomain_last_success(node->nodeid, &last_fenced);
+		/* fenced_last_success(node->nodeid, &last_fenced); */
+		gettimeofday(&last_fenced, NULL);
 
-		if (last_fenced <= node->add_time) {
+		if (timercmp(&last_fenced, &node->add_time, >)) {
+			node->check_fencing = 0;
+			timerclear(&node->add_time);
+		} else {
+			log_group(ls, "check_fencing %d needs fencing",
+				  node->nodeid);
 			wait_count++;
-			continue;
 		}
-
-		/* node has been fenced */
-		node->needs_fencing = 0;
-		timerclear(&node->add_time);
 	}
 
-	if (wait_count) {
+	if (wait_count)
 		return 0;
-	}
 
 	/* now check if there are any outstanding fencing ops (for nodes
 	   we may not have seen in any lockspace), and return 0 if there
 	   are any */
 
-	fencedomain_pending_count(&pending);
+	/*
+	fenced_pending_count(&pending);
 	if (pending)
 		return 0;
-#endif
+	*/
 	return 1;
 }
 
-static int cluster_has_quorum(struct lockspace *ls)
+static int check_quorum_done(struct lockspace *ls)
 {
-	/* verify cman_last_failure_time() for this node is more recent
-	   than when we last saw the node added; then we know that the
-	   quorum result from cman is accounting for the given failure. */
+	struct node *node;
+	int wait_count = 0;
+
+	if (!cman_quorate) {
+		log_group(ls, "check_quorum %d", cman_quorate);
+		return 0;
+	}
+
+	list_for_each_entry(node, &ls->node_history, list) {
+		if (!node->check_quorum)
+			continue;
+
+		if (!is_cman_member(node->nodeid)) {
+			node->check_quorum = 0;
+		} else {
+			log_group(ls, "check_quorum %d is_cman_member",
+				  node->nodeid);
+			wait_count++;
+		}
+	}
+
+	if (wait_count)
+		return 0;
+
+	log_group(ls, "check_quorum done");
 	return 1;
 }
 
-static int cluster_filesystem_stopped(struct lockspace *ls)
+static int check_fs_done(struct lockspace *ls)
 {
-	/* communicate with fs daemon through the fscontrol:hostname
-	   cpg to check if the fs has been notified of any node failures
-	   in this change */
+	struct node *node;
+	int wait_count = 0;
+
+	/* no corresponding fs for this lockspace */
+	if (!ls->fs_registered)
+		return 1;
+
+	list_for_each_entry(node, &ls->node_history, list) {
+		if (!node->check_fs)
+			continue;
+
+		if (node->fs_notify) {
+			node->check_fs = 0;
+		} else {
+			log_group(ls, "check_fs %d needs fs notify",
+				  node->nodeid);
+			wait_count++;
+		}
+	}
+
+	if (wait_count)
+		return 0;
+
+	log_group(ls, "check_fs done");
 	return 1;
 }
 
@@ -490,7 +539,7 @@ static int wait_conditions_done(struct lockspace *ls)
 	   that have occured since the last change applied to dlm-kernel, not
 	   just the latest change */
 
-	if (!failed_nodes_fenced(ls)) {
+	if (!check_fencing_done(ls)) {
 		poll_fencing = 1;
 		return 0;
 	}
@@ -500,13 +549,13 @@ static int wait_conditions_done(struct lockspace *ls)
 	   sufficient because we don't want to start new lockspaces in an
 	   inquorate cluster */
 
-	if (!cluster_has_quorum(ls)) {
+	if (!check_quorum_done(ls)) {
 		poll_quorum = 1;
 		return 0;
 	}
 	poll_quorum = 0;
 
-	if (!cluster_filesystem_stopped(ls)) {
+	if (!check_fs_done(ls)) {
 		poll_fs = 1;
 		return 0;
 	}
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index d5657dd..b969bdc 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -68,6 +68,7 @@ extern int poll_ignore_plock;
 extern int plock_fd;
 extern int plock_ci;
 extern struct list_head lockspaces;
+extern int cman_quorate;
 extern int our_nodeid;
 extern char daemon_debug_buf[256];
 extern char dump_buf[DUMP_SIZE];
@@ -149,6 +150,7 @@ struct lockspace {
 	int			joining;
 	int			leaving;
 	int			kernel_stopped;
+	int			fs_registered;
 	uint32_t		change_seq;
 	struct change		*started_change;
 	struct list_head	changes;
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index 0e4bc15..b954f53 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -881,6 +881,7 @@ int poll_ignore_plock;
 int plock_fd;
 int plock_ci;
 struct list_head lockspaces;
+int cman_quorate;
 int our_nodeid;
 char daemon_debug_buf[256];
 char dump_buf[DUMP_SIZE];
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
index 847351a..c871097 100644
--- a/group/dlm_controld/member_cman.c
+++ b/group/dlm_controld/member_cman.c
@@ -71,6 +71,8 @@ static void statechange(void)
 	int num_addrs;
 	struct cman_node_address *addrptr = addrs;
 
+	cman_quorate = cman_is_quorate(ch);
+
 	old_node_count = cman_node_count;
 	memcpy(&old_nodes, &cman_nodes, sizeof(old_nodes));
 
diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c
index a862356..4dc38ac 100644
--- a/group/dlm_controld/plock.c
+++ b/group/dlm_controld/plock.c
@@ -1816,7 +1816,7 @@ static int _unlink_checkpoint(struct lockspace *ls, SaNameT *name)
 	if (rv == SA_AIS_OK)
 		goto out_close;
 
-	log_error("unlink ckpt error %d %s", rv, ls->name);
+	log_group(ls, "unlink ckpt error %d %s", rv, ls->name);
 	ret = -1;
 
  status_retry:


hooks/post-receive
--
Cluster Project


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]