This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RHEL52 - fence_tool: new option to delay before join


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=e4aecceba36426b1a148a49c83d4f32eac351275
Commit:        e4aecceba36426b1a148a49c83d4f32eac351275
Parent:        b3c91c9dd3290c5c571071542c9b539ae4cd9ba0
Author:        David Teigland <teigland@redhat.com>
AuthorDate:    Tue Aug 26 15:50:49 2008 -0500
Committer:     Chris Feist <cfeist@redhat.com>
CommitterDate: Mon Nov 17 13:38:24 2008 -0600

fence_tool: new option to delay before join

bz 460190

Certain network/switch settings cause nodes to form partitioned clusters
when they start up.  Add code to better cope with these initial partitions.
The network partitions are a particular problem for two_node clusters where
a node has quorum when it starts up on its own.

This adds a new fence_tool option -m, e.g. fence_tool join -m <seconds>.
It causes fence_tool to delay the join by up to <seconds> to allow all
nodes in cluster.conf to become cluster members.

This allows openais on the nodes to all see each other before starting
the fence domain. So we join the domain *after* the nodes merge into a
single cluster.  If we joined the domain *before* the cluster partition
merged, then nodes end up being fenced unnecessarily.  (This is a similar
idea to post_join_delay; a delay that gives us time to determine that a
node in an unknown state is actually ok and doesn't require fencing.)

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fence/fence_tool/fence_tool.c |   93 ++++++++++++++++++++++++++++++++++++++++-
 fence/man/fence_tool.8        |    7 ++-
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index a6b002a..0b7ea62 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -37,10 +37,12 @@
 #define FALSE 0
 #endif
 
-#define OPTION_STRING			("Vht:wQ")
+#define OPTION_STRING			("Vht:m:wQ")
 #define FENCED_SOCK_PATH                "fenced_socket"
 #define MAXLINE				256
 
+#define MAX_NODES			128
+
 #define OP_JOIN  			1
 #define OP_LEAVE 			2
 #define OP_WAIT				3
@@ -63,9 +65,15 @@ char *prog_name;
 int operation;
 int child_wait = FALSE;
 int quorum_wait = TRUE;
+int member_wait = 0;
 int fenced_start_timeout = 300; /* five minutes */
 int signalled = 0;
 cman_handle_t ch;
+int all_nodeids[MAX_NODES];
+int all_nodeids_count;
+cman_node_t cman_nodes[MAX_NODES];
+int cman_nodes_count;
+
 
 static int do_write(int fd, void *buf, size_t count)
 {
@@ -245,6 +253,77 @@ static int do_wait(int joining)
 	return -1;
 }
 
+static int all_nodeids_are_members(void)
+{
+	int i, j, rv, found;
+
+	cman_nodes_count = 0;
+	memset(&cman_nodes, 0, sizeof(cman_nodes));
+
+	rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes);
+	if (rv < 0) {
+		printf("cman_get_nodes error %d %d\n", rv, errno);
+		return 0;
+	}
+
+	for (i = 0; i < all_nodeids_count; i++) {
+		found = 0;
+
+		for (j = 0; j < cman_nodes_count; j++) {
+			if (cman_nodes[j].cn_nodeid == all_nodeids[i] &&
+			    cman_nodes[j].cn_member) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (!found)
+			return 0;
+	}
+	return 1;
+}
+
+static void wait_for_members(void)
+{
+	char path[256];
+	char *nodeid_str;
+	int i = 0, cd, error;
+
+	while ((cd = ccs_connect()) < 0) {
+		sleep(1);
+		if (++i > 9 && !(i % 10))
+			printf("connect to ccs error %d %d\n", cd, errno);
+	}
+
+	memset(all_nodeids, 0, sizeof(all_nodeids));
+	all_nodeids_count = 0;
+
+	for (i = 1; ; i++) {
+		nodeid_str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i);
+
+		error = ccs_get(cd, path, &nodeid_str);
+		if (error || !nodeid_str)
+			break;
+
+		all_nodeids[all_nodeids_count++] = atoi(nodeid_str);
+		free(nodeid_str);
+	}
+
+	ccs_disconnect(cd);
+
+	for (i = 0; i < member_wait; i++) {
+		if (all_nodeids_are_members())
+			break;
+		if (i && !(i % 5))
+			printf("Waiting for all %d nodes to be members\n",
+				all_nodeids_count);
+		sleep(1);
+	}
+
+}
+
 static int do_join(int argc, char *argv[])
 {
 	int i, fd, rv;
@@ -264,6 +343,10 @@ static int do_join(int argc, char *argv[])
 		cman_finish(ch);
 		return EXIT_FAILURE;
 	}
+
+	if (member_wait)
+		wait_for_members();
+
 	cman_finish(ch);
 
 	i = 0;
@@ -361,10 +444,12 @@ static void print_usage(void)
 	printf("  dump		   Dump debug buffer from fenced\n");
 	printf("\n");
 	printf("Options:\n");
+	printf("  -m <n>           Delay join up to n seconds for all nodes in cluster.conf\n");
+	printf("                   to be cluster members\n");
 	printf("  -w               Wait for join to complete\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("  -h               Print this help, then exit\n");
-	printf("  -t               Maximum time in seconds to wait\n");
+	printf("  -t <n>           Maximum time in seconds to wait\n");
 	printf("  -Q               Fail if cluster is not quorate, don't wait\n");
 	printf("\n");
 }
@@ -399,6 +484,10 @@ static void decode_arguments(int argc, char *argv[])
 			child_wait = TRUE;
 			break;
 
+		case 'm':
+			member_wait = atoi(optarg);
+			break;
+
 		case ':':
 		case '?':
 			fprintf(stderr, "Please use '-h' for usage.\n");
diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8
index a7ad0c4..7477f6f 100644
--- a/fence/man/fence_tool.8
+++ b/fence/man/fence_tool.8
@@ -27,6 +27,9 @@ it to stdout.
 
 .SH OPTIONS
 .TP
+\fB-m\fP <n>
+Delay join up to n seconds for all nodes in cluster.conf to be cluster members.
+.TP
 \fB-w\fP
 Wait until the join or leave is completed.
 .TP
@@ -36,8 +39,8 @@ Help.  Print out the usage syntax.
 \fB-V\fP
 Print version information.
 .TP
-\fB-t\fP
-Maximum time in seconds to wait (default: 300 seconds)
+\fB-t\fP <n>
+Maximum time in seconds to wait for quorum or -w (default: 300 seconds)
 .TP
 \fB-Q\fP
 Fail command immediately if the cluster is not quorate, don't wait.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]