This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

STABLE2 - fence_tool: new option to delay before join


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=41a69f04aeaf9aa3f38c899bf55495f04c19831c
Commit:        41a69f04aeaf9aa3f38c899bf55495f04c19831c
Parent:        672db2d7e03d61f4f64a51fdbde4e887054e7839
Author:        David Teigland <teigland@redhat.com>
AuthorDate:    Tue Aug 26 15:50:49 2008 -0500
Committer:     David Teigland <teigland@redhat.com>
CommitterDate: Wed Aug 27 10:55:36 2008 -0500

fence_tool: new option to delay before join

bz 460190

Certain network/switch settings cause nodes to form partitioned clusters
when they start up.  Add code to better cope with these initial partitions.
The network partitions are a particular problem for two_node clusters where
a node has quorum when it starts up on its own.

This adds a new fence_tool option -m, e.g. fence_tool join -m <seconds>.
It causes fence_tool to delay the join by up to <seconds> to allow all
nodes in cluster.conf to become cluster members.

This allows openais on the nodes to all see each other before starting
the fence domain. So we join the domain *after* the nodes merge into a
single cluster.  If we joined the domain *before* the cluster partition
merged, then nodes end up being fenced unnecessarily.  (This is a similar
idea to post_join_delay; a delay that gives us time to determine that a
node in an unknown state is actually ok and doesn't require fencing.)

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fence/fence_tool/fence_tool.c |   93 ++++++++++++++++++++++++++++++++++++++++-
 fence/man/fence_tool.8        |    7 ++-
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index 7a4fe27..60d47eb 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -25,10 +25,12 @@
 #define FALSE 0
 #endif
 
-#define OPTION_STRING			("Vht:wQ")
+#define OPTION_STRING			("Vht:m:wQ")
 #define FENCED_SOCK_PATH                "fenced_socket"
 #define MAXLINE				256
 
+#define MAX_NODES			128
+
 #define OP_JOIN  			1
 #define OP_LEAVE 			2
 #define OP_WAIT				3
@@ -51,9 +53,15 @@ char *prog_name;
 int operation;
 int child_wait = FALSE;
 int quorum_wait = TRUE;
+int member_wait = 0;
 int fenced_start_timeout = 300; /* five minutes */
 int signalled = 0;
 cman_handle_t ch;
+int all_nodeids[MAX_NODES];
+int all_nodeids_count;
+cman_node_t cman_nodes[MAX_NODES];
+int cman_nodes_count;
+
 
 static int do_write(int fd, void *buf, size_t count)
 {
@@ -233,6 +241,77 @@ static int do_wait(int joining)
 	return -1;
 }
 
+static int all_nodeids_are_members(void)
+{
+	int i, j, rv, found;
+
+	cman_nodes_count = 0;
+	memset(&cman_nodes, 0, sizeof(cman_nodes));
+
+	rv = cman_get_nodes(ch, MAX_NODES, &cman_nodes_count, cman_nodes);
+	if (rv < 0) {
+		printf("cman_get_nodes error %d %d\n", rv, errno);
+		return 0;
+	}
+
+	for (i = 0; i < all_nodeids_count; i++) {
+		found = 0;
+
+		for (j = 0; j < cman_nodes_count; j++) {
+			if (cman_nodes[j].cn_nodeid == all_nodeids[i] &&
+			    cman_nodes[j].cn_member) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (!found)
+			return 0;
+	}
+	return 1;
+}
+
+static void wait_for_members(void)
+{
+	char path[256];
+	char *nodeid_str;
+	int i = 0, cd, error;
+
+	while ((cd = ccs_connect()) < 0) {
+		sleep(1);
+		if (++i > 9 && !(i % 10))
+			printf("connect to ccs error %d %d\n", cd, errno);
+	}
+
+	memset(all_nodeids, 0, sizeof(all_nodeids));
+	all_nodeids_count = 0;
+
+	for (i = 1; ; i++) {
+		nodeid_str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i);
+
+		error = ccs_get(cd, path, &nodeid_str);
+		if (error || !nodeid_str)
+			break;
+
+		all_nodeids[all_nodeids_count++] = atoi(nodeid_str);
+		free(nodeid_str);
+	}
+
+	ccs_disconnect(cd);
+
+	for (i = 0; i < member_wait; i++) {
+		if (all_nodeids_are_members())
+			break;
+		if (i && !(i % 5))
+			printf("Waiting for all %d nodes to be members\n",
+				all_nodeids_count);
+		sleep(1);
+	}
+
+}
+
 static int do_join(int argc, char *argv[])
 {
 	int i, fd, rv;
@@ -252,6 +331,10 @@ static int do_join(int argc, char *argv[])
 		cman_finish(ch);
 		return EXIT_FAILURE;
 	}
+
+	if (member_wait)
+		wait_for_members();
+
 	cman_finish(ch);
 
 	i = 0;
@@ -349,10 +432,12 @@ static void print_usage(void)
 	printf("  dump		   Dump debug buffer from fenced\n");
 	printf("\n");
 	printf("Options:\n");
+	printf("  -m <n>           Delay join up to n seconds for all nodes in cluster.conf\n");
+	printf("                   to be cluster members\n");
 	printf("  -w               Wait for join to complete\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("  -h               Print this help, then exit\n");
-	printf("  -t               Maximum time in seconds to wait\n");
+	printf("  -t <n>           Maximum time in seconds to wait\n");
 	printf("  -Q               Fail if cluster is not quorate, don't wait\n");
 	printf("\n");
 }
@@ -387,6 +472,10 @@ static void decode_arguments(int argc, char *argv[])
 			child_wait = TRUE;
 			break;
 
+		case 'm':
+			member_wait = atoi(optarg);
+			break;
+
 		case ':':
 		case '?':
 			fprintf(stderr, "Please use '-h' for usage.\n");
diff --git a/fence/man/fence_tool.8 b/fence/man/fence_tool.8
index 73867cb..2a35240 100644
--- a/fence/man/fence_tool.8
+++ b/fence/man/fence_tool.8
@@ -20,6 +20,9 @@ it to stdout.
 
 .SH OPTIONS
 .TP
+\fB-m\fP <n>
+Delay join up to n seconds for all nodes in cluster.conf to be cluster members.
+.TP
 \fB-w\fP
 Wait until the join or leave is completed.
 .TP
@@ -29,8 +32,8 @@ Help.  Print out the usage syntax.
 \fB-V\fP
 Print version information.
 .TP
-\fB-t\fP
-Maximum time in seconds to wait (default: 300 seconds)
+\fB-t\fP <n>
+Maximum time in seconds to wait for quorum or -w (default: 300 seconds)
 .TP
 \fB-Q\fP
 Fail command immediately if the cluster is not quorate, don't wait.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]