This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

master - dlm/fence: daemon fixes and tool improvements


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=b2364c1b1b2b7b79469ae637d68f65e3631c9004
Commit:        b2364c1b1b2b7b79469ae637d68f65e3631c9004
Parent:        85049a0824daa9abaa38f5dca377767907b53b39
Author:        David Teigland <teigland@redhat.com>
AuthorDate:    Fri Oct 10 15:35:48 2008 -0500
Committer:     David Teigland <teigland@redhat.com>
CommitterDate: Fri Oct 10 15:46:53 2008 -0500

dlm/fence: daemon fixes and tool improvements

fence_tool/dlm_tool: improve info in ls output
fenced/dlm_controld: fix confchg/message processing, must be done after
each individual confchg/message
dlm_controld: fix fencing checks which weren't happening
dlm_controld: improvements to recovery debug messages

Signed-off-by: David Teigland <teigland@redhat.com>
---
 dlm/libdlmcontrol/libdlmcontrol.h |    1 -
 dlm/tool/main.c                   |   97 ++++++++++++++++++++++---------------
 fence/fence_tool/fence_tool.c     |   11 ++--
 fence/fenced/cpg.c                |    6 ++-
 group/dlm_controld/cpg.c          |   48 ++++++++++++++----
 group/dlm_controld/main.c         |    2 +
 group/gfs_control/main.c          |   12 ++--
 group/gfs_controld/cpg-new.c      |   14 +++---
 group/tool/main.c                 |    6 +-
 9 files changed, 122 insertions(+), 75 deletions(-)

diff --git a/dlm/libdlmcontrol/libdlmcontrol.h b/dlm/libdlmcontrol/libdlmcontrol.h
index 9c95c5e..c85bd3f 100644
--- a/dlm/libdlmcontrol/libdlmcontrol.h
+++ b/dlm/libdlmcontrol/libdlmcontrol.h
@@ -9,7 +9,6 @@
 #define DLMC_NF_CHECK_FENCING	0x00000008
 #define DLMC_NF_CHECK_QUORUM	0x00000010
 #define DLMC_NF_CHECK_FS	0x00000020
-#define DLMC_NF_FS_NOTIFIED	0x00000040
 
 struct dlmc_node {
 	int nodeid;
diff --git a/dlm/tool/main.c b/dlm/tool/main.c
index b274da0..ac50d76 100644
--- a/dlm/tool/main.c
+++ b/dlm/tool/main.c
@@ -33,7 +33,7 @@ static char *prog_name;
 static char *lsname;
 static int operation;
 static int opt_ind;
-static int verbose;
+static int ls_all_nodes = 0;
 static int opt_dir = 0;
 static int opt_excl = 0;
 static int opt_fs = 0;
@@ -55,7 +55,7 @@ static void print_usage(void)
 	       "                    ls | dump | plocks | deadlock_check]\n");
 	printf("\n");
 	printf("Options:\n");
-	printf("  -v               Verbose output\n");
+	printf("  -n               Show all node information in ls\n");
 	printf("  -d <n>           Resource directory off/on (0/1) in join, default 0\n");
 #ifdef LINUX2628rc
 	printf("  -e <n>           Exclusive create off/on (0/1) in join, default 0\n");
@@ -69,7 +69,7 @@ static void print_usage(void)
 	printf("\n");
 }
 
-#define OPTION_STRING "MhVvd:m:e:f:"
+#define OPTION_STRING "MhVnd:m:e:f:"
 
 static void decode_arguments(int argc, char **argv)
 {
@@ -104,8 +104,8 @@ static void decode_arguments(int argc, char **argv)
 			dump_mstcpy = 1;
 			break;
 
-		case 'v':
-			verbose = 1;
+		case 'n':
+			ls_all_nodes = 1;
 			break;
 
 		case 'h':
@@ -466,53 +466,56 @@ void do_lockdump(char *name)
 	fclose(file);
 }
 
-char *dlmc_lf_str(uint32_t flags)
+static char *dlmc_lf_str(uint32_t flags)
 {
 	static char str[128];
+	int i = 0;
 
 	memset(str, 0, sizeof(str));
 
-	if (flags & DLMC_LF_JOINING)
-		strcat(str, "joining ");
-	if (flags & DLMC_LF_LEAVING)
-		strcat(str, "leaving ");
-	if (flags & DLMC_LF_KERNEL_STOPPED)
-		strcat(str, "kernel_stopped ");
-	if (flags & DLMC_LF_FS_REGISTERED)
-		strcat(str, "fs_registered ");
-	if (flags & DLMC_LF_NEED_PLOCKS)
-		strcat(str, "need_plocks ");
-	if (flags & DLMC_LF_SAVE_PLOCKS)
-		strcat(str, "save_plocks ");
+	if (flags & DLMC_LF_SAVE_PLOCKS) {
+		i++;
+		strcat(str, "save_plock");
+	}
+	if (flags & DLMC_LF_NEED_PLOCKS) {
+		strcat(str, i++ ? "," : "");
+		strcat(str, "need_plock");
+	}
+	if (flags & DLMC_LF_FS_REGISTERED) {
+		strcat(str, i++ ? "," : "");
+		strcat(str, "fs_reg");
+	}
+	if (flags & DLMC_LF_KERNEL_STOPPED) {
+		strcat(str, i++ ? "," : "");
+		strcat(str, "kern_stop");
+	}
+	if (flags & DLMC_LF_LEAVING) {
+		strcat(str, i++ ? "," : "");
+		strcat(str, "leave");
+	}
+	if (flags & DLMC_LF_JOINING) {
+		strcat(str, i++ ? "," : "");
+		strcat(str, "join");
+	}
 
 	return str;
 }
 
-char *dlmc_nf_str(uint32_t flags)
+static char *nf_check_str(uint32_t flags)
 {
-	static char str[128];
-
-	memset(str, 0, sizeof(str));
-
-	if (flags & DLMC_NF_MEMBER)
-		strcat(str, "member ");
-	if (flags & DLMC_NF_START)
-		strcat(str, "start ");
-	if (flags & DLMC_NF_DISALLOWED)
-		strcat(str, "disallowed ");
 	if (flags & DLMC_NF_CHECK_FENCING)
-		strcat(str, "check_fencing ");
+		return "fence";
+
 	if (flags & DLMC_NF_CHECK_QUORUM)
-		strcat(str, "check_quorum ");
+		return "quorum";
+
 	if (flags & DLMC_NF_CHECK_FS)
-		strcat(str, "check_fs ");
-	if (flags & DLMC_NF_FS_NOTIFIED)
-		strcat(str, "fs_notified");
+		return "fs";
 
-	return str;
+	return "none";
 }
 
-char *condition_str(int cond)
+static char *condition_str(int cond)
 {
 	switch (cond) {
 	case 0:
@@ -603,15 +606,29 @@ static void show_ls(struct dlmc_lockspace *ls)
 	show_nodeids(node_count, nodes);
 }
 
+static int member_int(struct dlmc_node *n)
+{
+	if (n->flags & DLMC_NF_DISALLOWED)
+		return -1;
+	if (n->flags & DLMC_NF_MEMBER)
+		return 1;
+	return 0;
+}
+
 static void show_all_nodes(int count, struct dlmc_node *nodes)
 {
 	struct dlmc_node *n = nodes;
 	int i;
 
 	for (i = 0; i < count; i++) {
-		printf("nodeid %d add_seq %u rem_seq %u failed %d flags 0x%x %s\n",
-			n->nodeid, n->added_seq, n->removed_seq,
-			n->failed_reason, n->flags, dlmc_nf_str(n->flags));
+		printf("nodeid %d member %d failed %d start %d seq_add %u seq_rem %u check %s\n",
+			n->nodeid,
+			member_int(n),
+			n->failed_reason,
+			(n->flags & DLMC_NF_START) ? 1 : 0,
+			n->added_seq,
+			n->removed_seq,
+			nf_check_str(n->flags));
 		n++;
 	}
 }
@@ -645,7 +662,7 @@ static void do_list(char *name)
 
 		show_ls(ls);
 
-		if (!verbose)
+		if (!ls_all_nodes)
 			goto next;
 
 		node_count = 0;
diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index e12c398..93f8e7c 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -36,7 +36,7 @@ int cman_nodes_count;
 struct fenced_node nodes[MAX_NODES];
 char *prog_name;
 int operation;
-int verbose = 0;
+int ls_all_nodes = 0;
 int inquorate_fail = 0;
 int wait_join = 0;			 /* default: don't wait for join */
 int wait_leave = 0;			 /* default: don't wait for leave */
@@ -452,7 +452,7 @@ static int do_list(void)
 	}
 	printf("\n");
 
-	if (!verbose) {
+	if (!ls_all_nodes) {
 		printf("\n");
 		exit(EXIT_SUCCESS);
 	}
@@ -500,6 +500,7 @@ static void print_usage(void)
 	printf("  dump		   Dump debug buffer from fenced\n");
 	printf("\n");
 	printf("Options:\n");
+	printf("  -n               Show all node information in ls\n");
 	printf("  -m <seconds>     Delay join up to <seconds> for all nodes in cluster.conf\n");
 	printf("                   to be cluster members\n");
 	printf("  -w               Wait for join or leave to complete\n");
@@ -510,7 +511,7 @@ static void print_usage(void)
 	printf("\n");
 }
 
-#define OPTION_STRING "vVht:wQm:"
+#define OPTION_STRING "Vht:wQm:n"
 
 static void decode_arguments(int argc, char *argv[])
 {
@@ -529,8 +530,8 @@ static void decode_arguments(int argc, char *argv[])
 			exit(EXIT_SUCCESS);
 			break;
 
-		case 'v':
-			verbose++;
+		case 'n':
+			ls_all_nodes = 1;
 			break;
 
 		case 'h':
diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
index 6d51a78..2168995 100644
--- a/fence/fenced/cpg.c
+++ b/fence/fenced/cpg.c
@@ -1247,6 +1247,8 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
 
 	if (cg->we_joined)
 		add_victims_init(fd, cg);
+
+	apply_changes(fd);
 }
 
 static void fd_header_in(struct fd_header *hd)
@@ -1313,6 +1315,8 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
 	default:
 		log_error("unknown msg type %d", hd->type);
 	}
+
+	apply_changes(fd);
 }
 
 static cpg_callbacks_t cpg_callbacks = {
@@ -1336,8 +1340,6 @@ static void process_fd_cpg(int ci)
 		log_error("cpg_dispatch error %d", error);
 		return;
 	}
-
-	apply_changes(fd);
 }
 
 int fd_join(struct fd *fd)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index 1d3a369..2b9270e 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -47,6 +47,8 @@ struct node {
 	int check_fs;
 	int fs_notified;
 	uint64_t add_time;
+	uint64_t fence_time;	/* for debug */
+	uint32_t fence_queries;	/* for debug */
 	uint32_t added_seq;	/* for queries */
 	uint32_t removed_seq;	/* for queries */
 	int failed_reason;	/* for queries */
@@ -401,8 +403,11 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
 		return;
 	}
 
-	if (cfgd_enable_fencing && !node->add_time)
+	if (cfgd_enable_fencing && node->add_time) {
 		node->check_fencing = 1;
+		node->fence_time = 0;
+		node->fence_queries = 0;
+	}
 
 	/* fenced will take care of making sure the quorum value
 	   is adjusted for all the failures */
@@ -410,7 +415,8 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
 	if (cfgd_enable_quorum && !cfgd_enable_fencing)
 		node->check_quorum = 1;
 
-	node->check_fs = 1;
+	if (ls->fs_registered)
+		node->check_fs = 1;
 
 	node->removed_seq = cg->seq;	/* for queries */
 	node->failed_reason = reason;	/* for queries */
@@ -423,8 +429,10 @@ static int check_fencing_done(struct lockspace *ls)
 	int in_progress, wait_count = 0;
 	int rv;
 
-	if (!cfgd_enable_fencing)
+	if (!cfgd_enable_fencing) {
+		log_group(ls, "check_fencing disabled");
 		return 1;
+	}
 
 	list_for_each_entry(node, &ls->node_history, list) {
 		if (!node->check_fencing)
@@ -438,11 +446,23 @@ static int check_fencing_done(struct lockspace *ls)
 			log_error("fenced_node_info error %d", rv);
 
 		if (last_fenced_time > node->add_time) {
+			log_group(ls, "check_fencing %d %llu fenced at %llu",
+				  node->nodeid,
+				  (unsigned long long)node->add_time,
+				  (unsigned long long)last_fenced_time);
 			node->check_fencing = 0;
 			node->add_time = 0;
+			node->fence_time = last_fenced_time;
 		} else {
-			log_group(ls, "check_fencing %d needs fencing",
-				  node->nodeid);
+			if (!node->fence_queries ||
+			    node->fence_time != last_fenced_time) {
+				log_group(ls, "check_fencing %d not fenced "
+					  "add %llu fence %llu", node->nodeid,
+					 (unsigned long long)node->add_time,
+					 (unsigned long long)last_fenced_time);
+				node->fence_queries++;
+				node->fence_time = last_fenced_time;
+			}
 			wait_count++;
 		}
 	}
@@ -462,6 +482,8 @@ static int check_fencing_done(struct lockspace *ls)
 
 	if (in_progress)
 		return 0;
+
+	log_group(ls, "check_fencing done");
 	return 1;
 }
 
@@ -470,8 +492,10 @@ static int check_quorum_done(struct lockspace *ls)
 	struct node *node;
 	int wait_count = 0;
 
-	if (!cfgd_enable_quorum)
+	if (!cfgd_enable_quorum) {
+		log_group(ls, "check_quorum disabled");
 		return 1;
+	}
 
 	/* wait for quorum system (cman) to see all the same nodes failed, so
 	   we know that cluster_quorate is adjusted for the same failures we've
@@ -510,8 +534,10 @@ static int check_fs_done(struct lockspace *ls)
 	int wait_count = 0;
 
 	/* no corresponding fs for this lockspace */
-	if (!ls->fs_registered)
+	if (!ls->fs_registered) {
+		log_group(ls, "check_fs none registered");
 		return 1;
+	}
 
 	list_for_each_entry(node, &ls->node_history, list) {
 		if (!node->check_fs)
@@ -1301,6 +1327,8 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
 		       left_list, left_list_entries,
 		       joined_list, joined_list_entries);
 #endif
+
+	apply_changes(ls);
 }
 
 static void dlm_header_in(struct dlm_header *hd)
@@ -1396,6 +1424,8 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
 	default:
 		log_error("unknown msg type %d", hd->type);
 	}
+
+	apply_changes(ls);
 }
 
 static cpg_callbacks_t cpg_callbacks = {
@@ -1445,8 +1475,6 @@ static void process_lockspace_cpg(int ci)
 		return;
 	}
 
-	apply_changes(ls);
-
 	update_flow_control_status();
 }
 
@@ -2149,8 +2177,6 @@ static int _set_node_info(struct lockspace *ls, struct change *cg, int nodeid,
 		node->flags |= DLMC_NF_CHECK_QUORUM;
 	if (n->check_fs)
 		node->flags |= DLMC_NF_CHECK_FS;
-	if (n->fs_notified)
-		node->flags |= DLMC_NF_FS_NOTIFIED;
 
 	node->added_seq = n->added_seq;
 	node->removed_seq = n->removed_seq;
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index f3ab1f1..53edeb8 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -531,6 +531,8 @@ static void query_node_info(int fd, char *name, int nodeid)
 		goto out;
 	}
 
+	memset(&node, 0, sizeof(node));
+
 	if (group_mode == GROUP_LIBGROUP)
 		rv = set_node_info_group(ls, nodeid, &node);
 	else
diff --git a/group/gfs_control/main.c b/group/gfs_control/main.c
index 650e144..7b90987 100644
--- a/group/gfs_control/main.c
+++ b/group/gfs_control/main.c
@@ -14,7 +14,7 @@
 
 #include "libgfscontrol.h"
 
-#define OPTION_STRING			"vhV"
+#define OPTION_STRING			"nhV"
 
 #define OP_LIST				1
 #define OP_DUMP				2
@@ -27,7 +27,7 @@ static char *prog_name;
 static char *fsname;
 static int operation;
 static int opt_ind;
-static int verbose;
+static int ls_all_nodes;
 
 #define MAX_MG 128
 #define MAX_NODES 128
@@ -43,7 +43,7 @@ static void print_usage(void)
 	printf("%s [options] [ls|dump|plocks]\n", prog_name);
 	printf("\n");
 	printf("Options:\n");
-	printf("  -v               Verbose output\n");
+	printf("  -n               Show all node information in ls\n");
 	printf("  -h               Print this help, then exit\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("\n");
@@ -59,8 +59,8 @@ static void decode_arguments(int argc, char **argv)
 		optchar = getopt(argc, argv, OPTION_STRING);
 
 		switch (optchar) {
-		case 'v':
-			verbose = 1;
+		case 'n':
+			ls_all_nodes = 1;
 			break;
 
 		case 'h':
@@ -354,7 +354,7 @@ static void do_list(char *name)
 
 		show_mg(mg);
 
-		if (!verbose)
+		if (!ls_all_nodes)
 			goto next;
 
 		node_count = 0;
diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c
index 839ff4a..bd8bc67 100644
--- a/group/gfs_controld/cpg-new.c
+++ b/group/gfs_controld/cpg-new.c
@@ -269,7 +269,7 @@ static int daemon_member_count;
     would let everyone start again.]
 */
 
-static void process_mountgroup(struct mountgroup *mg);
+static void apply_changes_recovery(struct mountgroup *mg);
 static void send_withdraw_acks(struct mountgroup *mg);
 static void leave_mountgroup(struct mountgroup *mg, int mnterr);
 
@@ -700,7 +700,7 @@ void process_dlmcontrol(int ci)
 
 	poll_dlm = 0;
 
-	process_mountgroup(mg);
+	apply_changes_recovery(mg);
 }
 
 static int check_dlm_notify_done(struct mountgroup *mg)
@@ -2184,7 +2184,7 @@ void process_recovery_uevent(char *table)
 		}
 	}
 
-	process_mountgroup(mg);
+	apply_changes_recovery(mg);
 }
 
 static void start_journal_recovery(struct mountgroup *mg, int jid)
@@ -2301,7 +2301,7 @@ static void apply_recovery(struct mountgroup *mg)
 	}
 }
 
-static void process_mountgroup(struct mountgroup *mg)
+static void apply_changes_recovery(struct mountgroup *mg)
 {
 	if (!list_empty(&mg->changes))
 		apply_changes(mg);
@@ -2315,7 +2315,7 @@ void process_mountgroups(void)
 	struct mountgroup *mg, *safe;
 
 	list_for_each_entry_safe(mg, safe, &mountgroups, list)
-		process_mountgroup(mg);
+		apply_changes_recovery(mg);
 }
 
 static int add_change(struct mountgroup *mg,
@@ -2476,7 +2476,7 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
 	if (rv)
 		return;
 
-	process_mountgroup(mg);
+	apply_changes_recovery(mg);
 }
 
 static void gfs_header_in(struct gfs_header *hd)
@@ -2564,7 +2564,7 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
 		log_error("unknown msg type %d", hd->type);
 	}
 
-	process_mountgroup(mg);
+	apply_changes_recovery(mg);
 }
 
 static cpg_callbacks_t cpg_callbacks = {
diff --git a/group/tool/main.c b/group/tool/main.c
index 410943b..9feb36b 100644
--- a/group/tool/main.c
+++ b/group/tool/main.c
@@ -610,9 +610,9 @@ int main(int argc, char **argv)
 	case OP_LIST:
 		if (all_daemons) {
 			if (verbose) {
-				system("fence_tool ls -v");
-				system("dlm_tool ls -v");
-				system("gfs_control ls -v");
+				system("fence_tool ls -n");
+				system("dlm_tool ls -n");
+				system("gfs_control ls -n");
 			} else {
 				system("fence_tool ls");
 				system("dlm_tool ls");


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]