This is the mail archive of the
cluster-cvs@sourceware.org
mailing list for the cluster.
master - dlm/fence: daemon fixes and tool improvements
- From: David Teigland <teigland at fedoraproject dot org>
- To: cluster-cvs-relay at redhat dot com
- Date: Fri, 10 Oct 2008 21:00:40 +0000 (UTC)
- Subject: master - dlm/fence: daemon fixes and tool improvements
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=b2364c1b1b2b7b79469ae637d68f65e3631c9004
Commit: b2364c1b1b2b7b79469ae637d68f65e3631c9004
Parent: 85049a0824daa9abaa38f5dca377767907b53b39
Author: David Teigland <teigland@redhat.com>
AuthorDate: Fri Oct 10 15:35:48 2008 -0500
Committer: David Teigland <teigland@redhat.com>
CommitterDate: Fri Oct 10 15:46:53 2008 -0500
dlm/fence: daemon fixes and tool improvements
fence_tool/dlm_tool: improve info in ls output
fenced/dlm_controld: fix confchg/message processing, must be done after
each individual confchg/message
dlm_controld: fix fencing checks which weren't happening
dlm_controld: improvements to recovery debug messages
Signed-off-by: David Teigland <teigland@redhat.com>
---
dlm/libdlmcontrol/libdlmcontrol.h | 1 -
dlm/tool/main.c | 97 ++++++++++++++++++++++---------------
fence/fence_tool/fence_tool.c | 11 ++--
fence/fenced/cpg.c | 6 ++-
group/dlm_controld/cpg.c | 48 ++++++++++++++----
group/dlm_controld/main.c | 2 +
group/gfs_control/main.c | 12 ++--
group/gfs_controld/cpg-new.c | 14 +++---
group/tool/main.c | 6 +-
9 files changed, 122 insertions(+), 75 deletions(-)
diff --git a/dlm/libdlmcontrol/libdlmcontrol.h b/dlm/libdlmcontrol/libdlmcontrol.h
index 9c95c5e..c85bd3f 100644
--- a/dlm/libdlmcontrol/libdlmcontrol.h
+++ b/dlm/libdlmcontrol/libdlmcontrol.h
@@ -9,7 +9,6 @@
#define DLMC_NF_CHECK_FENCING 0x00000008
#define DLMC_NF_CHECK_QUORUM 0x00000010
#define DLMC_NF_CHECK_FS 0x00000020
-#define DLMC_NF_FS_NOTIFIED 0x00000040
struct dlmc_node {
int nodeid;
diff --git a/dlm/tool/main.c b/dlm/tool/main.c
index b274da0..ac50d76 100644
--- a/dlm/tool/main.c
+++ b/dlm/tool/main.c
@@ -33,7 +33,7 @@ static char *prog_name;
static char *lsname;
static int operation;
static int opt_ind;
-static int verbose;
+static int ls_all_nodes = 0;
static int opt_dir = 0;
static int opt_excl = 0;
static int opt_fs = 0;
@@ -55,7 +55,7 @@ static void print_usage(void)
" ls | dump | plocks | deadlock_check]\n");
printf("\n");
printf("Options:\n");
- printf(" -v Verbose output\n");
+ printf(" -n Show all node information in ls\n");
printf(" -d <n> Resource directory off/on (0/1) in join, default 0\n");
#ifdef LINUX2628rc
printf(" -e <n> Exclusive create off/on (0/1) in join, default 0\n");
@@ -69,7 +69,7 @@ static void print_usage(void)
printf("\n");
}
-#define OPTION_STRING "MhVvd:m:e:f:"
+#define OPTION_STRING "MhVnd:m:e:f:"
static void decode_arguments(int argc, char **argv)
{
@@ -104,8 +104,8 @@ static void decode_arguments(int argc, char **argv)
dump_mstcpy = 1;
break;
- case 'v':
- verbose = 1;
+ case 'n':
+ ls_all_nodes = 1;
break;
case 'h':
@@ -466,53 +466,56 @@ void do_lockdump(char *name)
fclose(file);
}
-char *dlmc_lf_str(uint32_t flags)
+static char *dlmc_lf_str(uint32_t flags)
{
static char str[128];
+ int i = 0;
memset(str, 0, sizeof(str));
- if (flags & DLMC_LF_JOINING)
- strcat(str, "joining ");
- if (flags & DLMC_LF_LEAVING)
- strcat(str, "leaving ");
- if (flags & DLMC_LF_KERNEL_STOPPED)
- strcat(str, "kernel_stopped ");
- if (flags & DLMC_LF_FS_REGISTERED)
- strcat(str, "fs_registered ");
- if (flags & DLMC_LF_NEED_PLOCKS)
- strcat(str, "need_plocks ");
- if (flags & DLMC_LF_SAVE_PLOCKS)
- strcat(str, "save_plocks ");
+ if (flags & DLMC_LF_SAVE_PLOCKS) {
+ i++;
+ strcat(str, "save_plock");
+ }
+ if (flags & DLMC_LF_NEED_PLOCKS) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "need_plock");
+ }
+ if (flags & DLMC_LF_FS_REGISTERED) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "fs_reg");
+ }
+ if (flags & DLMC_LF_KERNEL_STOPPED) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "kern_stop");
+ }
+ if (flags & DLMC_LF_LEAVING) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "leave");
+ }
+ if (flags & DLMC_LF_JOINING) {
+ strcat(str, i++ ? "," : "");
+ strcat(str, "join");
+ }
return str;
}
-char *dlmc_nf_str(uint32_t flags)
+static char *nf_check_str(uint32_t flags)
{
- static char str[128];
-
- memset(str, 0, sizeof(str));
-
- if (flags & DLMC_NF_MEMBER)
- strcat(str, "member ");
- if (flags & DLMC_NF_START)
- strcat(str, "start ");
- if (flags & DLMC_NF_DISALLOWED)
- strcat(str, "disallowed ");
if (flags & DLMC_NF_CHECK_FENCING)
- strcat(str, "check_fencing ");
+ return "fence";
+
if (flags & DLMC_NF_CHECK_QUORUM)
- strcat(str, "check_quorum ");
+ return "quorum";
+
if (flags & DLMC_NF_CHECK_FS)
- strcat(str, "check_fs ");
- if (flags & DLMC_NF_FS_NOTIFIED)
- strcat(str, "fs_notified");
+ return "fs";
- return str;
+ return "none";
}
-char *condition_str(int cond)
+static char *condition_str(int cond)
{
switch (cond) {
case 0:
@@ -603,15 +606,29 @@ static void show_ls(struct dlmc_lockspace *ls)
show_nodeids(node_count, nodes);
}
+static int member_int(struct dlmc_node *n)
+{
+ if (n->flags & DLMC_NF_DISALLOWED)
+ return -1;
+ if (n->flags & DLMC_NF_MEMBER)
+ return 1;
+ return 0;
+}
+
static void show_all_nodes(int count, struct dlmc_node *nodes)
{
struct dlmc_node *n = nodes;
int i;
for (i = 0; i < count; i++) {
- printf("nodeid %d add_seq %u rem_seq %u failed %d flags 0x%x %s\n",
- n->nodeid, n->added_seq, n->removed_seq,
- n->failed_reason, n->flags, dlmc_nf_str(n->flags));
+ printf("nodeid %d member %d failed %d start %d seq_add %u seq_rem %u check %s\n",
+ n->nodeid,
+ member_int(n),
+ n->failed_reason,
+ (n->flags & DLMC_NF_START) ? 1 : 0,
+ n->added_seq,
+ n->removed_seq,
+ nf_check_str(n->flags));
n++;
}
}
@@ -645,7 +662,7 @@ static void do_list(char *name)
show_ls(ls);
- if (!verbose)
+ if (!ls_all_nodes)
goto next;
node_count = 0;
diff --git a/fence/fence_tool/fence_tool.c b/fence/fence_tool/fence_tool.c
index e12c398..93f8e7c 100644
--- a/fence/fence_tool/fence_tool.c
+++ b/fence/fence_tool/fence_tool.c
@@ -36,7 +36,7 @@ int cman_nodes_count;
struct fenced_node nodes[MAX_NODES];
char *prog_name;
int operation;
-int verbose = 0;
+int ls_all_nodes = 0;
int inquorate_fail = 0;
int wait_join = 0; /* default: don't wait for join */
int wait_leave = 0; /* default: don't wait for leave */
@@ -452,7 +452,7 @@ static int do_list(void)
}
printf("\n");
- if (!verbose) {
+ if (!ls_all_nodes) {
printf("\n");
exit(EXIT_SUCCESS);
}
@@ -500,6 +500,7 @@ static void print_usage(void)
printf(" dump Dump debug buffer from fenced\n");
printf("\n");
printf("Options:\n");
+ printf(" -n Show all node information in ls\n");
printf(" -m <seconds> Delay join up to <seconds> for all nodes in cluster.conf\n");
printf(" to be cluster members\n");
printf(" -w Wait for join or leave to complete\n");
@@ -510,7 +511,7 @@ static void print_usage(void)
printf("\n");
}
-#define OPTION_STRING "vVht:wQm:"
+#define OPTION_STRING "Vht:wQm:n"
static void decode_arguments(int argc, char *argv[])
{
@@ -529,8 +530,8 @@ static void decode_arguments(int argc, char *argv[])
exit(EXIT_SUCCESS);
break;
- case 'v':
- verbose++;
+ case 'n':
+ ls_all_nodes = 1;
break;
case 'h':
diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
index 6d51a78..2168995 100644
--- a/fence/fenced/cpg.c
+++ b/fence/fenced/cpg.c
@@ -1247,6 +1247,8 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
if (cg->we_joined)
add_victims_init(fd, cg);
+
+ apply_changes(fd);
}
static void fd_header_in(struct fd_header *hd)
@@ -1313,6 +1315,8 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
default:
log_error("unknown msg type %d", hd->type);
}
+
+ apply_changes(fd);
}
static cpg_callbacks_t cpg_callbacks = {
@@ -1336,8 +1340,6 @@ static void process_fd_cpg(int ci)
log_error("cpg_dispatch error %d", error);
return;
}
-
- apply_changes(fd);
}
int fd_join(struct fd *fd)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index 1d3a369..2b9270e 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -47,6 +47,8 @@ struct node {
int check_fs;
int fs_notified;
uint64_t add_time;
+ uint64_t fence_time; /* for debug */
+ uint32_t fence_queries; /* for debug */
uint32_t added_seq; /* for queries */
uint32_t removed_seq; /* for queries */
int failed_reason; /* for queries */
@@ -401,8 +403,11 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
return;
}
- if (cfgd_enable_fencing && !node->add_time)
+ if (cfgd_enable_fencing && node->add_time) {
node->check_fencing = 1;
+ node->fence_time = 0;
+ node->fence_queries = 0;
+ }
/* fenced will take care of making sure the quorum value
is adjusted for all the failures */
@@ -410,7 +415,8 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
if (cfgd_enable_quorum && !cfgd_enable_fencing)
node->check_quorum = 1;
- node->check_fs = 1;
+ if (ls->fs_registered)
+ node->check_fs = 1;
node->removed_seq = cg->seq; /* for queries */
node->failed_reason = reason; /* for queries */
@@ -423,8 +429,10 @@ static int check_fencing_done(struct lockspace *ls)
int in_progress, wait_count = 0;
int rv;
- if (!cfgd_enable_fencing)
+ if (!cfgd_enable_fencing) {
+ log_group(ls, "check_fencing disabled");
return 1;
+ }
list_for_each_entry(node, &ls->node_history, list) {
if (!node->check_fencing)
@@ -438,11 +446,23 @@ static int check_fencing_done(struct lockspace *ls)
log_error("fenced_node_info error %d", rv);
if (last_fenced_time > node->add_time) {
+ log_group(ls, "check_fencing %d %llu fenced at %llu",
+ node->nodeid,
+ (unsigned long long)node->add_time,
+ (unsigned long long)last_fenced_time);
node->check_fencing = 0;
node->add_time = 0;
+ node->fence_time = last_fenced_time;
} else {
- log_group(ls, "check_fencing %d needs fencing",
- node->nodeid);
+ if (!node->fence_queries ||
+ node->fence_time != last_fenced_time) {
+ log_group(ls, "check_fencing %d not fenced "
+ "add %llu fence %llu", node->nodeid,
+ (unsigned long long)node->add_time,
+ (unsigned long long)last_fenced_time);
+ node->fence_queries++;
+ node->fence_time = last_fenced_time;
+ }
wait_count++;
}
}
@@ -462,6 +482,8 @@ static int check_fencing_done(struct lockspace *ls)
if (in_progress)
return 0;
+
+ log_group(ls, "check_fencing done");
return 1;
}
@@ -470,8 +492,10 @@ static int check_quorum_done(struct lockspace *ls)
struct node *node;
int wait_count = 0;
- if (!cfgd_enable_quorum)
+ if (!cfgd_enable_quorum) {
+ log_group(ls, "check_quorum disabled");
return 1;
+ }
/* wait for quorum system (cman) to see all the same nodes failed, so
we know that cluster_quorate is adjusted for the same failures we've
@@ -510,8 +534,10 @@ static int check_fs_done(struct lockspace *ls)
int wait_count = 0;
/* no corresponding fs for this lockspace */
- if (!ls->fs_registered)
+ if (!ls->fs_registered) {
+ log_group(ls, "check_fs none registered");
return 1;
+ }
list_for_each_entry(node, &ls->node_history, list) {
if (!node->check_fs)
@@ -1301,6 +1327,8 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
left_list, left_list_entries,
joined_list, joined_list_entries);
#endif
+
+ apply_changes(ls);
}
static void dlm_header_in(struct dlm_header *hd)
@@ -1396,6 +1424,8 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
default:
log_error("unknown msg type %d", hd->type);
}
+
+ apply_changes(ls);
}
static cpg_callbacks_t cpg_callbacks = {
@@ -1445,8 +1475,6 @@ static void process_lockspace_cpg(int ci)
return;
}
- apply_changes(ls);
-
update_flow_control_status();
}
@@ -2149,8 +2177,6 @@ static int _set_node_info(struct lockspace *ls, struct change *cg, int nodeid,
node->flags |= DLMC_NF_CHECK_QUORUM;
if (n->check_fs)
node->flags |= DLMC_NF_CHECK_FS;
- if (n->fs_notified)
- node->flags |= DLMC_NF_FS_NOTIFIED;
node->added_seq = n->added_seq;
node->removed_seq = n->removed_seq;
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index f3ab1f1..53edeb8 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -531,6 +531,8 @@ static void query_node_info(int fd, char *name, int nodeid)
goto out;
}
+ memset(&node, 0, sizeof(node));
+
if (group_mode == GROUP_LIBGROUP)
rv = set_node_info_group(ls, nodeid, &node);
else
diff --git a/group/gfs_control/main.c b/group/gfs_control/main.c
index 650e144..7b90987 100644
--- a/group/gfs_control/main.c
+++ b/group/gfs_control/main.c
@@ -14,7 +14,7 @@
#include "libgfscontrol.h"
-#define OPTION_STRING "vhV"
+#define OPTION_STRING "nhV"
#define OP_LIST 1
#define OP_DUMP 2
@@ -27,7 +27,7 @@ static char *prog_name;
static char *fsname;
static int operation;
static int opt_ind;
-static int verbose;
+static int ls_all_nodes;
#define MAX_MG 128
#define MAX_NODES 128
@@ -43,7 +43,7 @@ static void print_usage(void)
printf("%s [options] [ls|dump|plocks]\n", prog_name);
printf("\n");
printf("Options:\n");
- printf(" -v Verbose output\n");
+ printf(" -n Show all node information in ls\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
printf("\n");
@@ -59,8 +59,8 @@ static void decode_arguments(int argc, char **argv)
optchar = getopt(argc, argv, OPTION_STRING);
switch (optchar) {
- case 'v':
- verbose = 1;
+ case 'n':
+ ls_all_nodes = 1;
break;
case 'h':
@@ -354,7 +354,7 @@ static void do_list(char *name)
show_mg(mg);
- if (!verbose)
+ if (!ls_all_nodes)
goto next;
node_count = 0;
diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c
index 839ff4a..bd8bc67 100644
--- a/group/gfs_controld/cpg-new.c
+++ b/group/gfs_controld/cpg-new.c
@@ -269,7 +269,7 @@ static int daemon_member_count;
would let everyone start again.]
*/
-static void process_mountgroup(struct mountgroup *mg);
+static void apply_changes_recovery(struct mountgroup *mg);
static void send_withdraw_acks(struct mountgroup *mg);
static void leave_mountgroup(struct mountgroup *mg, int mnterr);
@@ -700,7 +700,7 @@ void process_dlmcontrol(int ci)
poll_dlm = 0;
- process_mountgroup(mg);
+ apply_changes_recovery(mg);
}
static int check_dlm_notify_done(struct mountgroup *mg)
@@ -2184,7 +2184,7 @@ void process_recovery_uevent(char *table)
}
}
- process_mountgroup(mg);
+ apply_changes_recovery(mg);
}
static void start_journal_recovery(struct mountgroup *mg, int jid)
@@ -2301,7 +2301,7 @@ static void apply_recovery(struct mountgroup *mg)
}
}
-static void process_mountgroup(struct mountgroup *mg)
+static void apply_changes_recovery(struct mountgroup *mg)
{
if (!list_empty(&mg->changes))
apply_changes(mg);
@@ -2315,7 +2315,7 @@ void process_mountgroups(void)
struct mountgroup *mg, *safe;
list_for_each_entry_safe(mg, safe, &mountgroups, list)
- process_mountgroup(mg);
+ apply_changes_recovery(mg);
}
static int add_change(struct mountgroup *mg,
@@ -2476,7 +2476,7 @@ static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
if (rv)
return;
- process_mountgroup(mg);
+ apply_changes_recovery(mg);
}
static void gfs_header_in(struct gfs_header *hd)
@@ -2564,7 +2564,7 @@ static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
log_error("unknown msg type %d", hd->type);
}
- process_mountgroup(mg);
+ apply_changes_recovery(mg);
}
static cpg_callbacks_t cpg_callbacks = {
diff --git a/group/tool/main.c b/group/tool/main.c
index 410943b..9feb36b 100644
--- a/group/tool/main.c
+++ b/group/tool/main.c
@@ -610,9 +610,9 @@ int main(int argc, char **argv)
case OP_LIST:
if (all_daemons) {
if (verbose) {
- system("fence_tool ls -v");
- system("dlm_tool ls -v");
- system("gfs_control ls -v");
+ system("fence_tool ls -n");
+ system("dlm_tool ls -n");
+ system("gfs_control ls -n");
} else {
system("fence_tool ls");
system("dlm_tool ls");