This is the mail archive of the
cluster-cvs@sourceware.org
mailing list for the cluster.
fence: master - libfence/fenced/fence_node: add logging tofence_node()
- From: David Teigland <teigland at fedoraproject dot org>
- To: cluster-cvs-relay at redhat dot com
- Date: Wed, 25 Feb 2009 17:59:38 +0000 (UTC)
- Subject: fence: master - libfence/fenced/fence_node: add logging tofence_node()
Gitweb: http://git.fedorahosted.org/git/fence.git?p=fence.git;a=commitdiff;h=dbb04917012761351f5402d5786e5ec5149be14b
Commit: dbb04917012761351f5402d5786e5ec5149be14b
Parent: 708488124fcfa8547b64a3839c018f5a4cca16e2
Author: David Teigland <teigland@redhat.com>
AuthorDate: Wed Feb 25 11:56:08 2009 -0600
Committer: David Teigland <teigland@redhat.com>
CommitterDate: Wed Feb 25 11:57:11 2009 -0600
libfence/fenced/fence_node: add logging to fence_node()
Change the libfence fence_node() function to log what it does.
After fence_node() completes, the caller (either fenced or
fence_node) can see all the fencing steps and the results of
each.
. libfence version is incremented to 4.0 given the new parameters
to fence_node().
. fenced adds the full libfence:fence_node() record to its debug
log, and puts any errors that occured in syslog/logfile.
. fence_node -v displays the fence_node() log.
Signed-off-by: David Teigland <teigland@redhat.com>
---
fence/fence_node/fence_node.c | 81 +++++++++++++++++--
fence/fenced/recover.c | 60 +++++++++++++-
fence/libfence/Makefile | 3 +
fence/libfence/agent.c | 178 +++++++++++++++++++++++++++-------------
fence/libfence/libfence.h | 24 +++++-
5 files changed, 274 insertions(+), 72 deletions(-)
diff --git a/fence/fence_node/fence_node.c b/fence/fence_node/fence_node.c
index 44dcbc3..21a9ba2 100644
--- a/fence/fence_node/fence_node.c
+++ b/fence/fence_node/fence_node.c
@@ -9,7 +9,14 @@
#include "libfenced.h"
#include "copyright.cf"
-#define OPTION_STRING ("huV")
+static char *prog_name;
+static int verbose;
+
+#define FL_SIZE 32
+static struct fence_log log[FL_SIZE];
+static int log_count;
+
+#define OPTION_STRING "hvV"
#define die(fmt, args...) \
do \
@@ -20,8 +27,6 @@ do \
} \
while (0)
-static char *prog_name;
-
static void print_usage(void)
{
printf("Usage:\n");
@@ -30,15 +35,44 @@ static void print_usage(void)
printf("\n");
printf("Options:\n");
printf("\n");
- printf(" -h Print this help, then exit\n");
- printf(" -V Print program version information, then exit\n");
+ printf(" -v Show fence agent results, -vv for agent args\n");
+ printf(" -h Print this help, then exit\n");
+ printf(" -V Print program version information, then exit\n");
printf("\n");
}
+static char *fe_str(int r)
+{
+ switch (r) {
+ case FE_AGENT_SUCCESS:
+ return "success";
+ case FE_AGENT_ERROR:
+ return "error from agent";
+ case FE_AGENT_FORK:
+ return "error from fork";
+ case FE_NO_CONFIG:
+ return "error from ccs";
+ case FE_NO_METHOD:
+ return "error no method";
+ case FE_NO_DEVICE:
+ return "error no device";
+ case FE_READ_AGENT:
+ return "error config agent";
+ case FE_READ_ARGS:
+ return "error config args";
+ case FE_READ_METHOD:
+ return "error config method";
+ case FE_READ_DEVICE:
+ return "error config device";
+ default:
+ return "error unknown";
+ }
+}
+
int main(int argc, char *argv[])
{
- int cont = 1, optchar, error, rv;
- char *victim = NULL;
+ char *victim = NULL, *p;
+ int cont = 1, optchar, error, rv, i;
prog_name = argv[0];
@@ -47,6 +81,10 @@ int main(int argc, char *argv[])
switch (optchar) {
+ case 'v':
+ verbose++;
+ break;
+
case 'h':
print_usage();
exit(EXIT_SUCCESS);
@@ -85,11 +123,38 @@ int main(int argc, char *argv[])
if (!victim)
die("no node name specified");
- error = fence_node(victim);
+ memset(&log, 0, sizeof(log));
+ log_count = 0;
+
+ error = fence_node(victim, log, FL_SIZE, &log_count);
logt_init("fence_node", LOG_MODE_OUTPUT_SYSLOG, SYSLOGFACILITY,
SYSLOGLEVEL, 0, NULL);
+ if (!verbose)
+ goto skip;
+
+ if (log_count > FL_SIZE) {
+ fprintf(stderr, "fence_node log overflow %d", log_count);
+ log_count = FL_SIZE;
+ }
+
+ for (i = 0; i < log_count; i++) {
+ fprintf(stderr, "fence %s dev %d.%d agent %s result: %s\n",
+ victim, log[i].method_num, log[i].device_num,
+ log[i].agent_name[0] ? log[i].agent_name : "none",
+ fe_str(log[i].error));
+
+ if (verbose < 2)
+ continue;
+
+ p = strchr(log[i].agent_args, '\n');
+ if (p)
+ *p = '\0';
+ fprintf(stderr, "agent args: %s\n", log[i].agent_args);
+ }
+
+ skip:
if (error) {
fprintf(stderr, "Fence of \"%s\" was unsuccessful\n", victim);
logt_print(LOG_ERR, "Fence of \"%s\" was unsuccessful\n",
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index 4251d5e..1a4e469 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -246,10 +246,41 @@ void delay_fencing(struct fd *fd, int node_join)
}
}
+static char *fe_str(int r)
+{
+ switch (r) {
+ case FE_AGENT_SUCCESS:
+ return "success";
+ case FE_AGENT_ERROR:
+ return "error from agent";
+ case FE_AGENT_FORK:
+ return "error from fork";
+ case FE_NO_CONFIG:
+ return "error from ccs";
+ case FE_NO_METHOD:
+ return "error no method";
+ case FE_NO_DEVICE:
+ return "error no device";
+ case FE_READ_AGENT:
+ return "error config agent";
+ case FE_READ_ARGS:
+ return "error config args";
+ case FE_READ_METHOD:
+ return "error config method";
+ case FE_READ_DEVICE:
+ return "error config device";
+ default:
+ return "error unknown";
+ }
+}
+
+#define FL_SIZE 32
+static struct fence_log log[FL_SIZE];
+
void fence_victims(struct fd *fd)
{
struct node *node;
- int error;
+ int error, i, ll, log_count;
int override = -1;
int cluster_member, cpg_member, ext;
@@ -274,13 +305,32 @@ void fence_victims(struct fd *fd)
continue;
}
- log_level(LOG_INFO, "fencing node \"%s\"", node->name);
+ memset(&log, 0, sizeof(log));
+ log_count = 0;
+
+ log_level(LOG_INFO, "fencing node %s", node->name);
query_unlock();
- error = fence_node(node->name);
+ error = fence_node(node->name, log, FL_SIZE, &log_count);
query_lock();
- log_level(LOG_INFO, "fence \"%s\" %s", node->name,
+ if (log_count > FL_SIZE) {
+ log_error("fence_node log overflow %d", log_count);
+ log_count = FL_SIZE;
+ }
+
+ for (i = 0; i < log_count; i++) {
+ ll = (log[i].error == FE_AGENT_SUCCESS) ? LOG_DEBUG:
+ LOG_ERR;
+ log_level(ll, "fence %s dev %d.%d agent %s result: %s",
+ node->name,
+ log[i].method_num, log[i].device_num,
+ log[i].agent_name[0] ?
+ log[i].agent_name : "none",
+ fe_str(log[i].error));
+ }
+
+ log_error("fence %s %s", node->name,
error ? "failed" : "success");
if (!error) {
@@ -301,7 +351,7 @@ void fence_victims(struct fd *fd)
override = open_override(cfgd_override_path);
if (check_override(override, node->name,
cfgd_override_time) > 0) {
- log_level(LOG_WARNING, "fence \"%s\" overridden by "
+ log_level(LOG_WARNING, "fence %s overridden by "
"administrator intervention", node->name);
victim_done(fd, node->nodeid, VIC_DONE_OVERRIDE);
list_del(&node->list);
diff --git a/fence/libfence/Makefile b/fence/libfence/Makefile
index e1e74db..93bf45c 100644
--- a/fence/libfence/Makefile
+++ b/fence/libfence/Makefile
@@ -1,5 +1,8 @@
TARGET= libfence
+SOMAJOR=4
+SOMINOR=0
+
OBJS= agent.o
include ../../make/defines.mk
diff --git a/fence/libfence/agent.c b/fence/libfence/agent.c
index ab93bdf..883202f 100644
--- a/fence/libfence/agent.c
+++ b/fence/libfence/agent.c
@@ -7,12 +7,13 @@
#include <string.h>
#include <errno.h>
#include <time.h>
+#include <limits.h>
+#include "libfence.h"
#include "ccs.h"
#define MAX_METHODS 8
#define MAX_DEVICES 8
-#define MAX_AGENT_ARGS_LEN 512
#define METHOD_NAME_PATH "/cluster/clusternodes/clusternode[@name=\"%s\"]/fence/method[%d]/@name"
#define DEVICE_NAME_PATH "/cluster/clusternodes/clusternode[@name=\"%s\"]/fence/method[@name=\"%s\"]/device[%d]/@name"
@@ -22,27 +23,7 @@
-static void display_agent_output(const char *agent, int fd)
-{
- char buf[384];
- int ret;
-
- do {
- ret = read(fd, buf, sizeof(buf) - 1);
- if (ret < 0) {
- if (errno == EINTR)
- continue;
- break;
- } else if (ret > 0) {
- buf[ret] = '\0';
- /*
- syslog(LOG_ERR, "agent \"%s\" reports: %s", agent, buf);
- */
- }
- } while (ret > 0);
-}
-
-static int run_agent(char *agent, char *args)
+static int run_agent(char *agent, char *args, int *agent_result)
{
int pid, status, len;
int pr_fd, pw_fd; /* parent read/write file descriptors */
@@ -67,8 +48,10 @@ static int run_agent(char *agent, char *args)
pw_fd = fd2[1];
pid = fork();
- if (pid < 0)
+ if (pid < 0) {
+ *agent_result = FE_AGENT_FORK;
goto fail;
+ }
if (pid) {
/* parent */
@@ -87,8 +70,10 @@ static int run_agent(char *agent, char *args)
waitpid(pid, &status, 0);
if (!WIFEXITED(status) || WEXITSTATUS(status)) {
- display_agent_output(agent, pr_fd);
+ *agent_result = FE_AGENT_ERROR;
goto fail;
+ } else {
+ *agent_result = FE_AGENT_SUCCESS;
}
} else {
/* child */
@@ -126,19 +111,19 @@ static int run_agent(char *agent, char *args)
}
static int make_args(int cd, char *victim, char *method, int d,
- char *device, char **args_out)
+ char *device, char **args_out)
{
- char path[256], *args, *str;
+ char path[PATH_MAX], *args, *str;
int error, cnt = 0;
- args = malloc(MAX_AGENT_ARGS_LEN);
+ args = malloc(FENCE_AGENT_ARGS_MAX);
if (!args)
return -ENOMEM;
- memset(args, 0, MAX_AGENT_ARGS_LEN);
+ memset(args, 0, FENCE_AGENT_ARGS_MAX);
/* node-specific args for victim */
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, NODE_FENCE_ARGS_PATH, victim, method, d+1);
for (;;) {
@@ -159,7 +144,7 @@ static int make_args(int cd, char *victim, char *method, int d,
/* device-specific args */
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, FENCE_DEVICE_ARGS_PATH, device);
for (;;) {
@@ -193,10 +178,10 @@ static int make_args(int cd, char *victim, char *method, int d,
static int get_method(int cd, char *victim, int m, char **method)
{
- char path[256], *str = NULL;
+ char path[PATH_MAX], *str = NULL;
int error;
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, METHOD_NAME_PATH, victim, m+1);
error = ccs_get(cd, path, &str);
@@ -208,10 +193,10 @@ static int get_method(int cd, char *victim, int m, char **method)
static int get_device(int cd, char *victim, char *method, int d, char **device)
{
- char path[256], *str = NULL;
+ char path[PATH_MAX], *str = NULL;
int error;
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, DEVICE_NAME_PATH, victim, method, d+1);
error = ccs_get(cd, path, &str);
@@ -221,11 +206,11 @@ static int get_device(int cd, char *victim, char *method, int d, char **device)
static int count_methods(int cd, char *victim)
{
- char path[256], *name;
+ char path[PATH_MAX], *name;
int error, i;
for (i = 0; i < MAX_METHODS; i++) {
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, METHOD_NAME_PATH, victim, i+1);
error = ccs_get(cd, path, &name);
@@ -238,11 +223,11 @@ static int count_methods(int cd, char *victim)
static int count_devices(int cd, char *victim, char *method)
{
- char path[256], *name;
+ char path[PATH_MAX], *name;
int error, i;
for (i = 0; i < MAX_DEVICES; i++) {
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, DEVICE_NAME_PATH, victim, method, i+1);
error = ccs_get(cd, path, &name);
@@ -254,23 +239,31 @@ static int count_devices(int cd, char *victim, char *method)
}
static int use_device(int cd, char *victim, char *method, int d,
- char *device)
+ char *device, struct fence_log *lp)
{
- char path[256], *agent, *args = NULL;
+ char path[PATH_MAX], *agent, *args = NULL;
int error;
- memset(path, 0, 256);
+ memset(path, 0, PATH_MAX);
sprintf(path, AGENT_NAME_PATH, device);
error = ccs_get(cd, path, &agent);
- if (error)
+ if (error) {
+ lp->error = FE_READ_AGENT;
goto out;
+ }
+
+ strncpy(lp->agent_name, agent, FENCE_AGENT_NAME_MAX);
error = make_args(cd, victim, method, d, device, &args);
- if (error)
+ if (error) {
+ lp->error = FE_READ_ARGS;
goto out_agent;
+ }
+
+ strncpy(lp->agent_args, args, FENCE_AGENT_ARGS_MAX);
- error = run_agent(agent, args);
+ error = run_agent(agent, args, &lp->error);
free(args);
out_agent:
@@ -279,38 +272,102 @@ static int use_device(int cd, char *victim, char *method, int d,
return error;
}
-int fence_node(char *victim)
+int fence_node(char *victim, struct fence_log *log, int log_size,
+ int *log_count)
{
+ struct fence_log stub;
+ struct fence_log *lp = log;
char *method = NULL, *device = NULL;
char *victim_nodename = NULL;
- int num_methods, num_devices, m, d, error = -1, cd;
+ int num_methods, num_devices, m, d, cd, rv;
+ int left = log_size;
+ int error = -1;
+ int count = 0;
cd = ccs_connect();
- if (cd < 0)
- return -1;
+ if (cd < 0) {
+ if (lp && left) {
+ lp->error = FE_NO_CONFIG;
+ lp++;
+ left--;
+ }
+ count++;
+ error = -1;
+ goto ret;
+ }
if (ccs_lookup_nodename(cd, victim, &victim_nodename) == 0)
victim = victim_nodename;
num_methods = count_methods(cd, victim);
+ if (!num_methods) {
+ if (lp && left) {
+ lp->error = FE_NO_METHOD;
+ lp++;
+ left--;
+ }
+ count++;
+ error = -1;
+ goto out;
+ }
for (m = 0; m < num_methods; m++) {
- error = get_method(cd, victim, m, &method);
- if (error)
+ rv = get_method(cd, victim, m, &method);
+ if (rv) {
+ if (lp && left) {
+ lp->error = FE_READ_METHOD;
+ lp->method_num = m;
+ lp++;
+ left--;
+ }
+ count++;
+ error = -1;
continue;
-
- /* if num_devices is zero we should return an error */
- error = -1;
+ }
num_devices = count_devices(cd, victim, method);
+ if (!num_devices) {
+ if (lp && left) {
+ lp->error = FE_NO_DEVICE;
+ lp->method_num = m;
+ lp++;
+ left--;
+ }
+ count++;
+ error = -1;
+ continue;
+ }
for (d = 0; d < num_devices; d++) {
- error = get_device(cd, victim, method, d, &device);
- if (error)
+ rv = get_device(cd, victim, method, d, &device);
+ if (rv) {
+ if (lp && left) {
+ lp->error = FE_READ_DEVICE;
+ lp->method_num = m;
+ lp->device_num = d;
+ lp++;
+ left--;
+ }
+ count++;
+ error = -1;
break;
+ }
+
+ /* every call to use_device generates a log entry,
+ whether success or fail */
+
+ error = use_device(cd, victim, method, d, device,
+ (lp && left) ? lp : &stub);
+ count++;
+ if (lp && left) {
+ /* error, name, args already set */
+ lp->method_num = m;
+ lp->device_num = d;
+ lp++;
+ left--;
+ }
- error = use_device(cd, victim, method, d, device);
if (error)
break;
@@ -324,11 +381,16 @@ int fence_node(char *victim)
free(victim_nodename);
free(method);
+ /* we return 0 for fencing success when use_device has
+ returned zero for each device in this method */
+
if (!error)
break;
}
-
+ out:
ccs_disconnect(cd);
-
+ ret:
+ if (log_count)
+ *log_count = count;
return error;
}
diff --git a/fence/libfence/libfence.h b/fence/libfence/libfence.h
index b71450b..677041d 100644
--- a/fence/libfence/libfence.h
+++ b/fence/libfence/libfence.h
@@ -5,7 +5,29 @@
extern "C" {
#endif
-int fence_node(char *name);
+#define FE_AGENT_SUCCESS 1 /* agent exited with EXIT_SUCCESS */
+#define FE_AGENT_ERROR 2 /* agent exited with EXIT_FAILURE */
+#define FE_AGENT_FORK 3 /* error forking agent */
+#define FE_NO_CONFIG 4 /* ccs_connect error */
+#define FE_NO_METHOD 5 /* zero methods defined */
+#define FE_NO_DEVICE 6 /* zero devices defined in method */
+#define FE_READ_AGENT 7 /* read (ccs) error on agent path */
+#define FE_READ_ARGS 8 /* read (ccs) error on node/dev args */
+#define FE_READ_METHOD 9 /* read (ccs) error on method */
+#define FE_READ_DEVICE 10 /* read (ccs) error on method/device */
+
+#define FENCE_AGENT_NAME_MAX 256
+#define FENCE_AGENT_ARGS_MAX 1024
+
+struct fence_log {
+ int error;
+ int method_num;
+ int device_num;
+ char agent_name[FENCE_AGENT_NAME_MAX+1];
+ char agent_args[FENCE_AGENT_ARGS_MAX+1];
+};
+
+int fence_node(char *name, struct fence_log *log, int log_size, int *log_count);
#ifdef __cplusplus
}