This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

master - rgmanager: Permit careful restart w/o disturbing services


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=9f0d36f3fee3c7c00e2f5cf7cbd9ea878220a69d
Commit:        9f0d36f3fee3c7c00e2f5cf7cbd9ea878220a69d
Parent:        63f9fc14ecee65b107bc0fe2d8c745839a24fbd0
Author:        Lon Hohberger <lhh@redhat.com>
AuthorDate:    Mon Sep 8 11:59:36 2008 -0400
Committer:     Lon Hohberger <lhh@redhat.com>
CommitterDate: Wed Sep 24 13:39:56 2008 -0400

rgmanager: Permit careful restart w/o disturbing services

... e.g. for upgrades of rgmanager in-place for example.

Note: Requires service-freeze patch

Example use:
 * Manually freeze all services on a node.
 * Stop rgmanager (service rgmanager stop)
 * Upgrade rgmanager package
 * Manually start rgmanager from the command line
   'clurgmgrd -N'
---
 rgmanager/include/resgroup.h                    |    2 +
 rgmanager/man/clurgmgrd.8                       |   13 +++++++++-
 rgmanager/man/clusvcadm.8                       |   28 +++++++++++++++++++++++
 rgmanager/src/clulib/rg_strings.c               |   23 ++++++++++++------
 rgmanager/src/daemons/fo_domain.c               |    2 +-
 rgmanager/src/daemons/groups.c                  |   23 +++++++++++-------
 rgmanager/src/daemons/main.c                    |   11 +++++---
 rgmanager/src/daemons/rg_state.c                |    2 +-
 rgmanager/src/daemons/slang_event.c             |    9 ++++++-
 rgmanager/src/resources/default_event_script.sl |    3 +-
 10 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/rgmanager/include/resgroup.h b/rgmanager/include/resgroup.h
index a4a55b0..c79924c 100644
--- a/rgmanager/include/resgroup.h
+++ b/rgmanager/include/resgroup.h
@@ -137,6 +137,8 @@ int handle_start_remote_req(char *svcName, int req);
 #define RG_FLAG_FROZEN			(1<<0)	/** Resource frozen */
 
 const char *rg_state_str(int val);
+const char *rg_flag_str(int val);
+const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
 int rg_state_str_to_id(const char *val);
 const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
 const char *agent_op_str(int val);
diff --git a/rgmanager/man/clurgmgrd.8 b/rgmanager/man/clurgmgrd.8
index bbde0f2..7b43925 100644
--- a/rgmanager/man/clurgmgrd.8
+++ b/rgmanager/man/clurgmgrd.8
@@ -26,5 +26,16 @@ the member has been fenced whenever fencing is available.
 When a cluster member determines that it is no longer in the cluster quorum,
 the service manager stops all services and waits for a new quorum to form.
 
+.SH "COMMAND LINE OPTIONS"
+.IP \-f
+Run in the foreground (do not fork).
+.IP \-d
+Enable debug-level logging.
+.IP \-N
+Do not perform stop-before-start.  Combined with the
+.I -Z
+flag to clusvcadm, this can be used to allow rgmanager to be upgraded
+without stopping a given user service or set of services.
+
 .SH "SEE ALSO"
-clurmtabd(8), ccsd(8)
+clusvcadm(8), ccsd(8)
diff --git a/rgmanager/man/clusvcadm.8 b/rgmanager/man/clusvcadm.8
index 20ae823..155ac88 100644
--- a/rgmanager/man/clusvcadm.8
+++ b/rgmanager/man/clusvcadm.8
@@ -22,6 +22,12 @@ clusvcadm \- Cluster User Service Administration Utility
 .B [\-R
 .I <service>
 .B ]
+.B [\-Z
+.I <service>
+.B ]
+.B [\-U
+.I <service>
+.B ]
 .B [\-s
 .I <service>
 .B ]
@@ -73,6 +79,19 @@ Restarts the user service named
 .I
 service
 on the cluster member on which it is currently running.
+.IP "\-Z <service>"
+Freezes the service named
+.I
+service
+on the cluster member on which it is currently running.  This will
+prevent status checks of the service as well as failover in the
+event the node fails or rgmanager is stopped.
+.IP "\-U <service>"
+Unfreezes the user service named
+.I
+service
+on the cluster member on which it is currently running.  This will
+re-enable status checks.
 .IP "\-S"
 Display whether each of the active service managers is locked or not.  This
 can be used to verify the correct operation of the \fB-l\fR and \fB-u\fR 
@@ -89,5 +108,14 @@ again.
 .IP \-v
 Display version information and exit.
 
+.SH "NOTES"
+Executing
+.I -U
+(unfreeze) on a service which was frozen in the 
+.B started
+state while the service owner is offline results in an undefined
+(and possibly dangerous) condition.  Manually ensure all resources are
+clear before doing this.
+
 .SH "SEE ALSO"
 clustat(8)
diff --git a/rgmanager/src/clulib/rg_strings.c b/rgmanager/src/clulib/rg_strings.c
index 6641fc5..8c613bf 100644
--- a/rgmanager/src/clulib/rg_strings.c
+++ b/rgmanager/src/clulib/rg_strings.c
@@ -145,12 +145,12 @@ rg_search_table_by_str(const struct string_val *table, const char *val)
 }
 
 
-
 const char *
 rg_strerror(int val)
 {
 	return rg_search_table(rg_error_strings, val);
 }
+
 	
 const char *
 rg_state_str(int val)
@@ -165,6 +165,19 @@ rg_state_str_to_id(const char *val)
 }
 
 
+const char *
+rg_req_str(int val)
+{
+	return rg_search_table(rg_req_strings, val);
+}
+
+
+const char *
+rg_flag_str(int val)
+{
+	return rg_search_table(rg_flags_strings, val);
+}
+
 
 const char *
 rg_flags_str(char *flags_string, size_t size, int val, char *separator)
@@ -172,7 +185,7 @@ rg_flags_str(char *flags_string, size_t size, int val, char *separator)
 	int i;
 	const char *string;
 
-	for (i = 0; i < sizeof(uint32_t); i++) {
+	for (i = 0; i < (sizeof(val) * 8); i++) {
 		if ( val & (1 << i)) {
 			if (strlen(flags_string))
 				strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
@@ -183,12 +196,6 @@ rg_flags_str(char *flags_string, size_t size, int val, char *separator)
 	return flags_string;
 }
 
-const char *
-rg_req_str(int val)
-{
-	return rg_search_table(rg_req_strings, val);
-}
-
 
 const char *
 agent_op_str(int val)
diff --git a/rgmanager/src/daemons/fo_domain.c b/rgmanager/src/daemons/fo_domain.c
index 97f244c..6104c40 100644
--- a/rgmanager/src/daemons/fo_domain.c
+++ b/rgmanager/src/daemons/fo_domain.c
@@ -354,7 +354,7 @@ node_domain_set(fod_t **domains, char *name, int **ret, int *retlen, int *flags)
 	int ts_count;
 	fod_node_t *fodn;
 	fod_t *domain;
-	int rv = -1, found = 0;
+	int found = 0;
 
 	list_for(domains, domain, x) {
 		if (!strcasecmp(domain->fd_name, name)) {
diff --git a/rgmanager/src/daemons/groups.c b/rgmanager/src/daemons/groups.c
index 3927479..f656977 100644
--- a/rgmanager/src/daemons/groups.c
+++ b/rgmanager/src/daemons/groups.c
@@ -12,6 +12,7 @@
 #include <reslist.h>
 #include <assert.h>
 #include <event.h>
+#include <sets.h>
 
 /* Use address field in this because we never use it internally,
    and there is no extra space in the cman_node_t type.
@@ -410,15 +411,15 @@ check_depend_safe(char *rg_name)
 int
 check_rdomain_crash(char *svcName)
 {
-	int *nodes = NULL, nodecount;
-	int *fd_nodes = NULL, fd_nodecount, fl;
-	int *isect = NULL, icount;
+	int *nodes = NULL, nodecount = 0;
+	int *fd_nodes = NULL, fd_nodecount = 0, fl = 0;
+	int *isect = NULL, icount = 0;
 	char fd_name[256];
 
 	if (_group_property(svcName, "domain", fd_name, sizeof(fd_name)) != 0)
 		goto out_free;
 
-	if (node_domain_set(_domains, fd_name, &fd_nodes,
+	if (node_domain_set(&_domains, fd_name, &fd_nodes,
 			    &fd_nodecount, &fl) != 0)
 		goto out_free;
 
@@ -1597,7 +1598,7 @@ dump_config_version(FILE *fp)
   resource group modification.
  */
 int
-init_resource_groups(int reconfigure)
+init_resource_groups(int reconfigure, int do_init)
 {
 	int fd, x, y, cnt;
 
@@ -1724,10 +1725,14 @@ init_resource_groups(int reconfigure)
 		clulog(LOG_INFO, "Restarting changed resources.\n");
 		do_condstarts();
 	} else {
-		/* Do initial stop-before-start */
-		clulog(LOG_INFO, "Initializing Services\n");
-		rg_doall(RG_INIT, 1, "Initializing %s\n");
-		clulog(LOG_INFO, "Services Initialized\n");
+		if (do_init) {
+			/* Do initial stop-before-start */
+			clulog(LOG_INFO, "Initializing Services\n");
+			rg_doall(RG_INIT, 1, "Initializing %s\n");
+			clulog(LOG_INFO, "Services Initialized\n");
+		} else {
+			clulog(LOG_INFO, "Skipping stop-before-start: overridden by administrator\n");
+		}
 		rg_set_initialized();
 	}
 
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 9c4f842..89bdcd1 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -31,7 +31,7 @@ void set_transition_throttling(int);
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
 int daemon_init(char *);
-int init_resource_groups(int);
+int init_resource_groups(int, int);
 void kill_resource_groups(void);
 void set_my_id(int);
 void flag_shutdown(int sig);
@@ -924,7 +924,7 @@ void dump_thread_states(FILE *);
 int
 main(int argc, char **argv)
 {
-	int rv;
+	int rv, do_init = 1;
 	char foreground = 0, wd = 1;
 	cman_node_t me;
 	msgctx_t *cluster_ctx;
@@ -932,7 +932,7 @@ main(int argc, char **argv)
 	pthread_t th;
 	cman_handle_t clu = NULL;
 
-	while ((rv = getopt(argc, argv, "wfd")) != EOF) {
+	while ((rv = getopt(argc, argv, "wfdN")) != EOF) {
 		switch (rv) {
 		case 'w':
 			wd = 0;
@@ -940,6 +940,9 @@ main(int argc, char **argv)
 		case 'd':
 			debug = 1;
 			break;
+		case 'N':
+			do_init = 0;
+			break;
 		case 'f':
 			foreground = 1;
 			break;
@@ -1005,7 +1008,7 @@ main(int argc, char **argv)
 	configure_rgmanager(-1, debug);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
-	if (init_resource_groups(0) != 0) {
+	if (init_resource_groups(0, do_init) != 0) {
 		clulog(LOG_CRIT, "#8: Couldn't initialize services\n");
 		return -1;
 	}
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index c57b148..14a1d5e 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1551,7 +1551,7 @@ _svc_freeze(char *svcName, int enabled)
 
 	default:
 		rg_unlock(&lockp);
-		return RG_EFAIL;
+		return RG_EAGAIN;
 		break;
 	}
 
diff --git a/rgmanager/src/daemons/slang_event.c b/rgmanager/src/daemons/slang_event.c
index 737e01a..01fff05 100644
--- a/rgmanager/src/daemons/slang_event.c
+++ b/rgmanager/src/daemons/slang_event.c
@@ -248,7 +248,14 @@ sl_service_status(char *svcName)
 		return;
 	}
 
-	state_str = strdup(rg_state_str(svcStatus.rs_state));
+	if (svcStatus.rs_flags & RG_FLAG_FROZEN) {
+		/* Special case: "frozen" is a flag, but user scripts should
+		   treat it as a state. */
+		state_str = strdup(rg_flag_str(RG_FLAG_FROZEN));
+	} else {
+		state_str = strdup(rg_state_str(svcStatus.rs_state));
+	}
+
 	if (!state_str) {
 		SLang_verror(SL_RunTime_Error,
 			     "%s: Failed to duplicate state of %s",
diff --git a/rgmanager/src/resources/default_event_script.sl b/rgmanager/src/resources/default_event_script.sl
index df9bce0..3f1379a 100644
--- a/rgmanager/src/resources/default_event_script.sl
+++ b/rgmanager/src/resources/default_event_script.sl
@@ -36,7 +36,8 @@ define move_or_start(service, node_list)
 		return ERR_DOMAIN;
 	}
 
-	if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) {
+	if (((event_type != EVENT_USER) and (state == "disabled")) or
+            ((state == "failed") or (state == "frozen"))) {
 		%
 		% Commenting out this block will -not- allow you to
 		% recover failed services from event scripts.  Sorry.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]