|
 |
f1cb04 |
From ab8942f6260fde93824ed2a18e09e572b59ceb25 Mon Sep 17 00:00:00 2001
|
|
 |
f1cb04 |
From: Christine Caulfield <ccaulfie@redhat.com>
|
|
 |
f1cb04 |
Date: Fri, 12 Jun 2015 16:16:45 +0100
|
|
 |
f1cb04 |
Subject: [PATCH] totemsrp: Improve logging of left/down nodes
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
This patch from Hideo Yamauchi improves the logging of
|
|
 |
f1cb04 |
whether nodes leave the cluster cleanly or uncleanly,
|
|
 |
f1cb04 |
making it easier to determine if a node ws shut down
|
|
 |
f1cb04 |
by the operator. There is also the possibility that a
|
|
 |
f1cb04 |
LEAVE message could get missed (due to the node being
|
|
 |
f1cb04 |
in flush state) so this can also make that clearer.
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
The modifications are as follows.
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
Change 1) I added the list which maintained LEAVE node to totemsrp.
|
|
 |
f1cb04 |
Change 2) I added registration, a search, the handling of to clear LEAVE
|
|
 |
f1cb04 |
node.
|
|
 |
f1cb04 |
Change 3) I added the output to log.
|
|
 |
f1cb04 |
Change 4) I changed an output level of the log.
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
Signed-off-by: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
|
|
 |
f1cb04 |
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
|
 |
f1cb04 |
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
|
|
 |
f1cb04 |
---
|
|
 |
f1cb04 |
exec/totemsrp.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
 |
f1cb04 |
1 files changed, 104 insertions(+), 1 deletions(-)
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
|
|
 |
f1cb04 |
index 6357f5a..3aa61cc 100644
|
|
 |
f1cb04 |
--- a/exec/totemsrp.c
|
|
 |
f1cb04 |
+++ b/exec/totemsrp.c
|
|
 |
f1cb04 |
@@ -316,6 +316,8 @@ struct totemsrp_instance {
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX];
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
+ unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX];
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
int my_proc_list_entries;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
int my_failed_list_entries;
|
|
 |
f1cb04 |
@@ -329,6 +331,8 @@ struct totemsrp_instance {
|
|
 |
f1cb04 |
int my_deliver_memb_entries;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
int my_left_memb_entries;
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+ int my_leave_memb_entries;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
struct memb_ring_id my_ring_id;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
@@ -513,6 +517,8 @@ struct totemsrp_instance {
|
|
 |
f1cb04 |
uint32_t threaded_mode_enabled;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
uint32_t waiting_trans_ack;
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+ int flushing;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
void * token_recv_event_handle;
|
|
 |
f1cb04 |
void * token_sent_event_handle;
|
|
 |
f1cb04 |
@@ -1476,6 +1482,52 @@ static void memb_set_print (
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
#endif
|
|
 |
f1cb04 |
+static void my_leave_memb_clear(
|
|
 |
f1cb04 |
+ struct totemsrp_instance *instance)
|
|
 |
f1cb04 |
+{
|
|
 |
f1cb04 |
+ memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list));
|
|
 |
f1cb04 |
+ instance->my_leave_memb_entries = 0;
|
|
 |
f1cb04 |
+}
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+static unsigned int my_leave_memb_match(
|
|
 |
f1cb04 |
+ struct totemsrp_instance *instance,
|
|
 |
f1cb04 |
+ unsigned int nodeid)
|
|
 |
f1cb04 |
+{
|
|
 |
f1cb04 |
+ int i;
|
|
 |
f1cb04 |
+ unsigned int ret = 0;
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+ for (i = 0; i < instance->my_leave_memb_entries; i++){
|
|
 |
f1cb04 |
+ if (instance->my_leave_memb_list[i] == nodeid){
|
|
 |
f1cb04 |
+ ret = nodeid;
|
|
 |
f1cb04 |
+ break;
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ return ret;
|
|
 |
f1cb04 |
+}
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+static void my_leave_memb_set(
|
|
 |
f1cb04 |
+ struct totemsrp_instance *instance,
|
|
 |
f1cb04 |
+ unsigned int nodeid)
|
|
 |
f1cb04 |
+{
|
|
 |
f1cb04 |
+ int i, found = 0;
|
|
 |
f1cb04 |
+ for (i = 0; i < instance->my_leave_memb_entries; i++){
|
|
 |
f1cb04 |
+ if (instance->my_leave_memb_list[i] == nodeid){
|
|
 |
f1cb04 |
+ found = 1;
|
|
 |
f1cb04 |
+ break;
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ if (found == 1) {
|
|
 |
f1cb04 |
+ return;
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) {
|
|
 |
f1cb04 |
+ instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid;
|
|
 |
f1cb04 |
+ instance->my_leave_memb_entries++;
|
|
 |
f1cb04 |
+ } else {
|
|
 |
f1cb04 |
+ log_printf (instance->totemsrp_log_level_warning,
|
|
 |
f1cb04 |
+ "Cannot set LEAVE nodeid=%d", nodeid);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+}
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance)
|
|
 |
f1cb04 |
{
|
|
 |
f1cb04 |
@@ -1837,6 +1889,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
|
|
 |
f1cb04 |
unsigned int res;
|
|
 |
f1cb04 |
char left_node_msg[1024];
|
|
 |
f1cb04 |
char joined_node_msg[1024];
|
|
 |
f1cb04 |
+ char failed_node_msg[1024];
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
instance->originated_orf_token = 0;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
@@ -2008,15 +2061,30 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
if (instance->my_left_memb_entries) {
|
|
 |
f1cb04 |
int sptr = 0;
|
|
 |
f1cb04 |
+ int sptr2 = 0;
|
|
 |
f1cb04 |
sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:");
|
|
 |
f1cb04 |
for (i=0; i< instance->my_left_memb_entries; i++) {
|
|
 |
f1cb04 |
sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %u", left_list[i]);
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
+ for (i=0; i< instance->my_left_memb_entries; i++) {
|
|
 |
f1cb04 |
+ if (my_leave_memb_match(instance, left_list[i]) == 0) {
|
|
 |
f1cb04 |
+ if (sptr2 == 0) {
|
|
 |
f1cb04 |
+ sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:");
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " %u", left_list[i]);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ if (sptr2 == 0) {
|
|
 |
f1cb04 |
+ failed_node_msg[0] = '\0';
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
else {
|
|
 |
f1cb04 |
left_node_msg[0] = '\0';
|
|
 |
f1cb04 |
+ failed_node_msg[0] = '\0';
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
+ my_leave_memb_clear(instance);
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
log_printf (instance->totemsrp_log_level_debug,
|
|
 |
f1cb04 |
"entering OPERATIONAL state.");
|
|
 |
f1cb04 |
log_printf (instance->totemsrp_log_level_notice,
|
|
 |
f1cb04 |
@@ -2025,6 +2093,13 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
|
|
 |
f1cb04 |
instance->my_ring_id.seq,
|
|
 |
f1cb04 |
joined_node_msg,
|
|
 |
f1cb04 |
left_node_msg);
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+ if (strlen(failed_node_msg)) {
|
|
 |
f1cb04 |
+ log_printf (instance->totemsrp_log_level_notice,
|
|
 |
f1cb04 |
+ "Failed to receive the leave message.%s",
|
|
 |
f1cb04 |
+ failed_node_msg);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
instance->memb_state = MEMB_STATE_OPERATIONAL;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
instance->stats.operational_entered++;
|
|
 |
f1cb04 |
@@ -3597,8 +3672,9 @@ static int message_handler_orf_token (
|
|
 |
f1cb04 |
return (0);
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
#endif
|
|
 |
f1cb04 |
-
|
|
 |
f1cb04 |
+ instance->flushing = 1;
|
|
 |
f1cb04 |
totemrrp_recv_flush (instance->totemrrp_context);
|
|
 |
f1cb04 |
+ instance->flushing = 0;
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
/*
|
|
 |
f1cb04 |
* Determine if we should hold (in reality drop) the token
|
|
 |
f1cb04 |
@@ -4130,6 +4206,32 @@ static void memb_join_process (
|
|
 |
f1cb04 |
memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries);
|
|
 |
f1cb04 |
-*/
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
+ if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) {
|
|
 |
f1cb04 |
+ if (instance->flushing) {
|
|
 |
f1cb04 |
+ if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
|
|
 |
f1cb04 |
+ log_printf (instance->totemsrp_log_level_warning,
|
|
 |
f1cb04 |
+ "Discarding LEAVE message during flush, nodeid=%u",
|
|
 |
f1cb04 |
+ memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID);
|
|
 |
f1cb04 |
+ if (memb_join->failed_list_entries > 0) {
|
|
 |
f1cb04 |
+ my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ } else {
|
|
 |
f1cb04 |
+ log_printf (instance->totemsrp_log_level_warning,
|
|
 |
f1cb04 |
+ "Discarding JOIN message during flush, nodeid=%d", memb_join->header.nodeid);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ return;
|
|
 |
f1cb04 |
+ } else {
|
|
 |
f1cb04 |
+ if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
|
|
 |
f1cb04 |
+ log_printf (instance->totemsrp_log_level_debug,
|
|
 |
f1cb04 |
+ "Recieve LEAVE message from %u", memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID);
|
|
 |
f1cb04 |
+ if (memb_join->failed_list_entries > 0) {
|
|
 |
f1cb04 |
+ my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid);
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
+ }
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
if (memb_set_equal (proc_list,
|
|
 |
f1cb04 |
memb_join->proc_list_entries,
|
|
 |
f1cb04 |
instance->my_proc_list,
|
|
 |
f1cb04 |
@@ -4573,6 +4675,7 @@ void main_deliver_fn (
|
|
 |
f1cb04 |
return;
|
|
 |
f1cb04 |
}
|
|
 |
f1cb04 |
|
|
 |
f1cb04 |
+
|
|
 |
f1cb04 |
switch (message_header->type) {
|
|
 |
f1cb04 |
case MESSAGE_TYPE_ORF_TOKEN:
|
|
 |
f1cb04 |
instance->stats.orf_token_rx++;
|
|
 |
f1cb04 |
--
|
|
 |
f1cb04 |
1.7.1
|
|
 |
f1cb04 |
|