|
 |
354091 |
From ca320beac25f82c0c555799e647a47975a333c28 Mon Sep 17 00:00:00 2001
|
|
 |
354091 |
From: Jan Friesse <jfriesse@redhat.com>
|
|
 |
354091 |
Date: Tue, 10 Mar 2020 17:49:27 +0100
|
|
 |
354091 |
Subject: [PATCH] votequorum: set wfa status only on startup
|
|
 |
354091 |
|
|
 |
354091 |
Previously reload of configuration with enabled wait_for_all result in
|
|
 |
354091 |
set of wait_for_all_status which set cluster_is_quorate to 0 but didn't
|
|
 |
354091 |
inform the quorum service so votequorum and quorum information may get
|
|
 |
354091 |
out of sync.
|
|
 |
354091 |
|
|
 |
354091 |
Example is 1 node cluster, which is extended to 3 nodes. Quorum service
|
|
 |
354091 |
reports cluster as a quorate (incorrect) and votequorum as not-quorate
|
|
 |
354091 |
(correct). Similar behavior happens when extending cluster in general,
|
|
 |
354091 |
but some configurations are less incorrect (3->4).
|
|
 |
354091 |
|
|
 |
354091 |
Discussed solution was to inform quorum service but that would mean
|
|
 |
354091 |
every reload would cause loss of quorum until all nodes would be seen
|
|
 |
354091 |
again.
|
|
 |
354091 |
|
|
 |
354091 |
Such behaviour is consistent but seems to be a bit too strict.
|
|
 |
354091 |
|
|
 |
354091 |
Proposed solution sets wait_for_all_status only on startup and
|
|
 |
354091 |
doesn't touch it during reload.
|
|
 |
354091 |
|
|
 |
354091 |
This solution fulfills requirement of "cluster will be quorate for
|
|
 |
354091 |
the first time only after all nodes have been visible at least
|
|
 |
354091 |
once at the same time." because node clears wait_for_all_status only
|
|
 |
354091 |
after it sees all other nodes or joins cluster which is quorate. It also
|
|
 |
354091 |
solves problem with extending cluster, because when cluster becomes
|
|
 |
354091 |
unquorate (1->3) wait_for_all_status is set.
|
|
 |
354091 |
|
|
 |
354091 |
Added assert is only for ensure that I haven't missed any case when
|
|
 |
354091 |
quorate cluster may become unquorate.
|
|
 |
354091 |
|
|
 |
354091 |
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
|
 |
354091 |
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
|
 |
354091 |
---
|
|
 |
354091 |
exec/votequorum.c | 6 ++++--
|
|
 |
354091 |
1 file changed, 4 insertions(+), 2 deletions(-)
|
|
 |
354091 |
|
|
 |
354091 |
diff --git a/exec/votequorum.c b/exec/votequorum.c
|
|
 |
354091 |
index b152425..fb9f1cd 100644
|
|
 |
354091 |
--- a/exec/votequorum.c
|
|
 |
354091 |
+++ b/exec/votequorum.c
|
|
 |
354091 |
@@ -1009,7 +1009,7 @@ static void are_we_quorate(unsigned int total_votes)
|
|
 |
354091 |
"Waiting for all cluster members. "
|
|
 |
354091 |
"Current votes: %d expected_votes: %d",
|
|
 |
354091 |
total_votes, us->expected_votes);
|
|
 |
354091 |
- cluster_is_quorate = 0;
|
|
 |
354091 |
+ assert(!cluster_is_quorate);
|
|
 |
354091 |
return;
|
|
 |
354091 |
}
|
|
 |
354091 |
update_wait_for_all_status(0);
|
|
 |
354091 |
@@ -1547,7 +1547,9 @@ static char *votequorum_readconfig(int runtime)
|
|
 |
354091 |
update_ev_barrier(us->expected_votes);
|
|
 |
354091 |
update_two_node();
|
|
 |
354091 |
if (wait_for_all) {
|
|
 |
354091 |
- update_wait_for_all_status(1);
|
|
 |
354091 |
+ if (!runtime) {
|
|
 |
354091 |
+ update_wait_for_all_status(1);
|
|
 |
354091 |
+ }
|
|
 |
354091 |
} else if (wait_for_all_autoset && wait_for_all_status) {
|
|
 |
354091 |
/*
|
|
 |
354091 |
* Reset wait for all status for consistency when wfa is auto-unset by 2node.
|
|
 |
354091 |
--
|
|
 |
354091 |
1.8.3.1
|
|
 |
354091 |
|