|
 |
444711 |
From 3dbe5903235efabcaf438f5e5f526946dfbdf661 Mon Sep 17 00:00:00 2001
|
|
 |
444711 |
From: Christopher Faulet <cfaulet@haproxy.com>
|
|
 |
444711 |
Date: Wed, 2 May 2018 12:12:45 +0200
|
|
 |
444711 |
Subject: [PATCH] BUG/MINOR: checks: Fix check->health computation for flapping
|
|
 |
444711 |
servers
|
|
 |
444711 |
|
|
 |
444711 |
This patch fixes an old bug introduced in the commit 7b1d47ce ("MAJOR: checks:
|
|
 |
444711 |
move health checks changes to set_server_check_status()"). When a DOWN server is
|
|
 |
444711 |
flapping, everytime a check succeds, check->health is incremented. But when a
|
|
 |
444711 |
check fails, it is decremented only when it is higher than the rise value. So if
|
|
 |
444711 |
only one check succeds for a DOWN server, check->health will remain set to 1 for
|
|
 |
444711 |
all subsequent failing checks.
|
|
 |
444711 |
|
|
 |
444711 |
So, at first glance, it seems not that terrible because the server remains
|
|
 |
444711 |
DOWN. But it is reported in the transitional state "DOWN server, going up". And
|
|
 |
444711 |
it will remain in this state until it is UP again. And there is also an
|
|
 |
444711 |
insidious side effect. If a DOWN server is flapping time to time, It will end to
|
|
 |
444711 |
be considered UP after a uniq successful check, , regardless the rise threshold,
|
|
 |
444711 |
because check->health will be increased slowly and never decreased.
|
|
 |
444711 |
|
|
 |
444711 |
To fix the bug, we just need to reset check->health to 0 when a check fails for
|
|
 |
444711 |
a DOWN server. To do so, we just need to relax the condition to handle a failure
|
|
 |
444711 |
in the function set_server_check_status.
|
|
 |
444711 |
|
|
 |
444711 |
This patch must be backported to haproxy 1.5 and newer.
|
|
 |
444711 |
|
|
 |
444711 |
(cherry picked from commit b119a79fc336f2b6074de1c3113b1682c717985c)
|
|
 |
444711 |
Signed-off-by: Willy Tarreau <w@1wt.eu>
|
|
 |
444711 |
(cherry picked from commit edb5a1efd22eb9918574d962640cd2ae3bb45ad3)
|
|
 |
444711 |
Signed-off-by: William Lallemand <wlallemand@haproxy.org>
|
|
 |
444711 |
(cherry picked from commit 6d2f7fb1531a446dcf609e1340a1c1e40e907a39)
|
|
 |
444711 |
Signed-off-by: Willy Tarreau <w@1wt.eu>
|
|
 |
444711 |
---
|
|
 |
444711 |
src/checks.c | 2 +-
|
|
 |
444711 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
 |
444711 |
|
|
 |
444711 |
diff --git a/src/checks.c b/src/checks.c
|
|
 |
444711 |
index 27a23b21..fcd85aba 100644
|
|
 |
444711 |
--- a/src/checks.c
|
|
 |
444711 |
+++ b/src/checks.c
|
|
 |
444711 |
@@ -247,7 +247,7 @@ static void set_server_check_status(struct check *check, short status, const cha
|
|
 |
444711 |
*/
|
|
 |
444711 |
if ((!(check->state & CHK_ST_AGENT) ||
|
|
 |
444711 |
(check->status >= HCHK_STATUS_L57DATA)) &&
|
|
 |
444711 |
- (check->health >= check->rise)) {
|
|
 |
444711 |
+ (check->health > 0)) {
|
|
 |
444711 |
s->counters.failed_checks++;
|
|
 |
444711 |
report = 1;
|
|
 |
444711 |
check->health--;
|
|
 |
444711 |
--
|
|
 |
444711 |
2.29.2
|
|
 |
444711 |
|