From 3ad2852be38b5534471a28ee03ca4440d4994cc5 Mon Sep 17 00:00:00 2001 From: Open vSwitch CI Date: Apr 04 2025 05:18:20 +0000 Subject: Import openvswitch3.3-3.3.4-107 from Fast DataPath --- diff --git a/SOURCES/openvswitch-3.3.0.patch b/SOURCES/openvswitch-3.3.0.patch index 81a6c44..1466861 100644 --- a/SOURCES/openvswitch-3.3.0.patch +++ b/SOURCES/openvswitch-3.3.0.patch @@ -5821,10 +5821,19 @@ index facd680ff3..354382f111 100644 } diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index f463afcb3d..ac3d37ac40 100644 +index f463afcb3d..f56ee30b03 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c -@@ -81,6 +81,7 @@ enum raft_failure_test { +@@ -65,6 +65,8 @@ enum raft_role { + RAFT_LEADER + }; + ++static const char *raft_role_to_string(enum raft_role); ++ + /* Flags for unit tests. */ + enum raft_failure_test { + FT_NO_TEST, +@@ -81,6 +83,7 @@ enum raft_failure_test { FT_STOP_RAFT_RPC, FT_TRANSFER_LEADERSHIP, FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ, @@ -5832,7 +5841,7 @@ index f463afcb3d..ac3d37ac40 100644 }; static enum raft_failure_test failure_test; -@@ -280,6 +281,7 @@ struct raft { +@@ -280,6 +283,7 @@ struct raft { /* Used for joining a cluster. */ bool joining; /* Attempting to join the cluster? */ struct sset remote_addresses; /* Addresses to try to find other servers. */ @@ -5840,7 +5849,19 @@ index f463afcb3d..ac3d37ac40 100644 long long int join_timeout; /* Time to re-send add server request. */ /* Used for leaving a cluster. */ -@@ -385,6 +387,7 @@ static void raft_get_servers_from_log(struct raft *, enum vlog_level); +@@ -373,6 +377,11 @@ static void raft_send_append_request(struct raft *, + struct raft_server *, unsigned int n, + const char *comment); + ++static void raft_role_transition_at(struct raft *, enum raft_role, ++ const char *func, const char *source); ++#define raft_role_transition(raft, role) \ ++ raft_role_transition_at(raft, role, __func__, OVS_SOURCE_LOCATOR) ++ + static void raft_become_leader(struct raft *); + static void raft_become_follower(struct raft *); + static void raft_reset_election_timer(struct raft *); +@@ -385,6 +394,7 @@ static void raft_get_servers_from_log(struct raft *, enum vlog_level); static void raft_get_election_timer_from_log(struct raft *); static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); @@ -5848,7 +5869,16 @@ index f463afcb3d..ac3d37ac40 100644 static void raft_run_reconfigure(struct raft *); -@@ -1015,8 +1018,13 @@ raft_conn_update_probe_interval(struct raft *raft, struct raft_conn *r_conn) +@@ -433,7 +443,7 @@ raft_alloc(void) + hmap_node_nullify(&raft->hmap_node); + hmap_init(&raft->servers); + raft->log_start = raft->log_end = 1; +- raft->role = RAFT_FOLLOWER; ++ raft_role_transition(raft, RAFT_FOLLOWER); + sset_init(&raft->remote_addresses); + raft->join_timeout = LLONG_MAX; + ovs_list_init(&raft->waiters); +@@ -1015,8 +1025,13 @@ raft_conn_update_probe_interval(struct raft *raft, struct raft_conn *r_conn) * inactivity probe follower will just try to initiate election * indefinitely staying in 'candidate' role. And the leader will continue * to send heartbeats to the dead connection thinking that remote server @@ -5864,7 +5894,7 @@ index f463afcb3d..ac3d37ac40 100644 jsonrpc_session_set_probe_interval(r_conn->js, probe_interval); } -@@ -1083,7 +1091,7 @@ raft_open(struct ovsdb_log *log, struct raft **raftp) +@@ -1083,7 +1098,7 @@ raft_open(struct ovsdb_log *log, struct raft **raftp) raft_start_election(raft, false, false); } } else { @@ -5873,7 +5903,7 @@ index f463afcb3d..ac3d37ac40 100644 } raft_reset_ping_timer(raft); -@@ -1261,10 +1269,30 @@ raft_transfer_leadership(struct raft *raft, const char *reason) +@@ -1261,10 +1276,30 @@ raft_transfer_leadership(struct raft *raft, const char *reason) return; } @@ -5907,7 +5937,7 @@ index f463afcb3d..ac3d37ac40 100644 struct raft_conn *conn = raft_find_conn_by_sid(raft, &s->sid); if (!conn) { continue; -@@ -1280,7 +1308,10 @@ raft_transfer_leadership(struct raft *raft, const char *reason) +@@ -1280,7 +1315,10 @@ raft_transfer_leadership(struct raft *raft, const char *reason) .term = raft->term, } }; @@ -5919,7 +5949,7 @@ index f463afcb3d..ac3d37ac40 100644 raft_record_note(raft, "transfer leadership", "transferring leadership to %s because %s", -@@ -1288,6 +1319,23 @@ raft_transfer_leadership(struct raft *raft, const char *reason) +@@ -1288,6 +1326,23 @@ raft_transfer_leadership(struct raft *raft, const char *reason) break; } } @@ -5943,7 +5973,93 @@ index f463afcb3d..ac3d37ac40 100644 } /* Send a RemoveServerRequest to the rest of the servers in the cluster. -@@ -2078,7 +2126,7 @@ raft_run(struct raft *raft) +@@ -1322,8 +1377,29 @@ raft_send_remove_server_requests(struct raft *raft) + raft_send(raft, &rpc); + } + } ++} ++ ++/* Sends requests required to leave the cluster and schedules the next time ++ * this function should be called. */ ++static void ++raft_send_leave_requests(struct raft *raft) ++{ ++ long long int delay = raft->election_timer; + +- raft->leave_timeout = time_msec() + raft->election_timer; ++ if (raft->role == RAFT_LEADER) { ++ raft_transfer_leadership(raft, "this server is leaving the cluster"); ++ raft_become_follower(raft); ++ /* Not sending the RemoveServerRequest right away, because a new ++ * leader has to be elected first for the request to be successful. ++ * But setting a shorter delay to avoid waiting for too long when ++ * the leader re-election is fast. Randomized to avoid two servers ++ * bouncing the leadership between each other and never actually ++ * leaving. */ ++ delay = delay / 10 + random_range(delay / 10); ++ } else { ++ raft_send_remove_server_requests(raft); ++ } ++ raft->leave_timeout = time_msec() + delay; + } + + /* Attempts to start 'raft' leaving its cluster. The caller can check progress +@@ -1337,10 +1413,7 @@ raft_leave(struct raft *raft) + VLOG_INFO(SID_FMT": starting to leave cluster "CID_FMT, + SID_ARGS(&raft->sid), CID_ARGS(&raft->cid)); + raft->leaving = true; +- raft_transfer_leadership(raft, "this server is leaving the cluster"); +- raft_become_follower(raft); +- raft_send_remove_server_requests(raft); +- raft->leave_timeout = time_msec() + raft->election_timer; ++ raft_send_leave_requests(raft); + } + + /* Returns true if 'raft' is currently attempting to leave its cluster. */ +@@ -1812,10 +1885,6 @@ raft_start_election(struct raft *raft, bool is_prevote, + /* Leadership transfer doesn't use pre-vote. */ + ovs_assert(!is_prevote || !leadership_transfer); + +- if (raft->leaving) { +- return; +- } +- + struct raft_server *me = raft_find_server(raft, &raft->sid); + if (!me) { + return; +@@ -1828,8 +1897,8 @@ raft_start_election(struct raft *raft, bool is_prevote, + ovs_assert(raft->role != RAFT_LEADER); + + raft->leader_sid = UUID_ZERO; +- raft->role = RAFT_CANDIDATE; + raft->prevote_passed = !is_prevote; ++ raft_role_transition(raft, RAFT_CANDIDATE); + + if (is_prevote || leadership_transfer) { + /* If there was no leader elected since last election, we know we are +@@ -1942,6 +2011,12 @@ raft_conn_should_stay_open(struct raft *raft, struct raft_conn *conn) + return true; + } + ++ /* Keep the connection until we send a RemoveServerReply. */ ++ if (raft->remove_server ++ && uuid_equals(&conn->sid, &raft->remove_server->sid)) { ++ return true; ++ } ++ + /* We have joined the cluster. If we did that "recently", then there is a + * chance that we do not have the most recent server configuration log + * entry. If so, it's a waste to disconnect from the servers that were in +@@ -2068,6 +2143,8 @@ raft_run(struct raft *raft) + count ++; + } + } ++ VLOG_DBG("%d out of %"PRIuSIZE" servers replied", ++ count, hmap_count(&raft->servers)); + if (count >= hmap_count(&raft->servers) / 2) { + HMAP_FOR_EACH (server, hmap_node, &raft->servers) { + server->replied = false; +@@ -2078,17 +2155,17 @@ raft_run(struct raft *raft) raft_start_election(raft, true, false); } } else { @@ -5952,7 +6068,10 @@ index f463afcb3d..ac3d37ac40 100644 } } -@@ -2088,7 +2136,7 @@ raft_run(struct raft *raft) + + if (raft->leaving && time_msec() >= raft->leave_timeout) { +- raft_send_remove_server_requests(raft); ++ raft_send_leave_requests(raft); } if (raft->joining && time_msec() >= raft->join_timeout) { @@ -5961,13 +6080,18 @@ index f463afcb3d..ac3d37ac40 100644 LIST_FOR_EACH (conn, list_node, &raft->conns) { raft_send_add_server_request(raft, conn); } -@@ -2122,10 +2170,12 @@ raft_run(struct raft *raft) +@@ -2122,10 +2199,17 @@ raft_run(struct raft *raft) raft_reset_ping_timer(raft); } -+ uint64_t interval = raft->joining -+ ? RAFT_JOIN_TIMEOUT_MS -+ : RAFT_TIMER_THRESHOLD(raft->election_timer); ++ uint64_t interval = RAFT_TIMER_THRESHOLD(raft->election_timer); ++ ++ if (raft->joining) { ++ interval = RAFT_JOIN_TIMEOUT_MS; ++ } else if (uuid_is_zero(&raft->leader_sid)) { ++ /* There are no heartbeats to handle when there is no leader. */ ++ interval = raft->election_timer; ++ } cooperative_multitasking_set( &raft_run_cb, (void *) raft, time_msec(), - RAFT_TIMER_THRESHOLD(raft->election_timer) @@ -5976,7 +6100,25 @@ index f463afcb3d..ac3d37ac40 100644 /* Do this only at the end; if we did it as soon as we set raft->left or * raft->failed in handling the RemoveServerReply, then it could easily -@@ -2696,15 +2746,22 @@ raft_become_follower(struct raft *raft) +@@ -2347,7 +2431,7 @@ raft_command_execute__(struct raft *raft, const struct json *data, + const struct json *servers, uint64_t election_timer, + const struct uuid *prereq, struct uuid *result) + { +- if (raft->joining || raft->leaving || raft->left || raft->failed) { ++ if (raft->joining || raft->left || raft->failed) { + return raft_command_create_completed(RAFT_CMD_SHUTDOWN); + } + +@@ -2685,7 +2769,7 @@ raft_become_follower(struct raft *raft) + return; + } + +- raft->role = RAFT_FOLLOWER; ++ raft_role_transition(raft, RAFT_FOLLOWER); + raft_reset_election_timer(raft); + + /* Notify clients about lost leadership. +@@ -2696,15 +2780,22 @@ raft_become_follower(struct raft *raft) * new configuration. Our AppendEntries processing will properly update * the server configuration later, if necessary. * @@ -6000,7 +6142,7 @@ index f463afcb3d..ac3d37ac40 100644 } if (raft->remove_server) { raft_send_remove_server_reply__(raft, &raft->remove_server->sid, -@@ -2768,6 +2825,13 @@ raft_send_heartbeats(struct raft *raft) +@@ -2768,6 +2859,13 @@ raft_send_heartbeats(struct raft *raft) raft_reset_ping_timer(raft); } @@ -6014,7 +6156,41 @@ index f463afcb3d..ac3d37ac40 100644 /* Initializes the fields in 's' that represent the leader's view of the * server. */ static void -@@ -2805,6 +2869,18 @@ raft_become_leader(struct raft *raft) +@@ -2788,6 +2886,26 @@ raft_set_leader(struct raft *raft, const struct uuid *sid) + raft->candidate_retrying = false; + } + ++static const char * ++raft_role_to_string(enum raft_role role) ++{ ++ switch (role) { ++ case RAFT_FOLLOWER: return "follower"; ++ case RAFT_CANDIDATE: return "candidate"; ++ case RAFT_LEADER: return "leader"; ++ default: return ""; ++ } ++} ++ ++static void ++raft_role_transition_at(struct raft *raft, enum raft_role role, ++ const char *func, const char *source) ++{ ++ VLOG_DBG("%s(%s): role transition: %s --> %s", func, source, ++ raft_role_to_string(raft->role), raft_role_to_string(role)); ++ raft->role = role; ++} ++ + static void + raft_become_leader(struct raft *raft) + { +@@ -2799,12 +2917,24 @@ raft_become_leader(struct raft *raft) + raft->n_votes, hmap_count(&raft->servers)); + + ovs_assert(raft->role != RAFT_LEADER); +- raft->role = RAFT_LEADER; ++ raft_role_transition(raft, RAFT_LEADER); + raft->election_won = time_msec(); + raft_set_leader(raft, &raft->sid); raft_reset_election_timer(raft); raft_reset_ping_timer(raft); @@ -6033,7 +6209,7 @@ index f463afcb3d..ac3d37ac40 100644 struct raft_server *s; HMAP_FOR_EACH (s, hmap_node, &raft->servers) { raft_server_init_leader(raft, s); -@@ -2963,12 +3039,12 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) +@@ -2963,12 +3093,12 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) } while (raft->commit_index < new_commit_index) { @@ -6047,7 +6223,7 @@ index f463afcb3d..ac3d37ac40 100644 if (cmd) { if (!cmd->index && raft->role == RAFT_LEADER) { -@@ -3012,6 +3088,35 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) +@@ -3012,6 +3142,35 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) * reallocate raft->entries, which would invalidate 'e', so * this case must be last, after the one for 'e->data'. */ raft_run_reconfigure(raft); @@ -6083,7 +6259,16 @@ index f463afcb3d..ac3d37ac40 100644 } } -@@ -3938,6 +4043,10 @@ raft_handle_add_server_request(struct raft *raft, +@@ -3219,7 +3378,7 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) + * least as large as the candidate's current term, then the + * candidate recognizes the leader as legitimate and returns to + * follower state. */ +- raft->role = RAFT_FOLLOWER; ++ raft_role_transition(raft, RAFT_FOLLOWER); + } + return true; + } +@@ -3938,6 +4097,10 @@ raft_handle_add_server_request(struct raft *raft, "to cluster "CID_FMT, s->nickname, SID_ARGS(&s->sid), rq->address, CID_ARGS(&raft->cid)); raft_send_append_request(raft, s, 0, "initialize new server"); @@ -6094,7 +6279,7 @@ index f463afcb3d..ac3d37ac40 100644 } static void -@@ -3952,7 +4061,7 @@ raft_handle_add_server_reply(struct raft *raft, +@@ -3952,7 +4115,7 @@ raft_handle_add_server_reply(struct raft *raft, } if (rpy->success) { @@ -6103,7 +6288,35 @@ index f463afcb3d..ac3d37ac40 100644 /* It is tempting, at this point, to check that this server is part of * the current configuration. However, this is not necessarily the -@@ -4926,6 +5035,7 @@ raft_get_election_timer_from_log(struct raft *raft) +@@ -3991,6 +4154,14 @@ raft_handle_remove_server_request(struct raft *raft, + return; + } + ++ /* Check for the server already being removed. */ ++ if (raft->remove_server ++ && uuid_equals(&rq->sid, &raft->remove_server->sid)) { ++ raft_send_remove_server_reply(raft, rq, ++ false, RAFT_SERVER_IN_PROGRESS); ++ return; ++ } ++ + /* If the server isn't configured, report that. */ + target = raft_find_server(raft, &rq->sid); + if (!target) { +@@ -4725,11 +4896,7 @@ raft_unixctl_status(struct unixctl_conn *conn, + } + } + +- ds_put_format(&s, "Role: %s\n", +- raft->role == RAFT_LEADER ? "leader" +- : raft->role == RAFT_CANDIDATE ? "candidate" +- : raft->role == RAFT_FOLLOWER ? "follower" +- : ""); ++ ds_put_format(&s, "Role: %s\n", raft_role_to_string(raft->role)); + ds_put_format(&s, "Term: %"PRIu64"\n", raft->term); + raft_put_sid("Leader", &raft->leader_sid, raft, &s); + raft_put_sid("Vote", &raft->vote, raft, &s); +@@ -4926,6 +5093,7 @@ raft_get_election_timer_from_log(struct raft *raft) break; } } @@ -6111,7 +6324,7 @@ index f463afcb3d..ac3d37ac40 100644 } static void -@@ -5063,6 +5173,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, +@@ -5063,6 +5231,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, } else if (!strcmp(test, "transfer-leadership-after-sending-append-request")) { failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ; @@ -7763,10 +7976,10 @@ index d03d365003..a9337f6192 100644 OVS_VSWITCHD_START diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index 481afc08b3..9d8b4d06a4 100644 +index 481afc08b3..91a76cb813 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at -@@ -473,6 +473,112 @@ done +@@ -473,6 +473,271 @@ done AT_CLEANUP @@ -7876,6 +8089,165 @@ index 481afc08b3..9d8b4d06a4 100644 +done + +AT_CLEANUP ++ ++AT_BANNER([OVSDB - cluster failure while leaving]) ++AT_SETUP([OVSDB cluster - leaving the cluster with some servers down]) ++AT_KEYWORDS([ovsdb server negative unix cluster leave]) ++ ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db \ ++ $top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft], [0], [], [stderr]) ++schema_name=$(ovsdb-tool schema-name $top_srcdir/vswitchd/vswitch.ovsschema) ++for i in 2 3 4 5; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill $(cat *.pid)' ++on_exit " ++ for i in \$(ls $(pwd)/s[[0-5]]); do ++ ovs-appctl --timeout 1 -t \$i cluster/status $schema_name; ++ done ++" ++dnl Starting all the servers. ++for i in 1 2 3 4 5; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \ ++ --detach --no-chdir --log-file=s$i.log \ ++ --pidfile=s$i.pid --unixctl=s$i \ ++ --remote=punix:s$i.ovsdb s$i.db]) ++done ++ ++dnl Make sure that all servers joined the cluster. ++for i in 1 2 3 4 5; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++dnl Make sure the cluster is operational. ++m4_define([DB_REMOTE], [unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" --no-wait init]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-1], [0], [ignore], [ignore]) ++ ++dnl Stop servers 1 and 2. ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s1], [s1.pid]) ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s2], [s2.pid]) ++ ++dnl Make sure that all remaining servers are functional as a cluster. ++for i in 3 4 5; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++dnl Make sure the cluster is still operational. ++m4_define([DB_REMOTE], [unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-2], [0], [ignore], [ignore]) ++ ++dnl Servers 1 and 2 in a cluster of 5 are down, 3 servers are still alive. ++dnl Server 3 can't leave, because the NEW configuration will be a cluster of ++dnl 4 with 2 servers down and it doesn't have a quorum. Try it. ++dnl The cluster will fall apart until servers 1 or 2 come back to resolve ++dnl the quorum issue, because servers 4 and 5 will no longer consider 3 ++dnl to be part of the configuration. ++AT_CHECK([ovs-appctl -t $(pwd)/s3 cluster/leave $schema_name]) ++ ++dnl Check that the cluster is not operational. ++for i in 3 4 5; do ++ OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name \ ++ | grep -qE 'leaving|disconnected']) ++done ++ ++dnl Try to commit a transaction, it should not be successful. ++m4_define([DB_REMOTE], [unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-3], [1], [ignore], [stderr]) ++ ++dnl Now bring back the server 2. This should allow server 3 to leave. ++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \ ++ --detach --no-chdir --log-file=s2.log \ ++ --pidfile=s2.pid --unixctl=s2 \ ++ --remote=punix:s2.ovsdb s2.db]) ++ ++dnl Wait for server 3 to actually leave and stop the server. ++AT_CHECK([ovsdb_client_wait unix:s3.ovsdb $schema_name removed]) ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s3], [s3.pid]) ++ ++dnl Make sure that all remaining servers are functional as a cluster. ++for i in 2 4 5; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++dnl Make sure the cluster is operational again. ++m4_define([DB_REMOTE], [unix:s2.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-4], [0], [ignore], [ignore]) ++ ++dnl Now we have a cluster of 4 servers (1, 2, 4, 5) with 1 server down. ++dnl Server 2 should be able to leave, because the NEW configuration will ++dnl be a cluster of 3 servers with 1 being down and it has a quorum. ++AT_CHECK([ovs-appctl -t $(pwd)/s2 cluster/leave $schema_name]) ++dnl Wait for server 2 to actually leave and stop the server. ++AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name removed]) ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s2], [s2.pid]) ++ ++dnl Make sure the cluster is still operational. ++m4_define([DB_REMOTE], [unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-5], [0], [ignore], [ignore]) ++ ++dnl Now we have a cluster of 3 servers (1, 4, 5) with 1 server down. ++dnl None of the alive servers can leave, because the NEW configuration ++dnl will be a cluster of 2 with 1 server down and it has no quorum. ++dnl Request both to leave anyway. ++for i in 4 5; do ++ AT_CHECK([ovs-appctl -t $(pwd)/s$i cluster/leave $schema_name]) ++done ++ ++dnl Check that the cluster is not operational. ++for i in 4 5; do ++ OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name \ ++ | grep -qE 'leaving|disconnected']) ++done ++ ++dnl Try to commit a transaction, it should not be successful. ++m4_define([DB_REMOTE], [unix:s4.ovsdb,unix:s5.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \ ++ --no-wait create QoS type=test-6], [1], [ignore], [stderr]) ++ ++dnl Now bring back the first server. ++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \ ++ --detach --no-chdir --log-file=s1.log \ ++ --pidfile=s1.pid --unixctl=s1 \ ++ --remote=punix:s1.ovsdb s1.db]) ++ ++dnl Now it should be possible for all the other servers to leave, so we ++dnl should end up with a single-node cluster that consists of server 1. ++for i in 4 5; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name removed]) ++done ++for i in 4 5; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid]) ++done ++ ++dnl Wait for the first server to become a leader of a single-node cluster. ++OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s1 cluster/status $schema_name \ ++ | grep -q 'Role: leader']) ++AT_CHECK([ovs-appctl -t $(pwd)/s1 cluster/status $schema_name \ ++ | grep -c ' s[[1-5]] '], [0], [dnl ++1 ++]) ++ ++dnl Check that the database is operational and the data is still in there. ++m4_define([DB_REMOTE], [unix:s1.ovsdb]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-wait \ ++ create QoS type=test-7], [0], [ignore], [ignore]) ++AT_CHECK([ovs-vsctl --db="DB_REMOTE" --no-wait \ ++ --columns=type --bare list QoS | sed '/^$/d' | sort], [0], [dnl ++test-1 ++test-2 ++test-4 ++test-5 ++test-7 ++]) ++ ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s1], [s1.pid]) ++AT_CLEANUP OVS_START_SHELL_HELPERS diff --git a/SPECS/openvswitch3.3.spec b/SPECS/openvswitch3.3.spec index 260aeb6..549d98d 100644 --- a/SPECS/openvswitch3.3.spec +++ b/SPECS/openvswitch3.3.spec @@ -59,7 +59,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 3.3.4 -Release: 106%{?dist} +Release: 107%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -794,6 +794,14 @@ exit 0 %endif %changelog +* Thu Apr 03 2025 Open vSwitch CI - 3.3.0-107 +- Merging upstream branch-3.3 [RH git: 8c610a6a50] + Commit list: + d1fd6bce8d ovsdb: raft: Fix cluster break down on leaving with some nodes down. (FDP-662) + e2aa29240f ovsdb: raft: Add debug logs for role transition. + bc1735a22a ovsdb: raft: Fix multitasking overrun warning when there is no leader. + + * Thu Apr 03 2025 Open vSwitch CI - 3.3.0-106 - Merging upstream branch-3.3 [RH git: acaac7359f] Commit list: