From 0da05d1b9b0024f09c6fe08bee5565657a7a07df Mon Sep 17 00:00:00 2001
From: Open vSwitch CI <ovs-team@redhat.com>
Date: Apr 04 2025 02:49:39 +0000
Subject: Import openvswitch3.5-3.5.0-16 from Fast DataPath


---

diff --git a/SOURCES/openvswitch-3.5.0.patch b/SOURCES/openvswitch-3.5.0.patch
index aeb40c6..3996c75 100644
--- a/SOURCES/openvswitch-3.5.0.patch
+++ b/SOURCES/openvswitch-3.5.0.patch
@@ -256,6 +256,244 @@ index bf43d5d4bc..354357b50a 100644
      group_get_stats,            /* group_get_stats */
      get_datapath_version,       /* get_datapath_version */
      get_datapath_cap,
+diff --git a/ovsdb/raft.c b/ovsdb/raft.c
+index 9c3c351b5b..7d78f710ef 100644
+--- a/ovsdb/raft.c
++++ b/ovsdb/raft.c
+@@ -65,6 +65,8 @@ enum raft_role {
+     RAFT_LEADER
+ };
+ 
++static const char *raft_role_to_string(enum raft_role);
++
+ /* Flags for unit tests. */
+ enum raft_failure_test {
+     FT_NO_TEST,
+@@ -375,6 +377,11 @@ static void raft_send_append_request(struct raft *,
+                                      struct raft_server *, unsigned int n,
+                                      const char *comment);
+ 
++static void raft_role_transition_at(struct raft *, enum raft_role,
++                                    const char *func, const char *source);
++#define raft_role_transition(raft, role) \
++    raft_role_transition_at(raft, role, __func__, OVS_SOURCE_LOCATOR)
++
+ static void raft_become_leader(struct raft *);
+ static void raft_become_follower(struct raft *);
+ static void raft_reset_election_timer(struct raft *);
+@@ -436,7 +443,7 @@ raft_alloc(void)
+     hmap_node_nullify(&raft->hmap_node);
+     hmap_init(&raft->servers);
+     raft->log_start = raft->log_end = 1;
+-    raft->role = RAFT_FOLLOWER;
++    raft_role_transition(raft, RAFT_FOLLOWER);
+     sset_init(&raft->remote_addresses);
+     raft->join_timeout = LLONG_MAX;
+     ovs_list_init(&raft->waiters);
+@@ -1370,8 +1377,29 @@ raft_send_remove_server_requests(struct raft *raft)
+             raft_send(raft, &rpc);
+         }
+     }
++}
++
++/* Sends requests required to leave the cluster and schedules the next time
++ * this function should be called. */
++static void
++raft_send_leave_requests(struct raft *raft)
++{
++    long long int delay = raft->election_timer;
+ 
+-    raft->leave_timeout = time_msec() + raft->election_timer;
++    if (raft->role == RAFT_LEADER) {
++        raft_transfer_leadership(raft, "this server is leaving the cluster");
++        raft_become_follower(raft);
++        /* Not sending the RemoveServerRequest right away, because a new
++         * leader has to be elected first for the request to be successful.
++         * But setting a shorter delay to avoid waiting for too long when
++         * the leader re-election is fast.  Randomized to avoid two servers
++         * bouncing the leadership between each other and never actually
++         * leaving. */
++        delay = delay / 10 + random_range(delay / 10);
++    } else {
++        raft_send_remove_server_requests(raft);
++    }
++    raft->leave_timeout = time_msec() + delay;
+ }
+ 
+ /* Attempts to start 'raft' leaving its cluster.  The caller can check progress
+@@ -1385,10 +1413,7 @@ raft_leave(struct raft *raft)
+     VLOG_INFO(SID_FMT": starting to leave cluster "CID_FMT,
+               SID_ARGS(&raft->sid), CID_ARGS(&raft->cid));
+     raft->leaving = true;
+-    raft_transfer_leadership(raft, "this server is leaving the cluster");
+-    raft_become_follower(raft);
+-    raft_send_remove_server_requests(raft);
+-    raft->leave_timeout = time_msec() + raft->election_timer;
++    raft_send_leave_requests(raft);
+ }
+ 
+ /* Returns true if 'raft' is currently attempting to leave its cluster. */
+@@ -1860,10 +1885,6 @@ raft_start_election(struct raft *raft, bool is_prevote,
+     /* Leadership transfer doesn't use pre-vote. */
+     ovs_assert(!is_prevote || !leadership_transfer);
+ 
+-    if (raft->leaving) {
+-        return;
+-    }
+-
+     struct raft_server *me = raft_find_server(raft, &raft->sid);
+     if (!me) {
+         return;
+@@ -1876,8 +1897,8 @@ raft_start_election(struct raft *raft, bool is_prevote,
+     ovs_assert(raft->role != RAFT_LEADER);
+ 
+     raft->leader_sid = UUID_ZERO;
+-    raft->role = RAFT_CANDIDATE;
+     raft->prevote_passed = !is_prevote;
++    raft_role_transition(raft, RAFT_CANDIDATE);
+ 
+     if (is_prevote || leadership_transfer) {
+         /* If there was no leader elected since last election, we know we are
+@@ -1990,6 +2011,12 @@ raft_conn_should_stay_open(struct raft *raft, struct raft_conn *conn)
+         return true;
+     }
+ 
++    /* Keep the connection until we send a RemoveServerReply. */
++    if (raft->remove_server
++        && uuid_equals(&conn->sid, &raft->remove_server->sid)) {
++        return true;
++    }
++
+     /* We have joined the cluster.  If we did that "recently", then there is a
+      * chance that we do not have the most recent server configuration log
+      * entry.  If so, it's a waste to disconnect from the servers that were in
+@@ -2116,6 +2143,8 @@ raft_run(struct raft *raft)
+                     count ++;
+                 }
+             }
++            VLOG_DBG("%d out of %"PRIuSIZE" servers replied",
++                      count, hmap_count(&raft->servers));
+             if (count >= hmap_count(&raft->servers) / 2) {
+                 HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
+                     server->replied = false;
+@@ -2132,7 +2161,7 @@ raft_run(struct raft *raft)
+     }
+ 
+     if (raft->leaving && time_msec() >= raft->leave_timeout) {
+-        raft_send_remove_server_requests(raft);
++        raft_send_leave_requests(raft);
+     }
+ 
+     if (raft->joining && time_msec() >= raft->join_timeout) {
+@@ -2170,9 +2199,14 @@ raft_run(struct raft *raft)
+         raft_reset_ping_timer(raft);
+     }
+ 
+-    uint64_t interval = raft->joining
+-                        ? RAFT_JOIN_TIMEOUT_MS
+-                        : RAFT_TIMER_THRESHOLD(raft->election_timer);
++    uint64_t interval = RAFT_TIMER_THRESHOLD(raft->election_timer);
++
++    if (raft->joining) {
++        interval = RAFT_JOIN_TIMEOUT_MS;
++    } else if (uuid_is_zero(&raft->leader_sid)) {
++        /* There are no heartbeats to handle when there is no leader. */
++        interval = raft->election_timer;
++    }
+     cooperative_multitasking_set(
+         &raft_run_cb, (void *) raft, time_msec(),
+         interval + interval / 10, "raft_run");
+@@ -2440,7 +2474,7 @@ raft_command_execute__(struct raft *raft, const struct json *data,
+                        const struct json *servers, uint64_t election_timer,
+                        const struct uuid *prereq, struct uuid *result)
+ {
+-    if (raft->joining || raft->leaving || raft->left || raft->failed) {
++    if (raft->joining || raft->left || raft->failed) {
+         return raft_command_create_completed(RAFT_CMD_SHUTDOWN);
+     }
+ 
+@@ -2778,7 +2812,7 @@ raft_become_follower(struct raft *raft)
+         return;
+     }
+ 
+-    raft->role = RAFT_FOLLOWER;
++    raft_role_transition(raft, RAFT_FOLLOWER);
+     raft_reset_election_timer(raft);
+ 
+     /* Notify clients about lost leadership.
+@@ -2895,6 +2929,26 @@ raft_set_leader(struct raft *raft, const struct uuid *sid)
+     raft->candidate_retrying = false;
+ }
+ 
++static const char *
++raft_role_to_string(enum raft_role role)
++{
++    switch (role) {
++    case RAFT_FOLLOWER:  return "follower";
++    case RAFT_CANDIDATE: return "candidate";
++    case RAFT_LEADER:    return "leader";
++    default: return "<error>";
++    }
++}
++
++static void
++raft_role_transition_at(struct raft *raft, enum raft_role role,
++                        const char *func, const char *source)
++{
++    VLOG_DBG("%s(%s): role transition: %s --> %s", func, source,
++             raft_role_to_string(raft->role), raft_role_to_string(role));
++    raft->role = role;
++}
++
+ static void
+ raft_become_leader(struct raft *raft)
+ {
+@@ -2906,7 +2960,7 @@ raft_become_leader(struct raft *raft)
+                  raft->n_votes, hmap_count(&raft->servers));
+ 
+     ovs_assert(raft->role != RAFT_LEADER);
+-    raft->role = RAFT_LEADER;
++    raft_role_transition(raft, RAFT_LEADER);
+     raft->election_won = time_msec();
+     raft_set_leader(raft, &raft->sid);
+     raft_reset_election_timer(raft);
+@@ -3367,7 +3421,7 @@ raft_update_leader(struct raft *raft, const struct uuid *sid)
+          * least as large as the candidate's current term, then the
+          * candidate recognizes the leader as legitimate and returns to
+          * follower state. */
+-        raft->role = RAFT_FOLLOWER;
++        raft_role_transition(raft, RAFT_FOLLOWER);
+     }
+     return true;
+ }
+@@ -4143,6 +4197,14 @@ raft_handle_remove_server_request(struct raft *raft,
+         return;
+     }
+ 
++    /* Check for the server already being removed. */
++    if (raft->remove_server
++        && uuid_equals(&rq->sid, &raft->remove_server->sid)) {
++        raft_send_remove_server_reply(raft, rq,
++                                      false, RAFT_SERVER_IN_PROGRESS);
++        return;
++    }
++
+     /* If the server isn't configured, report that. */
+     target = raft_find_server(raft, &rq->sid);
+     if (!target) {
+@@ -4877,11 +4939,7 @@ raft_unixctl_status(struct unixctl_conn *conn,
+         }
+     }
+ 
+-    ds_put_format(&s, "Role: %s\n",
+-                  raft->role == RAFT_LEADER ? "leader"
+-                  : raft->role == RAFT_CANDIDATE ? "candidate"
+-                  : raft->role == RAFT_FOLLOWER ? "follower"
+-                  : "<error>");
++    ds_put_format(&s, "Role: %s\n", raft_role_to_string(raft->role));
+     ds_put_format(&s, "Term: %"PRIu64"\n", raft->term);
+     raft_put_sid("Leader", &raft->leader_sid, raft, &s);
+     raft_put_sid("Vote", &raft->vote, raft, &s);
 diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
 index c8cc543465..384428c3fc 100644
 --- a/python/ovs/db/idl.py
@@ -524,6 +762,176 @@ index fa5f148b4c..fbc3deb68e 100644
     func=`printf '%s_' "$1" | cut -c 4-`
     add_${func}of_ports br0 1 2
     AT_DATA([flows.txt], [dnl
+diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
+index 9d8b4d06a4..91a76cb813 100644
+--- a/tests/ovsdb-cluster.at
++++ b/tests/ovsdb-cluster.at
+@@ -578,6 +578,165 @@ for i in $(seq $n); do
+     OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid])
+ done
+ 
++AT_CLEANUP
++
++AT_BANNER([OVSDB - cluster failure while leaving])
++AT_SETUP([OVSDB cluster - leaving the cluster with some servers down])
++AT_KEYWORDS([ovsdb server negative unix cluster leave])
++
++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db \
++            $top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft], [0], [], [stderr])
++schema_name=$(ovsdb-tool schema-name $top_srcdir/vswitchd/vswitch.ovsschema)
++for i in 2 3 4 5; do
++    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
++done
++
++on_exit 'kill $(cat *.pid)'
++on_exit "
++  for i in \$(ls $(pwd)/s[[0-5]]); do
++    ovs-appctl --timeout 1 -t \$i cluster/status $schema_name;
++  done
++"
++dnl Starting all the servers.
++for i in 1 2 3 4 5; do
++    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \
++                           --detach --no-chdir --log-file=s$i.log \
++                           --pidfile=s$i.pid --unixctl=s$i \
++                           --remote=punix:s$i.ovsdb s$i.db])
++done
++
++dnl Make sure that all servers joined the cluster.
++for i in 1 2 3 4 5; do
++    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
++done
++
++dnl Make sure the cluster is operational.
++m4_define([DB_REMOTE], [unix:s1.ovsdb,unix:s2.ovsdb,unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" --no-wait init])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-1], [0], [ignore], [ignore])
++
++dnl Stop servers 1 and 2.
++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s1], [s1.pid])
++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s2], [s2.pid])
++
++dnl Make sure that all remaining servers are functional as a cluster.
++for i in 3 4 5; do
++    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
++done
++
++dnl Make sure the cluster is still operational.
++m4_define([DB_REMOTE], [unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-2], [0], [ignore], [ignore])
++
++dnl Servers 1 and 2 in a cluster of 5 are down, 3 servers are still alive.
++dnl Server 3 can't leave, because the NEW configuration will be a cluster of
++dnl 4 with 2 servers down and it doesn't have a quorum.  Try it.
++dnl The cluster will fall apart until servers 1 or 2 come back to resolve
++dnl the quorum issue, because servers 4 and 5 will no longer consider 3
++dnl to be part of the configuration.
++AT_CHECK([ovs-appctl -t $(pwd)/s3 cluster/leave $schema_name])
++
++dnl Check that the cluster is not operational.
++for i in 3 4 5; do
++    OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name \
++                        | grep -qE 'leaving|disconnected'])
++done
++
++dnl Try to commit a transaction, it should not be successful.
++m4_define([DB_REMOTE], [unix:s3.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-3], [1], [ignore], [stderr])
++
++dnl Now bring back the server 2.  This should allow server 3 to leave.
++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \
++                          --detach --no-chdir --log-file=s2.log \
++                          --pidfile=s2.pid --unixctl=s2 \
++                          --remote=punix:s2.ovsdb s2.db])
++
++dnl Wait for server 3 to actually leave and stop the server.
++AT_CHECK([ovsdb_client_wait unix:s3.ovsdb $schema_name removed])
++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s3], [s3.pid])
++
++dnl Make sure that all remaining servers are functional as a cluster.
++for i in 2 4 5; do
++    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
++done
++dnl Make sure the cluster is operational again.
++m4_define([DB_REMOTE], [unix:s2.ovsdb,unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-4], [0], [ignore], [ignore])
++
++dnl Now we have a cluster of 4 servers (1, 2, 4, 5) with 1 server down.
++dnl Server 2 should be able to leave, because the NEW configuration will
++dnl be a cluster of 3 servers with 1 being down and it has a quorum.
++AT_CHECK([ovs-appctl -t $(pwd)/s2 cluster/leave $schema_name])
++dnl Wait for server 2 to actually leave and stop the server.
++AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name removed])
++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s2], [s2.pid])
++
++dnl Make sure the cluster is still operational.
++m4_define([DB_REMOTE], [unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-5], [0], [ignore], [ignore])
++
++dnl Now we have a cluster of 3 servers (1, 4, 5) with 1 server down.
++dnl None of the alive servers can leave, because the NEW configuration
++dnl will be a cluster of 2 with 1 server down and it has no quorum.
++dnl Request both to leave anyway.
++for i in 4 5; do
++    AT_CHECK([ovs-appctl -t $(pwd)/s$i cluster/leave $schema_name])
++done
++
++dnl Check that the cluster is not operational.
++for i in 4 5; do
++    OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status $schema_name \
++                        | grep -qE 'leaving|disconnected'])
++done
++
++dnl Try to commit a transaction, it should not be successful.
++m4_define([DB_REMOTE], [unix:s4.ovsdb,unix:s5.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-leader-only \
++            --no-wait create QoS type=test-6], [1], [ignore], [stderr])
++
++dnl Now bring back the first server.
++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \
++                          --detach --no-chdir --log-file=s1.log \
++                          --pidfile=s1.pid --unixctl=s1 \
++                          --remote=punix:s1.ovsdb s1.db])
++
++dnl Now it should be possible for all the other servers to leave, so we
++dnl should end up with a single-node cluster that consists of server 1.
++for i in 4 5; do
++    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name removed])
++done
++for i in 4 5; do
++    OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid])
++done
++
++dnl Wait for the first server to become a leader of a single-node cluster.
++OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s1 cluster/status $schema_name \
++                    | grep -q 'Role: leader'])
++AT_CHECK([ovs-appctl -t $(pwd)/s1 cluster/status $schema_name \
++            | grep -c '    s[[1-5]] '], [0], [dnl
++1
++])
++
++dnl Check that the database is operational and the data is still in there.
++m4_define([DB_REMOTE], [unix:s1.ovsdb])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" -vovsdb_cs:console:dbg --no-wait \
++            create QoS type=test-7], [0], [ignore], [ignore])
++AT_CHECK([ovs-vsctl --db="DB_REMOTE" --no-wait \
++            --columns=type --bare list QoS | sed '/^$/d' | sort], [0], [dnl
++test-1
++test-2
++test-4
++test-5
++test-7
++])
++
++OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s1], [s1.pid])
+ AT_CLEANUP
+ 
+ 
 diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
 index f9f79f1941..a88706982d 100644
 --- a/tests/ovsdb-idl.at
diff --git a/SPECS/openvswitch3.5.spec b/SPECS/openvswitch3.5.spec
index 806b6ea..7ac4b4f 100644
--- a/SPECS/openvswitch3.5.spec
+++ b/SPECS/openvswitch3.5.spec
@@ -59,7 +59,7 @@ Summary: Open vSwitch
 Group: System Environment/Daemons daemon/database/utilities
 URL: http://www.openvswitch.org/
 Version: 3.5.0
-Release: 15%{?dist}
+Release: 16%{?dist}
 
 # Nearly all of openvswitch is ASL 2.0.  The bugtool is LGPLv2+, and the
 # lib/sflow*.[ch] files are SISSL
@@ -796,6 +796,14 @@ exit 0
 %endif
 
 %changelog
+* Thu Apr 03 2025 Open vSwitch CI <ovs-ci@redhat.com> - 3.5.0-16
+- Merging upstream branch-3.5 [RH git: 016e532d74]
+    Commit list:
+    d116d17fd2 ovsdb: raft: Fix cluster break down on leaving with some nodes down. (FDP-662)
+    fb6195d835 ovsdb: raft: Add debug logs for role transition.
+    158c592c50 ovsdb: raft: Fix multitasking overrun warning when there is no leader.
+
+
 * Thu Apr 03 2025 Open vSwitch CI <ovs-ci@redhat.com> - 3.5.0-15
 - Merging upstream branch-3.5 [RH git: 1d1bff6c83]
     Commit list: