|
|
f07426 |
From 96bf75e8ebffe06d33b5dee20c44e16ce53ea663 Mon Sep 17 00:00:00 2001
|
|
|
f07426 |
From: Stefano Brivio <sbrivio@redhat.com>
|
|
|
f07426 |
Date: Wed, 8 Mar 2023 18:07:42 +0100
|
|
|
f07426 |
Subject: [PATCH 16/20] tcp: Clamp MSS value when queueing data to tap, also
|
|
|
f07426 |
for pasta
|
|
|
f07426 |
|
|
|
f07426 |
Tom reports that a pattern of repated ~1 MiB chunks downloads over
|
|
|
f07426 |
NNTP over TLS, on Podman 4.4 using pasta as network back-end, results
|
|
|
f07426 |
in pasta taking one full CPU thread after a while, and the download
|
|
|
f07426 |
never succeeds.
|
|
|
f07426 |
|
|
|
f07426 |
On that setup, we end up re-sending the same frame over and over,
|
|
|
f07426 |
with a consistent 65 534 bytes size, and never get an
|
|
|
f07426 |
acknowledgement from the tap-side client. This only happens for the
|
|
|
f07426 |
default MTU value (65 520 bytes) or for values that are slightly
|
|
|
f07426 |
smaller than that (down to 64 499 bytes).
|
|
|
f07426 |
|
|
|
f07426 |
We hit this condition because the MSS value we use in
|
|
|
f07426 |
tcp_data_from_sock(), only in pasta mode, is simply clamped to
|
|
|
f07426 |
USHRT_MAX, and not to the actual size of the buffers we pre-cooked
|
|
|
f07426 |
for sending, which is a bit less than that.
|
|
|
f07426 |
|
|
|
f07426 |
It looks like we got away with it until commit 0fb7b2b9080a ("tap:
|
|
|
f07426 |
Use different io vector bases depending on tap type") fixed the
|
|
|
f07426 |
setting of iov_len.
|
|
|
f07426 |
|
|
|
f07426 |
Luckily, since it's pasta, we're queueing up to two frames at a time,
|
|
|
f07426 |
so the worst that can happen is a badly segmented TCP stream: we
|
|
|
f07426 |
always have some space at the tail of the buffer.
|
|
|
f07426 |
|
|
|
f07426 |
Clamp the MSS value to the appropriate maximum given by struct
|
|
|
f07426 |
tcp{4,6}_buf_data_t, no matter if we're running in pasta or passt
|
|
|
f07426 |
mode.
|
|
|
f07426 |
|
|
|
f07426 |
While at it, fix the comments to those structs to reflect the current
|
|
|
f07426 |
struct size. This is not really relevant for any further calculation
|
|
|
f07426 |
or consideration, but it's convenient to know while debugging this
|
|
|
f07426 |
kind of issues.
|
|
|
f07426 |
|
|
|
f07426 |
Thanks to Tom for reporting the issue in a very detailed way and for
|
|
|
f07426 |
providing a test setup.
|
|
|
f07426 |
|
|
|
f07426 |
Reported-by: Tom Mombourquette <tom@devnode.com>
|
|
|
f07426 |
Link: https://github.com/containers/podman/issues/17703
|
|
|
f07426 |
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
|
|
f07426 |
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
|
|
|
f07426 |
(cherry picked from commit d7272f1df89c099a7e98ae43d1ef9b936c7e46f7)
|
|
|
f07426 |
---
|
|
|
f07426 |
tcp.c | 23 +++++++++--------------
|
|
|
f07426 |
1 file changed, 9 insertions(+), 14 deletions(-)
|
|
|
f07426 |
|
|
|
f07426 |
diff --git a/tcp.c b/tcp.c
|
|
|
f07426 |
index 001fa50..1355b0e 100644
|
|
|
f07426 |
--- a/tcp.c
|
|
|
f07426 |
+++ b/tcp.c
|
|
|
f07426 |
@@ -460,7 +460,7 @@ static struct tcp4_l2_buf_t {
|
|
|
f07426 |
struct iphdr iph; /* 44 28 */
|
|
|
f07426 |
struct tcphdr th; /* 64 48 */
|
|
|
f07426 |
uint8_t data[MSS4]; /* 84 68 */
|
|
|
f07426 |
- /* 65541 65525 */
|
|
|
f07426 |
+ /* 65536 65532 */
|
|
|
f07426 |
#ifdef __AVX2__
|
|
|
f07426 |
} __attribute__ ((packed, aligned(32)))
|
|
|
f07426 |
#else
|
|
|
f07426 |
@@ -488,7 +488,7 @@ struct tcp6_l2_buf_t {
|
|
|
f07426 |
struct ipv6hdr ip6h; /* 32 20 */
|
|
|
f07426 |
struct tcphdr th; /* 72 60 */
|
|
|
f07426 |
uint8_t data[MSS6]; /* 92 80 */
|
|
|
f07426 |
- /* 65639 65627 */
|
|
|
f07426 |
+ /* 65536 65532 */
|
|
|
f07426 |
#ifdef __AVX2__
|
|
|
f07426 |
} __attribute__ ((packed, aligned(32)))
|
|
|
f07426 |
#else
|
|
|
f07426 |
@@ -1913,15 +1913,13 @@ int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
|
|
|
f07426 |
|
|
|
f07426 |
/**
|
|
|
f07426 |
* tcp_conn_tap_mss() - Get MSS value advertised by tap/guest
|
|
|
f07426 |
- * @c: Execution context
|
|
|
f07426 |
* @conn: Connection pointer
|
|
|
f07426 |
* @opts: Pointer to start of TCP options
|
|
|
f07426 |
* @optlen: Bytes in options: caller MUST ensure available length
|
|
|
f07426 |
*
|
|
|
f07426 |
* Return: clamped MSS value
|
|
|
f07426 |
*/
|
|
|
f07426 |
-static uint16_t tcp_conn_tap_mss(const struct ctx *c,
|
|
|
f07426 |
- const struct tcp_tap_conn *conn,
|
|
|
f07426 |
+static uint16_t tcp_conn_tap_mss(const struct tcp_tap_conn *conn,
|
|
|
f07426 |
const char *opts, size_t optlen)
|
|
|
f07426 |
{
|
|
|
f07426 |
unsigned int mss;
|
|
|
f07426 |
@@ -1932,13 +1930,10 @@ static uint16_t tcp_conn_tap_mss(const struct ctx *c,
|
|
|
f07426 |
else
|
|
|
f07426 |
mss = ret;
|
|
|
f07426 |
|
|
|
f07426 |
- /* Don't upset qemu */
|
|
|
f07426 |
- if (c->mode == MODE_PASST) {
|
|
|
f07426 |
- if (CONN_V4(conn))
|
|
|
f07426 |
- mss = MIN(MSS4, mss);
|
|
|
f07426 |
- else
|
|
|
f07426 |
- mss = MIN(MSS6, mss);
|
|
|
f07426 |
- }
|
|
|
f07426 |
+ if (CONN_V4(conn))
|
|
|
f07426 |
+ mss = MIN(MSS4, mss);
|
|
|
f07426 |
+ else
|
|
|
f07426 |
+ mss = MIN(MSS6, mss);
|
|
|
f07426 |
|
|
|
f07426 |
return MIN(mss, USHRT_MAX);
|
|
|
f07426 |
}
|
|
|
f07426 |
@@ -2007,7 +2002,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
|
|
|
f07426 |
|
|
|
f07426 |
conn->wnd_to_tap = WINDOW_DEFAULT;
|
|
|
f07426 |
|
|
|
f07426 |
- mss = tcp_conn_tap_mss(c, conn, opts, optlen);
|
|
|
f07426 |
+ mss = tcp_conn_tap_mss(conn, opts, optlen);
|
|
|
f07426 |
if (setsockopt(s, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)))
|
|
|
f07426 |
trace("TCP: failed to set TCP_MAXSEG on socket %i", s);
|
|
|
f07426 |
MSS_SET(conn, mss);
|
|
|
f07426 |
@@ -2469,7 +2464,7 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
|
|
|
f07426 |
if (!(conn->wnd_from_tap >>= conn->ws_from_tap))
|
|
|
f07426 |
conn->wnd_from_tap = 1;
|
|
|
f07426 |
|
|
|
f07426 |
- MSS_SET(conn, tcp_conn_tap_mss(c, conn, opts, optlen));
|
|
|
f07426 |
+ MSS_SET(conn, tcp_conn_tap_mss(conn, opts, optlen));
|
|
|
f07426 |
|
|
|
f07426 |
conn->seq_init_from_tap = ntohl(th->seq) + 1;
|
|
|
f07426 |
conn->seq_from_tap = conn->seq_init_from_tap;
|
|
|
f07426 |
--
|
|
|
f07426 |
2.39.2
|
|
|
f07426 |
|