|
|
049c96 |
From 863f0cafefd9293451fe4682b02c8186a68ad30a Mon Sep 17 00:00:00 2001
|
|
|
049c96 |
From: Phil Sutter <psutter@redhat.com>
|
|
|
049c96 |
Date: Thu, 18 Feb 2016 14:19:36 +0100
|
|
|
049c96 |
Subject: [PATCH] ip: route: add congestion control metric
|
|
|
049c96 |
|
|
|
049c96 |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1291832
|
|
|
049c96 |
Upstream Status: iproute2.git commit 6ef87f9cce213
|
|
|
049c96 |
|
|
|
049c96 |
commit 6ef87f9cce213cae66098d08e0abc36d67b95244
|
|
|
049c96 |
Author: Daniel Borkmann <dborkman@redhat.com>
|
|
|
049c96 |
Date: Fri Jan 9 00:13:06 2015 +0100
|
|
|
049c96 |
|
|
|
049c96 |
ip: route: add congestion control metric
|
|
|
049c96 |
|
|
|
049c96 |
This patch adds configuration and dumping of congestion control metric
|
|
|
049c96 |
for ip route, for example:
|
|
|
049c96 |
|
|
|
049c96 |
ip route add <dst> dev foo congctl [lock] dctcp
|
|
|
049c96 |
|
|
|
049c96 |
Reference: http://thread.gmane.org/gmane.linux.network/344733
|
|
|
049c96 |
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
|
|
|
049c96 |
---
|
|
|
049c96 |
ip/iproute.c | 22 ++++++++++++++++++----
|
|
|
049c96 |
man/man8/ip-route.8.in | 19 ++++++++++++++++++-
|
|
|
049c96 |
2 files changed, 36 insertions(+), 5 deletions(-)
|
|
|
049c96 |
|
|
|
049c96 |
diff --git a/ip/iproute.c b/ip/iproute.c
|
|
|
049c96 |
index ea69aa3..d3a5e1c 100644
|
|
|
049c96 |
--- a/ip/iproute.c
|
|
|
049c96 |
+++ b/ip/iproute.c
|
|
|
049c96 |
@@ -53,6 +53,7 @@ static const char *mx_names[RTAX_MAX+1] = {
|
|
|
049c96 |
[RTAX_RTO_MIN] = "rto_min",
|
|
|
049c96 |
[RTAX_INITRWND] = "initrwnd",
|
|
|
049c96 |
[RTAX_QUICKACK] = "quickack",
|
|
|
049c96 |
+ [RTAX_CC_ALGO] = "congctl",
|
|
|
049c96 |
};
|
|
|
049c96 |
static void usage(void) __attribute__((noreturn));
|
|
|
049c96 |
|
|
|
049c96 |
@@ -80,8 +81,7 @@ static void usage(void)
|
|
|
049c96 |
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
|
|
|
049c96 |
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
|
|
|
049c96 |
fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
|
|
|
049c96 |
- fprintf(stderr, " [ features FEATURES ]\n");
|
|
|
049c96 |
- fprintf(stderr, " [ quickack BOOL ]\n");
|
|
|
049c96 |
+ fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n");
|
|
|
049c96 |
fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
|
|
|
049c96 |
fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n");
|
|
|
049c96 |
fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
|
|
|
049c96 |
@@ -539,7 +539,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
|
|
049c96 |
mxlock = *(unsigned*)RTA_DATA(mxrta[RTAX_LOCK]);
|
|
|
049c96 |
|
|
|
049c96 |
for (i=2; i<= RTAX_MAX; i++) {
|
|
|
049c96 |
- unsigned val;
|
|
|
049c96 |
+ __u32 val;
|
|
|
049c96 |
|
|
|
049c96 |
if (mxrta[i] == NULL)
|
|
|
049c96 |
continue;
|
|
|
049c96 |
@@ -548,10 +548,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
|
|
049c96 |
fprintf(fp, " %s", mx_names[i]);
|
|
|
049c96 |
else
|
|
|
049c96 |
fprintf(fp, " metric %d", i);
|
|
|
049c96 |
+
|
|
|
049c96 |
if (mxlock & (1<
|
|
|
049c96 |
fprintf(fp, " lock");
|
|
|
049c96 |
+ if (i != RTAX_CC_ALGO)
|
|
|
049c96 |
+ val = rta_getattr_u32(mxrta[i]);
|
|
|
049c96 |
|
|
|
049c96 |
- val = *(unsigned*)RTA_DATA(mxrta[i]);
|
|
|
049c96 |
switch (i) {
|
|
|
049c96 |
case RTAX_FEATURES:
|
|
|
049c96 |
print_rtax_features(fp, val);
|
|
|
049c96 |
@@ -576,6 +578,10 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
|
|
049c96 |
fprintf(fp, " %gs", val/1e3);
|
|
|
049c96 |
else
|
|
|
049c96 |
fprintf(fp, " %ums", val);
|
|
|
049c96 |
+ break;
|
|
|
049c96 |
+ case RTAX_CC_ALGO:
|
|
|
049c96 |
+ fprintf(fp, " %s", rta_getattr_str(mxrta[i]));
|
|
|
049c96 |
+ break;
|
|
|
049c96 |
}
|
|
|
049c96 |
}
|
|
|
049c96 |
}
|
|
|
049c96 |
@@ -928,6 +934,14 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
|
|
|
049c96 |
if (quickack != 1 && quickack != 0)
|
|
|
049c96 |
invarg("\"quickack\" value should be 0 or 1\n", *argv);
|
|
|
049c96 |
rta_addattr32(mxrta, sizeof(mxbuf), RTAX_QUICKACK, quickack);
|
|
|
049c96 |
+ } else if (matches(*argv, "congctl") == 0) {
|
|
|
049c96 |
+ NEXT_ARG();
|
|
|
049c96 |
+ if (strcmp(*argv, "lock") == 0) {
|
|
|
049c96 |
+ mxlock |= 1 << RTAX_CC_ALGO;
|
|
|
049c96 |
+ NEXT_ARG();
|
|
|
049c96 |
+ }
|
|
|
049c96 |
+ rta_addattr_l(mxrta, sizeof(mxbuf), RTAX_CC_ALGO, *argv,
|
|
|
049c96 |
+ strlen(*argv));
|
|
|
049c96 |
} else if (matches(*argv, "rttvar") == 0) {
|
|
|
049c96 |
unsigned win;
|
|
|
049c96 |
NEXT_ARG();
|
|
|
049c96 |
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in
|
|
|
049c96 |
index 05fd879..ec4a5b2 100644
|
|
|
049c96 |
--- a/man/man8/ip-route.8.in
|
|
|
049c96 |
+++ b/man/man8/ip-route.8.in
|
|
|
049c96 |
@@ -116,7 +116,9 @@ replace " } "
|
|
|
049c96 |
.B features
|
|
|
049c96 |
.IR FEATURES " ] [ "
|
|
|
049c96 |
.B quickack
|
|
|
049c96 |
-.IR BOOL " ]"
|
|
|
049c96 |
+.IR BOOL " ] [ "
|
|
|
049c96 |
+.B congctl
|
|
|
049c96 |
+.IR NAME " ]"
|
|
|
049c96 |
|
|
|
049c96 |
.ti -8
|
|
|
049c96 |
.IR TYPE " := [ "
|
|
|
049c96 |
@@ -433,6 +435,21 @@ sysctl is set to 0.
|
|
|
049c96 |
Enable or disable quick ack for connections to this destination.
|
|
|
049c96 |
|
|
|
049c96 |
.TP
|
|
|
049c96 |
+.BI congctl " NAME " "(3.20+ only)"
|
|
|
049c96 |
+.TP
|
|
|
049c96 |
+.BI "congctl lock" " NAME " "(3.20+ only)"
|
|
|
049c96 |
+Sets a specific TCP congestion control algorithm only for a given destination.
|
|
|
049c96 |
+If not specified, Linux keeps the current global default TCP congestion control
|
|
|
049c96 |
+algorithm, or the one set from the application. If the modifier
|
|
|
049c96 |
+.B lock
|
|
|
049c96 |
+is not used, an application may nevertheless overwrite the suggested congestion
|
|
|
049c96 |
+control algorithm for that destination. If the modifier
|
|
|
049c96 |
+.B lock
|
|
|
049c96 |
+is used, then an application is not allowed to overwrite the specified congestion
|
|
|
049c96 |
+control algorithm for that destination, thus it will be enforced/guaranteed to
|
|
|
049c96 |
+use the proposed algorithm.
|
|
|
049c96 |
+
|
|
|
049c96 |
+.TP
|
|
|
049c96 |
.BI advmss " NUMBER " "(2.3.15+ only)"
|
|
|
049c96 |
the MSS ('Maximal Segment Size') to advertise to these
|
|
|
049c96 |
destinations when establishing TCP connections. If it is not given,
|
|
|
049c96 |
--
|
|
|
049c96 |
1.8.3.1
|
|
|
049c96 |
|