From 2258117c811aa525ec995c601c5f4649553e9490 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 6 Jun 2022 09:30:32 -0400 Subject: [KPATCH CVE-2022-1012] kpatch fixes for CVE-2022-1012 Content-type: text/plain Changes since last build: arches: x86_64 ppc64le inet6_hashtables.o: changed function: inet6_hash_connect inet_hashtables.o: changed function: inet_hash_connect inet_hashtables.o: new function: klp__inet_hash_connect inet_hashtables.o: new function: klp_cve_2022_1012_pre_patch_callback inet_hashtables.o: new function: klp_table_perturb_ctor secure_seq.o: new function: klp_secure_ipv4_port_ephemeral secure_seq.o: new function: klp_secure_ipv6_port_ephemeral --------------------------- Kpatch-MR: https://gitlab.com/redhat/prdsc/rhel/src/kpatch/rhel-9/-/merge_requests/2 Kernels: 5.14.0-70.13.1.el9_0 Modifications: - Avoid header file changes, move prototypes into .c files - secure_ipv4_port_ephemeral() and secure_ipv6_port_ephemeral() are exported symbols, create "klp_" variants that change return type from u32 -> u64 - inet_sk_port_offset() and inet6_sk_port_offset() have only one caller each, which we can kpatch, so it's safe to update their return values from u32 -> u64 - __inet_hash_connect() is prototyped in a header file, which we don't want to touch, so create a "klp" variant updating the interface for port_offset u32 -> u64 - Create larger klp_table_perturb for kpatch functions, leaving smaller table_perturb intact for unpatched code - Use a shadow variable for the larger klp_table_perturb so it may persist across kpatch upgrades - Get or allocate klp_table_perturb shadow variable in prepatch callback rather than inet_hashinfo2_init() - Use a shadow variable constructor to initialize klp_table_perturb with net_get_random_once() on its allocation - Use KLP_INET_TABLE_PERTURB_{SHIFT,SIZE} for larger size so we don't modify existing (smaller) table_perturb - Add -fno-optimize-sibling-calls attribute for inet_hash_connect() commit 2f0029a0cd4a60d087882790b8f2e4b3b7a34fc9 Author: Guillaume Nault Date: Tue May 10 21:26:43 2022 +0200 secure_seq: use the 64 bits of the siphash for port offset calculation Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: cca87d73827e2be4da6de677da5fbbf05ee1efe7 O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit b2d057560b8107c633b39aabe517ff9d93f285e3 Author: Willy Tarreau Date: Mon May 2 10:46:08 2022 +0200 secure_seq: use the 64 bits of the siphash for port offset calculation SipHash replaced MD5 in secure_ipv{4,6}_port_ephemeral() via commit 7cd23e5300c1 ("secure_seq: use SipHash in place of MD5"), but the output remained truncated to 32-bit only. In order to exploit more bits from the hash, let's make the functions return the full 64-bit of siphash_3u32(). We also make sure the port offset calculation in __inet_hash_connect() remains done on 32-bit to avoid the need for div_u64_rem() and an extra cost on 32-bit systems. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit 12d013e32960f7de65bf6d48b5d1b34f035db485 Author: Guillaume Nault Date: Tue May 10 21:29:59 2022 +0200 tcp: use different parts of the port_offset for index and offset Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: cf01c418b12c3f395134654725aee76ba8ead14f O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit 9e9b70ae923baf2b5e8a0ea4fd0c8451801ac526 Author: Willy Tarreau Date: Mon May 2 10:46:09 2022 +0200 tcp: use different parts of the port_offset for index and offset Amit Klein suggests that we use different parts of port_offset for the table's index and the port offset so that there is no direct relation between them. Cc: Jason A. Donenfeld Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit a35aa2ee80eba52053d4857f03aaec487e124938 Author: Guillaume Nault Date: Tue May 10 21:30:49 2022 +0200 tcp: resalt the secret every 10 seconds Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: 76a5e5ab10aa3a222b10ea44aae87b61de99de76 O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 Conflicts: (context) Missing upstream commit 49ecc2e9c3ab ("net: align static siphash keys"): RHEL 9 still declares net_secret and ts_secret as siphash_key_t, while upstream uses the new siphash_aligned_key_t. commit 4dfa9b438ee34caca4e6a4e5e961641807367f6f Author: Eric Dumazet Date: Mon May 2 10:46:10 2022 +0200 tcp: resalt the secret every 10 seconds In order to limit the ability for an observer to recognize the source ports sequence used to contact a set of destinations, we should periodically shuffle the secret. 10 seconds looks effective enough without causing particular issues. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Cc: Jason A. Donenfeld Tested-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit 2e7decde85e1d99aba7cfc25489136757e5e5e4d Author: Guillaume Nault Date: Tue May 10 21:37:42 2022 +0200 tcp: add small random increments to the source port Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: c36cc7ecaeb895c5ec73085d925208ead5a9f285 O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit ca7af0402550f9a0b3316d5f1c30904e42ed257d Author: Willy Tarreau Date: Mon May 2 10:46:11 2022 +0200 tcp: add small random increments to the source port Here we're randomly adding between 0 and 7 random increments to the selected source port in order to add some noise in the source port selection that will make the next port less predictable. With the default port range of 32768-60999 this means a worst case reuse scenario of 14116/8=1764 connections between two consecutive uses of the same port, with an average of 14116/4.5=3137. This code was stressed at more than 800000 connections per second to a fixed target with all connections closed by the client using RSTs (worst condition) and only 2 connections failed among 13 billion, despite the hash being reseeded every 10 seconds, indicating a perfectly safe situation. Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit 11b7585327e4558856beea1b60c2b56a89109034 Author: Guillaume Nault Date: Tue May 10 21:38:33 2022 +0200 tcp: dynamically allocate the perturb table used by source ports Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: 2a8821f71c0f00f5169888c9e21df0e033306ae8 O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit e9261476184be1abd486c9434164b2acbe0ed6c2 Author: Willy Tarreau Date: Mon May 2 10:46:12 2022 +0200 tcp: dynamically allocate the perturb table used by source ports We'll need to further increase the size of this table and it's likely that at some point its size will not be suitable anymore for a static table. Let's allocate it on boot from inet_hashinfo2_init(), which is called from tcp_init(). Cc: Moshe Kol Cc: Yossi Gilad Cc: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit ffec79df2cf078ae8efda94fa1644a2f3d249629 Author: Guillaume Nault Date: Tue May 10 21:39:49 2022 +0200 tcp: increase source port perturb table to 2^16 Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: cb2b7f66f852709725a5fc3798b8cf60eee4cfcd O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit 4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 Author: Willy Tarreau Date: Mon May 2 10:46:13 2022 +0200 tcp: increase source port perturb table to 2^16 Moshe Kol, Amit Klein, and Yossi Gilad reported being able to accurately identify a client by forcing it to emit only 40 times more connections than there are entries in the table_perturb[] table. The previous two improvements consisting in resalting the secret every 10s and adding randomness to each port selection only slightly improved the situation, and the current value of 2^8 was too small as it's not very difficult to make a client emit 10k connections in less than 10 seconds. Thus we're increasing the perturb table from 2^8 to 2^16 so that the same precision now requires 2.6M connections, which is more difficult in this time frame and harder to hide as a background activity. The impact is that the table now uses 256 kB instead of 1 kB, which could mostly affect devices making frequent outgoing connections. However such components usually target a small set of destinations (load balancers, database clients, perf assessment tools), and in practice only a few entries will be visited, like before. A live test at 1 million connections per second showed no performance difference from the previous value. Reported-by: Moshe Kol Reported-by: Yossi Gilad Reported-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski commit 8b63da5c1514e473907b074eae7ce5acc0ea739e Author: Guillaume Nault Date: Tue May 10 21:40:34 2022 +0200 tcp: drop the hash_32() part from the index calculation Bugzilla: https://bugzilla.redhat.com/2087128 CVE: CVE-2022-1012 Y-Commit: f8b7b8321dd6b55062382fc343422f3926ffa04c O-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2064868 Upstream Status: linux.git O-CVE: CVE-2022-1012 commit e8161345ddbb66e449abde10d2fdce93f867eba9 Author: Willy Tarreau Date: Mon May 2 10:46:14 2022 +0200 tcp: drop the hash_32() part from the index calculation In commit 190cc82489f4 ("tcp: change source port randomizarion at connect() time"), the table_perturb[] array was introduced and an index was taken from the port_offset via hash_32(). But it turns out that hash_32() performs a multiplication while the input here comes from the output of SipHash in secure_seq, that is well distributed enough to avoid the need for yet another hash. Suggested-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Signed-off-by: Guillaume Nault Signed-off-by: Herton R. Krzesinski Signed-off-by: Joe Lawrence --- net/core/secure_seq.c | 30 +++++++ net/ipv4/inet_hashtables.c | 175 ++++++++++++++++++++++++++++++++++-- net/ipv6/inet6_hashtables.c | 20 +++-- 3 files changed, 211 insertions(+), 14 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index b5bc680d4755..f18f8176d69e 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -22,6 +22,8 @@ static siphash_key_t net_secret __read_mostly; static siphash_key_t ts_secret __read_mostly; +#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) + static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); @@ -111,6 +113,25 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, &net_secret); } EXPORT_SYMBOL(secure_ipv6_port_ephemeral); + +u64 klp_secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + unsigned int timeseed; + __be16 dport; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + .dport = dport, + }; + net_secret_init(); + return siphash(&combined, offsetofend(typeof(combined), dport), + &net_secret); +} #endif #ifdef CONFIG_INET @@ -149,6 +170,15 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) (__force u16)dport, &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); + +u64 klp_secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + net_secret_init(); + return siphash_4u32((__force u32)saddr, (__force u32)daddr, + (__force u16)dport, + jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, + &net_secret); +} #endif #if IS_ENABLED(CONFIG_IP_DCCP) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bfb522e51346..7f982eb04e3f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -504,13 +504,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +u64 klp_secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, - inet->inet_daddr, - inet->inet_dport); + return klp_secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, + inet->inet_daddr, + inet->inet_dport); } /* Searches for an exsiting socket in the ehash bucket list. @@ -842,18 +843,178 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return 0; } +#include "kpatch-macros.h" +#define KLP_CVE_2022_1012_TABLE_PERTURB 0x2022101200000001 + +#define KLP_INET_TABLE_PERTURB_SHIFT 16 +#define KLP_INET_TABLE_PERTURB_SIZE (1 << KLP_INET_TABLE_PERTURB_SHIFT) +static u32 *klp_table_perturb; + +static int klp_table_perturb_ctor(void *obj, void *shadow_data, void *ctor_data) +{ + u32 *klp_tp = shadow_data; + if (!net_get_random_once(klp_tp, KLP_INET_TABLE_PERTURB_SIZE * sizeof(*klp_tp))) { + pr_err("kpatch assertion: net_get_random_once() on klp_table_perturb"); + return -1; + } + + return 0; +} + +static int klp_cve_2022_1012_pre_patch_callback(struct klp_object *obj) +{ + /* this one is used for source ports of outgoing connections */ + klp_table_perturb = klp_shadow_get_or_alloc(0, KLP_CVE_2022_1012_TABLE_PERTURB, + KLP_INET_TABLE_PERTURB_SIZE * sizeof(*klp_table_perturb), + GFP_KERNEL, klp_table_perturb_ctor, NULL); + if (!klp_table_perturb) { + pr_err("TCP: failed to alloc / find klp_table_perturb"); + return -ENOMEM; + } + + return 0; +} +KPATCH_PRE_PATCH_CALLBACK(klp_cve_2022_1012_pre_patch_callback); + +int klp__inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, u64 port_offset, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **)) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_timewait_sock *tw = NULL; + struct inet_bind_hashbucket *head; + int port = inet_sk(sk)->inet_num; + struct net *net = sock_net(sk); + struct inet_bind_bucket *tb; + u32 remaining, offset; + int ret, i, low, high; + int l3mdev; + u32 index; + + if (port) { + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { + inet_ehash_nolisten(sk, NULL, NULL); + spin_unlock_bh(&head->lock); + return 0; + } + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = check_established(death_row, sk, port, NULL); + local_bh_enable(); + return ret; + } + + l3mdev = inet_sk_bound_l3mdev(sk); + + inet_get_local_port_range(net, &low, &high); + high++; /* [32768, 60999] -> [32768, 61000[ */ + remaining = high - low; + if (likely(remaining > 1)) + remaining &= ~1U; + + /* + * kpatch: klp_table_perturb_ctor() responsible for calling + * net_get_random_once() for klp_table_perturb + */ + index = port_offset & (KLP_INET_TABLE_PERTURB_SIZE - 1); + + offset = READ_ONCE(klp_table_perturb[index]) + (port_offset >> 32); + offset %= remaining; + + /* In first pass we try ports of @low parity. + * inet_csk_get_port() does the opposite choice. + */ + offset &= ~1U; +other_parity_scan: + port = low + offset; + for (i = 0; i < remaining; i += 2, port += 2) { + if (unlikely(port >= high)) + port -= remaining; + if (inet_is_local_reserved_port(net, port)) + continue; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; + spin_lock_bh(&head->lock); + + /* Does not bother with rcv_saddr checks, because + * the established check is already unique enough. + */ + inet_bind_bucket_for_each(tb, &head->chain) { + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && + tb->port == port) { + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) + goto next_port; + WARN_ON(hlist_empty(&tb->owners)); + if (!check_established(death_row, sk, + port, &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port, l3mdev); + if (!tb) { + spin_unlock_bh(&head->lock); + return -ENOMEM; + } + tb->fastreuse = -1; + tb->fastreuseport = -1; + goto ok; +next_port: + spin_unlock_bh(&head->lock); + cond_resched(); + } + + offset++; + if ((offset & 1) && remaining > 1) + goto other_parity_scan; + + return -EADDRNOTAVAIL; + +ok: + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. + */ + i = max_t(int, i, (prandom_u32() & 7) * 2); + WRITE_ONCE(klp_table_perturb[index], READ_ONCE(klp_table_perturb[index]) + i + 2); + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->inet_sport = htons(port); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); + } + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + spin_unlock(&head->lock); + if (tw) + inet_twsk_deschedule_put(tw); + local_bh_enable(); + return 0; +} + /* * Bind a port for a connect operation and hash it. */ +__attribute__((optimize("-fno-optimize-sibling-calls"))) int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, - __inet_check_established); + return klp__inet_hash_connect(death_row, sk, port_offset, + __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 67c9114835c8..fe0c5e5935e7 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -308,24 +308,30 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +u64 klp_secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_dport); + return klp_secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_dport); } +int klp__inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, u64 port_offset, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **)); int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, - __inet6_check_established); + return klp__inet_hash_connect(death_row, sk, port_offset, + __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); -- 2.26.3