diff --git a/AUTHORS.rst b/AUTHORS.rst index 05c2fb7046..8454641758 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -312,6 +312,7 @@ Mark Kavanagh mark.b.kavanagh81@gmail.com Mark Maglana mmaglana@gmail.com Mark Michelson mmichels@redhat.com Markos Chandras mchandras@suse.de +Markus Linnala markus.linnala@gmail.com Martin Casado casado@cs.stanford.edu Martin Fong mwfong@csl.sri.com Martin Kalcok martin.kalcok@canonical.com diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index c9acc1e80e..a28554f674 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -222,6 +222,7 @@ Q: What DPDK version does each Open vSwitch release work with? 3.2.x 22.11.6 3.3.x 23.11.2 3.4.x 23.11.2 + 3.5.x 24.11.1 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index ae08496bdd..ed4d36edd1 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -109,9 +109,9 @@ then started again. OvS will then report:: Limitations ~~~~~~~~~~~ -The current OvS userspace `TSO` implementation supports flat and VLAN networks -only (i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP, -etc.]). +The current OvS userspace `TSO` implementation supports flat, VLAN networks, +and some tunneled connections. Currently only VxLAN, Geneve and GRE tunnels +are supported. The NIC driver must support and advertise checksum offload for TCP and UDP. However, SCTP is not mandatory because very few drivers advertised support @@ -120,11 +120,11 @@ in Open vSwitch. Currently, if the NIC supports that, then the feature is enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC will be dropped. -There is no software implementation of TSO, so all ports attached to the -datapath must support TSO or packets using that feature will be dropped -on ports without TSO support. That also means guests using vhost-user -in client mode will receive TSO packet regardless of TSO being enabled -or disabled within the guest. +There is a limited software implementation of TSO when tunnels are used which +only supports VxLAN, Geneve, and GRE. When these tunnels are used with TSO, +not all ports attached to the datapath need to support hardware TSO. +Guests using vhost-user in client mode will receive TSO packet regardless of +TSO being enabled or disabled within the guest. All kernel devices that use the raw socket interface (veth, for example) require the kernel commit 9d2f67e43b73 ("net/packet: fix packet drop as of diff --git a/Makefile.am b/Makefile.am index dc5c34a6ae..a61a1cadfb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -339,6 +339,8 @@ check-tabs: fi .PHONY: check-tabs +# NOTE: test-lib-route-table.c excluded due to use of system() to execute +# ip route commands provided as arguments by test suite. ALL_LOCAL += thread-safety-check thread-safety-check: @cd $(srcdir); \ @@ -346,7 +348,8 @@ thread-safety-check: grep -n -f build-aux/thread-safety-forbidden \ `git ls-files | grep '\.[ch]$$' \ | $(EGREP) -v '^datapath-windows|^lib/sflow|^third-party'` /dev/null \ - | $(EGREP) -v ':[ ]*/?\*'; \ + | $(EGREP) -v ':[ ]*/?\*' \ + | $(EGREP) -v '^tests/test-lib-route-table.c'; \ then \ echo "See above for list of calls to functions that are"; \ echo "forbidden due to thread safety issues"; \ diff --git a/NEWS b/NEWS index 83f0513797..ec2f85c015 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Post-v3.4.0 +v3.5.0 - xx xxx xxxx -------------------- - The limit on the number of fields for address prefix tracking in flow tables increased from 3 to 4. For example, it is now possible to @@ -31,6 +31,8 @@ Post-v3.4.0 that does not have a specific value defined, rather than being treated as a global value, aligning the behavior with that of the kernel datapath. + * Extended the support for TSO software fallback to include support for + VXLAN, Geneve, and GRE tunneled packets. - Linux TC offload: * Add support for matching tunnel flags if the kernel supports it. * Add support for the "Don't Fragment" (DF) flag in the encap action, @@ -55,6 +57,8 @@ Post-v3.4.0 to make it not configure any crypto options (ike/esp) for connections. Most useful in combination with '--root-ipsec-conf' where system-wide crypto-policy is included from the root ipsec.conf. + * New option '--ovs-monitor-ipsec-options' for 'ovs-ctl start-ovs-ipsec' + to pass above new options to ovs-monitor-ipsec. v3.4.0 - 15 Aug 2024 diff --git a/configure.ac b/configure.ac index 266e9d4799..2b19888775 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 3.4.90, bugs@openvswitch.org) +AC_INIT(openvswitch, 3.5.0, bugs@openvswitch.org) AC_CONFIG_SRCDIR([vswitchd/ovs-vswitchd.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/automake.mk b/debian/automake.mk index fe8febdd3c..7ae4e00e58 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -14,6 +14,8 @@ EXTRA_DIST += \ debian/openvswitch-common.lintian-overrides \ debian/openvswitch-doc.doc-base \ debian/openvswitch-doc.install \ + debian/openvswitch-ipsec.default \ + debian/openvswitch-ipsec.dirs \ debian/openvswitch-ipsec.init \ debian/openvswitch-ipsec.install \ debian/openvswitch-ipsec.service \ @@ -56,8 +58,6 @@ EXTRA_DIST += \ debian/openvswitch-vtep.init \ debian/openvswitch-vtep.install \ debian/ovs-systemd-reload \ - debian/patches/ovs-ctl-ipsec.patch \ - debian/patches/series \ debian/python3-openvswitch.install \ debian/rules \ debian/source/format \ diff --git a/debian/changelog b/debian/changelog index f1a071141d..3f4e2c56e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -openvswitch (3.4.90-1) unstable; urgency=low +openvswitch (3.5.0-1) unstable; urgency=low * New upstream version - -- Open vSwitch team Mon, 15 Jul 2024 13:00:01 +0100 + -- Open vSwitch team Mon, 16 Jan 2025 13:00:01 +0100 openvswitch (3.4.0-1) unstable; urgency=low diff --git a/debian/control.in b/debian/control.in index 47b1f8cdd6..dfcf10bcc4 100644 --- a/debian/control.in +++ b/debian/control.in @@ -39,14 +39,17 @@ Rules-Requires-Root: no Homepage: http://openvswitch.org/ Vcs-Browser: https://salsa.debian.org/openstack-team/third-party/openvswitch/-/tree/debian/experimental Vcs-Git: https://salsa.debian.org/openstack-team/third-party/openvswitch.git +X-Python3-Version: >= 3.10 Package: openvswitch-common Architecture: linux-any Depends: openssl, ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, +Recommends: + python3-openvswitch (= ${binary:Version}), + ${python3:Depends}, Suggests: ethtool, openvswitch-doc, @@ -94,7 +97,6 @@ Depends: python3-openvswitch (= ${source:Version}), strongswan, ${misc:Depends}, - ${shlibs:Depends}, Suggests: python3:any Breaks: openvswitch-common (<< 2.17~), @@ -154,12 +156,13 @@ Depends: netbase, openvswitch-common (= ${binary:Version}), procps, - python3-netifaces, - python3-openvswitch (>= ${source:Version}), uuid-runtime, ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, +Recommends: + python3-netifaces, + python3-openvswitch (>= ${source:Version}), + ${python3:Depends}, Breaks: openvswitch-common (<< 2.17~), Replaces: @@ -183,7 +186,6 @@ Description: Open vSwitch switch implementations # DPDK_NETDEV dpdk, # DPDK_NETDEV openvswitch-switch (= ${binary:Version}), # DPDK_NETDEV ${misc:Depends}, -# DPDK_NETDEV ${python3:Depends}, # DPDK_NETDEV ${shlibs:Depends}, # DPDK_NETDEV Enhances: # DPDK_NETDEV openvswitch-switch, @@ -258,7 +260,6 @@ Depends: openvswitch-switch (>= ${binary:Version}), python3-openvswitch (>= ${source:Version}), ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, Suggests: python3:any Breaks: @@ -284,12 +285,13 @@ Depends: ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}, -Suggests: +Recommends: python3-click, - python3-graphviz, python3-netaddr, python3-pyparsing, python3-rich, +Suggests: + python3-graphviz, python3-unbound, Description: Python 3 bindings for Open vSwitch Open vSwitch is a production quality, multilayer, software-based, diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install index 9bdb43a6f2..5fef8de74e 100644 --- a/debian/openvswitch-common.install +++ b/debian/openvswitch-common.install @@ -1,6 +1,7 @@ etc/bash_completion.d/ovs-appctl-bashcomp.bash usr/share/bash-completion/completions usr/bin/ovs-appctl usr/bin/ovs-docker +usr/bin/ovs-flowviz usr/bin/ovs-ofctl usr/bin/ovs-parse-backtrace usr/bin/ovs-pki @@ -20,6 +21,7 @@ usr/share/man/man7/ovsdb-server.7 usr/share/man/man7/ovsdb.7 usr/share/man/man8/ovs-appctl.8 usr/share/man/man8/ovs-bugtool.8 +usr/share/man/man8/ovs-flowviz.8 usr/share/man/man8/ovs-ofctl.8 usr/share/man/man8/ovs-parse-backtrace.8 usr/share/man/man8/ovs-pki.8 diff --git a/debian/openvswitch-ipsec.default b/debian/openvswitch-ipsec.default new file mode 100644 index 0000000000..a074948591 --- /dev/null +++ b/debian/openvswitch-ipsec.default @@ -0,0 +1,5 @@ +# This is a POSIX shell fragment -*- sh -*- + +# OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example, +# a suitable place to specify --no-restart-ike-daemon. +# OVS_CTL_OPTS= diff --git a/debian/openvswitch-ipsec.dirs b/debian/openvswitch-ipsec.dirs new file mode 100644 index 0000000000..4b83f29661 --- /dev/null +++ b/debian/openvswitch-ipsec.dirs @@ -0,0 +1 @@ +/usr/share/openvswitch/ipsec diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init index aa68384547..4fc7701b83 100755 --- a/debian/openvswitch-ipsec.init +++ b/debian/openvswitch-ipsec.init @@ -41,6 +41,8 @@ test -x $DAEMON || exit 0 . /lib/lsb/init-functions +test -e /etc/default/openvswitch-ipsec && . /etc/default/openvswitch-ipsec + DODTIME=10 # Time to wait for the server to die, in seconds # If this value is set too low you might not # let some servers to die gracefully and @@ -72,7 +74,8 @@ running() { } start_server() { - ${DATADIR}/scripts/ovs-ctl --ike-daemon=strongswan start-ovs-ipsec + ${DATADIR}/scripts/ovs-ctl --ike-daemon=strongswan \ + start-ovs-ipsec $OVS_CTL_OPTS return 0 } diff --git a/debian/openvswitch-ipsec.install b/debian/openvswitch-ipsec.install old mode 100644 new mode 100755 index 31a8945e2f..ae127e2d4d --- a/debian/openvswitch-ipsec.install +++ b/debian/openvswitch-ipsec.install @@ -1 +1,3 @@ +#!/usr/bin/dh-exec +debian/openvswitch-ipsec.default => /usr/share/openvswitch/ipsec/default.template usr/share/openvswitch/scripts/ovs-monitor-ipsec diff --git a/debian/openvswitch-ipsec.service b/debian/openvswitch-ipsec.service index 608a6a6188..2f92def514 100644 --- a/debian/openvswitch-ipsec.service +++ b/debian/openvswitch-ipsec.service @@ -6,8 +6,10 @@ After=openvswitch-switch.service [Service] Type=forking PIDFile=/run/openvswitch/ovs-monitor-ipsec.pid -ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ - --ike-daemon=strongswan start-ovs-ipsec +Restart=on-failure +EnvironmentFile=-/etc/default/openvswitch-ipsec +ExecStart=/usr/share/openvswitch/scripts/ovs-ctl --no-monitor \ + --ike-daemon=strongswan start-ovs-ipsec $OVS_CTL_OPTS ExecStop=/usr/share/openvswitch/scripts/ovs-ctl stop-ovs-ipsec [Install] diff --git a/debian/openvswitch-switch.ovs-vswitchd.service b/debian/openvswitch-switch.ovs-vswitchd.service index 519d80d8ed..a4d445b953 100644 --- a/debian/openvswitch-switch.ovs-vswitchd.service +++ b/debian/openvswitch-switch.ovs-vswitchd.service @@ -11,6 +11,7 @@ DefaultDependencies=no [Service] LimitNOFILE=1048576 Type=forking +PIDFile=/run/openvswitch/ovs-vswitchd.pid Restart=on-failure Environment=HOME=/var/run/openvswitch EnvironmentFile=-/etc/default/openvswitch-switch diff --git a/debian/openvswitch-switch.ovsdb-server.service b/debian/openvswitch-switch.ovsdb-server.service index 339665b255..35654d7059 100644 --- a/debian/openvswitch-switch.ovsdb-server.service +++ b/debian/openvswitch-switch.ovsdb-server.service @@ -8,6 +8,7 @@ DefaultDependencies=no [Service] LimitNOFILE=1048576 Type=forking +PIDFile=/run/openvswitch/ovsdb-server.pid Restart=on-failure EnvironmentFile=-/etc/default/openvswitch-switch ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ @@ -18,6 +19,3 @@ ExecStop=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd stop ExecReload=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd \ --no-record-hostname \ --no-monitor restart $OVS_CTL_OPTS -RuntimeDirectory=openvswitch -RuntimeDirectoryMode=0755 -RuntimeDirectoryPreserve=yes diff --git a/debian/openvswitch-test.install b/debian/openvswitch-test.install index 88c8252805..dfc8ebf302 100644 --- a/debian/openvswitch-test.install +++ b/debian/openvswitch-test.install @@ -2,4 +2,5 @@ usr/bin/ovs-l3ping usr/bin/ovs-test usr/share/man/man8/ovs-l3ping.8 usr/share/man/man8/ovs-test.8 +usr/share/openvswitch/python/ovstest usr/lib/python3/dist-packages/ usr/share/openvswitch/scripts/usdt/* diff --git a/debian/patches/ovs-ctl-ipsec.patch b/debian/patches/ovs-ctl-ipsec.patch deleted file mode 100644 index 63375cd47d..0000000000 --- a/debian/patches/ovs-ctl-ipsec.patch +++ /dev/null @@ -1,18 +0,0 @@ -Description: Don't monitor ipsec daemon - For Ubuntu systemd will monitor the ovs-monitor-ipsec daemon so - there is no need to spawn a separate monitor thread to deal with - restarts. Doing so has the side effect of confusing systemd into - monitoring the wrong process. -Author: James Page -Forwarded: not-needed - ---- a/utilities/ovs-ctl.in -+++ b/utilities/ovs-ctl.in -@@ -245,7 +245,7 @@ start_ovs_ipsec () { - --pidfile=${rundir}/ovs-monitor-ipsec.pid \ - --ike-daemon=$IKE_DAEMON \ - $no_restart \ -- --log-file --detach --monitor unix:${rundir}/db.sock || return 1 -+ --log-file --detach unix:${rundir}/db.sock || return 1 - return 0 - } diff --git a/debian/patches/series b/debian/patches/series deleted file mode 100644 index 87a2a1d97c..0000000000 --- a/debian/patches/series +++ /dev/null @@ -1 +0,0 @@ -ovs-ctl-ipsec.patch diff --git a/debian/python3-openvswitch.install b/debian/python3-openvswitch.install index e1e8c3a6e1..cd1dae3aff 100644 --- a/debian/python3-openvswitch.install +++ b/debian/python3-openvswitch.install @@ -1 +1,6 @@ -usr/share/man/man8/ovs-flowviz.8 +# At the dh_install stage we need to retain python version specific directory +# tree to support extensions. +# +# dh_python will consolidate into usr/lib/python3/dist-packages retaining +# version specific shared object files. +usr/lib/python3* usr/lib/ diff --git a/debian/rules b/debian/rules index b6f905f3cd..6b51b51e16 100755 --- a/debian/rules +++ b/debian/rules @@ -16,7 +16,6 @@ else PARALLEL = endif -PYTHON3S:=$(shell py3versions -vr) DEB_HOST_ARCH?=$(shell dpkg-architecture -qDEB_HOST_ARCH) override_dh_auto_configure: @@ -80,9 +79,26 @@ endif # nodpdk endif # i386/amd64/ppc64el endif # nocheck +export PYBUILD_DESTDIR = $(CURDIR)/debian/tmp +export PYBUILD_DIR = $(CURDIR)/python + +pybuild = \ + export PKG_CONFIG_PATH=$(CURDIR)/debian/tmp/usr/lib/pkgconfig; \ + export PKG_CONFIG_SYSROOT_DIR=$(CURDIR)/debian/tmp; \ + export PKG_CONFIG_SYSTEM_INCLUDE_PATH=/; \ + export PKG_CONFIG_SYSTEM_LIBRARY_PATH=/; \ + enable_shared=no \ + extra_cflags="`pkg-config --cflags libopenvswitch`" \ + extra_libs="-Wl,-Bstatic -lopenvswitch -Wl,-Bdynamic `pkg-config --libs --static libopenvswitch`" \ + pybuild + override_dh_auto_build: dh_auto_build --sourcedirectory=_debian -- dist distdir=openvswitch dh_auto_build --sourcedirectory=_debian + # We need an extra install here so that we can use pkgconfig to + # retrieve accurate CFLAGS and LDFLAGS for building Python extensions. + dh_auto_install --sourcedirectory=_debian + $(pybuild) --build ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH))) ifeq (,$(filter nodpdk, $(DEB_BUILD_OPTIONS))) dh_auto_build --sourcedirectory=_dpdk @@ -91,28 +107,15 @@ endif execute_before_dh_auto_clean: find . -name "*.pyc" -delete + if test -d $(PYBUILD_DIR)/build; then \ + pybuild --clean ; \ + fi override_dh_auto_install: + # We need to use pybuild to install Python extensions. + $(pybuild) --install dh_auto_install --sourcedirectory=_debian -execute_after_dh_install: - set -e && for pyvers in $(PYTHON3S); do \ - cd python; \ - export PKG_CONFIG_PATH=$(CURDIR)/debian/tmp/usr/lib/pkgconfig; \ - export PKG_CONFIG_SYSROOT_DIR=$(CURDIR)/debian/tmp; \ - export PKG_CONFIG_SYSTEM_INCLUDE_PATH=/; \ - export PKG_CONFIG_SYSTEM_LIBRARY_PATH=/; \ - enable_shared=no \ - extra_cflags="`pkg-config --cflags libopenvswitch`" \ - extra_libs="-Wl,-Bstatic -lopenvswitch -Wl,-Bdynamic `pkg-config --libs --static libopenvswitch`" \ - python$$pyvers setup.py install --install-layout=deb \ - --root $(CURDIR)/debian/python3-openvswitch; \ - cd ..; \ - mkdir -p $(CURDIR)/debian/openvswitch-test/usr/lib/python$$pyvers/dist-packages/ovstest; \ - install -v -D python/ovstest/*.py \ - $(CURDIR)/debian/openvswitch-test/usr/lib/python$$pyvers/dist-packages/ovstest; \ - done - override_dh_installinit: dh_installinit --restart-after-upgrade dh_installinit -popenvswitch-switch --name=ovsdb-server --no-start @@ -134,8 +137,8 @@ override_dh_python3: # Helper target for creating snapshots from upstream git DATE=$(shell date +%Y%m%d) # Upstream branch to track -BRANCH=branch-3.4 -VERSION=3.4.0 +BRANCH=branch-3.5 +VERSION=3.5.0 get-orig-snapshot: rm -Rf openvswitch-upstream diff --git a/include/openvswitch/ofp-ct.h b/include/openvswitch/ofp-ct.h index d57b626784..ea68c2e605 100644 --- a/include/openvswitch/ofp-ct.h +++ b/include/openvswitch/ofp-ct.h @@ -24,6 +24,8 @@ #include "openflow/nicira-ext.h" +struct ds; + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c index 04ebb19da1..2356359772 100644 --- a/lib/dp-packet-gso.c +++ b/lib/dp-packet-gso.c @@ -73,8 +73,7 @@ dp_packet_gso_nr_segs(struct dp_packet *p) const char *data_tail; const char *data_pos; - if (dp_packet_hwol_is_tunnel_vxlan(p) || - dp_packet_hwol_is_tunnel_geneve(p)) { + if (dp_packet_hwol_is_tunnel(p)) { data_pos = dp_packet_get_inner_tcp_payload(p); } else { data_pos = dp_packet_get_tcp_payload(p); @@ -105,7 +104,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) bool outer_ipv4; int hdr_len; int seg_len; - bool tnl; + bool udp_tnl = dp_packet_hwol_is_tunnel_vxlan(p) || + dp_packet_hwol_is_tunnel_geneve(p); + bool gre_tnl = dp_packet_hwol_is_tunnel_gre(p); tso_segsz = dp_packet_get_tso_segsz(p); if (!tso_segsz) { @@ -114,11 +115,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) return false; } - if (dp_packet_hwol_is_tunnel_vxlan(p) || - dp_packet_hwol_is_tunnel_geneve(p)) { + if (udp_tnl || gre_tnl) { outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p); tcp_hdr = dp_packet_inner_l4(p); - tnl = true; if (outer_ipv4) { outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id); @@ -130,7 +129,6 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) } else { outer_ipv4 = dp_packet_hwol_is_ipv4(p); tcp_hdr = dp_packet_l4(p); - tnl = false; if (outer_ipv4) { struct ip_header *ip_hdr = dp_packet_l3(p); @@ -156,13 +154,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); data_pos += seg_len; - if (tnl) { + if (udp_tnl) { /* Update tunnel UDP header length. */ struct udp_header *tnl_hdr; tnl_hdr = dp_packet_l4(seg); tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); + } + if (udp_tnl || gre_tnl) { /* Update tunnel inner L3 header. */ if (dp_packet_hwol_is_ipv4(seg)) { struct ip_header *ip_hdr = dp_packet_inner_l3(seg); @@ -194,7 +194,7 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) } /* Update L4 header. */ - if (tnl) { + if (udp_tnl || gre_tnl) { tcp_hdr = dp_packet_inner_l4(seg); } else { tcp_hdr = dp_packet_l4(seg); @@ -208,6 +208,18 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); } + if (gre_tnl) { + struct gre_base_hdr *ghdr; + + ghdr = dp_packet_l4(seg); + + if (ghdr->flags & htons(GRE_CSUM)) { + ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); + *csum_opt = 0; + *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); + } + } + if (dp_packet_batch_is_full(curr_batch)) { curr_batch++; } diff --git a/lib/dp-packet.c b/lib/dp-packet.c index df7bf8e6b3..dad0d7be3a 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -604,6 +604,8 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) NETDEV_TX_OFFLOAD_SCTP_CKSUM | NETDEV_TX_OFFLOAD_IPV4_CKSUM); } + } else if (dp_packet_hwol_is_tunnel_gre(p)) { + tnl_inner = true; } if (dp_packet_hwol_tx_ip_csum(p)) { diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 4afbbe7223..f94a82b07c 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -104,6 +104,9 @@ enum dp_packet_offload_mask { /* Offload tunnel packet, outer header is IPv6. */ DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, RTE_MBUF_F_TX_OUTER_IPV6, 0x40000), + /* Offload packet is GRE tunnel. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GRE, + RTE_MBUF_F_TX_TUNNEL_GRE, 0x80000), /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ }; @@ -123,6 +126,7 @@ enum dp_packet_offload_mask { DP_PACKET_OL_TX_IP_CKSUM | \ DP_PACKET_OL_TX_TUNNEL_GENEVE | \ DP_PACKET_OL_TX_TUNNEL_VXLAN | \ + DP_PACKET_OL_TX_TUNNEL_GRE | \ DP_PACKET_OL_TX_OUTER_IPV4 | \ DP_PACKET_OL_TX_OUTER_IP_CKSUM | \ DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \ @@ -1171,6 +1175,22 @@ dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b) return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN); } +/* Returns 'true' if packet 'b' is marked for GRE tunnel offloading. */ +static inline bool +dp_packet_hwol_is_tunnel_gre(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GRE); +} + +/* Returns true if packet 'b' has any offloadable tunnel type. */ +static inline bool +dp_packet_hwol_is_tunnel(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & (DP_PACKET_OL_TX_TUNNEL_VXLAN | + DP_PACKET_OL_TX_TUNNEL_GRE | + DP_PACKET_OL_TX_TUNNEL_GENEVE)); +} + /* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */ static inline bool dp_packet_hwol_is_outer_ipv4_cksum(const struct dp_packet *b) @@ -1289,11 +1309,19 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; } +/* Mark packet 'b' for GRE tunnel offloading. */ +static inline void +dp_packet_hwol_set_tunnel_gre(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GRE; +} + /* Clears tunnel offloading marks. */ static inline void dp_packet_hwol_reset_tunnel(struct dp_packet *b) { *dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN | + DP_PACKET_OL_TX_TUNNEL_GRE | DP_PACKET_OL_TX_TUNNEL_GENEVE); } @@ -1352,6 +1380,9 @@ dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; } ol_flags |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM; + } else if (ol_flags & DP_PACKET_OL_TX_TUNNEL_GRE && + ol_flags & DP_PACKET_OL_TX_OUTER_IPV4) { + ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; } *dp_packet_ol_flags_ptr(p) = ol_flags; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2a529f272d..b572fab23d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -8928,9 +8928,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet; DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) { - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { - + if (dp_packet_hwol_is_tunnel(packet)) { if (dp_packet_hwol_is_tso(packet)) { /* Can't perform GSO in the middle of a pipeline. */ COVERAGE_INC(datapath_drop_tunnel_tso_recirc); diff --git a/lib/flow.c b/lib/flow.c index 9be4375246..0eb34892f2 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1187,7 +1187,7 @@ parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p) * If 'packet' is not an Ethernet packet embedding TCP, returns 0. * 'dl_type_p' will be set only if the 'packet' is an Ethernet packet. * 'nw_frag_p' will be set only if the 'packet' is an IP packet. - * 'first_vlan_tci' will be set only if the 'packet' contains vlan header. + * 'first_vlan_tci_p' will be set only if the 'packet' contains vlan header. * * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes * long.'*/ diff --git a/lib/ipf.c b/lib/ipf.c index 59e2323557..b76181e793 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -410,11 +410,12 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list) dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); struct ip_header *l3 = dp_packet_l3(pkt); int len = ntohs(l3->ip_tot_len); + int orig_len = dp_packet_size(pkt); int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - frag_list[1].start_data_byte + 1; - if (len + rest_len > IPV4_PACKET_MAX_SIZE) { + if (orig_len + rest_len > IPV4_PACKET_MAX_SIZE) { ipf_print_reass_packet( "Unsupported big reassembled v4 packet; v4 hdr:", l3); dp_packet_delete(pkt); @@ -459,11 +460,12 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); + int orig_len = dp_packet_size(pkt); int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - frag_list[1].start_data_byte + 1; - if (pl + rest_len > IPV6_PACKET_MAX_DATA) { + if (orig_len + rest_len > IPV6_PACKET_MAX_DATA) { ipf_print_reass_packet( "Unsupported big reassembled v6 packet; v6 hdr:", l3); dp_packet_delete(pkt); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index b88247a2d0..549887b313 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -427,6 +427,7 @@ enum dpdk_hw_ol_features { NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10, NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11, + NETDEV_TX_GRE_TNL_TSO_OFFLOAD = 1 << 12, }; enum dpdk_rx_steer_flags { @@ -1100,6 +1101,8 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) NETDEV_TX_OFFLOAD_TCP_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD, NETDEV_TX_VXLAN_TNL_TSO); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GRE_TNL_TSO_OFFLOAD, + NETDEV_TX_GRE_TNL_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD, NETDEV_TX_GENEVE_TNL_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD, @@ -1167,6 +1170,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; } + if (dev->hw_ol_features & NETDEV_TX_GRE_TNL_TSO_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO; + } + if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) { conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; } @@ -1443,6 +1450,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.", netdev_get_name(&dev->up)); } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO) { + dev->hw_ol_features |= NETDEV_TX_GRE_TNL_TSO_OFFLOAD; + } else { + VLOG_WARN("%s: Tx GRE tunnel TSO offload is not supported.", + netdev_get_name(&dev->up)); + } } n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); @@ -2650,6 +2664,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK; if (OVS_UNLIKELY(tunnel_type && tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE && + tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE && tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) { VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64, netdev_get_name(&dev->up), tunnel_type); diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index ede5e16865..62e1a0c870 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -194,8 +194,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header, packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label); packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + if (dp_packet_hwol_is_tunnel(packet)) { dp_packet_hwol_set_tx_outer_ipv6(packet); } else { dp_packet_hwol_set_tx_ipv6(packet); @@ -207,8 +206,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header, ip = netdev_tnl_ip_hdr(eth); ip->ip_tot_len = htons(*ip_tot_size); /* Postpone checksum to when the packet is pushed to the port. */ - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + if (dp_packet_hwol_is_tunnel(packet)) { dp_packet_hwol_set_tx_outer_ipv4(packet); dp_packet_hwol_set_tx_outer_ipv4_csum(packet); } else { @@ -271,7 +269,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet, ip = dp_packet_l3(packet); if (data->tnl_type == OVS_VPORT_TYPE_GENEVE || - data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + data->tnl_type == OVS_VPORT_TYPE_VXLAN || + data->tnl_type == OVS_VPORT_TYPE_GRE || + data->tnl_type == OVS_VPORT_TYPE_IP6GRE) { if (IP_VER(ip->ip_ihl_ver) == 4) { dp_packet_hwol_set_tx_ipv4(packet); @@ -286,6 +286,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet, dp_packet_hwol_set_tunnel_geneve(packet); } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { dp_packet_hwol_set_tunnel_vxlan(packet); + } else if (data->tnl_type == OVS_VPORT_TYPE_GRE || + data->tnl_type == OVS_VPORT_TYPE_IP6GRE) { + dp_packet_hwol_set_tunnel_gre(packet); } } @@ -535,9 +538,13 @@ netdev_gre_push_header(const struct netdev *netdev, const struct ovs_action_push_tnl *data) { struct netdev_vport *dev = netdev_vport_cast(netdev); + uint16_t l3_ofs = packet->l3_ofs; + uint16_t l4_ofs = packet->l4_ofs; struct gre_base_hdr *greh; int ip_tot_size; + dp_packet_tnl_ol_process(packet, data); + greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size, 0); @@ -547,11 +554,24 @@ netdev_gre_push_header(const struct netdev *netdev, } if (greh->flags & htons(GRE_SEQ)) { - /* Last 4 byte is GRE seqno */ - int seq_ofs = gre_header_len(greh->flags) - 4; - ovs_16aligned_be32 *seq_opt = - ALIGNED_CAST(ovs_16aligned_be32 *, (char *)greh + seq_ofs); - put_16aligned_be32(seq_opt, htonl(atomic_count_inc(&dev->gre_seqno))); + if (!dp_packet_hwol_is_tso(packet)) { + /* Last 4 bytes are GRE seqno. */ + int seq_ofs = gre_header_len(greh->flags) - 4; + ovs_16aligned_be32 *seq_opt = + ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs); + + put_16aligned_be32(seq_opt, + htonl(atomic_count_inc(&dev->gre_seqno))); + } else { + VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO."); + } + } + + if (l3_ofs != UINT16_MAX) { + packet->inner_l3_ofs = l3_ofs + data->header_len; + } + if (l4_ofs != UINT16_MAX) { + packet->inner_l4_ofs = l4_ofs + data->header_len; } } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 22840a058b..5ae3794699 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -47,6 +47,7 @@ enum netdev_ol_flags { NETDEV_TX_GENEVE_TNL_TSO = 1 << 6, NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7, NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8, + NETDEV_TX_GRE_TNL_TSO = 1 << 9, }; /* A network device (e.g. an Ethernet device). diff --git a/lib/netdev.c b/lib/netdev.c index 02beac9d0b..9dd94ebdd7 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -916,11 +916,11 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, } } } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO | + NETDEV_TX_GRE_TNL_TSO | NETDEV_TX_GENEVE_TNL_TSO))) { DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_hwol_is_tso(packet) && - (dp_packet_hwol_is_tunnel_vxlan(packet) || - dp_packet_hwol_is_tunnel_geneve(packet))) { + dp_packet_hwol_is_tunnel(packet)) { return netdev_send_tso(netdev, qid, batch, concurrent_txq); } } @@ -1011,6 +1011,8 @@ netdev_push_header(const struct netdev *netdev, DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { if (OVS_UNLIKELY(data->tnl_type != OVS_VPORT_TYPE_GENEVE && data->tnl_type != OVS_VPORT_TYPE_VXLAN && + data->tnl_type != OVS_VPORT_TYPE_GRE && + data->tnl_type != OVS_VPORT_TYPE_IP6GRE && dp_packet_hwol_is_tso(packet))) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); @@ -1019,16 +1021,17 @@ netdev_push_header(const struct netdev *netdev, netdev_get_name(netdev), netdev_get_type(netdev)); } else { if (data->tnl_type != OVS_VPORT_TYPE_GENEVE && - data->tnl_type != OVS_VPORT_TYPE_VXLAN) { + data->tnl_type != OVS_VPORT_TYPE_VXLAN && + data->tnl_type != OVS_VPORT_TYPE_GRE && + data->tnl_type != OVS_VPORT_TYPE_IP6GRE) { dp_packet_ol_send_prepare(packet, 0); - } else if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + } else if (dp_packet_hwol_is_tunnel(packet)) { if (dp_packet_hwol_is_tso(packet)) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not " "supported with multiple levels of " - "VXLAN or GENEVE encapsulation.", + "VXLAN, GENEVE, or GRE encapsulation.", netdev_get_name(netdev)); continue; } @@ -1480,6 +1483,7 @@ netdev_get_status(const struct netdev *netdev, struct smap *smap) OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM); OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO); OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO); + OL_ADD_STAT("gre_tso", NETDEV_TX_GRE_TNL_TSO); OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO); OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); diff --git a/lib/netlink.c b/lib/netlink.c index 1e8d5a8ec5..446a0679ed 100644 --- a/lib/netlink.c +++ b/lib/netlink.c @@ -29,6 +29,16 @@ #include "openvswitch/vlog.h" #include "util.h" +#ifdef HAVE_NETLINK +#include +#else +/* RTA_VIA */ +struct rtvia { + sa_family_t rtvia_family; + uint8_t rtvia_addr[]; +}; +#endif + VLOG_DEFINE_THIS_MODULE(netlink); /* A single (bad) Netlink message can in theory dump out many, many log @@ -819,6 +829,7 @@ min_attr_len(enum nl_attr_type type) case NL_A_IPV6: return 16; case NL_A_NESTED: return 0; case NL_A_LL_ADDR: return 6; /* ETH_ALEN */ + case NL_A_RTA_VIA: return sizeof(struct rtvia) + sizeof(struct in_addr); case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED(); } } @@ -840,6 +851,7 @@ max_attr_len(enum nl_attr_type type) case NL_A_IPV6: return 16; case NL_A_NESTED: return SIZE_MAX; case NL_A_LL_ADDR: return 20; /* INFINIBAND_ALEN */ + case NL_A_RTA_VIA: return sizeof(struct rtvia) + sizeof(struct in6_addr); case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED(); } } diff --git a/lib/netlink.h b/lib/netlink.h index 008604aa60..d98ef3a989 100644 --- a/lib/netlink.h +++ b/lib/netlink.h @@ -152,6 +152,7 @@ enum nl_attr_type NL_A_IPV6, NL_A_NESTED, NL_A_LL_ADDR, + NL_A_RTA_VIA, N_NL_ATTR_TYPES }; diff --git a/lib/route-table.c b/lib/route-table.c index c6cb21394a..d9b51931ef 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -32,6 +32,7 @@ #include "netlink.h" #include "netlink-notifier.h" #include "netlink-socket.h" +#include "openvswitch/list.h" #include "openvswitch/ofpbuf.h" #include "ovs-router.h" #include "packets.h" @@ -47,27 +48,6 @@ VLOG_DEFINE_THIS_MODULE(route_table); COVERAGE_DEFINE(route_table_dump); -struct route_data { - /* Copied from struct rtmsg. */ - unsigned char rtm_dst_len; - bool local; - - /* Extracted from Netlink attributes. */ - struct in6_addr rta_dst; /* 0 if missing. */ - struct in6_addr rta_prefsrc; /* 0 if missing. */ - struct in6_addr rta_gw; - char ifname[IFNAMSIZ]; /* Interface name. */ - uint32_t mark; -}; - -/* A digested version of a route message sent down by the kernel to indicate - * that a route has changed. */ -struct route_table_msg { - bool relevant; /* Should this message be processed? */ - int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ - struct route_data rd; /* Data parsed from this message. */ -}; - static struct ovs_mutex route_table_mutex = OVS_MUTEX_INITIALIZER; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -76,7 +56,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static uint64_t rt_change_seq; static struct nln *nln = NULL; -static struct route_table_msg rtmsg; +static struct route_table_msg nln_rtmsg_change; static struct nln_notifier *route_notifier = NULL; static struct nln_notifier *route6_notifier = NULL; static struct nln_notifier *name_notifier = NULL; @@ -84,14 +64,31 @@ static struct nln_notifier *name_notifier = NULL; static bool route_table_valid = false; static void route_table_reset(void); -static void route_table_handle_msg(const struct route_table_msg *); -static int route_table_parse(struct ofpbuf *, void *change); -static void route_table_change(const struct route_table_msg *, void *); +static void route_table_handle_msg(const struct route_table_msg *, void *aux); +static void route_table_change(struct route_table_msg *, void *aux); static void route_map_clear(void); static void name_table_init(void); static void name_table_change(const struct rtnetlink_change *, void *); +static void +route_data_destroy_nexthops__(struct route_data *rd) +{ + struct route_data_nexthop *rdnh; + + LIST_FOR_EACH_POP (rdnh, nexthop_node, &rd->nexthops) { + if (rdnh && rdnh != &rd->primary_next_hop__) { + free(rdnh); + } + } +} + +void +route_data_destroy(struct route_data *rd) +{ + route_data_destroy_nexthops__(rd); +} + uint64_t route_table_get_change_seq(void) { @@ -110,7 +107,7 @@ route_table_init(void) ovs_assert(!route6_notifier); ovs_router_init(); - nln = nln_create(NETLINK_ROUTE, route_table_parse, &rtmsg); + nln = nln_create(NETLINK_ROUTE, route_table_parse, &nln_rtmsg_change); route_notifier = nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, @@ -155,8 +152,10 @@ route_table_wait(void) ovs_mutex_unlock(&route_table_mutex); } -static bool -route_table_dump_one_table(unsigned char id) +bool +route_table_dump_one_table(uint32_t id, + route_table_handle_msg_callback *handle_msg_cb, + void *aux) { uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; struct ofpbuf request, reply, buf; @@ -170,7 +169,13 @@ route_table_dump_one_table(unsigned char id) rq_msg = ofpbuf_put_zeros(&request, sizeof *rq_msg); rq_msg->rtm_family = AF_UNSPEC; - rq_msg->rtm_table = id; + + if (id > UCHAR_MAX) { + rq_msg->rtm_table = RT_TABLE_UNSPEC; + nl_msg_put_u32(&request, RTA_TABLE, id); + } else { + rq_msg->rtm_table = id; + } nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); @@ -186,7 +191,8 @@ route_table_dump_one_table(unsigned char id) if (!(nlmsghdr->nlmsg_flags & NLM_F_DUMP_FILTERED)) { filtered = false; } - route_table_handle_msg(&msg); + handle_msg_cb(&msg, aux); + route_data_destroy(&msg.rd); } } ofpbuf_uninit(&buf); @@ -198,7 +204,7 @@ route_table_dump_one_table(unsigned char id) static void route_table_reset(void) { - unsigned char tables[] = { + uint32_t tables[] = { RT_TABLE_DEFAULT, RT_TABLE_MAIN, RT_TABLE_LOCAL, @@ -212,19 +218,21 @@ route_table_reset(void) COVERAGE_INC(route_table_dump); for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { - if (!route_table_dump_one_table(tables[i])) { + if (!route_table_dump_one_table(tables[i], + route_table_handle_msg, NULL)) { /* Got unfiltered reply, no need to dump further. */ break; } } } -/* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse - * error. */ static int -route_table_parse(struct ofpbuf *buf, void *change_) +route_table_parse__(struct ofpbuf *buf, size_t ofs, + const struct nlmsghdr *nlmsg, + const struct rtmsg *rtm, + const struct rtnexthop *rtnh, + struct route_table_msg *change) { - struct route_table_msg *change = change_; bool parsed, ipv4 = false; static const struct nl_policy policy[] = { @@ -234,6 +242,9 @@ route_table_parse(struct ofpbuf *buf, void *change_) [RTA_MARK] = { .type = NL_A_U32, .optional = true }, [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, + [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_VIA] = { .type = NL_A_RTA_VIA, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; static const struct nl_policy policy6[] = { @@ -243,33 +254,36 @@ route_table_parse(struct ofpbuf *buf, void *change_) [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true }, [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, + [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_VIA] = { .type = NL_A_RTA_VIA, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; - const struct rtmsg *rtm; - - rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); if (rtm->rtm_family == AF_INET) { - parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), - policy, attrs, ARRAY_SIZE(policy)); + parsed = nl_policy_parse(buf, ofs, policy, attrs, + ARRAY_SIZE(policy)); ipv4 = true; } else if (rtm->rtm_family == AF_INET6) { - parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), - policy6, attrs, ARRAY_SIZE(policy6)); + parsed = nl_policy_parse(buf, ofs, policy6, attrs, + ARRAY_SIZE(policy6)); } else { VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message"); return 0; } if (parsed) { - const struct nlmsghdr *nlmsg; - uint32_t table_id; + struct route_data_nexthop *rdnh = NULL; int rta_oif; /* Output interface index. */ - nlmsg = buf->data; - memset(change, 0, sizeof *change); + + ovs_list_init(&change->rd.nexthops); + rdnh = rtnh ? xzalloc(sizeof *rdnh) : &change->rd.primary_next_hop__; + ovs_list_insert(&change->rd.nexthops, &rdnh->nexthop_node); + + rdnh->family = rtm->rtm_family; change->relevant = true; if (rtm->rtm_scope == RT_SCOPE_NOWHERE) { @@ -281,33 +295,33 @@ route_table_parse(struct ofpbuf *buf, void *change_) change->relevant = false; } - table_id = rtm->rtm_table; + change->rd.rta_table_id = rtm->rtm_table; if (attrs[RTA_TABLE]) { - table_id = nl_attr_get_u32(attrs[RTA_TABLE]); - } - /* Do not consider changes in non-standard routing tables. */ - if (table_id - && table_id != RT_TABLE_DEFAULT - && table_id != RT_TABLE_MAIN - && table_id != RT_TABLE_LOCAL) { - change->relevant = false; + change->rd.rta_table_id = nl_attr_get_u32(attrs[RTA_TABLE]); } change->nlmsg_type = nlmsg->nlmsg_type; - change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? 96 : 0); - change->rd.local = rtm->rtm_type == RTN_LOCAL; - if (attrs[RTA_OIF]) { - rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); + change->rd.rtm_dst_len = rtm->rtm_dst_len; + change->rd.rtm_protocol = rtm->rtm_protocol; + change->rd.rtn_local = rtm->rtm_type == RTN_LOCAL; + if (attrs[RTA_OIF] && rtnh) { + VLOG_DBG_RL(&rl, "unexpected RTA_OIF attribute while parsing " + "nested RTA_MULTIPATH attributes"); + goto error_out; + } + if (attrs[RTA_OIF] || rtnh) { + rta_oif = rtnh ? rtnh->rtnh_ifindex + : nl_attr_get_u32(attrs[RTA_OIF]); - if (!if_indextoname(rta_oif, change->rd.ifname)) { + if (!if_indextoname(rta_oif, rdnh->ifname)) { int error = errno; - VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", + VLOG_DBG_RL(&rl, "could not find interface name[%u]: %s", rta_oif, ovs_strerror(error)); if (error == ENXIO) { change->relevant = false; } else { - return 0; + goto error_out; } } } @@ -337,40 +351,190 @@ route_table_parse(struct ofpbuf *buf, void *change_) if (ipv4) { ovs_be32 gw; gw = nl_attr_get_be32(attrs[RTA_GATEWAY]); - in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw); + in6_addr_set_mapped_ipv4(&rdnh->addr, gw); } else { - change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); + rdnh->addr = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); } } if (attrs[RTA_MARK]) { - change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]); + change->rd.rta_mark = nl_attr_get_u32(attrs[RTA_MARK]); + } + if (attrs[RTA_PRIORITY]) { + change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]); } + if (attrs[RTA_VIA]) { + const struct rtvia *rtvia = nl_attr_get(attrs[RTA_VIA]); + ovs_be32 addr; + + if (attrs[RTA_GATEWAY]) { + VLOG_DBG_RL(&rl, "route message can not contain both " + "RTA_GATEWAY and RTA_VIA"); + goto error_out; + } + + rdnh->family = rtvia->rtvia_family; + + switch (rdnh->family) { + case AF_INET: + if (nl_attr_get_size(attrs[RTA_VIA]) + - sizeof *rtvia < sizeof addr) { + VLOG_DBG_RL(&rl, "got short message while parsing RTA_VIA " + "attribute for family AF_INET"); + goto error_out; + } + memcpy(&addr, rtvia->rtvia_addr, sizeof addr); + in6_addr_set_mapped_ipv4(&rdnh->addr, addr); + break; + + case AF_INET6: + if (nl_attr_get_size(attrs[RTA_VIA]) + - sizeof *rtvia < sizeof rdnh->addr) { + VLOG_DBG_RL(&rl, "got short message while parsing RTA_VIA " + "attribute for family AF_INET6"); + goto error_out; + } + memcpy(&rdnh->addr, rtvia->rtvia_addr, sizeof rdnh->addr); + break; + + default: + VLOG_DBG_RL(&rl, "unsupported address family, %d, " + "in via attribute", rdnh->family); + goto error_out; + } + } + if (attrs[RTA_MULTIPATH]) { + const struct nlattr *nla; + size_t left; + + if (rtnh) { + VLOG_DBG_RL(&rl, "unexpected nested RTA_MULTIPATH attribute"); + goto error_out; + } + + /* The change->rd->nexthops list is unconditionally populated with + * a single rdnh entry as we start parsing above. Multiple + * branches above may access it or jump to error_out, and having it + * on the list is the only way to ensure proper cleanup. + * + * Getting to this point, we know that the above branches has not + * provided next hop information, because information about + * multiple next hops is encoded in the nested attributes after the + * RTA_MULTIPATH attribute. + * + * Before retrieving those we need to remove the empty rdnh entry + * from the list. */ + route_data_destroy_nexthops__(&change->rd); + + NL_NESTED_FOR_EACH (nla, left, attrs[RTA_MULTIPATH]) { + struct route_table_msg mp_change; + struct rtnexthop *mp_rtnh; + struct ofpbuf mp_buf; + + ofpbuf_use_const(&mp_buf, nla, nla->nla_len); + mp_rtnh = ofpbuf_try_pull(&mp_buf, sizeof *mp_rtnh); + + if (!mp_rtnh) { + VLOG_DBG_RL(&rl, "got short message while parsing " + "multipath attribute"); + goto error_out; + } + + if (!route_table_parse__(&mp_buf, 0, nlmsg, rtm, mp_rtnh, + &mp_change)) { + goto error_out; + } + ovs_list_push_back_all(&change->rd.nexthops, + &mp_change.rd.nexthops); + } + } + if (!attrs[RTA_OIF] && !attrs[RTA_GATEWAY] + && !attrs[RTA_VIA] && !attrs[RTA_MULTIPATH]) { + VLOG_DBG_RL(&rl, "route message needs an RTA_OIF, RTA_GATEWAY, " + "RTA_VIA or RTA_MULTIPATH attribute"); + goto error_out; + } + /* Add any additional RTA attribute processing before RTA_MULTIPATH. */ } else { VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); - return 0; + goto error_out; } /* Success. */ return ipv4 ? RTNLGRP_IPV4_ROUTE : RTNLGRP_IPV6_ROUTE; + +error_out: + route_data_destroy(&change->rd); + return 0; +} + +/* Parse Netlink message in buf, which is expected to contain a UAPI rtmsg + * header and associated route attributes. + * + * Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, and 0 on a parse + * error. + * + * On success, memory may have been allocated, and it is the caller's + * responsibility to free it with a call to route_data_destroy(). + * + * In case of error, any allocated memory will be freed before returning. */ +int +route_table_parse(struct ofpbuf *buf, void *change) +{ + struct nlmsghdr *nlmsg; + struct rtmsg *rtm; + + nlmsg = ofpbuf_at(buf, 0, NLMSG_HDRLEN); + rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); + + if (!nlmsg || !rtm) { + return 0; + } + + return route_table_parse__(buf, NLMSG_HDRLEN + sizeof *rtm, + nlmsg, rtm, NULL, change); +} + +static bool +is_standard_table_id(uint32_t table_id) +{ + return !table_id + || table_id == RT_TABLE_DEFAULT + || table_id == RT_TABLE_MAIN + || table_id == RT_TABLE_LOCAL; } static void -route_table_change(const struct route_table_msg *change OVS_UNUSED, - void *aux OVS_UNUSED) +route_table_change(struct route_table_msg *change, void *aux OVS_UNUSED) { - if (!change || change->relevant) { + if (!change + || (change->relevant + && is_standard_table_id(change->rd.rta_table_id))) { route_table_valid = false; } + if (change) { + route_data_destroy(&change->rd); + } } static void -route_table_handle_msg(const struct route_table_msg *change) +route_table_handle_msg(const struct route_table_msg *change, + void *aux OVS_UNUSED) { - if (change->relevant && change->nlmsg_type == RTM_NEWROUTE) { + if (change->relevant && change->nlmsg_type == RTM_NEWROUTE + && !ovs_list_is_empty(&change->rd.nexthops)) { const struct route_data *rd = &change->rd; - - ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len, - rd->local, rd->ifname, &rd->rta_gw, + const struct route_data_nexthop *rdnh; + + /* The ovs-router module currently does not implement lookup or + * storage for routes with multiple next hops. For backwards + * compatibility, we use the first next hop. */ + rdnh = CONTAINER_OF(ovs_list_front(&change->rd.nexthops), + const struct route_data_nexthop, nexthop_node); + + ovs_router_insert(rd->rta_mark, &rd->rta_dst, + IN6_IS_ADDR_V4MAPPED(&rd->rta_dst) + ? rd->rtm_dst_len + 96 : rd->rtm_dst_len, + rd->rtn_local, rdnh->ifname, &rdnh->addr, &rd->rta_prefsrc); } } diff --git a/lib/route-table.h b/lib/route-table.h index 3a02d737ae..b805e84dd6 100644 --- a/lib/route-table.h +++ b/lib/route-table.h @@ -24,8 +24,133 @@ #include #include +#include "openvswitch/list.h" +#include "openvswitch/ofpbuf.h" #include "openvswitch/types.h" +/* + * route-table, system route table synchronization for Open vSwitch. + * + * Overview + * ======== + * + * The route-table module has two use cases: + * + * 1) Internal use by Open vSwitch which together with the ovs-router module + * implement route lookup for features such as flow based tunneling, + * userspace tunneling, and sFlow. + * + * 2) External use by projects such as Open Virtual Network (OVN), that use + * Open vSwitch as a compile time library. + * + * Typical External Usage + * ====================== + * + * static void + * my_handle_msg(const struct route_table_msg *change, void *data) + * { + * struct my_data *aux = data; + * + * if (data) { + * aux->rta_dst = change->rd.rta_dst; + * } + * } + * + * static void + * my_route_table_dump(void) + * { + * struct my_data *aux; + * + * route_table_dump_one_table(RT_TABLE_MAIN, my_handle_msg, aux); + * } + * + * static void + * my_route_table_change(struct route_table_msg *change, void *aux OVS_UNUSED); + * { + * my_handle_msg(change, NULL); + * route_data_destroy(&change->rd); + * } + * + * static void + * my_init(void) + * { + * static struct nln_notifier *route6_notifier = NULL; + * static struct nln_notifier *route_notifier = NULL; + * static struct route_table_msg nln_change; + * static struct nln *nln = NULL; + * + * nln = nln_create(NETLINK_ROUTE, route_table_parse, NULL); + * + * route6_notifier = + * nln_notifier_create(nln, RTNLGRP_IPV6_ROUTE, + * (nln_notify_func *) test_lib_route_table_change, + * NULL); + * + * route_notifier = + * nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, + * (nln_notify_func *) test_lib_route_table_change, + * NULL); + * } + * + * Thread-safety + * ============= + * + * Assuming thread safe initialization of dependencies such as netlink socket, + * netlink notifier and so on, the functions in this module are thread safe. + */ + +/* Information about a next hop stored in a linked list with base in struct + * route_data. Please refer to comment in struct route_data for details. */ +struct route_data_nexthop { + struct ovs_list nexthop_node; + + sa_family_t family; + struct in6_addr addr; + char ifname[IFNAMSIZ]; /* Interface name. */ +}; + +struct route_data { + /* Routes can have multiple next hops per destination. + * + * Each next hop has its own set of attributes such as address family, + * interface and IP address. + * + * When retrieving information about a route from the kernel, in the case + * of multiple next hops, information is provided as nested attributes. + * + * A linked list with struct route_data_nexthop entries is used to store + * this information as we parse each attribute. + * + * For the common case of one next hop, the nexthops list will contain a + * single entry pointing to the struct route_data primary_next_hop__ + * element. + * + * Any dynamically allocated list elements MUST be freed with a call to the + * route_data_destroy function. */ + struct ovs_list nexthops; + struct route_data_nexthop primary_next_hop__; + + /* Copied from struct rtmsg. */ + unsigned char rtm_dst_len; + unsigned char rtm_protocol; + bool rtn_local; + + /* Extracted from Netlink attributes. */ + struct in6_addr rta_dst; /* 0 if missing. */ + struct in6_addr rta_prefsrc; /* 0 if missing. */ + uint32_t rta_mark; /* 0 if missing. */ + uint32_t rta_table_id; /* 0 if missing. */ + uint32_t rta_priority; /* 0 if missing. */ +}; + +/* A digested version of a route message sent down by the kernel to indicate + * that a route has changed. */ +struct route_table_msg { + bool relevant; /* Should this message be processed? */ + uint16_t nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ + struct route_data rd; /* Data parsed from this message. */ +}; + uint64_t route_table_get_change_seq(void); void route_table_init(void); void route_table_run(void); @@ -33,4 +158,13 @@ void route_table_wait(void); bool route_table_fallback_lookup(const struct in6_addr *ip6_dst, char name[], struct in6_addr *gw6); + +typedef void route_table_handle_msg_callback(const struct route_table_msg *, + void *aux); + +bool route_table_dump_one_table(uint32_t id, + route_table_handle_msg_callback *, + void *aux); +int route_table_parse(struct ofpbuf *, void *change); +void route_data_destroy(struct route_data *); #endif /* route-table.h */ diff --git a/python/ovs/flowviz/odp/graph.py b/python/ovs/flowviz/odp/graph.py index 4d1fb7493c..c9734efece 100644 --- a/python/ovs/flowviz/odp/graph.py +++ b/python/ovs/flowviz/odp/graph.py @@ -14,16 +14,26 @@ """ Defines a Datapath Graph using graphviz. """ import colorsys -import graphviz import random +import sys from ovs.flowviz.odp.html import HTMLTree, HTMLFormatter from ovs.flowviz.odp.tree import FlowTree from ovs.flowviz.process import FileProcessor +try: + import graphviz +except ImportError: + graphviz = None + class GraphProcessor(FileProcessor): def __init__(self, opts): + if graphviz is None: + print("ERROR: The graph sub-command depends on the graphviz " + "Python library, which does not appear to be installed.", + file=sys.stderr) + sys.exit(1) super().__init__(opts, "odp") def start_file(self, name, filename): diff --git a/rhel/usr_lib_systemd_system_openvswitch-ipsec.service b/rhel/usr_lib_systemd_system_openvswitch-ipsec.service index 92dad44f93..913598f080 100644 --- a/rhel/usr_lib_systemd_system_openvswitch-ipsec.service +++ b/rhel/usr_lib_systemd_system_openvswitch-ipsec.service @@ -6,8 +6,11 @@ After=openvswitch.service [Service] Type=forking PIDFile=/run/openvswitch/ovs-monitor-ipsec.pid -ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ - --ike-daemon=libreswan start-ovs-ipsec +Restart=on-failure +EnvironmentFile=/etc/openvswitch/default.conf +EnvironmentFile=-/etc/sysconfig/openvswitch +ExecStart=/usr/share/openvswitch/scripts/ovs-ctl --no-monitor \ + --ike-daemon=libreswan start-ovs-ipsec $OPTIONS ExecStop=/usr/share/openvswitch/scripts/ovs-ctl stop-ovs-ipsec [Install] diff --git a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template index c467d02db9..63833c4d8e 100644 --- a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template +++ b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template @@ -25,6 +25,9 @@ # --ovs-vswitchd-options='-vconsole:dbg -vfile:dbg' # --ovsdb-server-options='-vconsole:dbg -vfile:dbg' # +# Or to start with non-root IPsec config file: +# --ovs-monitor-ipsec-options='--ipsec-conf=/etc/ipsec.d/ovs.conf --root-ipsec-conf=/etc/ipsec.conf' +# OPTIONS="" # Uncomment and set the OVS User/Group value diff --git a/tests/automake.mk b/tests/automake.mk index edfc2cb335..59f5387612 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -498,6 +498,7 @@ endif if LINUX tests_ovstest_SOURCES += \ + tests/test-lib-route-table.c \ tests/test-netlink-conntrack.c \ tests/test-netlink-policy.c \ tests/test-psample.c diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 36cea6aa95..60060ee2e0 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -658,11 +658,11 @@ OVS_VSWITCHD_START( other-config:datapath-id=1234 fail-mode=secure]) AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" +tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) AT_CHECK([ovs-vsctl get interface br0 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" +tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) OVS_VSWITCHD_STOP @@ -937,15 +937,26 @@ AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy \ options:csum=true ofport_request=4 \ -- add-port int-br t4 -- set Interface t4 type=geneve \ options:remote_ip=2001:cafe::93 options:key=123 \ - options:csum=true ofport_request=5], [0]) + options:csum=true ofport_request=5 \ + -- add-port int-br t5 -- set Interface t5 type=gre \ + options:remote_ip=2001:cafe::93 options:key=123 \ + options:csum=true ofport_request=6 \ + -- add-port int-br t6 -- set Interface t6 type=gre \ + options:remote_ip=1.1.2.92 options:key=123 \ + options:csum=false ofport_request=7], [0]) -flow_s="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800), - ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no), - tcp(src=54392,dst=5201),tcp_flags(ack)" +dnl The final tunnel intentionally has checksum turned off to exercise a +dnl different code path, there is no GRE checksum offload anyways. -flow_s_v6="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd), - ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6), - tcp(src=54392,dst=5201),tcp_flags(ack)" +m4_define([IPV4_TSO], [m4_join([,], + [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800)], + [ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no)], + [tcp(src=54392,dst=5201),tcp_flags(ack)])]) + +m4_define([IPV6_TSO], [m4_join([,], + [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd)], + [ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6)], + [tcp(src=54392,dst=5201),tcp_flags(ack)])]) dnl Setup dummy interface tunnel connectivity. AT_CHECK([ovs-appctl netdev-dummy/ip4addr br1 1.1.2.88/24], [0], [OK @@ -968,9 +979,9 @@ AT_CHECK([ovs-vsctl set Interface p1 options:tx_pcap=p1.pcap -- \ set Interface int-br options:ol_ip_csum_set_good=false -- \ set Interface int-br options:ol_tso_segsz=500]) -AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s}" \ +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV4_TSO" \ --len 2054]) -AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s_v6}" \ +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV6_TSO" \ --len 2074]) dnl Check that first we have the following packets: @@ -984,10 +995,26 @@ dnl - IPv6 Geneve tunnel with IPv4 payload dnl - IPv6 Geneve tunnel with IPv6 payload dnl - IPv6 Geneve tunnel with IPv4 payload dnl - IPv6 Geneve tunnel with IPv6 payload +dnl - IPv4 GRE tunnel with IPv4 payload +dnl - IPv4 GRE tunnel with IPv6 payload +dnl - IPv6 GRE tunnel with IPv4 payload +dnl - IPv6 GRE tunnel with IPv6 payload dnl These are sorted since OVS may send payloads to the tunnels in any order. zero400=$(printf '0%.0s' $(seq 800)) zero100=$(printf '0%.0s' $(seq 200)) AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl +[aabbcc000001aa55aa55000308004500025a00004000402f31c0010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000000000000000050100000edfd0000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00014000402f31bf010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000001f40000000050100000ec090000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00024000402f31be010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000003e80000000050100000ea150000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00034000402f31bd010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000005dc0000000050100000e8210000${zero100}${zero400}] [aabbcc000001aa55aa55000308004500026200004000401131d6010102580101025ce01312b5024e5f360800000000007b00]dnl [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000092d4781451000000000000000050100000edfd0000${zero100}${zero400}] @@ -1012,6 +1039,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [aabbcc000001aa55aa55000308004500026200034000401131d3010102580101025ce01317c1024efcd10000655800007b00]dnl [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600004000402f31d3010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0000000040060187c0a87b02c0a87b01d47814510000000000000000501000004dc20000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600014000402f31d2010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0001000040060186c0a87b02c0a87b01d4781451000001f400000000501000004bce0000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600024000402f31d1010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0002000040060185c0a87b02c0a87b01d4781451000003e8000000005010000049da0000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600034000402f31d0010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc000000005010000047e60000]dnl +[${zero100}${zero400}] [aabbcc000001aa55aa55000308004501024e00004000401131e9010102580101025ce01312b5023abd990800000000007b00]dnl [0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01d4781451000000000000000050100000]dnl [4dc20000${zero100}${zero400}] @@ -1036,6 +1075,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [aabbcc000001aa55aa55000308004501024e00034000401131e6010102580101025ce01317c1023a5b350000655800007b00]dnl [0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc0000000050100000]dnl [47e60000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558da8e00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558dc8200000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000003e80000000050100000ea150000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558de7600000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000001f40000000050100000ec090000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558e06a00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}${zero400}] [aabbcc000006aa55aa55000386dd60000000024e11402001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000093e01312b5024e8ed10800000000007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl [00000000000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}]dnl @@ -1068,6 +1119,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [00000093e01317c1024e2c6d0000655800007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl [00000000000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}]dnl [${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583a4e00000000007b0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01]dnl +[d4781451000005dc000000005010000047e60000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583c4300000000007b0a8f394fe0738abf7e2f058408004501021c0002000040060185c0a87b02c0a87b01]dnl +[d4781451000003e8000000005010000049da0000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583e3800000000007b0a8f394fe0738abf7e2f058408004501021c0001000040060186c0a87b02c0a87b01]dnl +[d4781451000001f400000000501000004bce0000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558402d00000000007b0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01]dnl +[d47814510000000000000000501000004dc20000${zero100}${zero400}] [aabbcc000006aa55aa55000386dd60100000023a11402001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000093e01312b5023aed340800000000007b000a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02]dnl [c0a87b01d47814510000000000000000501000004dc20000${zero100}${zero400}] diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 779a054e8c..3cd49d7a71 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -171,9 +171,9 @@ strip_eth () { # 'recirc=' respectively. This should make output easier to # compare. strip_recirc() { - sed 's/recirc_id([[x0-9]]*)/recirc_id()/ - s/recirc_id=[[x0-9]]*/recirc_id=/ - s/recirc([[x0-9]]*)/recirc()/' + sed 's/recirc_id([[x0-9a-f]]*)/recirc_id()/ + s/recirc_id=[[x0-9a-f]]*/recirc_id=/ + s/recirc([[x0-9a-f]]*)/recirc()/' } # Strips dp_hash from output. diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index a48bd532a0..7a7a19f7e4 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -202,6 +202,14 @@ m4_define([DPCTL_CHECK_FRAGMENTATION_FAIL], ]) +# OVS_CHECK_FRAG_LARGE +# +# This check isn't valid for kernel +m4_define([OVS_CHECK_FRAG_LARGE], +[ + +]) + # OVS_CHECK_MIN_KERNEL([minversion], [minsublevel]) # # Skip test if kernel version falls below minversion.minsublevel diff --git a/tests/system-route.at b/tests/system-route.at index c0ecad6cfb..66bfd0e8ed 100644 --- a/tests/system-route.at +++ b/tests/system-route.at @@ -65,6 +65,26 @@ Cached: fc00:db8:beef::13/128 dev br0 GW fc00:db8:cafe::1 SRC fc00:db8:cafe::2]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ovs-route - add system route - ipv4 via ipv6 nexthop]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() +AT_CHECK([ovs-vsctl set bridge br0 other-config:hwaddr=00:53:00:00:00:42]) +AT_CHECK([ip link set br0 up]) + +AT_CHECK([ip addr add 192.168.9.2/24 dev br0], [0], [stdout]) + +AT_CHECK([ip route add 192.168.10.12/32 \ + via inet6 fe80::253:ff:fe00:51 dev br0], [0], [stdout]) + +AT_CHECK([ovs-appctl revalidator/wait]) + +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | \ + grep -E '192.168.10.12/32' | sort], [dnl +Cached: 192.168.10.12/32 dev br0 GW fe80::253:ff:fe00:51 SRC fe80::253:ff:fe00:42]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + dnl Checks that OVS doesn't use routes from non-standard tables. AT_SETUP([ovs-route - route tables]) AT_KEYWORDS([route]) @@ -91,8 +111,13 @@ Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local Cached: 10.0.0.18/32 dev p1-route SRC 10.0.0.17]) +dnl Negative check for custom routing table using route-table library. +AT_CHECK([ovstest test-lib-route-table-dump | grep rta_table_id:\ 42], [1]) +AT_CHECK([ovstest test-lib-route-table-dump | grep rta_table_id:\ 1042], [1]) + dnl Add a route to a custom routing table and check that OVS doesn't cache it. AT_CHECK([ip route add 10.0.0.19/32 dev p1-route table 42]) +AT_CHECK([ip route add 10.0.0.20/32 dev p1-route table 1042]) AT_CHECK([ip route show table 42 | grep 'p1-route' | grep -q '10.0.0.19']) dnl Give the main thread a chance to act. AT_CHECK([ovs-appctl revalidator/wait]) @@ -102,6 +127,11 @@ Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local Cached: 10.0.0.18/32 dev p1-route SRC 10.0.0.17 ]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/rta_table_id:.*42/{print$1" "$15" "$16}' | sort], [0], [dnl +10.0.0.19/32 rta_table_id: 42 +10.0.0.20/32 rta_table_id: 1042 +]) dnl Delete a route from the main table and check that OVS removes the route dnl from the cache. @@ -128,3 +158,177 @@ OVS_WAIT_UNTIL([test $(ovs-appctl ovs/route/show | grep -c 'p1-route') -eq 0 ]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ovs-route - add system route with multiple nexthop - ipv4]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip addr add 192.168.42.10/24 dev p1-route], [0], [stdout]) +AT_CHECK([ip addr add 192.168.51.10/24 dev p2-route], [0], [stdout]) +AT_CHECK([ip route add 172.16.42.0/24 nexthop via 192.168.42.1 \ + dev p1-route nexthop via 192.168.51.1 dev p2-route], [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep -E '172.16.42.0/24' | \ + sort], [dnl +Cached: 172.16.42.0/24 dev p1-route GW 192.168.42.1 SRC 192.168.42.10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep 172.16.42.0.*nexthop | sort], + [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET addr: 192.168.42.1 ifname: p1-route + 172.16.42.0/24 nexthop family: AF_INET addr: 192.168.51.1 ifname: p2-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ovs-route - add system route - ipv4 via multiple ipv6 nexthop]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) +AT_CHECK([ip route add 172.16.42.0/24 nexthop via inet6 fc00:db8:dead::1 \ + dev p1-route nexthop via inet6 fc00:db8:beef::1 dev p2-route], + [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep -E '172.16.42.0/24' | \ + sort], [dnl +Cached: 172.16.42.0/24 dev p1-route GW fc00:db8:dead::1 SRC fc00:db8:dead::10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep 172.16.42.0.*nexthop | sort], + [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ovs-route - add system route with multiple nexthop - ipv6]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) +AT_CHECK([ip -6 route add fc00:db8:cafe::/64 nexthop via fc00:db8:dead::1 \ + dev p1-route nexthop via fc00:db8:beef::1 dev p2-route], + [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | \ + grep -E 'fc00:db8:cafe::/64' | sort], [dnl +Cached: fc00:db8:cafe::/64 dev p1-route GW fc00:db8:dead::1 SRC fc00:db8:dead::10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep fc00:db8:cafe::.*nexthop | \ + sort], [0], [dnl + fc00:db8:cafe::/64 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + fc00:db8:cafe::/64 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([route-table - exported functions work for netlink-notifier]) +AT_KEYWORDS([route]) + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) + +AT_CHECK([ovstest test-lib-route-table-monitor 'ip route add 172.16.42.0/24 \ + nexthop via inet6 fc00:db8:dead::1 dev p1-route \ + nexthop via inet6 fc00:db8:beef::1 dev p2-route' | \ + grep 172.16.42.0.*nexthop | sort], [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +AT_CLEANUP + +AT_SETUP([route-table - route attributes]) +AT_KEYWORDS([route]) + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' + + +dnl Add ip address. +AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^10.0.0.17/{print$1" "$6" "$7}'], [0], [dnl +10.0.0.17/32 rtm_protocol: RTPROT_KERNEL +]) + +dnl Add route. +AT_CHECK([ip route add 192.168.10.12/32 dev p1-route via 10.0.0.18], [0], + [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$17" "$18}'], [0], [dnl +192.168.10.12/32 rta_priority: 0 +]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$6" "$7}'], [0], [dnl +192.168.10.12/32 rtm_protocol: RTPROT_BOOT +]) + +dnl Delete route. +AT_CHECK([ip route del 192.168.10.12/32 dev p1-route via 10.0.0.18], [0], + [stdout]) + +dnl Add route with priority. +AT_CHECK([ip route add 192.168.10.12/32 dev p1-route via 10.0.0.18 metric 42], + [0], [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$17" "$18}'], [0], [dnl +192.168.10.12/32 rta_priority: 42 +]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$6" "$7}'], [0], [dnl +192.168.10.12/32 rtm_protocol: RTPROT_BOOT +]) + +AT_CLEANUP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 16de8da20f..04328db4cc 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -557,7 +557,6 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_TRAFFIC_VSWITCHD_START() @@ -615,8 +614,97 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - tcp over gre tunnel with software fallback]) +AT_SKIP_IF([test $HAVE_NC = no]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +OVS_CHECK_GRE() + +dnl This test is only valid with tso. If the kernel segments the packets, the +dnl packet lengths in the final test will be different. +m4_ifndef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Test the case where one side has all checksum and TSO offload disabled. +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore]) +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore]) + +dnl Reinitialize. +AT_CHECK([ovs-vsctl del-port ovs-p0]) +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([gre], [br0], [at_gre0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([gretap], [at_gre1], [at_ns0], [172.31.1.100], [10.1.1.1/24]) + +dnl Set MTU for tunnel to generate 1500 byte packets. +AT_CHECK([ip link set dev br0 mtu 1400]) + +dnl First, check the underlay. +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], + [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Check that the tunnel is up. +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], + [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Start tcpdump to capture the encapsulated packets. +OVS_DAEMONIZE([tcpdump -i ovs-p0 -w p0.pcap], [tcpdump.pid]) + +dnl Wait until the pcap is written, which happens after the interface +dnl is opened by tcpdump. +OVS_WAIT_UNTIL([test -e p0.pcap]) + +dnl Initialize the listener before it is needed. +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid]) + +dnl Verify that ncat is ready. +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])]) + +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null]) +AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin]) + +dnl Wait until transfer completes before checking. +OVS_WAIT_WHILE([kill -0 $(cat nc.pid)]) +AT_CHECK([diff -q payload.bin data2], [0]) +OVS_WAIT_WHILE([test $(stat -c %s p0.pcap) -le 68000 ]) + +dnl Stop OVS and tcpdump and verify the results. +AT_CHECK([kill -15 $(cat tcpdump.pid)]) +OVS_WAIT_WHILE([kill -0 $(cat tcpdump.pid)]) + +dnl The exact number of packets sent will vary, but we check that the largest +dnl segments have the correct lengths and certain other fields. +AT_CHECK([test $(ovs-pcap p0.pcap | grep -Ec dnl +"^.{24}0800"dnl Ethernet +"4500059e....4000..2f....ac1f0164ac1f0101"dnl IP(len=1450, DF, GRE, 172.31.1.100->172.31.1.1) +"00006558"dnl GRE(flags=0, proto=0x6558) +".{24}0800"dnl Ethernet +"45000578....4000..06....0a0101640a010101"dnl IP(len=1400, DF, TCP, 10.1.1.100->10.1.1.1) +"....04d2............................0000"dnl TCP(dport=1234 +) -ge 20]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - ping over ip6gre L2 tunnel]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -651,6 +739,25 @@ dnl Okay, now check the overlay with different packet sizes NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) + +dnl Start ncat listeners. +OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > tcp_data], [nc.pid]) +NETNS_DAEMONIZE([at_ns0], [nc -l -u 10.1.1.1 4321 > udp_data], [nc2.pid]) + +dnl Verify that ncat is ready. +OVS_WAIT_UNTIL([netstat -ln | grep :1234]) +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :4321])]) + +dnl Check large bidirectional TCP. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null]) +NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin]) +OVS_WAIT_UNTIL([diff -q payload.bin tcp_data]) + +dnl Check UDP. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=600 count=1 2> /dev/null]) +AT_CHECK([nc $NC_EOF_OPT -u 10.1.1.1 4321 < payload.bin]) +OVS_WAIT_UNTIL([diff -q payload.bin udp_data]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -1191,7 +1298,6 @@ AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel by simulated packets]) OVS_CHECK_XT() -OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() AT_CHECK([ovs-vsctl -- set bridge br0 other-config:hwaddr=\"f2:ff:00:00:00:01\"]) @@ -2033,7 +2139,6 @@ dnl ns1: connect to br0, with IP:10.1.1.2 dnl br-underlay: with IP: 172.31.1.100 dnl ns0: connect to br-underlay, with IP: 10.1.1.1 AT_SETUP([datapath - truncate and output to gre tunnel by simulated packets]) -OVS_CHECK_MIN_KERNEL(3, 10) AT_SKIP_IF([test $HAVE_NC = no]) CHECK_NO_TC_OFFLOAD() OVS_TRAFFIC_VSWITCHD_START() @@ -2165,7 +2270,6 @@ dnl br-underlay: with IP: 172.31.1.100 dnl ns0: connect to br-underlay, with IP: 10.1.1.1 AT_SETUP([datapath - truncate and output to gre tunnel]) AT_SKIP_IF([test $HAVE_NC = no]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() CHECK_NO_TC_OFFLOAD() OVS_TRAFFIC_VSWITCHD_START() @@ -4603,7 +4707,11 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.2 | FORMAT_PING dnl Check userspace conntrack fragmentation counters. DPCTL_CHECK_FRAGMENTATION_PASS() -OVS_TRAFFIC_VSWITCHD_STOP +dnl Ipv4 max packet size fragmentation dropped. +NS_EXEC([at_ns0], [ping -s 65507 -q -c 1 -W 0.5 10.1.1.2]) +OVS_CHECK_FRAG_LARGE() + +OVS_TRAFFIC_VSWITCHD_STOP(["/Unsupported big reassembled v4 packet/d"]) AT_CLEANUP AT_SETUP([conntrack - IPv4 fragmentation expiry]) @@ -4897,7 +5005,11 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -W 2 fc00::2 | FORMAT_PING 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) -OVS_TRAFFIC_VSWITCHD_STOP +dnl Ipv6 max packet size fragmentation dropped. +NS_EXEC([at_ns0], [ping6 -s 65487 -q -c 1 -W 0.5 fc00::2]) +OVS_CHECK_FRAG_LARGE() + +OVS_TRAFFIC_VSWITCHD_STOP(["/Unsupported big reassembled v6 packet/d"]) AT_CLEANUP AT_SETUP([conntrack - IPv6 fragmentation expiry]) diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index c1be973478..49b277a089 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -298,6 +298,14 @@ AT_CHECK([ovs-appctl dpctl/ipf-get-status -m | FORMAT_FRAG_LIST()], [], [dnl ]) ]) +# OVS_CHECK_FRAG_LARGE() +# +# The userspace needs to check that ipf larger fragments have occurred. +m4_define([OVS_CHECK_FRAG_LARGE], +[ + OVS_WAIT_UNTIL([grep -Eq 'Unsupported big reassembled (v4|v6) packet' ovs-vswitchd.log]) +]) + # OVS_CHECK_MIN_KERNEL([minversion], [maxversion]) # # The userspace skips all tests that check kernel version. diff --git a/tests/test-lib-route-table.c b/tests/test-lib-route-table.c new file mode 100644 index 0000000000..61d97e06ff --- /dev/null +++ b/tests/test-lib-route-table.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2024 Canonical Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#undef NDEBUG + +#include +#include +#include + +#include "netlink-notifier.h" +#include "ovstest.h" +#include "packets.h" +#include "route-table.h" + +static char * +rt_prot_name(unsigned char p) +{ + /* We concentrate on the most used protocols, as they are the ones most + * likely to be defined in the build environment. */ + return p == RTPROT_UNSPEC ? "RTPROT_UNSPEC" : + p == RTPROT_REDIRECT ? "RTPROT_REDIRECT" : + p == RTPROT_KERNEL ? "RTPROT_KERNEL" : + p == RTPROT_BOOT ? "RTPROT_BOOT" : + p == RTPROT_STATIC ? "RTPROT_STATIC" : + p == RTPROT_RA ? "RTPROT_RA" : + p == RTPROT_DHCP ? "RTPROT_DHCP" : + p == RTPROT_BGP ? "RTPROT_BGP" : + "UNKNOWN"; +} + +static char * +rt_table_name(uint32_t id) +{ + static char tid[11] = ""; + + snprintf(tid, sizeof tid, "%"PRIu32, id); + + return id == RT_TABLE_UNSPEC ? "RT_TABLE_UNSPEC" : + id == RT_TABLE_COMPAT ? "RT_TABLE_COMPAT" : + id == RT_TABLE_DEFAULT ? "RT_TABLE_DEFAULT" : + id == RT_TABLE_MAIN ? "RT_TABLE_MAIN" : + id == RT_TABLE_LOCAL ? "RT_TABLE_LOCAL" : + tid; +} + +static void +test_lib_route_table_handle_msg(const struct route_table_msg *change, + void *data OVS_UNUSED) +{ + struct ds nexthop_addr = DS_EMPTY_INITIALIZER; + struct ds rta_prefsrc = DS_EMPTY_INITIALIZER; + const struct route_data *rd = &change->rd; + struct ds rta_dst = DS_EMPTY_INITIALIZER; + const struct route_data_nexthop *rdnh; + + ipv6_format_mapped(&change->rd.rta_prefsrc, &rta_prefsrc); + ipv6_format_mapped(&change->rd.rta_dst, &rta_dst); + + printf("%s/%u relevant: %d nlmsg_type: %d rtm_protocol: %s (%u) " + "rtn_local: %d rta_prefsrc: %s rta_mark: %"PRIu32" " + "rta_table_id: %s rta_priority: %"PRIu32"\n", + ds_cstr(&rta_dst), rd->rtm_dst_len, change->relevant, + change->nlmsg_type, rt_prot_name(rd->rtm_protocol), + rd->rtm_protocol, rd->rtn_local, ds_cstr(&rta_prefsrc), + rd->rta_mark, rt_table_name(rd->rta_table_id), rd->rta_priority); + + LIST_FOR_EACH (rdnh, nexthop_node, &rd->nexthops) { + ds_clear(&nexthop_addr); + ipv6_format_mapped(&rdnh->addr, &nexthop_addr); + printf(" %s/%u nexthop family: %s addr: %s ifname: %s\n", + ds_cstr(&rta_dst), rd->rtm_dst_len, + rdnh->family == AF_INET ? "AF_INET" : + rdnh->family == AF_INET6 ? "AF_INET6" : + "UNKNOWN", + ds_cstr(&nexthop_addr), + rdnh->ifname); + } + + ds_destroy(&nexthop_addr); + ds_destroy(&rta_prefsrc); + ds_destroy(&rta_dst); +} + +static void +test_lib_route_table_dump(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +{ + route_table_dump_one_table(RT_TABLE_UNSPEC, + test_lib_route_table_handle_msg, + NULL); +} + +static void +test_lib_route_table_change(struct route_table_msg *change, + void *aux OVS_UNUSED) +{ + test_lib_route_table_handle_msg(change, NULL); + route_data_destroy(&change->rd); +} + +static void +test_lib_route_table_monitor(int argc, char *argv[]) +{ + static struct nln_notifier *route6_notifier OVS_UNUSED; + static struct nln_notifier *route_notifier OVS_UNUSED; + static struct route_table_msg rtmsg; + static struct nln *nln OVS_UNUSED; + const char *cmd = argv[1]; + + if (argc != 2) { + printf("usage: ovstest %s 'ip route add ...'\n", argv[0]); + exit(EXIT_FAILURE); + } + + nln = nln_create(NETLINK_ROUTE, route_table_parse, &rtmsg); + + route_notifier = + nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, + (nln_notify_func *) test_lib_route_table_change, + NULL); + route6_notifier = + nln_notifier_create(nln, RTNLGRP_IPV6_ROUTE, + (nln_notify_func *) test_lib_route_table_change, + NULL); + nln_run(nln); + nln_wait(nln); + int rc = system(cmd); + if (rc) { + exit(rc); + } + nln_run(nln); +} + +OVSTEST_REGISTER("test-lib-route-table-monitor", test_lib_route_table_monitor); +OVSTEST_REGISTER("test-lib-route-table-dump", test_lib_route_table_dump); diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index 57abd3a5b4..03a39337f5 100644 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -242,15 +242,20 @@ start_forwarding () { } start_ovs_ipsec () { + set ${datadir}/scripts/ovs-monitor-ipsec unix:"$DB_SOCK" + set "$@" --log-file=${logdir}/ovs-monitor-ipsec.log + set "$@" --pidfile=${rundir}/ovs-monitor-ipsec.pid + set "$@" --detach + test X"$MONITOR" = Xno || set "$@" --monitor + set "$@" --ike-daemon=$IKE_DAEMON if test X$RESTART_IKE_DAEMON = Xno; then - no_restart="--no-restart-ike-daemon" + set "$@" --no-restart-ike-daemon + fi + if test X"$OVS_MONITOR_IPSEC_OPTIONS" != X; then + set "$@" $OVS_MONITOR_IPSEC_OPTIONS fi - ${datadir}/scripts/ovs-monitor-ipsec \ - --pidfile=${rundir}/ovs-monitor-ipsec.pid \ - --ike-daemon=$IKE_DAEMON \ - $no_restart \ - --log-file --detach --monitor unix:${rundir}/db.sock || return 1 + action "Starting ovs-monitor-ipsec" "$@" || return 1 return 0 } @@ -348,6 +353,7 @@ set_defaults () { OVS_VSWITCHD_WRAPPER= OVSDB_SERVER_OPTIONS= OVS_VSWITCHD_OPTIONS= + OVS_MONITOR_IPSEC_OPTIONS= OVSDB_SERVER_UMASK= OVS_VSWITCHD_UMASK= @@ -463,6 +469,9 @@ Option for "start-ovs-ipsec": the IKE daemon for ipsec tunnels (either libreswan or strongswan) --no-restart-ike-daemon do not restart the IKE daemon on startup + --ovs-monitor-ipsec-options=OPTIONS + additional options for ovs-monitor-ipsec (example: + '--ipsec-conf=/etc/ipsec.d/ovs.conf --root-ipsec-conf=/etc/ipsec.conf') Other options: -h, --help display this help message diff --git a/utilities/ovs-vsctl-bashcomp.bash b/utilities/ovs-vsctl-bashcomp.bash index c5ad24fb70..5313fa9d48 100644 --- a/utilities/ovs-vsctl-bashcomp.bash +++ b/utilities/ovs-vsctl-bashcomp.bash @@ -32,11 +32,15 @@ _ovs_vsctl () { # A bar (|) character in an argument means thing before bar OR thing # after bar; for example, del-port can take a port or an interface. -_OVS_VSCTL_COMMANDS="$(_ovs_vsctl --commands)" - -# This doesn't complete on short arguments, so it filters them out. -_OVS_VSCTL_OPTIONS="$(_ovs_vsctl --options | awk '/^--/ { print $0 }' \ - | sed -e 's/\(.*\)=ARG/\1=/')" +_OVS_VSCTL_COMMANDS= +_OVS_VSCTL_OPTIONS= +if command -v ovs-vsctl > /dev/null; then + _OVS_VSCTL_COMMANDS="$(_ovs_vsctl --commands)" + + # This doesn't complete on short arguments, so it filters them out. + _OVS_VSCTL_OPTIONS="$(_ovs_vsctl --options | awk '/^--/ { print $0 }' \ + | sed -e 's/\(.*\)=ARG/\1=/')" +fi IFS=$SAVE_IFS declare -A _OVS_VSCTL_PARSED_ARGS diff --git a/dpdk/VERSION b/dpdk/VERSION index 0a492611a0..9e2934aa34 100644 --- a/dpdk/VERSION +++ b/dpdk/VERSION @@ -1 +1 @@ -24.11.0 +24.11.1 diff --git a/dpdk/doc/guides/rel_notes/release_24_11.rst b/dpdk/doc/guides/rel_notes/release_24_11.rst index 8486cd986f..f9df63141e 100644 --- a/dpdk/doc/guides/rel_notes/release_24_11.rst +++ b/dpdk/doc/guides/rel_notes/release_24_11.rst @@ -616,3 +616,22 @@ Tested Platforms * Firmware version: 2.14, 0x8000028c * Device id (pf): 8086:125b * Driver version(in-tree): 6.8.0-45-generic (Ubuntu24.04.1)(igc) + +24.11.1 Release Notes +--------------------- + + +24.11.1 Fixes +~~~~~~~~~~~~~ + +* net/virtio: fix Rx checksum calculation + +24.11.1 Validation +~~~~~~~~~~~~~~~~~~ + +* Tested by Red Hat validation team + +24.11.1 Known Issues +~~~~~~~~~~~~~~~~~~~~ + +* DPDK 24.11.1 contains DPDK 24.11 plus the fix for CVE-2024-11614 only diff --git a/dpdk/lib/vhost/virtio_net.c b/dpdk/lib/vhost/virtio_net.c index d764d4bc6a..a340e5a772 100644 --- a/dpdk/lib/vhost/virtio_net.c +++ b/dpdk/lib/vhost/virtio_net.c @@ -2823,6 +2823,9 @@ vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, */ uint16_t csum = 0, off; + if (hdr->csum_start >= rte_pktmbuf_pkt_len(m)) + return; + if (rte_raw_cksum_mbuf(m, hdr->csum_start, rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) return; @@ -3626,6 +3629,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rte_rwlock_read_unlock(&vq->access_lock); virtio_dev_vring_translate(dev, vq); + + count = 0; goto out_no_unlock; }