diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 724e3e2f0f..d90b9273a6 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -610,7 +610,7 @@ jobs: build-linux-rpm: name: linux rpm fedora runs-on: ubuntu-latest - container: fedora:39 + container: fedora:41 timeout-minutes: 30 strategy: diff --git a/AUTHORS.rst b/AUTHORS.rst index 05c2fb7046..8454641758 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -312,6 +312,7 @@ Mark Kavanagh mark.b.kavanagh81@gmail.com Mark Maglana mmaglana@gmail.com Mark Michelson mmichels@redhat.com Markos Chandras mchandras@suse.de +Markus Linnala markus.linnala@gmail.com Martin Casado casado@cs.stanford.edu Martin Fong mwfong@csl.sri.com Martin Kalcok martin.kalcok@canonical.com diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index c9acc1e80e..1368f52b84 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -216,12 +216,13 @@ Q: What DPDK version does each Open vSwitch release work with? 2.14.x 19.11.13 2.15.x 20.11.6 2.16.x 20.11.6 - 2.17.x 21.11.8 - 3.0.x 21.11.8 - 3.1.x 22.11.6 - 3.2.x 22.11.6 - 3.3.x 23.11.2 - 3.4.x 23.11.2 + 2.17.x 21.11.9 + 3.0.x 21.11.9 + 3.1.x 22.11.7 + 3.2.x 22.11.7 + 3.3.x 23.11.3 + 3.4.x 23.11.3 + 3.5.x 24.11.1 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst index 42b7172891..0f3cb4e496 100644 --- a/Documentation/intro/install/general.rst +++ b/Documentation/intro/install/general.rst @@ -166,7 +166,7 @@ other than plain text, only if you have the following: If you are going to extensively modify Open vSwitch, consider installing the following to obtain better warnings: -- "sparse" version 0.6.2 or later +- "sparse" version 0.6.4 or later (https://git.kernel.org/pub/scm/devel/sparse/sparse.git/). - GNU make. diff --git a/Documentation/ref/ovs-flowviz.8.rst b/Documentation/ref/ovs-flowviz.8.rst index e8d84d995e..54adff498a 100644 --- a/Documentation/ref/ovs-flowviz.8.rst +++ b/Documentation/ref/ovs-flowviz.8.rst @@ -52,7 +52,7 @@ them in one of the predefined *format*\ s. Options ======= -.. program: ovs-flowviz +.. program:: ovs-flowviz .. option:: -h, --help @@ -151,6 +151,8 @@ option. Arguments: +.. program:: ovs-flowviz [datapath|openflow] console + .. option:: -h, --heat-map Color of the packet and byte counters to reflect their relative size. @@ -201,6 +203,8 @@ A logical block is a set of flows that have: Arguments: +.. program:: ovs-flowviz openflow logic + .. option:: -s, --show-flows Show all the flows under each logical block. @@ -277,6 +281,8 @@ same tree-like flow hierarchy that the ``tree`` format prints. Arguments: +.. program:: ovs-flowviz datapath console + .. option:: -h, --html Print the graphviz format as an svg image alongside an interactive HTML diff --git a/Documentation/ref/ovs-sim.1.rst b/Documentation/ref/ovs-sim.1.rst index f59cd7af7a..60971de3c7 100644 --- a/Documentation/ref/ovs-sim.1.rst +++ b/Documentation/ref/ovs-sim.1.rst @@ -58,7 +58,7 @@ such privileges. Options ======= -.. program: ovs-sim +.. program:: ovs-sim *script* Runs *script*, which should be a Bash script, within a subshell diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index ae08496bdd..ed4d36edd1 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -109,9 +109,9 @@ then started again. OvS will then report:: Limitations ~~~~~~~~~~~ -The current OvS userspace `TSO` implementation supports flat and VLAN networks -only (i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP, -etc.]). +The current OvS userspace `TSO` implementation supports flat, VLAN networks, +and some tunneled connections. Currently only VxLAN, Geneve and GRE tunnels +are supported. The NIC driver must support and advertise checksum offload for TCP and UDP. However, SCTP is not mandatory because very few drivers advertised support @@ -120,11 +120,11 @@ in Open vSwitch. Currently, if the NIC supports that, then the feature is enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC will be dropped. -There is no software implementation of TSO, so all ports attached to the -datapath must support TSO or packets using that feature will be dropped -on ports without TSO support. That also means guests using vhost-user -in client mode will receive TSO packet regardless of TSO being enabled -or disabled within the guest. +There is a limited software implementation of TSO when tunnels are used which +only supports VxLAN, Geneve, and GRE. When these tunnels are used with TSO, +not all ports attached to the datapath need to support hardware TSO. +Guests using vhost-user in client mode will receive TSO packet regardless of +TSO being enabled or disabled within the guest. All kernel devices that use the raw socket interface (veth, for example) require the kernel commit 9d2f67e43b73 ("net/packet: fix packet drop as of diff --git a/Makefile.am b/Makefile.am index dc5c34a6ae..a61a1cadfb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -339,6 +339,8 @@ check-tabs: fi .PHONY: check-tabs +# NOTE: test-lib-route-table.c excluded due to use of system() to execute +# ip route commands provided as arguments by test suite. ALL_LOCAL += thread-safety-check thread-safety-check: @cd $(srcdir); \ @@ -346,7 +348,8 @@ thread-safety-check: grep -n -f build-aux/thread-safety-forbidden \ `git ls-files | grep '\.[ch]$$' \ | $(EGREP) -v '^datapath-windows|^lib/sflow|^third-party'` /dev/null \ - | $(EGREP) -v ':[ ]*/?\*'; \ + | $(EGREP) -v ':[ ]*/?\*' \ + | $(EGREP) -v '^tests/test-lib-route-table.c'; \ then \ echo "See above for list of calls to functions that are"; \ echo "forbidden due to thread safety issues"; \ diff --git a/NEWS b/NEWS index 83f0513797..ec2f85c015 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Post-v3.4.0 +v3.5.0 - xx xxx xxxx -------------------- - The limit on the number of fields for address prefix tracking in flow tables increased from 3 to 4. For example, it is now possible to @@ -31,6 +31,8 @@ Post-v3.4.0 that does not have a specific value defined, rather than being treated as a global value, aligning the behavior with that of the kernel datapath. + * Extended the support for TSO software fallback to include support for + VXLAN, Geneve, and GRE tunneled packets. - Linux TC offload: * Add support for matching tunnel flags if the kernel supports it. * Add support for the "Don't Fragment" (DF) flag in the encap action, @@ -55,6 +57,8 @@ Post-v3.4.0 to make it not configure any crypto options (ike/esp) for connections. Most useful in combination with '--root-ipsec-conf' where system-wide crypto-policy is included from the root ipsec.conf. + * New option '--ovs-monitor-ipsec-options' for 'ovs-ctl start-ovs-ipsec' + to pass above new options to ovs-monitor-ipsec. v3.4.0 - 15 Aug 2024 diff --git a/configure.ac b/configure.ac index 266e9d4799..2b19888775 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 3.4.90, bugs@openvswitch.org) +AC_INIT(openvswitch, 3.5.0, bugs@openvswitch.org) AC_CONFIG_SRCDIR([vswitchd/ovs-vswitchd.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/automake.mk b/debian/automake.mk index fe8febdd3c..7ae4e00e58 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -14,6 +14,8 @@ EXTRA_DIST += \ debian/openvswitch-common.lintian-overrides \ debian/openvswitch-doc.doc-base \ debian/openvswitch-doc.install \ + debian/openvswitch-ipsec.default \ + debian/openvswitch-ipsec.dirs \ debian/openvswitch-ipsec.init \ debian/openvswitch-ipsec.install \ debian/openvswitch-ipsec.service \ @@ -56,8 +58,6 @@ EXTRA_DIST += \ debian/openvswitch-vtep.init \ debian/openvswitch-vtep.install \ debian/ovs-systemd-reload \ - debian/patches/ovs-ctl-ipsec.patch \ - debian/patches/series \ debian/python3-openvswitch.install \ debian/rules \ debian/source/format \ diff --git a/debian/changelog b/debian/changelog index f1a071141d..3f4e2c56e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -openvswitch (3.4.90-1) unstable; urgency=low +openvswitch (3.5.0-1) unstable; urgency=low * New upstream version - -- Open vSwitch team Mon, 15 Jul 2024 13:00:01 +0100 + -- Open vSwitch team Mon, 16 Jan 2025 13:00:01 +0100 openvswitch (3.4.0-1) unstable; urgency=low diff --git a/debian/control.in b/debian/control.in index 47b1f8cdd6..dfcf10bcc4 100644 --- a/debian/control.in +++ b/debian/control.in @@ -39,14 +39,17 @@ Rules-Requires-Root: no Homepage: http://openvswitch.org/ Vcs-Browser: https://salsa.debian.org/openstack-team/third-party/openvswitch/-/tree/debian/experimental Vcs-Git: https://salsa.debian.org/openstack-team/third-party/openvswitch.git +X-Python3-Version: >= 3.10 Package: openvswitch-common Architecture: linux-any Depends: openssl, ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, +Recommends: + python3-openvswitch (= ${binary:Version}), + ${python3:Depends}, Suggests: ethtool, openvswitch-doc, @@ -94,7 +97,6 @@ Depends: python3-openvswitch (= ${source:Version}), strongswan, ${misc:Depends}, - ${shlibs:Depends}, Suggests: python3:any Breaks: openvswitch-common (<< 2.17~), @@ -154,12 +156,13 @@ Depends: netbase, openvswitch-common (= ${binary:Version}), procps, - python3-netifaces, - python3-openvswitch (>= ${source:Version}), uuid-runtime, ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, +Recommends: + python3-netifaces, + python3-openvswitch (>= ${source:Version}), + ${python3:Depends}, Breaks: openvswitch-common (<< 2.17~), Replaces: @@ -183,7 +186,6 @@ Description: Open vSwitch switch implementations # DPDK_NETDEV dpdk, # DPDK_NETDEV openvswitch-switch (= ${binary:Version}), # DPDK_NETDEV ${misc:Depends}, -# DPDK_NETDEV ${python3:Depends}, # DPDK_NETDEV ${shlibs:Depends}, # DPDK_NETDEV Enhances: # DPDK_NETDEV openvswitch-switch, @@ -258,7 +260,6 @@ Depends: openvswitch-switch (>= ${binary:Version}), python3-openvswitch (>= ${source:Version}), ${misc:Depends}, - ${python3:Depends}, ${shlibs:Depends}, Suggests: python3:any Breaks: @@ -284,12 +285,13 @@ Depends: ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}, -Suggests: +Recommends: python3-click, - python3-graphviz, python3-netaddr, python3-pyparsing, python3-rich, +Suggests: + python3-graphviz, python3-unbound, Description: Python 3 bindings for Open vSwitch Open vSwitch is a production quality, multilayer, software-based, diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install index 9bdb43a6f2..5fef8de74e 100644 --- a/debian/openvswitch-common.install +++ b/debian/openvswitch-common.install @@ -1,6 +1,7 @@ etc/bash_completion.d/ovs-appctl-bashcomp.bash usr/share/bash-completion/completions usr/bin/ovs-appctl usr/bin/ovs-docker +usr/bin/ovs-flowviz usr/bin/ovs-ofctl usr/bin/ovs-parse-backtrace usr/bin/ovs-pki @@ -20,6 +21,7 @@ usr/share/man/man7/ovsdb-server.7 usr/share/man/man7/ovsdb.7 usr/share/man/man8/ovs-appctl.8 usr/share/man/man8/ovs-bugtool.8 +usr/share/man/man8/ovs-flowviz.8 usr/share/man/man8/ovs-ofctl.8 usr/share/man/man8/ovs-parse-backtrace.8 usr/share/man/man8/ovs-pki.8 diff --git a/debian/openvswitch-ipsec.default b/debian/openvswitch-ipsec.default new file mode 100644 index 0000000000..a074948591 --- /dev/null +++ b/debian/openvswitch-ipsec.default @@ -0,0 +1,5 @@ +# This is a POSIX shell fragment -*- sh -*- + +# OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example, +# a suitable place to specify --no-restart-ike-daemon. +# OVS_CTL_OPTS= diff --git a/debian/openvswitch-ipsec.dirs b/debian/openvswitch-ipsec.dirs new file mode 100644 index 0000000000..4b83f29661 --- /dev/null +++ b/debian/openvswitch-ipsec.dirs @@ -0,0 +1 @@ +/usr/share/openvswitch/ipsec diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init index aa68384547..4fc7701b83 100755 --- a/debian/openvswitch-ipsec.init +++ b/debian/openvswitch-ipsec.init @@ -41,6 +41,8 @@ test -x $DAEMON || exit 0 . /lib/lsb/init-functions +test -e /etc/default/openvswitch-ipsec && . /etc/default/openvswitch-ipsec + DODTIME=10 # Time to wait for the server to die, in seconds # If this value is set too low you might not # let some servers to die gracefully and @@ -72,7 +74,8 @@ running() { } start_server() { - ${DATADIR}/scripts/ovs-ctl --ike-daemon=strongswan start-ovs-ipsec + ${DATADIR}/scripts/ovs-ctl --ike-daemon=strongswan \ + start-ovs-ipsec $OVS_CTL_OPTS return 0 } diff --git a/debian/openvswitch-ipsec.install b/debian/openvswitch-ipsec.install old mode 100644 new mode 100755 index 31a8945e2f..ae127e2d4d --- a/debian/openvswitch-ipsec.install +++ b/debian/openvswitch-ipsec.install @@ -1 +1,3 @@ +#!/usr/bin/dh-exec +debian/openvswitch-ipsec.default => /usr/share/openvswitch/ipsec/default.template usr/share/openvswitch/scripts/ovs-monitor-ipsec diff --git a/debian/openvswitch-ipsec.service b/debian/openvswitch-ipsec.service index 608a6a6188..2f92def514 100644 --- a/debian/openvswitch-ipsec.service +++ b/debian/openvswitch-ipsec.service @@ -6,8 +6,10 @@ After=openvswitch-switch.service [Service] Type=forking PIDFile=/run/openvswitch/ovs-monitor-ipsec.pid -ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ - --ike-daemon=strongswan start-ovs-ipsec +Restart=on-failure +EnvironmentFile=-/etc/default/openvswitch-ipsec +ExecStart=/usr/share/openvswitch/scripts/ovs-ctl --no-monitor \ + --ike-daemon=strongswan start-ovs-ipsec $OVS_CTL_OPTS ExecStop=/usr/share/openvswitch/scripts/ovs-ctl stop-ovs-ipsec [Install] diff --git a/debian/openvswitch-switch.ovs-vswitchd.service b/debian/openvswitch-switch.ovs-vswitchd.service index 519d80d8ed..a4d445b953 100644 --- a/debian/openvswitch-switch.ovs-vswitchd.service +++ b/debian/openvswitch-switch.ovs-vswitchd.service @@ -11,6 +11,7 @@ DefaultDependencies=no [Service] LimitNOFILE=1048576 Type=forking +PIDFile=/run/openvswitch/ovs-vswitchd.pid Restart=on-failure Environment=HOME=/var/run/openvswitch EnvironmentFile=-/etc/default/openvswitch-switch diff --git a/debian/openvswitch-switch.ovsdb-server.service b/debian/openvswitch-switch.ovsdb-server.service index 339665b255..35654d7059 100644 --- a/debian/openvswitch-switch.ovsdb-server.service +++ b/debian/openvswitch-switch.ovsdb-server.service @@ -8,6 +8,7 @@ DefaultDependencies=no [Service] LimitNOFILE=1048576 Type=forking +PIDFile=/run/openvswitch/ovsdb-server.pid Restart=on-failure EnvironmentFile=-/etc/default/openvswitch-switch ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ @@ -18,6 +19,3 @@ ExecStop=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd stop ExecReload=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd \ --no-record-hostname \ --no-monitor restart $OVS_CTL_OPTS -RuntimeDirectory=openvswitch -RuntimeDirectoryMode=0755 -RuntimeDirectoryPreserve=yes diff --git a/debian/openvswitch-test.install b/debian/openvswitch-test.install index 88c8252805..dfc8ebf302 100644 --- a/debian/openvswitch-test.install +++ b/debian/openvswitch-test.install @@ -2,4 +2,5 @@ usr/bin/ovs-l3ping usr/bin/ovs-test usr/share/man/man8/ovs-l3ping.8 usr/share/man/man8/ovs-test.8 +usr/share/openvswitch/python/ovstest usr/lib/python3/dist-packages/ usr/share/openvswitch/scripts/usdt/* diff --git a/debian/patches/ovs-ctl-ipsec.patch b/debian/patches/ovs-ctl-ipsec.patch deleted file mode 100644 index 63375cd47d..0000000000 --- a/debian/patches/ovs-ctl-ipsec.patch +++ /dev/null @@ -1,18 +0,0 @@ -Description: Don't monitor ipsec daemon - For Ubuntu systemd will monitor the ovs-monitor-ipsec daemon so - there is no need to spawn a separate monitor thread to deal with - restarts. Doing so has the side effect of confusing systemd into - monitoring the wrong process. -Author: James Page -Forwarded: not-needed - ---- a/utilities/ovs-ctl.in -+++ b/utilities/ovs-ctl.in -@@ -245,7 +245,7 @@ start_ovs_ipsec () { - --pidfile=${rundir}/ovs-monitor-ipsec.pid \ - --ike-daemon=$IKE_DAEMON \ - $no_restart \ -- --log-file --detach --monitor unix:${rundir}/db.sock || return 1 -+ --log-file --detach unix:${rundir}/db.sock || return 1 - return 0 - } diff --git a/debian/patches/series b/debian/patches/series deleted file mode 100644 index 87a2a1d97c..0000000000 --- a/debian/patches/series +++ /dev/null @@ -1 +0,0 @@ -ovs-ctl-ipsec.patch diff --git a/debian/python3-openvswitch.install b/debian/python3-openvswitch.install index e1e8c3a6e1..cd1dae3aff 100644 --- a/debian/python3-openvswitch.install +++ b/debian/python3-openvswitch.install @@ -1 +1,6 @@ -usr/share/man/man8/ovs-flowviz.8 +# At the dh_install stage we need to retain python version specific directory +# tree to support extensions. +# +# dh_python will consolidate into usr/lib/python3/dist-packages retaining +# version specific shared object files. +usr/lib/python3* usr/lib/ diff --git a/debian/rules b/debian/rules index b6f905f3cd..6b51b51e16 100755 --- a/debian/rules +++ b/debian/rules @@ -16,7 +16,6 @@ else PARALLEL = endif -PYTHON3S:=$(shell py3versions -vr) DEB_HOST_ARCH?=$(shell dpkg-architecture -qDEB_HOST_ARCH) override_dh_auto_configure: @@ -80,9 +79,26 @@ endif # nodpdk endif # i386/amd64/ppc64el endif # nocheck +export PYBUILD_DESTDIR = $(CURDIR)/debian/tmp +export PYBUILD_DIR = $(CURDIR)/python + +pybuild = \ + export PKG_CONFIG_PATH=$(CURDIR)/debian/tmp/usr/lib/pkgconfig; \ + export PKG_CONFIG_SYSROOT_DIR=$(CURDIR)/debian/tmp; \ + export PKG_CONFIG_SYSTEM_INCLUDE_PATH=/; \ + export PKG_CONFIG_SYSTEM_LIBRARY_PATH=/; \ + enable_shared=no \ + extra_cflags="`pkg-config --cflags libopenvswitch`" \ + extra_libs="-Wl,-Bstatic -lopenvswitch -Wl,-Bdynamic `pkg-config --libs --static libopenvswitch`" \ + pybuild + override_dh_auto_build: dh_auto_build --sourcedirectory=_debian -- dist distdir=openvswitch dh_auto_build --sourcedirectory=_debian + # We need an extra install here so that we can use pkgconfig to + # retrieve accurate CFLAGS and LDFLAGS for building Python extensions. + dh_auto_install --sourcedirectory=_debian + $(pybuild) --build ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH))) ifeq (,$(filter nodpdk, $(DEB_BUILD_OPTIONS))) dh_auto_build --sourcedirectory=_dpdk @@ -91,28 +107,15 @@ endif execute_before_dh_auto_clean: find . -name "*.pyc" -delete + if test -d $(PYBUILD_DIR)/build; then \ + pybuild --clean ; \ + fi override_dh_auto_install: + # We need to use pybuild to install Python extensions. + $(pybuild) --install dh_auto_install --sourcedirectory=_debian -execute_after_dh_install: - set -e && for pyvers in $(PYTHON3S); do \ - cd python; \ - export PKG_CONFIG_PATH=$(CURDIR)/debian/tmp/usr/lib/pkgconfig; \ - export PKG_CONFIG_SYSROOT_DIR=$(CURDIR)/debian/tmp; \ - export PKG_CONFIG_SYSTEM_INCLUDE_PATH=/; \ - export PKG_CONFIG_SYSTEM_LIBRARY_PATH=/; \ - enable_shared=no \ - extra_cflags="`pkg-config --cflags libopenvswitch`" \ - extra_libs="-Wl,-Bstatic -lopenvswitch -Wl,-Bdynamic `pkg-config --libs --static libopenvswitch`" \ - python$$pyvers setup.py install --install-layout=deb \ - --root $(CURDIR)/debian/python3-openvswitch; \ - cd ..; \ - mkdir -p $(CURDIR)/debian/openvswitch-test/usr/lib/python$$pyvers/dist-packages/ovstest; \ - install -v -D python/ovstest/*.py \ - $(CURDIR)/debian/openvswitch-test/usr/lib/python$$pyvers/dist-packages/ovstest; \ - done - override_dh_installinit: dh_installinit --restart-after-upgrade dh_installinit -popenvswitch-switch --name=ovsdb-server --no-start @@ -134,8 +137,8 @@ override_dh_python3: # Helper target for creating snapshots from upstream git DATE=$(shell date +%Y%m%d) # Upstream branch to track -BRANCH=branch-3.4 -VERSION=3.4.0 +BRANCH=branch-3.5 +VERSION=3.5.0 get-orig-snapshot: rm -Rf openvswitch-upstream diff --git a/include/openvswitch/ofp-ct.h b/include/openvswitch/ofp-ct.h index d57b626784..ea68c2e605 100644 --- a/include/openvswitch/ofp-ct.h +++ b/include/openvswitch/ofp-ct.h @@ -24,6 +24,8 @@ #include "openflow/nicira-ext.h" +struct ds; + #ifdef __cplusplus extern "C" { #endif diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk index 45e6202c52..052a1b4b82 100644 --- a/include/sparse/automake.mk +++ b/include/sparse/automake.mk @@ -12,10 +12,7 @@ noinst_HEADERS += \ include/sparse/netinet/ip6.h \ include/sparse/netpacket/packet.h \ include/sparse/pthread.h \ - include/sparse/rte_atomic.h \ - include/sparse/rte_mbuf.h \ include/sparse/rte_memcpy.h \ - include/sparse/rte_trace_point.h \ include/sparse/sys/socket.h \ include/sparse/sys/sysmacros.h \ include/sparse/sys/types.h \ diff --git a/include/sparse/rte_atomic.h b/include/sparse/rte_atomic.h deleted file mode 100644 index ae49fe5c03..0000000000 --- a/include/sparse/rte_atomic.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2015 Nicira, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CHECKER__ -#error "Use this header only with sparse. It is not a correct implementation." -#endif - -/* Fix sparse technicality about types in one of the function calls by just - * ignoring it. */ -#define __sync_add_and_fetch(a, b) (0) - -/* Get actual definitions for us to annotate and build on. */ -#include_next diff --git a/include/sparse/rte_mbuf.h b/include/sparse/rte_mbuf.h deleted file mode 100644 index 981cdb441f..0000000000 --- a/include/sparse/rte_mbuf.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2020 Intel, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CHECKER__ -#error "Use this header only with sparse. It is not a correct implementation." -#endif - -/* sparse doesn't know about gcc atomic builtins. */ -#ifndef __ATOMIC_ACQ_REL -#define __ATOMIC_ACQ_REL 0 -#define __ATOMIC_RELAXED 1 -#define __atomic_add_fetch(p, val, memorder) (*(p) = *(p) + (val)) -#define __atomic_store_n(p, val, memorder) (*(p) = (val)) -#endif - -/* Get actual definitions for us to annotate and build on. */ -#include_next diff --git a/include/sparse/rte_trace_point.h b/include/sparse/rte_trace_point.h deleted file mode 100644 index 8039232754..0000000000 --- a/include/sparse/rte_trace_point.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2020, Red Hat, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CHECKER__ -#error "Use this header only with sparse. It is not a correct implementation." -#endif - -/* sparse doesn't know about gcc atomic builtins. */ -#ifndef __ATOMIC_ACQUIRE -#define __ATOMIC_ACQUIRE 0 -#define __atomic_load_n(p, memorder) *(p) -#endif - -/* Get actual definitions for us to annotate and - * build on. */ -#include_next diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c index 04ebb19da1..2356359772 100644 --- a/lib/dp-packet-gso.c +++ b/lib/dp-packet-gso.c @@ -73,8 +73,7 @@ dp_packet_gso_nr_segs(struct dp_packet *p) const char *data_tail; const char *data_pos; - if (dp_packet_hwol_is_tunnel_vxlan(p) || - dp_packet_hwol_is_tunnel_geneve(p)) { + if (dp_packet_hwol_is_tunnel(p)) { data_pos = dp_packet_get_inner_tcp_payload(p); } else { data_pos = dp_packet_get_tcp_payload(p); @@ -105,7 +104,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) bool outer_ipv4; int hdr_len; int seg_len; - bool tnl; + bool udp_tnl = dp_packet_hwol_is_tunnel_vxlan(p) || + dp_packet_hwol_is_tunnel_geneve(p); + bool gre_tnl = dp_packet_hwol_is_tunnel_gre(p); tso_segsz = dp_packet_get_tso_segsz(p); if (!tso_segsz) { @@ -114,11 +115,9 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) return false; } - if (dp_packet_hwol_is_tunnel_vxlan(p) || - dp_packet_hwol_is_tunnel_geneve(p)) { + if (udp_tnl || gre_tnl) { outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p); tcp_hdr = dp_packet_inner_l4(p); - tnl = true; if (outer_ipv4) { outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id); @@ -130,7 +129,6 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) } else { outer_ipv4 = dp_packet_hwol_is_ipv4(p); tcp_hdr = dp_packet_l4(p); - tnl = false; if (outer_ipv4) { struct ip_header *ip_hdr = dp_packet_l3(p); @@ -156,13 +154,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); data_pos += seg_len; - if (tnl) { + if (udp_tnl) { /* Update tunnel UDP header length. */ struct udp_header *tnl_hdr; tnl_hdr = dp_packet_l4(seg); tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); + } + if (udp_tnl || gre_tnl) { /* Update tunnel inner L3 header. */ if (dp_packet_hwol_is_ipv4(seg)) { struct ip_header *ip_hdr = dp_packet_inner_l3(seg); @@ -194,7 +194,7 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) } /* Update L4 header. */ - if (tnl) { + if (udp_tnl || gre_tnl) { tcp_hdr = dp_packet_inner_l4(seg); } else { tcp_hdr = dp_packet_l4(seg); @@ -208,6 +208,18 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); } + if (gre_tnl) { + struct gre_base_hdr *ghdr; + + ghdr = dp_packet_l4(seg); + + if (ghdr->flags & htons(GRE_CSUM)) { + ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); + *csum_opt = 0; + *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); + } + } + if (dp_packet_batch_is_full(curr_batch)) { curr_batch++; } diff --git a/lib/dp-packet.c b/lib/dp-packet.c index df7bf8e6b3..dad0d7be3a 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -604,6 +604,8 @@ dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags) NETDEV_TX_OFFLOAD_SCTP_CKSUM | NETDEV_TX_OFFLOAD_IPV4_CKSUM); } + } else if (dp_packet_hwol_is_tunnel_gre(p)) { + tnl_inner = true; } if (dp_packet_hwol_tx_ip_csum(p)) { diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 4afbbe7223..0f487a4283 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -104,6 +104,9 @@ enum dp_packet_offload_mask { /* Offload tunnel packet, outer header is IPv6. */ DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, RTE_MBUF_F_TX_OUTER_IPV6, 0x40000), + /* Offload packet is GRE tunnel. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GRE, + RTE_MBUF_F_TX_TUNNEL_GRE, 0x80000), /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ }; @@ -123,6 +126,7 @@ enum dp_packet_offload_mask { DP_PACKET_OL_TX_IP_CKSUM | \ DP_PACKET_OL_TX_TUNNEL_GENEVE | \ DP_PACKET_OL_TX_TUNNEL_VXLAN | \ + DP_PACKET_OL_TX_TUNNEL_GRE | \ DP_PACKET_OL_TX_OUTER_IPV4 | \ DP_PACKET_OL_TX_OUTER_IP_CKSUM | \ DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \ @@ -206,6 +210,7 @@ static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t); void *dp_packet_resize_l2(struct dp_packet *, int increment); void *dp_packet_resize_l2_5(struct dp_packet *, int increment); static inline void *dp_packet_eth(const struct dp_packet *); +static inline void dp_packet_reset_outer_offsets(struct dp_packet *); static inline void dp_packet_reset_offsets(struct dp_packet *); static inline void dp_packet_reset_offload(struct dp_packet *); static inline uint16_t dp_packet_l2_pad_size(const struct dp_packet *); @@ -429,15 +434,22 @@ dp_packet_eth(const struct dp_packet *b) ? dp_packet_data(b) : NULL; } -/* Resets all layer offsets. 'l3' offset must be set before 'l2' can be - * retrieved. */ +/* Resets all outer layer offsets. */ static inline void -dp_packet_reset_offsets(struct dp_packet *b) +dp_packet_reset_outer_offsets(struct dp_packet *b) { b->l2_pad_size = 0; b->l2_5_ofs = UINT16_MAX; b->l3_ofs = UINT16_MAX; b->l4_ofs = UINT16_MAX; +} + +/* Resets all layer offsets. 'l3' offset must be set before 'l2' can be + * retrieved. */ +static inline void +dp_packet_reset_offsets(struct dp_packet *b) +{ + dp_packet_reset_outer_offsets(b); b->inner_l3_ofs = UINT16_MAX; b->inner_l4_ofs = UINT16_MAX; } @@ -1171,6 +1183,22 @@ dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b) return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN); } +/* Returns 'true' if packet 'b' is marked for GRE tunnel offloading. */ +static inline bool +dp_packet_hwol_is_tunnel_gre(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GRE); +} + +/* Returns true if packet 'b' has any offloadable tunnel type. */ +static inline bool +dp_packet_hwol_is_tunnel(struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & (DP_PACKET_OL_TX_TUNNEL_VXLAN | + DP_PACKET_OL_TX_TUNNEL_GRE | + DP_PACKET_OL_TX_TUNNEL_GENEVE)); +} + /* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */ static inline bool dp_packet_hwol_is_outer_ipv4_cksum(const struct dp_packet *b) @@ -1289,12 +1317,11 @@ dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN; } -/* Clears tunnel offloading marks. */ +/* Mark packet 'b' for GRE tunnel offloading. */ static inline void -dp_packet_hwol_reset_tunnel(struct dp_packet *b) +dp_packet_hwol_set_tunnel_gre(struct dp_packet *b) { - *dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN | - DP_PACKET_OL_TX_TUNNEL_GENEVE); + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GRE; } /* Mark packet 'b' as a tunnel packet with outer IPv4 header. */ @@ -1352,6 +1379,9 @@ dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; } ol_flags |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM; + } else if (ol_flags & DP_PACKET_OL_TX_TUNNEL_GRE && + ol_flags & DP_PACKET_OL_TX_OUTER_IPV4) { + ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM; } *dp_packet_ol_flags_ptr(p) = ol_flags; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2a529f272d..87d69c46d5 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -115,7 +115,6 @@ COVERAGE_DEFINE(datapath_drop_lock_error); COVERAGE_DEFINE(datapath_drop_userspace_action_error); COVERAGE_DEFINE(datapath_drop_tunnel_push_error); COVERAGE_DEFINE(datapath_drop_tunnel_pop_error); -COVERAGE_DEFINE(datapath_drop_tunnel_tso_recirc); COVERAGE_DEFINE(datapath_drop_recirc_error); COVERAGE_DEFINE(datapath_drop_invalid_port); COVERAGE_DEFINE(datapath_drop_invalid_bond); @@ -6519,9 +6518,6 @@ pmd_rebalance_dry_run(struct dp_netdev *dp) struct sched_numa_list numa_list_cur; struct sched_numa_list numa_list_est; bool thresh_met = false; - uint64_t current_var, estimate_var; - struct sched_numa *numa_cur, *numa_est; - uint64_t improvement = 0; VLOG_DBG("PMD auto load balance performing dry run."); @@ -6537,9 +6533,14 @@ pmd_rebalance_dry_run(struct dp_netdev *dp) /* Check if cross-numa polling, there is only one numa with PMDs. */ if (!sched_numa_list_cross_numa_polling(&numa_list_est) || sched_numa_list_count(&numa_list_est) == 1) { + struct sched_numa *numa_cur; /* Calculate variances. */ HMAP_FOR_EACH (numa_cur, node, &numa_list_cur.numas) { + uint64_t current_var, estimate_var; + struct sched_numa *numa_est; + uint64_t improvement = 0; + numa_est = sched_numa_list_lookup(&numa_list_est, numa_cur->numa_id); if (!numa_est) { @@ -8923,34 +8924,6 @@ static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - size_t i, size = dp_packet_batch_size(packets); - struct dp_packet *packet; - - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) { - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { - - if (dp_packet_hwol_is_tso(packet)) { - /* Can't perform GSO in the middle of a pipeline. */ - COVERAGE_INC(datapath_drop_tunnel_tso_recirc); - dp_packet_delete(packet); - VLOG_WARN_RL(&rl, "Recirculating tunnel packets with " - "TSO is not supported"); - continue; - } - /* Have to fix all the checksums before re-parsing, because the - * packet will be treated as having a single set of headers. */ - dp_packet_ol_send_prepare(packet, 0); - /* This packet must not be marked with anything tunnel-related. */ - dp_packet_hwol_reset_tunnel(packet); - /* Clear inner offsets. Other ones are collateral, but they will - * be re-initialized on re-parsing. */ - dp_packet_reset_offsets(packet); - } - dp_packet_batch_refill(packets, packet, i); - } - dp_netdev_input__(pmd, packets, true, 0); } diff --git a/lib/flow.c b/lib/flow.c index 9be4375246..ef719471c6 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -402,6 +402,14 @@ parse_ethertype(const void **datap, size_t *sizep) return htons(FLOW_DL_TYPE_NONE); } +static inline bool +icmp6_is_nd(const struct icmp6_data_header *icmp6) +{ + return (icmp6->icmp6_base.icmp6_code == 0 && + (icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_SOLICIT || + icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_ADVERT)); +} + /* Returns 'true' if the packet is an ND packet. In that case the '*nd_target' * and 'arp_buf[]' are filled in. If the packet is not an ND packet, 'false' * is returned and no values are filled in on '*nd_target' or 'arp_buf[]'. */ @@ -412,9 +420,7 @@ parse_icmpv6(const void **datap, size_t *sizep, const union ovs_16aligned_in6_addr **nd_target, struct eth_addr arp_buf[2], uint8_t *opt_type) { - if (icmp6->icmp6_base.icmp6_code != 0 || - (icmp6->icmp6_base.icmp6_type != ND_NEIGHBOR_SOLICIT && - icmp6->icmp6_base.icmp6_type != ND_NEIGHBOR_ADVERT)) { + if (!icmp6_is_nd(icmp6)) { return false; } @@ -804,6 +810,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) uint8_t nw_frag, nw_tos, nw_ttl, nw_proto; uint8_t *ct_nw_proto_p = NULL; ovs_be16 ct_tp_src = 0, ct_tp_dst = 0; + bool tunneling; /* Metadata. */ if (flow_tnl_dst_is_set(&md->tunnel)) { @@ -857,7 +864,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) /* Initialize packet's layer pointer and offsets. */ frame = data; - dp_packet_reset_offsets(packet); + tunneling = dp_packet_hwol_is_tunnel(packet); + if (tunneling) { + /* Preserve inner offsets from previous circulation. */ + dp_packet_reset_outer_offsets(packet); + } else { + dp_packet_reset_offsets(packet); + } if (packet_type == htonl(PT_ETH)) { /* Must have full Ethernet header to proceed. */ @@ -936,9 +949,16 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) nw_proto = nh->ip_proto; nw_frag = ipv4_get_nw_frag(nh); data_pull(&data, &size, ip_len); - dp_packet_hwol_set_tx_ipv4(packet); - if (dp_packet_ip_checksum_good(packet)) { - dp_packet_hwol_set_tx_ip_csum(packet); + if (tunneling) { + dp_packet_hwol_set_tx_outer_ipv4(packet); + if (dp_packet_ip_checksum_good(packet)) { + dp_packet_hwol_set_tx_outer_ipv4_csum(packet); + } + } else { + dp_packet_hwol_set_tx_ipv4(packet); + if (dp_packet_ip_checksum_good(packet)) { + dp_packet_hwol_set_tx_ip_csum(packet); + } } } else if (dl_type == htons(ETH_TYPE_IPV6)) { const struct ovs_16aligned_ip6_hdr *nh = data; @@ -953,7 +973,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } data_pull(&data, &size, sizeof *nh); - dp_packet_hwol_set_tx_ipv6(packet); + if (tunneling) { + dp_packet_hwol_set_tx_outer_ipv6(packet); + } else { + dp_packet_hwol_set_tx_ipv6(packet); + } plen = ntohs(nh->ip6_plen); dp_packet_set_l2_pad_size(packet, size - plen); size = plen; /* Never pull padding. */ @@ -1078,7 +1102,11 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) dp_packet_ol_l4_csum_check_partial(packet); if (dp_packet_l4_checksum_good(packet) || dp_packet_ol_l4_csum_partial(packet)) { - dp_packet_hwol_set_csum_udp(packet); + if (tunneling) { + dp_packet_hwol_set_outer_udp_csum(packet); + } else { + dp_packet_hwol_set_csum_udp(packet); + } } } } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) { @@ -1166,6 +1194,15 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) } } } + } else if (ct_nw_proto_p && + (*ct_nw_proto_p == IPPROTO_TCP || + *ct_nw_proto_p == IPPROTO_UDP || + *ct_nw_proto_p == IPPROTO_SCTP || + *ct_nw_proto_p == IPPROTO_ICMP || + (*ct_nw_proto_p == IPPROTO_ICMPV6 && !icmp6_is_nd(data)))) { + miniflow_pad_from_64(mf, ct_tp_src); + miniflow_push_be16(mf, ct_tp_src, ct_tp_src); + miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); } out: dst->map = mf.map; @@ -1187,7 +1224,7 @@ parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p) * If 'packet' is not an Ethernet packet embedding TCP, returns 0. * 'dl_type_p' will be set only if the 'packet' is an Ethernet packet. * 'nw_frag_p' will be set only if the 'packet' is an IP packet. - * 'first_vlan_tci' will be set only if the 'packet' contains vlan header. + * 'first_vlan_tci_p' will be set only if the 'packet' contains vlan header. * * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes * long.'*/ diff --git a/lib/ipf.c b/lib/ipf.c index 59e2323557..b76181e793 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -410,11 +410,12 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list) dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); struct ip_header *l3 = dp_packet_l3(pkt); int len = ntohs(l3->ip_tot_len); + int orig_len = dp_packet_size(pkt); int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - frag_list[1].start_data_byte + 1; - if (len + rest_len > IPV4_PACKET_MAX_SIZE) { + if (orig_len + rest_len > IPV4_PACKET_MAX_SIZE) { ipf_print_reass_packet( "Unsupported big reassembled v4 packet; v4 hdr:", l3); dp_packet_delete(pkt); @@ -459,11 +460,12 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); + int orig_len = dp_packet_size(pkt); int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - frag_list[1].start_data_byte + 1; - if (pl + rest_len > IPV6_PACKET_MAX_DATA) { + if (orig_len + rest_len > IPV6_PACKET_MAX_DATA) { ipf_print_reass_packet( "Unsupported big reassembled v6 packet; v6 hdr:", l3); dp_packet_delete(pkt); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index b88247a2d0..549887b313 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -427,6 +427,7 @@ enum dpdk_hw_ol_features { NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10, NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11, + NETDEV_TX_GRE_TNL_TSO_OFFLOAD = 1 << 12, }; enum dpdk_rx_steer_flags { @@ -1100,6 +1101,8 @@ netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev) NETDEV_TX_OFFLOAD_TCP_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD, NETDEV_TX_VXLAN_TNL_TSO); + netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GRE_TNL_TSO_OFFLOAD, + NETDEV_TX_GRE_TNL_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD, NETDEV_TX_GENEVE_TNL_TSO); netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD, @@ -1167,6 +1170,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; } + if (dev->hw_ol_features & NETDEV_TX_GRE_TNL_TSO_OFFLOAD) { + conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO; + } + if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) { conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; } @@ -1443,6 +1450,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.", netdev_get_name(&dev->up)); } + + if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO) { + dev->hw_ol_features |= NETDEV_TX_GRE_TNL_TSO_OFFLOAD; + } else { + VLOG_WARN("%s: Tx GRE tunnel TSO offload is not supported.", + netdev_get_name(&dev->up)); + } } n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); @@ -2650,6 +2664,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK; if (OVS_UNLIKELY(tunnel_type && tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE && + tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE && tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) { VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64, netdev_get_name(&dev->up), tunnel_type); diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index ede5e16865..62e1a0c870 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -194,8 +194,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header, packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label); packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size; - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + if (dp_packet_hwol_is_tunnel(packet)) { dp_packet_hwol_set_tx_outer_ipv6(packet); } else { dp_packet_hwol_set_tx_ipv6(packet); @@ -207,8 +206,7 @@ netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header, ip = netdev_tnl_ip_hdr(eth); ip->ip_tot_len = htons(*ip_tot_size); /* Postpone checksum to when the packet is pushed to the port. */ - if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + if (dp_packet_hwol_is_tunnel(packet)) { dp_packet_hwol_set_tx_outer_ipv4(packet); dp_packet_hwol_set_tx_outer_ipv4_csum(packet); } else { @@ -271,7 +269,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet, ip = dp_packet_l3(packet); if (data->tnl_type == OVS_VPORT_TYPE_GENEVE || - data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + data->tnl_type == OVS_VPORT_TYPE_VXLAN || + data->tnl_type == OVS_VPORT_TYPE_GRE || + data->tnl_type == OVS_VPORT_TYPE_IP6GRE) { if (IP_VER(ip->ip_ihl_ver) == 4) { dp_packet_hwol_set_tx_ipv4(packet); @@ -286,6 +286,9 @@ dp_packet_tnl_ol_process(struct dp_packet *packet, dp_packet_hwol_set_tunnel_geneve(packet); } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { dp_packet_hwol_set_tunnel_vxlan(packet); + } else if (data->tnl_type == OVS_VPORT_TYPE_GRE || + data->tnl_type == OVS_VPORT_TYPE_IP6GRE) { + dp_packet_hwol_set_tunnel_gre(packet); } } @@ -535,9 +538,13 @@ netdev_gre_push_header(const struct netdev *netdev, const struct ovs_action_push_tnl *data) { struct netdev_vport *dev = netdev_vport_cast(netdev); + uint16_t l3_ofs = packet->l3_ofs; + uint16_t l4_ofs = packet->l4_ofs; struct gre_base_hdr *greh; int ip_tot_size; + dp_packet_tnl_ol_process(packet, data); + greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size, 0); @@ -547,11 +554,24 @@ netdev_gre_push_header(const struct netdev *netdev, } if (greh->flags & htons(GRE_SEQ)) { - /* Last 4 byte is GRE seqno */ - int seq_ofs = gre_header_len(greh->flags) - 4; - ovs_16aligned_be32 *seq_opt = - ALIGNED_CAST(ovs_16aligned_be32 *, (char *)greh + seq_ofs); - put_16aligned_be32(seq_opt, htonl(atomic_count_inc(&dev->gre_seqno))); + if (!dp_packet_hwol_is_tso(packet)) { + /* Last 4 bytes are GRE seqno. */ + int seq_ofs = gre_header_len(greh->flags) - 4; + ovs_16aligned_be32 *seq_opt = + ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs); + + put_16aligned_be32(seq_opt, + htonl(atomic_count_inc(&dev->gre_seqno))); + } else { + VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO."); + } + } + + if (l3_ofs != UINT16_MAX) { + packet->inner_l3_ofs = l3_ofs + data->header_len; + } + if (l4_ofs != UINT16_MAX) { + packet->inner_l4_ofs = l4_ofs + data->header_len; } } diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 9e163c2a63..38ea95bd49 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -2293,6 +2293,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, const struct flow_tnl *tnl = &match->flow.tunnel; struct flow_tnl *tnl_mask = &mask->tunnel; struct dpif_flow_stats adjust_stats; + bool exact_match_on_dl_type; bool recirc_act = false; uint32_t block_id = 0; struct tcf_id id; @@ -2310,6 +2311,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, memset(&flower, 0, sizeof flower); + exact_match_on_dl_type = mask->dl_type == htons(0xffff); chain = key->recirc_id; mask->recirc_id = 0; @@ -2503,7 +2505,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, mask->dl_type = 0; mask->in_port.odp_port = 0; - if (key->dl_type == htons(ETH_P_ARP)) { + if (exact_match_on_dl_type && key->dl_type == htons(ETH_P_ARP)) { flower.key.arp.spa = key->nw_src; flower.key.arp.tpa = key->nw_dst; flower.key.arp.sha = key->arp_sha; @@ -2522,7 +2524,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, memset(&mask->arp_tha, 0, sizeof mask->arp_tha); } - if (is_ip_any(key) && !is_ipv6_fragment_and_masked(key, mask)) { + if (exact_match_on_dl_type && is_ip_any(key) + && !is_ipv6_fragment_and_masked(key, mask)) { flower.key.ip_proto = key->nw_proto; flower.mask.ip_proto = mask->nw_proto; mask->nw_proto = 0; @@ -2552,9 +2555,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, } else { /* This scenario should not occur. Currently, all installed IP DP * flows perform a fully masked match on the fragmentation bits. - * However, since TC depends on this behavior, we return ENOTSUPP + * However, since TC depends on this behavior, we return EOPNOTSUPP * for now in case this behavior changes in the future. */ - return EOPNOTSUPP; + return EOPNOTSUPP; } if (key->nw_proto == IPPROTO_TCP) { diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 22840a058b..5ae3794699 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -47,6 +47,7 @@ enum netdev_ol_flags { NETDEV_TX_GENEVE_TNL_TSO = 1 << 6, NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7, NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8, + NETDEV_TX_GRE_TNL_TSO = 1 << 9, }; /* A network device (e.g. an Ethernet device). diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 2b2acd341d..46a62dbfc1 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -842,7 +842,8 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) } } else if (!strcmp(node->key, "remote_cert") || !strcmp(node->key, "remote_name") || - !strcmp(node->key, "psk")) { + !strcmp(node->key, "psk") || + !strncmp(node->key, "ipsec_", strlen("ipsec_"))) { /* When configuring OVS for IPsec, these keys may be set in the tunnel port's 'options' column. 'ovs-vswitchd' does not directly use them, but they are read by 'ovs-monitor-ipsec'. In order to diff --git a/lib/netdev.c b/lib/netdev.c index 02beac9d0b..9dd94ebdd7 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -916,11 +916,11 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, } } } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO | + NETDEV_TX_GRE_TNL_TSO | NETDEV_TX_GENEVE_TNL_TSO))) { DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { if (dp_packet_hwol_is_tso(packet) && - (dp_packet_hwol_is_tunnel_vxlan(packet) || - dp_packet_hwol_is_tunnel_geneve(packet))) { + dp_packet_hwol_is_tunnel(packet)) { return netdev_send_tso(netdev, qid, batch, concurrent_txq); } } @@ -1011,6 +1011,8 @@ netdev_push_header(const struct netdev *netdev, DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { if (OVS_UNLIKELY(data->tnl_type != OVS_VPORT_TYPE_GENEVE && data->tnl_type != OVS_VPORT_TYPE_VXLAN && + data->tnl_type != OVS_VPORT_TYPE_GRE && + data->tnl_type != OVS_VPORT_TYPE_IP6GRE && dp_packet_hwol_is_tso(packet))) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); @@ -1019,16 +1021,17 @@ netdev_push_header(const struct netdev *netdev, netdev_get_name(netdev), netdev_get_type(netdev)); } else { if (data->tnl_type != OVS_VPORT_TYPE_GENEVE && - data->tnl_type != OVS_VPORT_TYPE_VXLAN) { + data->tnl_type != OVS_VPORT_TYPE_VXLAN && + data->tnl_type != OVS_VPORT_TYPE_GRE && + data->tnl_type != OVS_VPORT_TYPE_IP6GRE) { dp_packet_ol_send_prepare(packet, 0); - } else if (dp_packet_hwol_is_tunnel_geneve(packet) || - dp_packet_hwol_is_tunnel_vxlan(packet)) { + } else if (dp_packet_hwol_is_tunnel(packet)) { if (dp_packet_hwol_is_tso(packet)) { COVERAGE_INC(netdev_push_header_drops); dp_packet_delete(packet); VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not " "supported with multiple levels of " - "VXLAN or GENEVE encapsulation.", + "VXLAN, GENEVE, or GRE encapsulation.", netdev_get_name(netdev)); continue; } @@ -1480,6 +1483,7 @@ netdev_get_status(const struct netdev *netdev, struct smap *smap) OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM); OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO); OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO); + OL_ADD_STAT("gre_tso", NETDEV_TX_GRE_TNL_TSO); OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO); OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM); OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM); diff --git a/lib/netlink.c b/lib/netlink.c index 1e8d5a8ec5..446a0679ed 100644 --- a/lib/netlink.c +++ b/lib/netlink.c @@ -29,6 +29,16 @@ #include "openvswitch/vlog.h" #include "util.h" +#ifdef HAVE_NETLINK +#include +#else +/* RTA_VIA */ +struct rtvia { + sa_family_t rtvia_family; + uint8_t rtvia_addr[]; +}; +#endif + VLOG_DEFINE_THIS_MODULE(netlink); /* A single (bad) Netlink message can in theory dump out many, many log @@ -819,6 +829,7 @@ min_attr_len(enum nl_attr_type type) case NL_A_IPV6: return 16; case NL_A_NESTED: return 0; case NL_A_LL_ADDR: return 6; /* ETH_ALEN */ + case NL_A_RTA_VIA: return sizeof(struct rtvia) + sizeof(struct in_addr); case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED(); } } @@ -840,6 +851,7 @@ max_attr_len(enum nl_attr_type type) case NL_A_IPV6: return 16; case NL_A_NESTED: return SIZE_MAX; case NL_A_LL_ADDR: return 20; /* INFINIBAND_ALEN */ + case NL_A_RTA_VIA: return sizeof(struct rtvia) + sizeof(struct in6_addr); case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED(); } } diff --git a/lib/netlink.h b/lib/netlink.h index 008604aa60..d98ef3a989 100644 --- a/lib/netlink.h +++ b/lib/netlink.h @@ -152,6 +152,7 @@ enum nl_attr_type NL_A_IPV6, NL_A_NESTED, NL_A_LL_ADDR, + NL_A_RTA_VIA, N_NL_ATTR_TYPES }; diff --git a/lib/route-table.c b/lib/route-table.c index c6cb21394a..2bbb51c08f 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -32,6 +32,7 @@ #include "netlink.h" #include "netlink-notifier.h" #include "netlink-socket.h" +#include "openvswitch/list.h" #include "openvswitch/ofpbuf.h" #include "ovs-router.h" #include "packets.h" @@ -47,27 +48,6 @@ VLOG_DEFINE_THIS_MODULE(route_table); COVERAGE_DEFINE(route_table_dump); -struct route_data { - /* Copied from struct rtmsg. */ - unsigned char rtm_dst_len; - bool local; - - /* Extracted from Netlink attributes. */ - struct in6_addr rta_dst; /* 0 if missing. */ - struct in6_addr rta_prefsrc; /* 0 if missing. */ - struct in6_addr rta_gw; - char ifname[IFNAMSIZ]; /* Interface name. */ - uint32_t mark; -}; - -/* A digested version of a route message sent down by the kernel to indicate - * that a route has changed. */ -struct route_table_msg { - bool relevant; /* Should this message be processed? */ - int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ - struct route_data rd; /* Data parsed from this message. */ -}; - static struct ovs_mutex route_table_mutex = OVS_MUTEX_INITIALIZER; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); @@ -76,7 +56,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static uint64_t rt_change_seq; static struct nln *nln = NULL; -static struct route_table_msg rtmsg; +static struct route_table_msg nln_rtmsg_change; static struct nln_notifier *route_notifier = NULL; static struct nln_notifier *route6_notifier = NULL; static struct nln_notifier *name_notifier = NULL; @@ -84,14 +64,31 @@ static struct nln_notifier *name_notifier = NULL; static bool route_table_valid = false; static void route_table_reset(void); -static void route_table_handle_msg(const struct route_table_msg *); -static int route_table_parse(struct ofpbuf *, void *change); -static void route_table_change(const struct route_table_msg *, void *); +static void route_table_handle_msg(const struct route_table_msg *, void *aux); +static void route_table_change(struct route_table_msg *, void *aux); static void route_map_clear(void); static void name_table_init(void); static void name_table_change(const struct rtnetlink_change *, void *); +static void +route_data_destroy_nexthops__(struct route_data *rd) +{ + struct route_data_nexthop *rdnh; + + LIST_FOR_EACH_POP (rdnh, nexthop_node, &rd->nexthops) { + if (rdnh && rdnh != &rd->primary_next_hop__) { + free(rdnh); + } + } +} + +void +route_data_destroy(struct route_data *rd) +{ + route_data_destroy_nexthops__(rd); +} + uint64_t route_table_get_change_seq(void) { @@ -110,7 +107,7 @@ route_table_init(void) ovs_assert(!route6_notifier); ovs_router_init(); - nln = nln_create(NETLINK_ROUTE, route_table_parse, &rtmsg); + nln = nln_create(NETLINK_ROUTE, route_table_parse, &nln_rtmsg_change); route_notifier = nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, @@ -155,8 +152,10 @@ route_table_wait(void) ovs_mutex_unlock(&route_table_mutex); } -static bool -route_table_dump_one_table(unsigned char id) +bool +route_table_dump_one_table(uint32_t id, + route_table_handle_msg_callback *handle_msg_cb, + void *aux) { uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; struct ofpbuf request, reply, buf; @@ -170,7 +169,13 @@ route_table_dump_one_table(unsigned char id) rq_msg = ofpbuf_put_zeros(&request, sizeof *rq_msg); rq_msg->rtm_family = AF_UNSPEC; - rq_msg->rtm_table = id; + + if (id > UCHAR_MAX) { + rq_msg->rtm_table = RT_TABLE_UNSPEC; + nl_msg_put_u32(&request, RTA_TABLE, id); + } else { + rq_msg->rtm_table = id; + } nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); @@ -186,7 +191,8 @@ route_table_dump_one_table(unsigned char id) if (!(nlmsghdr->nlmsg_flags & NLM_F_DUMP_FILTERED)) { filtered = false; } - route_table_handle_msg(&msg); + handle_msg_cb(&msg, aux); + route_data_destroy(&msg.rd); } } ofpbuf_uninit(&buf); @@ -198,7 +204,7 @@ route_table_dump_one_table(unsigned char id) static void route_table_reset(void) { - unsigned char tables[] = { + uint32_t tables[] = { RT_TABLE_DEFAULT, RT_TABLE_MAIN, RT_TABLE_LOCAL, @@ -212,19 +218,39 @@ route_table_reset(void) COVERAGE_INC(route_table_dump); for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { - if (!route_table_dump_one_table(tables[i])) { + if (!route_table_dump_one_table(tables[i], + route_table_handle_msg, NULL)) { /* Got unfiltered reply, no need to dump further. */ break; } } } -/* Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, 0 on parse - * error. */ +/* Returns true if the given route requires nexthop information (output + * interface, nexthop IP, ...). Returns false for special route types + * that don't need this information. */ +static bool +route_type_needs_nexthop(unsigned char rtmsg_type) +{ + switch (rtmsg_type) { + case RTN_BLACKHOLE: + case RTN_THROW: + case RTN_UNREACHABLE: + case RTN_PROHIBIT: + return false; + + default: + return true; + } +} + static int -route_table_parse(struct ofpbuf *buf, void *change_) +route_table_parse__(struct ofpbuf *buf, size_t ofs, + const struct nlmsghdr *nlmsg, + const struct rtmsg *rtm, + const struct rtnexthop *rtnh, + struct route_table_msg *change) { - struct route_table_msg *change = change_; bool parsed, ipv4 = false; static const struct nl_policy policy[] = { @@ -234,6 +260,9 @@ route_table_parse(struct ofpbuf *buf, void *change_) [RTA_MARK] = { .type = NL_A_U32, .optional = true }, [RTA_PREFSRC] = { .type = NL_A_U32, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, + [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_VIA] = { .type = NL_A_RTA_VIA, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; static const struct nl_policy policy6[] = { @@ -243,33 +272,36 @@ route_table_parse(struct ofpbuf *buf, void *change_) [RTA_GATEWAY] = { .type = NL_A_IPV6, .optional = true }, [RTA_PREFSRC] = { .type = NL_A_IPV6, .optional = true }, [RTA_TABLE] = { .type = NL_A_U32, .optional = true }, + [RTA_PRIORITY] = { .type = NL_A_U32, .optional = true }, + [RTA_VIA] = { .type = NL_A_RTA_VIA, .optional = true }, + [RTA_MULTIPATH] = { .type = NL_A_NESTED, .optional = true }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; - const struct rtmsg *rtm; - - rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); if (rtm->rtm_family == AF_INET) { - parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), - policy, attrs, ARRAY_SIZE(policy)); + parsed = nl_policy_parse(buf, ofs, policy, attrs, + ARRAY_SIZE(policy)); ipv4 = true; } else if (rtm->rtm_family == AF_INET6) { - parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), - policy6, attrs, ARRAY_SIZE(policy6)); + parsed = nl_policy_parse(buf, ofs, policy6, attrs, + ARRAY_SIZE(policy6)); } else { VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message"); return 0; } if (parsed) { - const struct nlmsghdr *nlmsg; - uint32_t table_id; + struct route_data_nexthop *rdnh = NULL; int rta_oif; /* Output interface index. */ - nlmsg = buf->data; - memset(change, 0, sizeof *change); + + ovs_list_init(&change->rd.nexthops); + rdnh = rtnh ? xzalloc(sizeof *rdnh) : &change->rd.primary_next_hop__; + ovs_list_insert(&change->rd.nexthops, &rdnh->nexthop_node); + + rdnh->family = rtm->rtm_family; change->relevant = true; if (rtm->rtm_scope == RT_SCOPE_NOWHERE) { @@ -281,33 +313,33 @@ route_table_parse(struct ofpbuf *buf, void *change_) change->relevant = false; } - table_id = rtm->rtm_table; + change->rd.rta_table_id = rtm->rtm_table; if (attrs[RTA_TABLE]) { - table_id = nl_attr_get_u32(attrs[RTA_TABLE]); - } - /* Do not consider changes in non-standard routing tables. */ - if (table_id - && table_id != RT_TABLE_DEFAULT - && table_id != RT_TABLE_MAIN - && table_id != RT_TABLE_LOCAL) { - change->relevant = false; + change->rd.rta_table_id = nl_attr_get_u32(attrs[RTA_TABLE]); } change->nlmsg_type = nlmsg->nlmsg_type; - change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? 96 : 0); - change->rd.local = rtm->rtm_type == RTN_LOCAL; - if (attrs[RTA_OIF]) { - rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); + change->rd.rtm_dst_len = rtm->rtm_dst_len; + change->rd.rtm_protocol = rtm->rtm_protocol; + change->rd.rtn_local = rtm->rtm_type == RTN_LOCAL; + if (attrs[RTA_OIF] && rtnh) { + VLOG_DBG_RL(&rl, "unexpected RTA_OIF attribute while parsing " + "nested RTA_MULTIPATH attributes"); + goto error_out; + } + if (attrs[RTA_OIF] || rtnh) { + rta_oif = rtnh ? rtnh->rtnh_ifindex + : nl_attr_get_u32(attrs[RTA_OIF]); - if (!if_indextoname(rta_oif, change->rd.ifname)) { + if (!if_indextoname(rta_oif, rdnh->ifname)) { int error = errno; - VLOG_DBG_RL(&rl, "Could not find interface name[%u]: %s", + VLOG_DBG_RL(&rl, "could not find interface name[%u]: %s", rta_oif, ovs_strerror(error)); if (error == ENXIO) { change->relevant = false; } else { - return 0; + goto error_out; } } } @@ -337,40 +369,197 @@ route_table_parse(struct ofpbuf *buf, void *change_) if (ipv4) { ovs_be32 gw; gw = nl_attr_get_be32(attrs[RTA_GATEWAY]); - in6_addr_set_mapped_ipv4(&change->rd.rta_gw, gw); + in6_addr_set_mapped_ipv4(&rdnh->addr, gw); } else { - change->rd.rta_gw = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); + rdnh->addr = nl_attr_get_in6_addr(attrs[RTA_GATEWAY]); } } if (attrs[RTA_MARK]) { - change->rd.mark = nl_attr_get_u32(attrs[RTA_MARK]); + change->rd.rta_mark = nl_attr_get_u32(attrs[RTA_MARK]); + } + if (attrs[RTA_PRIORITY]) { + change->rd.rta_priority = nl_attr_get_u32(attrs[RTA_PRIORITY]); + } + if (attrs[RTA_VIA]) { + const struct rtvia *rtvia = nl_attr_get(attrs[RTA_VIA]); + ovs_be32 addr; + + if (attrs[RTA_GATEWAY]) { + VLOG_DBG_RL(&rl, "route message can not contain both " + "RTA_GATEWAY and RTA_VIA"); + goto error_out; + } + + rdnh->family = rtvia->rtvia_family; + + switch (rdnh->family) { + case AF_INET: + if (nl_attr_get_size(attrs[RTA_VIA]) + - sizeof *rtvia < sizeof addr) { + VLOG_DBG_RL(&rl, "got short message while parsing RTA_VIA " + "attribute for family AF_INET"); + goto error_out; + } + memcpy(&addr, rtvia->rtvia_addr, sizeof addr); + in6_addr_set_mapped_ipv4(&rdnh->addr, addr); + break; + + case AF_INET6: + if (nl_attr_get_size(attrs[RTA_VIA]) + - sizeof *rtvia < sizeof rdnh->addr) { + VLOG_DBG_RL(&rl, "got short message while parsing RTA_VIA " + "attribute for family AF_INET6"); + goto error_out; + } + memcpy(&rdnh->addr, rtvia->rtvia_addr, sizeof rdnh->addr); + break; + + default: + VLOG_DBG_RL(&rl, "unsupported address family, %d, " + "in via attribute", rdnh->family); + goto error_out; + } + } + if (attrs[RTA_MULTIPATH]) { + const struct nlattr *nla; + size_t left; + + if (rtnh) { + VLOG_DBG_RL(&rl, "unexpected nested RTA_MULTIPATH attribute"); + goto error_out; + } + + /* The change->rd->nexthops list is unconditionally populated with + * a single rdnh entry as we start parsing above. Multiple + * branches above may access it or jump to error_out, and having it + * on the list is the only way to ensure proper cleanup. + * + * Getting to this point, we know that the above branches has not + * provided next hop information, because information about + * multiple next hops is encoded in the nested attributes after the + * RTA_MULTIPATH attribute. + * + * Before retrieving those we need to remove the empty rdnh entry + * from the list. */ + route_data_destroy_nexthops__(&change->rd); + + NL_NESTED_FOR_EACH (nla, left, attrs[RTA_MULTIPATH]) { + struct route_table_msg mp_change; + struct rtnexthop *mp_rtnh; + struct ofpbuf mp_buf; + + ofpbuf_use_const(&mp_buf, nla, nla->nla_len); + mp_rtnh = ofpbuf_try_pull(&mp_buf, sizeof *mp_rtnh); + + if (!mp_rtnh) { + VLOG_DBG_RL(&rl, "got short message while parsing " + "multipath attribute"); + goto error_out; + } + + if (!route_table_parse__(&mp_buf, 0, nlmsg, rtm, mp_rtnh, + &mp_change)) { + goto error_out; + } + ovs_list_push_back_all(&change->rd.nexthops, + &mp_change.rd.nexthops); + } + } + if (route_type_needs_nexthop(rtm->rtm_type) + && !attrs[RTA_OIF] && !attrs[RTA_GATEWAY] + && !attrs[RTA_VIA] && !attrs[RTA_MULTIPATH]) { + VLOG_DBG_RL(&rl, "route message needs an RTA_OIF, RTA_GATEWAY, " + "RTA_VIA or RTA_MULTIPATH attribute"); + goto error_out; + } + /* Add any additional RTA attribute processing before RTA_MULTIPATH. */ + + /* Ensure that the change->rd->nexthops list is cleared in cases when + * the route does not need a next hop. */ + if (!route_type_needs_nexthop(rtm->rtm_type)) { + route_data_destroy_nexthops__(&change->rd); } } else { VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); - return 0; + goto error_out; } /* Success. */ return ipv4 ? RTNLGRP_IPV4_ROUTE : RTNLGRP_IPV6_ROUTE; + +error_out: + route_data_destroy(&change->rd); + return 0; +} + +/* Parse Netlink message in buf, which is expected to contain a UAPI rtmsg + * header and associated route attributes. + * + * Return RTNLGRP_IPV4_ROUTE or RTNLGRP_IPV6_ROUTE on success, and 0 on a parse + * error. + * + * On success, memory may have been allocated, and it is the caller's + * responsibility to free it with a call to route_data_destroy(). + * + * In case of error, any allocated memory will be freed before returning. */ +int +route_table_parse(struct ofpbuf *buf, void *change) +{ + struct nlmsghdr *nlmsg; + struct rtmsg *rtm; + + nlmsg = ofpbuf_at(buf, 0, NLMSG_HDRLEN); + rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); + + if (!nlmsg || !rtm) { + return 0; + } + + return route_table_parse__(buf, NLMSG_HDRLEN + sizeof *rtm, + nlmsg, rtm, NULL, change); +} + +static bool +is_standard_table_id(uint32_t table_id) +{ + return !table_id + || table_id == RT_TABLE_DEFAULT + || table_id == RT_TABLE_MAIN + || table_id == RT_TABLE_LOCAL; } static void -route_table_change(const struct route_table_msg *change OVS_UNUSED, - void *aux OVS_UNUSED) +route_table_change(struct route_table_msg *change, void *aux OVS_UNUSED) { - if (!change || change->relevant) { + if (!change + || (change->relevant + && is_standard_table_id(change->rd.rta_table_id))) { route_table_valid = false; } + if (change) { + route_data_destroy(&change->rd); + } } static void -route_table_handle_msg(const struct route_table_msg *change) +route_table_handle_msg(const struct route_table_msg *change, + void *aux OVS_UNUSED) { - if (change->relevant && change->nlmsg_type == RTM_NEWROUTE) { + if (change->relevant && change->nlmsg_type == RTM_NEWROUTE + && !ovs_list_is_empty(&change->rd.nexthops)) { const struct route_data *rd = &change->rd; - - ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len, - rd->local, rd->ifname, &rd->rta_gw, + const struct route_data_nexthop *rdnh; + + /* The ovs-router module currently does not implement lookup or + * storage for routes with multiple next hops. For backwards + * compatibility, we use the first next hop. */ + rdnh = CONTAINER_OF(ovs_list_front(&change->rd.nexthops), + const struct route_data_nexthop, nexthop_node); + + ovs_router_insert(rd->rta_mark, &rd->rta_dst, + IN6_IS_ADDR_V4MAPPED(&rd->rta_dst) + ? rd->rtm_dst_len + 96 : rd->rtm_dst_len, + rd->rtn_local, rdnh->ifname, &rdnh->addr, &rd->rta_prefsrc); } } diff --git a/lib/route-table.h b/lib/route-table.h index 3a02d737ae..b805e84dd6 100644 --- a/lib/route-table.h +++ b/lib/route-table.h @@ -24,8 +24,133 @@ #include #include +#include "openvswitch/list.h" +#include "openvswitch/ofpbuf.h" #include "openvswitch/types.h" +/* + * route-table, system route table synchronization for Open vSwitch. + * + * Overview + * ======== + * + * The route-table module has two use cases: + * + * 1) Internal use by Open vSwitch which together with the ovs-router module + * implement route lookup for features such as flow based tunneling, + * userspace tunneling, and sFlow. + * + * 2) External use by projects such as Open Virtual Network (OVN), that use + * Open vSwitch as a compile time library. + * + * Typical External Usage + * ====================== + * + * static void + * my_handle_msg(const struct route_table_msg *change, void *data) + * { + * struct my_data *aux = data; + * + * if (data) { + * aux->rta_dst = change->rd.rta_dst; + * } + * } + * + * static void + * my_route_table_dump(void) + * { + * struct my_data *aux; + * + * route_table_dump_one_table(RT_TABLE_MAIN, my_handle_msg, aux); + * } + * + * static void + * my_route_table_change(struct route_table_msg *change, void *aux OVS_UNUSED); + * { + * my_handle_msg(change, NULL); + * route_data_destroy(&change->rd); + * } + * + * static void + * my_init(void) + * { + * static struct nln_notifier *route6_notifier = NULL; + * static struct nln_notifier *route_notifier = NULL; + * static struct route_table_msg nln_change; + * static struct nln *nln = NULL; + * + * nln = nln_create(NETLINK_ROUTE, route_table_parse, NULL); + * + * route6_notifier = + * nln_notifier_create(nln, RTNLGRP_IPV6_ROUTE, + * (nln_notify_func *) test_lib_route_table_change, + * NULL); + * + * route_notifier = + * nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, + * (nln_notify_func *) test_lib_route_table_change, + * NULL); + * } + * + * Thread-safety + * ============= + * + * Assuming thread safe initialization of dependencies such as netlink socket, + * netlink notifier and so on, the functions in this module are thread safe. + */ + +/* Information about a next hop stored in a linked list with base in struct + * route_data. Please refer to comment in struct route_data for details. */ +struct route_data_nexthop { + struct ovs_list nexthop_node; + + sa_family_t family; + struct in6_addr addr; + char ifname[IFNAMSIZ]; /* Interface name. */ +}; + +struct route_data { + /* Routes can have multiple next hops per destination. + * + * Each next hop has its own set of attributes such as address family, + * interface and IP address. + * + * When retrieving information about a route from the kernel, in the case + * of multiple next hops, information is provided as nested attributes. + * + * A linked list with struct route_data_nexthop entries is used to store + * this information as we parse each attribute. + * + * For the common case of one next hop, the nexthops list will contain a + * single entry pointing to the struct route_data primary_next_hop__ + * element. + * + * Any dynamically allocated list elements MUST be freed with a call to the + * route_data_destroy function. */ + struct ovs_list nexthops; + struct route_data_nexthop primary_next_hop__; + + /* Copied from struct rtmsg. */ + unsigned char rtm_dst_len; + unsigned char rtm_protocol; + bool rtn_local; + + /* Extracted from Netlink attributes. */ + struct in6_addr rta_dst; /* 0 if missing. */ + struct in6_addr rta_prefsrc; /* 0 if missing. */ + uint32_t rta_mark; /* 0 if missing. */ + uint32_t rta_table_id; /* 0 if missing. */ + uint32_t rta_priority; /* 0 if missing. */ +}; + +/* A digested version of a route message sent down by the kernel to indicate + * that a route has changed. */ +struct route_table_msg { + bool relevant; /* Should this message be processed? */ + uint16_t nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ + struct route_data rd; /* Data parsed from this message. */ +}; + uint64_t route_table_get_change_seq(void); void route_table_init(void); void route_table_run(void); @@ -33,4 +158,13 @@ void route_table_wait(void); bool route_table_fallback_lookup(const struct in6_addr *ip6_dst, char name[], struct in6_addr *gw6); + +typedef void route_table_handle_msg_callback(const struct route_table_msg *, + void *aux); + +bool route_table_dump_one_table(uint32_t id, + route_table_handle_msg_callback *, + void *aux); +int route_table_parse(struct ofpbuf *, void *change); +void route_data_destroy(struct route_data *); #endif /* route-table.h */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 4cc7001a5b..e59ff17ade 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -6938,7 +6938,8 @@ rewrite_flow_push_nsh(struct xlate_ctx *ctx, "supported for packet type (%d,0x%x)", pt_ns(packet_type), pt_ns_type(packet_type)); ctx->error = XLATE_UNSUPPORTED_PACKET_TYPE; - return buf; + ofpbuf_delete(buf); + return NULL; } /* Note that we have matched on packet_type! */ wc->masks.packet_type = OVS_BE32_MAX; diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index fbc7ad5efe..aa2e27d844 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -504,7 +504,7 @@ add_database_config(struct shash *db_conf, const char *opt, conf = shash_replace_nocopy(db_conf, filename, conf); if (conf) { - VLOG_WARN("Duplicate database configuration: %s", filename); + VLOG_WARN("Duplicate database configuration: %s", opt); db_config_destroy(conf); } } diff --git a/python/ovs/flowviz/odp/graph.py b/python/ovs/flowviz/odp/graph.py index 4d1fb7493c..c9734efece 100644 --- a/python/ovs/flowviz/odp/graph.py +++ b/python/ovs/flowviz/odp/graph.py @@ -14,16 +14,26 @@ """ Defines a Datapath Graph using graphviz. """ import colorsys -import graphviz import random +import sys from ovs.flowviz.odp.html import HTMLTree, HTMLFormatter from ovs.flowviz.odp.tree import FlowTree from ovs.flowviz.process import FileProcessor +try: + import graphviz +except ImportError: + graphviz = None + class GraphProcessor(FileProcessor): def __init__(self, opts): + if graphviz is None: + print("ERROR: The graph sub-command depends on the graphviz " + "Python library, which does not appear to be installed.", + file=sys.stderr) + sys.exit(1) super().__init__(opts, "odp") def start_file(self, name, filename): diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index 715cbf7aa7..26c0f6ec48 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -148,7 +148,7 @@ License: ASL 2.0 This provides shared library, libopenswitch.so and the openvswitch header files needed to build an external application. -%if 0%{?rhel} > 7 || 0%{?fedora} > 28 +%if 0%{?rhel} > 7 || (0%{?fedora} > 28 && 0%{?fedora} < 41) %package -n network-scripts-%{name} Summary: Open vSwitch legacy network service support License: ASL 2.0 @@ -436,7 +436,7 @@ fi %exclude %{_libdir}/*.la %exclude %{_libdir}/*.a -%if 0%{?rhel} > 7 || 0%{?fedora} > 28 +%if 0%{?rhel} > 7 || (0%{?fedora} > 28 && 0%{?fedora} < 41) %files -n network-scripts-%{name} %{_sysconfdir}/sysconfig/network-scripts/ifup-ovs %{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs @@ -463,7 +463,7 @@ fi %{_unitdir}/ovs-vswitchd.service %{_unitdir}/ovs-delete-transient-ports.service %{_datadir}/openvswitch/scripts/openvswitch.init -%if ! (0%{?rhel} > 7 || 0%{?fedora} > 28) +%if ! (0%{?rhel} > 7 || (0%{?fedora} > 28 && 0%{?fedora} < 41)) %{_sysconfdir}/sysconfig/network-scripts/ifup-ovs %{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs %endif diff --git a/rhel/usr_lib_systemd_system_openvswitch-ipsec.service b/rhel/usr_lib_systemd_system_openvswitch-ipsec.service index 92dad44f93..913598f080 100644 --- a/rhel/usr_lib_systemd_system_openvswitch-ipsec.service +++ b/rhel/usr_lib_systemd_system_openvswitch-ipsec.service @@ -6,8 +6,11 @@ After=openvswitch.service [Service] Type=forking PIDFile=/run/openvswitch/ovs-monitor-ipsec.pid -ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ - --ike-daemon=libreswan start-ovs-ipsec +Restart=on-failure +EnvironmentFile=/etc/openvswitch/default.conf +EnvironmentFile=-/etc/sysconfig/openvswitch +ExecStart=/usr/share/openvswitch/scripts/ovs-ctl --no-monitor \ + --ike-daemon=libreswan start-ovs-ipsec $OPTIONS ExecStop=/usr/share/openvswitch/scripts/ovs-ctl stop-ovs-ipsec [Install] diff --git a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template index c467d02db9..63833c4d8e 100644 --- a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template +++ b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template @@ -25,6 +25,9 @@ # --ovs-vswitchd-options='-vconsole:dbg -vfile:dbg' # --ovsdb-server-options='-vconsole:dbg -vfile:dbg' # +# Or to start with non-root IPsec config file: +# --ovs-monitor-ipsec-options='--ipsec-conf=/etc/ipsec.d/ovs.conf --root-ipsec-conf=/etc/ipsec.conf' +# OPTIONS="" # Uncomment and set the OVS User/Group value diff --git a/tests/automake.mk b/tests/automake.mk index edfc2cb335..59f5387612 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -498,6 +498,7 @@ endif if LINUX tests_ovstest_SOURCES += \ + tests/test-lib-route-table.c \ tests/test-netlink-conntrack.c \ tests/test-netlink-policy.c \ tests/test-psample.c diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 36cea6aa95..60060ee2e0 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -658,11 +658,11 @@ OVS_VSWITCHD_START( other-config:datapath-id=1234 fail-mode=secure]) AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" +tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) AT_CHECK([ovs-vsctl get interface br0 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl -tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" +tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false" ], []) OVS_VSWITCHD_STOP @@ -937,15 +937,26 @@ AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy \ options:csum=true ofport_request=4 \ -- add-port int-br t4 -- set Interface t4 type=geneve \ options:remote_ip=2001:cafe::93 options:key=123 \ - options:csum=true ofport_request=5], [0]) + options:csum=true ofport_request=5 \ + -- add-port int-br t5 -- set Interface t5 type=gre \ + options:remote_ip=2001:cafe::93 options:key=123 \ + options:csum=true ofport_request=6 \ + -- add-port int-br t6 -- set Interface t6 type=gre \ + options:remote_ip=1.1.2.92 options:key=123 \ + options:csum=false ofport_request=7], [0]) -flow_s="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800), - ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no), - tcp(src=54392,dst=5201),tcp_flags(ack)" +dnl The final tunnel intentionally has checksum turned off to exercise a +dnl different code path, there is no GRE checksum offload anyways. -flow_s_v6="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd), - ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6), - tcp(src=54392,dst=5201),tcp_flags(ack)" +m4_define([IPV4_TSO], [m4_join([,], + [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800)], + [ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no)], + [tcp(src=54392,dst=5201),tcp_flags(ack)])]) + +m4_define([IPV6_TSO], [m4_join([,], + [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd)], + [ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6)], + [tcp(src=54392,dst=5201),tcp_flags(ack)])]) dnl Setup dummy interface tunnel connectivity. AT_CHECK([ovs-appctl netdev-dummy/ip4addr br1 1.1.2.88/24], [0], [OK @@ -968,9 +979,9 @@ AT_CHECK([ovs-vsctl set Interface p1 options:tx_pcap=p1.pcap -- \ set Interface int-br options:ol_ip_csum_set_good=false -- \ set Interface int-br options:ol_tso_segsz=500]) -AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s}" \ +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV4_TSO" \ --len 2054]) -AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s_v6}" \ +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV6_TSO" \ --len 2074]) dnl Check that first we have the following packets: @@ -984,10 +995,26 @@ dnl - IPv6 Geneve tunnel with IPv4 payload dnl - IPv6 Geneve tunnel with IPv6 payload dnl - IPv6 Geneve tunnel with IPv4 payload dnl - IPv6 Geneve tunnel with IPv6 payload +dnl - IPv4 GRE tunnel with IPv4 payload +dnl - IPv4 GRE tunnel with IPv6 payload +dnl - IPv6 GRE tunnel with IPv4 payload +dnl - IPv6 GRE tunnel with IPv6 payload dnl These are sorted since OVS may send payloads to the tunnels in any order. zero400=$(printf '0%.0s' $(seq 800)) zero100=$(printf '0%.0s' $(seq 200)) AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl +[aabbcc000001aa55aa55000308004500025a00004000402f31c0010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000000000000000050100000edfd0000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00014000402f31bf010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000001f40000000050100000ec090000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00024000402f31be010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000003e80000000050100000ea150000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004500025a00034000402f31bd010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl +[000005dc0000000050100000e8210000${zero100}${zero400}] [aabbcc000001aa55aa55000308004500026200004000401131d6010102580101025ce01312b5024e5f360800000000007b00]dnl [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000092d4781451000000000000000050100000edfd0000${zero100}${zero400}] @@ -1012,6 +1039,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [aabbcc000001aa55aa55000308004500026200034000401131d3010102580101025ce01317c1024efcd10000655800007b00]dnl [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600004000402f31d3010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0000000040060187c0a87b02c0a87b01d47814510000000000000000501000004dc20000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600014000402f31d2010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0001000040060186c0a87b02c0a87b01d4781451000001f400000000501000004bce0000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600024000402f31d1010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0002000040060185c0a87b02c0a87b01d4781451000003e8000000005010000049da0000]dnl +[${zero100}${zero400}] +[aabbcc000001aa55aa55000308004501024600034000402f31d0010102580101025c200065580000007b0a8f394fe0738abf]dnl +[7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc000000005010000047e60000]dnl +[${zero100}${zero400}] [aabbcc000001aa55aa55000308004501024e00004000401131e9010102580101025ce01312b5023abd990800000000007b00]dnl [0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01d4781451000000000000000050100000]dnl [4dc20000${zero100}${zero400}] @@ -1036,6 +1075,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [aabbcc000001aa55aa55000308004501024e00034000401131e6010102580101025ce01317c1023a5b350000655800007b00]dnl [0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc0000000050100000]dnl [47e60000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558da8e00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558dc8200000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000003e80000000050100000ea150000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558de7600000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000001f40000000050100000ec090000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558e06a00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl +[000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}${zero400}] [aabbcc000006aa55aa55000386dd60000000024e11402001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000093e01312b5024e8ed10800000000007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl [00000000000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}]dnl @@ -1068,6 +1119,18 @@ AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl [00000093e01317c1024e2c6d0000655800007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl [00000000000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}]dnl [${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583a4e00000000007b0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01]dnl +[d4781451000005dc000000005010000047e60000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583c4300000000007b0a8f394fe0738abf7e2f058408004501021c0002000040060185c0a87b02c0a87b01]dnl +[d4781451000003e8000000005010000049da0000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a00065583e3800000000007b0a8f394fe0738abf7e2f058408004501021c0001000040060186c0a87b02c0a87b01]dnl +[d4781451000001f400000000501000004bce0000${zero100}${zero400}] +[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl +[00000093a0006558402d00000000007b0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01]dnl +[d47814510000000000000000501000004dc20000${zero100}${zero400}] [aabbcc000006aa55aa55000386dd60100000023a11402001cafe0000000000000000000000882001cafe0000000000000000]dnl [00000093e01312b5023aed340800000000007b000a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02]dnl [c0a87b01d47814510000000000000000501000004dc20000${zero100}${zero400}] diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 779a054e8c..3cd49d7a71 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -171,9 +171,9 @@ strip_eth () { # 'recirc=' respectively. This should make output easier to # compare. strip_recirc() { - sed 's/recirc_id([[x0-9]]*)/recirc_id()/ - s/recirc_id=[[x0-9]]*/recirc_id=/ - s/recirc([[x0-9]]*)/recirc()/' + sed 's/recirc_id([[x0-9a-f]]*)/recirc_id()/ + s/recirc_id=[[x0-9a-f]]*/recirc_id=/ + s/recirc([[x0-9a-f]]*)/recirc()/' } # Strips dp_hash from output. diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index a48bd532a0..7a7a19f7e4 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -202,6 +202,14 @@ m4_define([DPCTL_CHECK_FRAGMENTATION_FAIL], ]) +# OVS_CHECK_FRAG_LARGE +# +# This check isn't valid for kernel +m4_define([OVS_CHECK_FRAG_LARGE], +[ + +]) + # OVS_CHECK_MIN_KERNEL([minversion], [minsublevel]) # # Skip test if kernel version falls below minversion.minsublevel diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at index 78c6f5d7ec..32c0d2f2a1 100644 --- a/tests/system-offloads-traffic.at +++ b/tests/system-offloads-traffic.at @@ -1016,4 +1016,34 @@ AT_CHECK( stdout]) OVS_TRAFFIC_VSWITCHD_STOP -AT_CLEANUP \ No newline at end of file +AT_CLEANUP + +AT_SETUP([offloads - 802.1ad should be offloaded]) +OVS_TRAFFIC_VSWITCHD_START( + [], [], [-- set Open_vSwitch . other_config:hw-offload=true]) +OVS_CHECK_8021AD() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +ADD_SVLAN(p0, at_ns0, 4094, "10.255.2.1/24") +ADD_SVLAN(p1, at_ns1, 4094, "10.255.2.2/24") + +ADD_CVLAN(p0.4094, at_ns0, 100, "10.2.2.1/24") +ADD_CVLAN(p1.4094, at_ns1, 100, "10.2.2.2/24") + +AT_CHECK([ovs-ofctl add-flow br0 "priority=1 action=normal"]) + +OVS_WAIT_UNTIL([ip netns exec at_ns0 ping -c 1 10.2.2.2]) + +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl +in_port(2),eth(macs),eth_type(0x88a8),vlan(vid=4094,pcp=0),encap(eth_type(0x0800)), packets:0, bytes:0, used:0.001s, actions:output +in_port(3),eth(macs),eth_type(0x88a8),vlan(vid=4094,pcp=0),encap(eth_type(0x0800)), packets:0, bytes:0, used:0.001s, actions:output +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows type=ovs | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], []) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/system-route.at b/tests/system-route.at index c0ecad6cfb..66bfd0e8ed 100644 --- a/tests/system-route.at +++ b/tests/system-route.at @@ -65,6 +65,26 @@ Cached: fc00:db8:beef::13/128 dev br0 GW fc00:db8:cafe::1 SRC fc00:db8:cafe::2]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ovs-route - add system route - ipv4 via ipv6 nexthop]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() +AT_CHECK([ovs-vsctl set bridge br0 other-config:hwaddr=00:53:00:00:00:42]) +AT_CHECK([ip link set br0 up]) + +AT_CHECK([ip addr add 192.168.9.2/24 dev br0], [0], [stdout]) + +AT_CHECK([ip route add 192.168.10.12/32 \ + via inet6 fe80::253:ff:fe00:51 dev br0], [0], [stdout]) + +AT_CHECK([ovs-appctl revalidator/wait]) + +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | \ + grep -E '192.168.10.12/32' | sort], [dnl +Cached: 192.168.10.12/32 dev br0 GW fe80::253:ff:fe00:51 SRC fe80::253:ff:fe00:42]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + dnl Checks that OVS doesn't use routes from non-standard tables. AT_SETUP([ovs-route - route tables]) AT_KEYWORDS([route]) @@ -91,8 +111,13 @@ Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local Cached: 10.0.0.18/32 dev p1-route SRC 10.0.0.17]) +dnl Negative check for custom routing table using route-table library. +AT_CHECK([ovstest test-lib-route-table-dump | grep rta_table_id:\ 42], [1]) +AT_CHECK([ovstest test-lib-route-table-dump | grep rta_table_id:\ 1042], [1]) + dnl Add a route to a custom routing table and check that OVS doesn't cache it. AT_CHECK([ip route add 10.0.0.19/32 dev p1-route table 42]) +AT_CHECK([ip route add 10.0.0.20/32 dev p1-route table 1042]) AT_CHECK([ip route show table 42 | grep 'p1-route' | grep -q '10.0.0.19']) dnl Give the main thread a chance to act. AT_CHECK([ovs-appctl revalidator/wait]) @@ -102,6 +127,11 @@ Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local Cached: 10.0.0.18/32 dev p1-route SRC 10.0.0.17 ]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/rta_table_id:.*42/{print$1" "$15" "$16}' | sort], [0], [dnl +10.0.0.19/32 rta_table_id: 42 +10.0.0.20/32 rta_table_id: 1042 +]) dnl Delete a route from the main table and check that OVS removes the route dnl from the cache. @@ -128,3 +158,177 @@ OVS_WAIT_UNTIL([test $(ovs-appctl ovs/route/show | grep -c 'p1-route') -eq 0 ]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ovs-route - add system route with multiple nexthop - ipv4]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip addr add 192.168.42.10/24 dev p1-route], [0], [stdout]) +AT_CHECK([ip addr add 192.168.51.10/24 dev p2-route], [0], [stdout]) +AT_CHECK([ip route add 172.16.42.0/24 nexthop via 192.168.42.1 \ + dev p1-route nexthop via 192.168.51.1 dev p2-route], [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep -E '172.16.42.0/24' | \ + sort], [dnl +Cached: 172.16.42.0/24 dev p1-route GW 192.168.42.1 SRC 192.168.42.10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep 172.16.42.0.*nexthop | sort], + [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET addr: 192.168.42.1 ifname: p1-route + 172.16.42.0/24 nexthop family: AF_INET addr: 192.168.51.1 ifname: p2-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ovs-route - add system route - ipv4 via multiple ipv6 nexthop]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) +AT_CHECK([ip route add 172.16.42.0/24 nexthop via inet6 fc00:db8:dead::1 \ + dev p1-route nexthop via inet6 fc00:db8:beef::1 dev p2-route], + [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep -E '172.16.42.0/24' | \ + sort], [dnl +Cached: 172.16.42.0/24 dev p1-route GW fc00:db8:dead::1 SRC fc00:db8:dead::10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep 172.16.42.0.*nexthop | sort], + [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ovs-route - add system route with multiple nexthop - ipv6]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) +AT_CHECK([ip -6 route add fc00:db8:cafe::/64 nexthop via fc00:db8:dead::1 \ + dev p1-route nexthop via fc00:db8:beef::1 dev p2-route], + [0], [stdout]) + +dnl NOTE: At the time of this writing, it is expected that only the first route +dnl will be stored in ovs-router. +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | \ + grep -E 'fc00:db8:cafe::/64' | sort], [dnl +Cached: fc00:db8:cafe::/64 dev p1-route GW fc00:db8:dead::1 SRC fc00:db8:dead::10]) + +dnl Confirm that both nexthops are available when using the route-table library +dnl directly. +AT_CHECK([ovstest test-lib-route-table-dump | grep fc00:db8:cafe::.*nexthop | \ + sort], [0], [dnl + fc00:db8:cafe::/64 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + fc00:db8:cafe::/64 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([route-table - exported functions work for netlink-notifier]) +AT_KEYWORDS([route]) + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' +AT_CHECK([ip tuntap add name p2-route mode tap]) +AT_CHECK([ip link set p2-route up]) +on_exit 'ip link del p2-route' + +AT_CHECK([ip -6 addr add fc00:db8:dead::10/64 dev p1-route], [0], [stdout]) +AT_CHECK([ip -6 addr add fc00:db8:beef::10/64 dev p2-route], [0], [stdout]) + +AT_CHECK([ovstest test-lib-route-table-monitor 'ip route add 172.16.42.0/24 \ + nexthop via inet6 fc00:db8:dead::1 dev p1-route \ + nexthop via inet6 fc00:db8:beef::1 dev p2-route' | \ + grep 172.16.42.0.*nexthop | sort], [0], [dnl + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:beef::1 ifname: p2-route + 172.16.42.0/24 nexthop family: AF_INET6 addr: fc00:db8:dead::1 ifname: p1-route +]) + +AT_CLEANUP + +AT_SETUP([route-table - route attributes]) +AT_KEYWORDS([route]) + +dnl Create tap ports. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' + + +dnl Add ip address. +AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^10.0.0.17/{print$1" "$6" "$7}'], [0], [dnl +10.0.0.17/32 rtm_protocol: RTPROT_KERNEL +]) + +dnl Add route. +AT_CHECK([ip route add 192.168.10.12/32 dev p1-route via 10.0.0.18], [0], + [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$17" "$18}'], [0], [dnl +192.168.10.12/32 rta_priority: 0 +]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$6" "$7}'], [0], [dnl +192.168.10.12/32 rtm_protocol: RTPROT_BOOT +]) + +dnl Delete route. +AT_CHECK([ip route del 192.168.10.12/32 dev p1-route via 10.0.0.18], [0], + [stdout]) + +dnl Add route with priority. +AT_CHECK([ip route add 192.168.10.12/32 dev p1-route via 10.0.0.18 metric 42], + [0], [stdout]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$17" "$18}'], [0], [dnl +192.168.10.12/32 rta_priority: 42 +]) +AT_CHECK([ovstest test-lib-route-table-dump | \ + awk '/^192.168.10.12/{print$1" "$6" "$7}'], [0], [dnl +192.168.10.12/32 rtm_protocol: RTPROT_BOOT +]) + +AT_CLEANUP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 16de8da20f..0215453230 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -557,7 +557,6 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_TRAFFIC_VSWITCHD_START() @@ -615,8 +614,97 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - tcp over gre tunnel with software fallback]) +AT_SKIP_IF([test $HAVE_NC = no]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +OVS_CHECK_GRE() + +dnl This test is only valid with tso. If the kernel segments the packets, the +dnl packet lengths in the final test will be different. +m4_ifndef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Test the case where one side has all checksum and TSO offload disabled. +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore]) +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore]) + +dnl Reinitialize. +AT_CHECK([ovs-vsctl del-port ovs-p0]) +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([gre], [br0], [at_gre0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([gretap], [at_gre1], [at_ns0], [172.31.1.100], [10.1.1.1/24]) + +dnl Set MTU for tunnel to generate 1500 byte packets. +AT_CHECK([ip link set dev br0 mtu 1400]) + +dnl First, check the underlay. +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], + [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Check that the tunnel is up. +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], + [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Start tcpdump to capture the encapsulated packets. +OVS_DAEMONIZE([tcpdump -i ovs-p0 -w p0.pcap], [tcpdump.pid]) + +dnl Wait until the pcap is written, which happens after the interface +dnl is opened by tcpdump. +OVS_WAIT_UNTIL([test -e p0.pcap]) + +dnl Initialize the listener before it is needed. +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid]) + +dnl Verify that ncat is ready. +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])]) + +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null]) +AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin]) + +dnl Wait until transfer completes before checking. +OVS_WAIT_WHILE([kill -0 $(cat nc.pid)]) +AT_CHECK([diff -q payload.bin data2], [0]) +OVS_WAIT_WHILE([test $(stat -c %s p0.pcap) -le 68000 ]) + +dnl Stop OVS and tcpdump and verify the results. +AT_CHECK([kill -15 $(cat tcpdump.pid)]) +OVS_WAIT_WHILE([kill -0 $(cat tcpdump.pid)]) + +dnl The exact number of packets sent will vary, but we check that the largest +dnl segments have the correct lengths and certain other fields. +AT_CHECK([test $(ovs-pcap p0.pcap | grep -Ec dnl +"^.{24}0800"dnl Ethernet +"4500059e....4000..2f....ac1f0164ac1f0101"dnl IP(len=1450, DF, GRE, 172.31.1.100->172.31.1.1) +"00006558"dnl GRE(flags=0, proto=0x6558) +".{24}0800"dnl Ethernet +"45000578....4000..06....0a0101640a010101"dnl IP(len=1400, DF, TCP, 10.1.1.100->10.1.1.1) +"....04d2............................0000"dnl TCP(dport=1234 +) -ge 20]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - ping over ip6gre L2 tunnel]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -651,6 +739,25 @@ dnl Okay, now check the overlay with different packet sizes NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) + +dnl Start ncat listeners. +OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > tcp_data], [nc.pid]) +NETNS_DAEMONIZE([at_ns0], [nc -l -u 10.1.1.1 4321 > udp_data], [nc2.pid]) + +dnl Verify that ncat is ready. +OVS_WAIT_UNTIL([netstat -ln | grep :1234]) +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :4321])]) + +dnl Check large bidirectional TCP. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null]) +NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin]) +OVS_WAIT_UNTIL([diff -q payload.bin tcp_data]) + +dnl Check UDP. +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=600 count=1 2> /dev/null]) +AT_CHECK([nc $NC_EOF_OPT -u 10.1.1.1 4321 < payload.bin]) +OVS_WAIT_UNTIL([diff -q payload.bin udp_data]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -1191,7 +1298,6 @@ AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel by simulated packets]) OVS_CHECK_XT() -OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() AT_CHECK([ovs-vsctl -- set bridge br0 other-config:hwaddr=\"f2:ff:00:00:00:01\"]) @@ -2033,7 +2139,6 @@ dnl ns1: connect to br0, with IP:10.1.1.2 dnl br-underlay: with IP: 172.31.1.100 dnl ns0: connect to br-underlay, with IP: 10.1.1.1 AT_SETUP([datapath - truncate and output to gre tunnel by simulated packets]) -OVS_CHECK_MIN_KERNEL(3, 10) AT_SKIP_IF([test $HAVE_NC = no]) CHECK_NO_TC_OFFLOAD() OVS_TRAFFIC_VSWITCHD_START() @@ -2165,7 +2270,6 @@ dnl br-underlay: with IP: 172.31.1.100 dnl ns0: connect to br-underlay, with IP: 10.1.1.1 AT_SETUP([datapath - truncate and output to gre tunnel]) AT_SKIP_IF([test $HAVE_NC = no]) -OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() CHECK_NO_TC_OFFLOAD() OVS_TRAFFIC_VSWITCHD_START() @@ -4603,6 +4707,95 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.2 | FORMAT_PING dnl Check userspace conntrack fragmentation counters. DPCTL_CHECK_FRAGMENTATION_PASS() +dnl Ipv4 max packet size fragmentation dropped. +NS_EXEC([at_ns0], [ping -s 65507 -q -c 1 -W 0.5 10.1.1.2]) +OVS_CHECK_FRAG_LARGE() + +OVS_TRAFFIC_VSWITCHD_STOP(["/Unsupported big reassembled v4 packet/d"]) +AT_CLEANUP + +AT_SETUP([conntrack - IPv4 fragmentation with ct orig match]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_DATA([flows.txt], [dnl +priority=1,action=drop +priority=10,arp,action=normal +priority=100,ip,ct_state=-trk,action=ct(table=0) +priority=100,in_port=2,icmp,ct_state=+rpl,action=1 +priority=100,in_port=1,ip,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ct_state=+new+trk,action=ct(commit) +priority=100,in_port=1,ip,ct_nw_proto=1,ct_tp_src=8,ct_tp_dst=0,ct_state=+new+trk,action=ct(commit),2 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +dnl Packet content: +dnl Ethernet II, Src: 50:54:00:00:00:0a, Dst: 50:54:00:00:00:09 +dnl Type: IPv4 (0x0800) +dnl Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2 +dnl Total Length: 1420 +dnl Identification: 0x0001 (1) +dnl 001. .... = Flags: 0x1, More fragments +dnl 0... .... = Reserved bit: Not set +dnl .0.. .... = Don't fragment: Not set +dnl ..1. .... = More fragments: Set +dnl ...0 0000 0000 0000 = Fragment Offset: 0 +dnl Time to Live: 64 +dnl Protocol: UDP (17) +dnl User Datagram Protocol, Src Port: 1, Dst Port: 2 +dnl Source Port: 1 +dnl Destination Port: 2 +dnl Length: 1608 +dnl UDP payload (1392 bytes) +dnl Data (1392 bytes) +eth="50 54 00 00 00 09 50 54 00 00 00 0a 08 00" +ip="45 00 05 8c 00 01 20 00 40 11 3f 5c 0a 01 01 01 0a 01 01 02" +udp="00 01 00 02 06 48 dd 56" +data_len=$(seq 1392) +data=$(printf '00 %.0s' ${data_len}) +packet="${eth} ${ip} ${udp} ${data}" +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) + +dnl Packet content: +dnl Ethernet II, Src: 50:54:00:00:00:0a, Dst: 50:54:00:00:00:09 +dnl Type: IPv4 (0x0800) +dnl Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2 +dnl 0100 .... = Version: 4 +dnl .... 0101 = Header Length: 20 bytes (5) +dnl Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT) +dnl 0000 00.. = Differentiated Services Codepoint: Default (0) +dnl .... ..00 = Explicit Congestion Notification: Not ECN-Capable Transport (0) +dnl Total Length: 228 +dnl Identification: 0x0001 (1) +dnl 000. .... = Flags: 0x0 +dnl 0... .... = Reserved bit: Not set +dnl .0.. .... = Don't fragment: Not set +dnl ..0. .... = More fragments: Not set +dnl ...0 0000 1010 1111 = Fragment Offset: 1400 +dnl Time to Live: 64 +dnl Protocol: UDP (17) +dnl Data (208 bytes) +eth="50 54 00 00 00 09 50 54 00 00 00 0a 08 00" +ip="45 00 00 e4 00 01 00 af 40 11 63 55 0a 01 01 01 0a 01 01 02" +data_len=$(seq 208) +data=$(printf '00 %.0s' ${data_len}) +packet="${eth} ${ip} ${data}" +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) + +NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 1 -W 1 10.1.1.2 | FORMAT_PING], [0], [dnl +1 packets transmitted, 1 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=,type=0,code=0) +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=) +]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -4897,6 +5090,96 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -W 2 fc00::2 | FORMAT_PING 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) +dnl Ipv6 max packet size fragmentation dropped. +NS_EXEC([at_ns0], [ping6 -s 65487 -q -c 1 -W 0.5 fc00::2]) +OVS_CHECK_FRAG_LARGE() + +OVS_TRAFFIC_VSWITCHD_STOP(["/Unsupported big reassembled v6 packet/d"]) +AT_CLEANUP + +AT_SETUP([conntrack - IPv6 fragmentation with ct orig match]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "fc00::1/96", "50:54:00:00:00:09", [], "nodad") +ADD_VETH(p1, at_ns1, br0, "fc00::2/96", "50:54:00:00:00:0a", [], "nodad") + +AT_DATA([flows.txt], [dnl +priority=1,action=drop +priority=10,ipv6,ct_state=-trk,action=ct(table=0) +priority=10,in_port=2,ipv6,ct_tp_src=128,ct_state=+trk+est+rpl,action=1 +priority=10,in_port=1,ipv6,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ct_state=+new+trk,action=ct(commit) +priority=10,in_port=1,ipv6,ct_nw_proto=58,ct_tp_src=128,ct_tp_dst=0,ct_state=+new+trk,action=ct(commit),2 +priority=100,icmp6,icmp_type=135,action=normal +priority=100,icmp6,icmp_type=136,action=normal +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +dnl Packet content: +dnl Ethernet II, Src: 50:54:00:00:00:0a, Dst: 50:54:00:00:00:09 +dnl Type: IPv6 (0x86dd) +dnl Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2 +dnl Payload Length: 1344 +dnl Next Header: Fragment Header for IPv6 (44) +dnl Hop Limit: 64 +dnl Fragment Header for IPv6 +dnl Next header: UDP (17) +dnl Reserved octet: 0x00 +dnl 0000 0000 0000 0... = Offset: 0 (0 bytes) +dnl .... .... .... .00. = Reserved bits: 0 +dnl .... .... .... ...1 = More Fragments: Yes +dnl Identification: 0x9bdb1fa7 +dnl User Datagram Protocol, Src Port: 1, Dst Port: 2 +dnl Source Port: 1 +dnl Destination Port: 2 +dnl Length: 1608 +dnl UDP payload (1328 bytes) +dnl Data (1328 bytes) +eth="50 54 00 00 00 09 50 54 00 00 00 0a 86 dd" +ipv6="60 00 00 00 05 40 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ + fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 9b db 1f a7" +udp="00 01 00 02 06 48 fb 56" +data_len=$(seq 1328) +data=$(printf '00 %.0s' ${data_len}) +packet="${eth} ${ipv6} ${udp} ${data}" +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) + +dnl IPv6 Packet content +dnl Ethernet II, Src: 50:54:00:00:00:0a, Dst: 50:54:00:00:00:09 +dnl Type: IPv6 (0x86dd) +dnl Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2 +dnl Payload Length: 280 +dnl Next Header: Fragment Header for IPv6 (44) +dnl Hop Limit: 64 +dnl Fragment Header for IPv6 +dnl Next header: UDP (17) +dnl Reserved octet: 0x00 +dnl 0000 0101 0011 1... = Offset: 167 (1336 bytes) +dnl .... .... .... .00. = Reserved bits: 0 +dnl .... .... .... ...0 = More Fragments: No +dnl Identification: 0x9bdb1fa7 +dnl Data (272 bytes) +eth="50 54 00 00 00 09 50 54 00 00 00 0a 86 dd" +ipv6="60 00 00 00 01 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ + fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 05 38 9b db 1f a7" +data_len=$(seq 272) +data=$(printf '00 %.0s' ${data_len}) +packet="${eth} ${ipv6} ${data}" +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) + +dnl Send also fragmented ICMPv6. +NS_CHECK_EXEC([at_ns0], [ping6 -s 1600 -q -c 1 -W 1 fc00::2 | FORMAT_PING], [0], [dnl +1 packets transmitted, 1 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fc00::2) | sort], [0], [dnl +icmpv6,orig=(src=fc00::1,dst=fc00::2,id=,type=128,code=0),reply=(src=fc00::2,dst=fc00::1,id=,type=129,code=0) +udp,orig=(src=fc00::1,dst=fc00::2,sport=,dport=),reply=(src=fc00::2,dst=fc00::1,sport=,dport=) +]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index c1be973478..49b277a089 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -298,6 +298,14 @@ AT_CHECK([ovs-appctl dpctl/ipf-get-status -m | FORMAT_FRAG_LIST()], [], [dnl ]) ]) +# OVS_CHECK_FRAG_LARGE() +# +# The userspace needs to check that ipf larger fragments have occurred. +m4_define([OVS_CHECK_FRAG_LARGE], +[ + OVS_WAIT_UNTIL([grep -Eq 'Unsupported big reassembled (v4|v6) packet' ovs-vswitchd.log]) +]) + # OVS_CHECK_MIN_KERNEL([minversion], [maxversion]) # # The userspace skips all tests that check kernel version. diff --git a/tests/test-lib-route-table.c b/tests/test-lib-route-table.c new file mode 100644 index 0000000000..61d97e06ff --- /dev/null +++ b/tests/test-lib-route-table.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2024 Canonical Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#undef NDEBUG + +#include +#include +#include + +#include "netlink-notifier.h" +#include "ovstest.h" +#include "packets.h" +#include "route-table.h" + +static char * +rt_prot_name(unsigned char p) +{ + /* We concentrate on the most used protocols, as they are the ones most + * likely to be defined in the build environment. */ + return p == RTPROT_UNSPEC ? "RTPROT_UNSPEC" : + p == RTPROT_REDIRECT ? "RTPROT_REDIRECT" : + p == RTPROT_KERNEL ? "RTPROT_KERNEL" : + p == RTPROT_BOOT ? "RTPROT_BOOT" : + p == RTPROT_STATIC ? "RTPROT_STATIC" : + p == RTPROT_RA ? "RTPROT_RA" : + p == RTPROT_DHCP ? "RTPROT_DHCP" : + p == RTPROT_BGP ? "RTPROT_BGP" : + "UNKNOWN"; +} + +static char * +rt_table_name(uint32_t id) +{ + static char tid[11] = ""; + + snprintf(tid, sizeof tid, "%"PRIu32, id); + + return id == RT_TABLE_UNSPEC ? "RT_TABLE_UNSPEC" : + id == RT_TABLE_COMPAT ? "RT_TABLE_COMPAT" : + id == RT_TABLE_DEFAULT ? "RT_TABLE_DEFAULT" : + id == RT_TABLE_MAIN ? "RT_TABLE_MAIN" : + id == RT_TABLE_LOCAL ? "RT_TABLE_LOCAL" : + tid; +} + +static void +test_lib_route_table_handle_msg(const struct route_table_msg *change, + void *data OVS_UNUSED) +{ + struct ds nexthop_addr = DS_EMPTY_INITIALIZER; + struct ds rta_prefsrc = DS_EMPTY_INITIALIZER; + const struct route_data *rd = &change->rd; + struct ds rta_dst = DS_EMPTY_INITIALIZER; + const struct route_data_nexthop *rdnh; + + ipv6_format_mapped(&change->rd.rta_prefsrc, &rta_prefsrc); + ipv6_format_mapped(&change->rd.rta_dst, &rta_dst); + + printf("%s/%u relevant: %d nlmsg_type: %d rtm_protocol: %s (%u) " + "rtn_local: %d rta_prefsrc: %s rta_mark: %"PRIu32" " + "rta_table_id: %s rta_priority: %"PRIu32"\n", + ds_cstr(&rta_dst), rd->rtm_dst_len, change->relevant, + change->nlmsg_type, rt_prot_name(rd->rtm_protocol), + rd->rtm_protocol, rd->rtn_local, ds_cstr(&rta_prefsrc), + rd->rta_mark, rt_table_name(rd->rta_table_id), rd->rta_priority); + + LIST_FOR_EACH (rdnh, nexthop_node, &rd->nexthops) { + ds_clear(&nexthop_addr); + ipv6_format_mapped(&rdnh->addr, &nexthop_addr); + printf(" %s/%u nexthop family: %s addr: %s ifname: %s\n", + ds_cstr(&rta_dst), rd->rtm_dst_len, + rdnh->family == AF_INET ? "AF_INET" : + rdnh->family == AF_INET6 ? "AF_INET6" : + "UNKNOWN", + ds_cstr(&nexthop_addr), + rdnh->ifname); + } + + ds_destroy(&nexthop_addr); + ds_destroy(&rta_prefsrc); + ds_destroy(&rta_dst); +} + +static void +test_lib_route_table_dump(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +{ + route_table_dump_one_table(RT_TABLE_UNSPEC, + test_lib_route_table_handle_msg, + NULL); +} + +static void +test_lib_route_table_change(struct route_table_msg *change, + void *aux OVS_UNUSED) +{ + test_lib_route_table_handle_msg(change, NULL); + route_data_destroy(&change->rd); +} + +static void +test_lib_route_table_monitor(int argc, char *argv[]) +{ + static struct nln_notifier *route6_notifier OVS_UNUSED; + static struct nln_notifier *route_notifier OVS_UNUSED; + static struct route_table_msg rtmsg; + static struct nln *nln OVS_UNUSED; + const char *cmd = argv[1]; + + if (argc != 2) { + printf("usage: ovstest %s 'ip route add ...'\n", argv[0]); + exit(EXIT_FAILURE); + } + + nln = nln_create(NETLINK_ROUTE, route_table_parse, &rtmsg); + + route_notifier = + nln_notifier_create(nln, RTNLGRP_IPV4_ROUTE, + (nln_notify_func *) test_lib_route_table_change, + NULL); + route6_notifier = + nln_notifier_create(nln, RTNLGRP_IPV6_ROUTE, + (nln_notify_func *) test_lib_route_table_change, + NULL); + nln_run(nln); + nln_wait(nln); + int rc = system(cmd); + if (rc) { + exit(rc); + } + nln_run(nln); +} + +OVSTEST_REGISTER("test-lib-route-table-monitor", test_lib_route_table_monitor); +OVSTEST_REGISTER("test-lib-route-table-dump", test_lib_route_table_dump); diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index 57abd3a5b4..03a39337f5 100644 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -242,15 +242,20 @@ start_forwarding () { } start_ovs_ipsec () { + set ${datadir}/scripts/ovs-monitor-ipsec unix:"$DB_SOCK" + set "$@" --log-file=${logdir}/ovs-monitor-ipsec.log + set "$@" --pidfile=${rundir}/ovs-monitor-ipsec.pid + set "$@" --detach + test X"$MONITOR" = Xno || set "$@" --monitor + set "$@" --ike-daemon=$IKE_DAEMON if test X$RESTART_IKE_DAEMON = Xno; then - no_restart="--no-restart-ike-daemon" + set "$@" --no-restart-ike-daemon + fi + if test X"$OVS_MONITOR_IPSEC_OPTIONS" != X; then + set "$@" $OVS_MONITOR_IPSEC_OPTIONS fi - ${datadir}/scripts/ovs-monitor-ipsec \ - --pidfile=${rundir}/ovs-monitor-ipsec.pid \ - --ike-daemon=$IKE_DAEMON \ - $no_restart \ - --log-file --detach --monitor unix:${rundir}/db.sock || return 1 + action "Starting ovs-monitor-ipsec" "$@" || return 1 return 0 } @@ -348,6 +353,7 @@ set_defaults () { OVS_VSWITCHD_WRAPPER= OVSDB_SERVER_OPTIONS= OVS_VSWITCHD_OPTIONS= + OVS_MONITOR_IPSEC_OPTIONS= OVSDB_SERVER_UMASK= OVS_VSWITCHD_UMASK= @@ -463,6 +469,9 @@ Option for "start-ovs-ipsec": the IKE daemon for ipsec tunnels (either libreswan or strongswan) --no-restart-ike-daemon do not restart the IKE daemon on startup + --ovs-monitor-ipsec-options=OPTIONS + additional options for ovs-monitor-ipsec (example: + '--ipsec-conf=/etc/ipsec.d/ovs.conf --root-ipsec-conf=/etc/ipsec.conf') Other options: -h, --help display this help message diff --git a/utilities/ovs-vsctl-bashcomp.bash b/utilities/ovs-vsctl-bashcomp.bash index c5ad24fb70..5313fa9d48 100644 --- a/utilities/ovs-vsctl-bashcomp.bash +++ b/utilities/ovs-vsctl-bashcomp.bash @@ -32,11 +32,15 @@ _ovs_vsctl () { # A bar (|) character in an argument means thing before bar OR thing # after bar; for example, del-port can take a port or an interface. -_OVS_VSCTL_COMMANDS="$(_ovs_vsctl --commands)" - -# This doesn't complete on short arguments, so it filters them out. -_OVS_VSCTL_OPTIONS="$(_ovs_vsctl --options | awk '/^--/ { print $0 }' \ - | sed -e 's/\(.*\)=ARG/\1=/')" +_OVS_VSCTL_COMMANDS= +_OVS_VSCTL_OPTIONS= +if command -v ovs-vsctl > /dev/null; then + _OVS_VSCTL_COMMANDS="$(_ovs_vsctl --commands)" + + # This doesn't complete on short arguments, so it filters them out. + _OVS_VSCTL_OPTIONS="$(_ovs_vsctl --options | awk '/^--/ { print $0 }' \ + | sed -e 's/\(.*\)=ARG/\1=/')" +fi IFS=$SAVE_IFS declare -A _OVS_VSCTL_PARSED_ARGS diff --git a/dpdk/VERSION b/dpdk/VERSION index 0a492611a0..9e2934aa34 100644 --- a/dpdk/VERSION +++ b/dpdk/VERSION @@ -1 +1 @@ -24.11.0 +24.11.1 diff --git a/dpdk/doc/guides/rel_notes/release_24_11.rst b/dpdk/doc/guides/rel_notes/release_24_11.rst index 8486cd986f..f9df63141e 100644 --- a/dpdk/doc/guides/rel_notes/release_24_11.rst +++ b/dpdk/doc/guides/rel_notes/release_24_11.rst @@ -616,3 +616,22 @@ Tested Platforms * Firmware version: 2.14, 0x8000028c * Device id (pf): 8086:125b * Driver version(in-tree): 6.8.0-45-generic (Ubuntu24.04.1)(igc) + +24.11.1 Release Notes +--------------------- + + +24.11.1 Fixes +~~~~~~~~~~~~~ + +* net/virtio: fix Rx checksum calculation + +24.11.1 Validation +~~~~~~~~~~~~~~~~~~ + +* Tested by Red Hat validation team + +24.11.1 Known Issues +~~~~~~~~~~~~~~~~~~~~ + +* DPDK 24.11.1 contains DPDK 24.11 plus the fix for CVE-2024-11614 only diff --git a/dpdk/kernel/linux/uapi/linux/vduse.h b/dpdk/kernel/linux/uapi/linux/vduse.h new file mode 100644 index 0000000000..11bd48c72c --- /dev/null +++ b/dpdk/kernel/linux/uapi/linux/vduse.h @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_VDUSE_H_ +#define _UAPI_VDUSE_H_ + +#include + +#define VDUSE_BASE 0x81 + +/* The ioctls for control device (/dev/vduse/control) */ + +#define VDUSE_API_VERSION 0 + +/* + * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). + * This is used for future extension. + */ +#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64) + +/* Set the version of VDUSE API that userspace supported. */ +#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64) + +/** + * struct vduse_dev_config - basic configuration of a VDUSE device + * @name: VDUSE device name, needs to be NUL terminated + * @vendor_id: virtio vendor id + * @device_id: virtio device id + * @features: virtio features + * @vq_num: the number of virtqueues + * @vq_align: the allocation alignment of virtqueue's metadata + * @reserved: for future use, needs to be initialized to zero + * @config_size: the size of the configuration space + * @config: the buffer of the configuration space + * + * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device. + */ +struct vduse_dev_config { +#define VDUSE_NAME_MAX 256 + char name[VDUSE_NAME_MAX]; + __u32 vendor_id; + __u32 device_id; + __u64 features; + __u32 vq_num; + __u32 vq_align; + __u32 reserved[13]; + __u32 config_size; + __u8 config[]; +}; + +/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */ +#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config) + +/* + * Destroy a VDUSE device. Make sure there are no more references + * to the char device (/dev/vduse/$NAME). + */ +#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX]) + +/* The ioctls for VDUSE device (/dev/vduse/$NAME) */ + +/** + * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last] + * @offset: the mmap offset on returned file descriptor + * @start: start of the IOVA region + * @last: last of the IOVA region + * @perm: access permission of the IOVA region + * + * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry { + __u64 offset; + __u64 start; + __u64 last; +#define VDUSE_ACCESS_RO 0x1 +#define VDUSE_ACCESS_WO 0x2 +#define VDUSE_ACCESS_RW 0x3 + __u8 perm; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return the corresponding file descriptor. Return -EINVAL means the + * IOVA region doesn't exist. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry) + +/* + * Get the negotiated virtio features. It's a subset of the features in + * struct vduse_dev_config which can be accepted by virtio driver. It's + * only valid after FEATURES_OK status bit is set. + */ +#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64) + +/** + * struct vduse_config_data - data used to update configuration space + * @offset: the offset from the beginning of configuration space + * @length: the length to write to configuration space + * @buffer: the buffer used to write from + * + * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device + * configuration space. + */ +struct vduse_config_data { + __u32 offset; + __u32 length; + __u8 buffer[]; +}; + +/* Set device configuration space */ +#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data) + +/* + * Inject a config interrupt. It's usually used to notify virtio driver + * that device configuration space has changed. + */ +#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13) + +/** + * struct vduse_vq_config - basic configuration of a virtqueue + * @index: virtqueue index + * @max_size: the max size of virtqueue + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. + */ +struct vduse_vq_config { + __u32 index; + __u16 max_size; + __u16 reserved[13]; +}; + +/* + * Setup the specified virtqueue. Make sure all virtqueues have been + * configured before the device is attached to vDPA bus. + */ +#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config) + +/** + * struct vduse_vq_state_split - split virtqueue state + * @avail_index: available index + */ +struct vduse_vq_state_split { + __u16 avail_index; +}; + +/** + * struct vduse_vq_state_packed - packed virtqueue state + * @last_avail_counter: last driver ring wrap counter observed by device + * @last_avail_idx: device available index + * @last_used_counter: device ring wrap counter + * @last_used_idx: used index + */ +struct vduse_vq_state_packed { + __u16 last_avail_counter; + __u16 last_avail_idx; + __u16 last_used_counter; + __u16 last_used_idx; +}; + +/** + * struct vduse_vq_info - information of a virtqueue + * @index: virtqueue index + * @num: the size of virtqueue + * @desc_addr: address of desc area + * @driver_addr: address of driver area + * @device_addr: address of device area + * @split: split virtqueue state + * @packed: packed virtqueue state + * @ready: ready status of virtqueue + * + * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information. + */ +struct vduse_vq_info { + __u32 index; + __u32 num; + __u64 desc_addr; + __u64 driver_addr; + __u64 device_addr; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; + __u8 ready; +}; + +/* Get the specified virtqueue's information. Caller should set index field. */ +#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info) + +/** + * struct vduse_vq_eventfd - eventfd configuration for a virtqueue + * @index: virtqueue index + * @fd: eventfd, -1 means de-assigning the eventfd + * + * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd. + */ +struct vduse_vq_eventfd { + __u32 index; +#define VDUSE_EVENTFD_DEASSIGN -1 + int fd; +}; + +/* + * Setup kick eventfd for specified virtqueue. The kick eventfd is used + * by VDUSE kernel module to notify userspace to consume the avail vring. + */ +#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd) + +/* + * Inject an interrupt for specific virtqueue. It's used to notify virtio driver + * to consume the used vring. + */ +#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) + +/** + * struct vduse_iova_umem - userspace memory configuration for one IOVA region + * @uaddr: start address of userspace memory, it must be aligned to page size + * @iova: start of the IOVA region + * @size: size of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM + * ioctls to register/de-register userspace memory for IOVA regions + */ +struct vduse_iova_umem { + __u64 uaddr; + __u64 iova; + __u64 size; + __u64 reserved[3]; +}; + +/* Register userspace memory for IOVA regions */ +#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem) + +/* De-register the userspace memory. Caller should set iova and size field. */ +#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) + +/** + * struct vduse_iova_info - information of one IOVA region + * @start: start of the IOVA region + * @last: last of the IOVA region + * @capability: capability of the IOVA regsion + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of + * one IOVA region. + */ +struct vduse_iova_info { + __u64 start; + __u64 last; +#define VDUSE_IOVA_CAP_UMEM (1 << 0) + __u64 capability; + __u64 reserved[3]; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return some information on it. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) + +/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ + +/** + * enum vduse_req_type - request type + * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace + * @VDUSE_SET_STATUS: set the device status + * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for + * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl + */ +enum vduse_req_type { + VDUSE_GET_VQ_STATE, + VDUSE_SET_STATUS, + VDUSE_UPDATE_IOTLB, +}; + +/** + * struct vduse_vq_state - virtqueue state + * @index: virtqueue index + * @split: split virtqueue state + * @packed: packed virtqueue state + */ +struct vduse_vq_state { + __u32 index; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; +}; + +/** + * struct vduse_dev_status - device status + * @status: device status + */ +struct vduse_dev_status { + __u8 status; +}; + +/** + * struct vduse_iova_range - IOVA range [start, last] + * @start: start of the IOVA range + * @last: last of the IOVA range + */ +struct vduse_iova_range { + __u64 start; + __u64 last; +}; + +/** + * struct vduse_dev_request - control request + * @type: request type + * @request_id: request id + * @reserved: for future use + * @vq_state: virtqueue state, only index field is available + * @s: device status + * @iova: IOVA range for updating + * @padding: padding + * + * Structure used by read(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_request { + __u32 type; + __u32 request_id; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + struct vduse_dev_status s; + struct vduse_iova_range iova; + __u32 padding[32]; + }; +}; + +/** + * struct vduse_dev_response - response to control request + * @request_id: corresponding request id + * @result: the result of request + * @reserved: for future use, needs to be initialized to zero + * @vq_state: virtqueue state + * @padding: padding + * + * Structure used by write(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_response { + __u32 request_id; +#define VDUSE_REQ_RESULT_OK 0x00 +#define VDUSE_REQ_RESULT_FAILED 0x01 + __u32 result; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + __u32 padding[32]; + }; +}; + +#endif /* _UAPI_VDUSE_H_ */ diff --git a/dpdk/lib/vhost/meson.build b/dpdk/lib/vhost/meson.build index 51bcf17244..0004f283bb 100644 --- a/dpdk/lib/vhost/meson.build +++ b/dpdk/lib/vhost/meson.build @@ -26,16 +26,13 @@ sources = files( 'iotlb.c', 'socket.c', 'vdpa.c', + 'vduse.c', 'vhost.c', 'vhost_crypto.c', 'vhost_user.c', 'virtio_net.c', 'virtio_net_ctrl.c', ) -if cc.has_header('linux/vduse.h') - sources += files('vduse.c') - cflags += '-DVHOST_HAS_VDUSE' -endif headers = files( 'rte_vdpa.h', 'rte_vhost.h', diff --git a/dpdk/lib/vhost/vduse.c b/dpdk/lib/vhost/vduse.c index 8ba58555f9..eaf3146b95 100644 --- a/dpdk/lib/vhost/vduse.c +++ b/dpdk/lib/vhost/vduse.c @@ -8,7 +8,7 @@ #include -#include +#include #include #include diff --git a/dpdk/lib/vhost/vduse.h b/dpdk/lib/vhost/vduse.h index 0d8f3f1205..47ca97a064 100644 --- a/dpdk/lib/vhost/vduse.h +++ b/dpdk/lib/vhost/vduse.h @@ -9,29 +9,7 @@ #define VDUSE_NET_SUPPORTED_FEATURES VIRTIO_NET_SUPPORTED_FEATURES -#ifdef VHOST_HAS_VDUSE - int vduse_device_create(const char *path, bool compliant_ol_flags); int vduse_device_destroy(const char *path); -#else - -static inline int -vduse_device_create(const char *path, bool compliant_ol_flags) -{ - RTE_SET_USED(compliant_ol_flags); - - VHOST_CONFIG_LOG(path, ERR, "VDUSE support disabled at build time"); - return -1; -} - -static inline int -vduse_device_destroy(const char *path) -{ - VHOST_CONFIG_LOG(path, ERR, "VDUSE support disabled at build time"); - return -1; -} - -#endif /* VHOST_HAS_VDUSE */ - #endif /* _VDUSE_H */ diff --git a/dpdk/lib/vhost/virtio_net.c b/dpdk/lib/vhost/virtio_net.c index d764d4bc6a..a340e5a772 100644 --- a/dpdk/lib/vhost/virtio_net.c +++ b/dpdk/lib/vhost/virtio_net.c @@ -2823,6 +2823,9 @@ vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr, */ uint16_t csum = 0, off; + if (hdr->csum_start >= rte_pktmbuf_pkt_len(m)) + return; + if (rte_raw_cksum_mbuf(m, hdr->csum_start, rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) return; @@ -3626,6 +3629,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rte_rwlock_read_unlock(&vq->access_lock); virtio_dev_vring_translate(dev, vq); + + count = 0; goto out_no_unlock; }