From a6040af14d8ac0647242bc26a78117c64359c103 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jun 26 2018 18:20:40 +0000 Subject: import dpdk-17.11-11.el7 --- diff --git a/SOURCES/0001-eal-abstract-away-the-auxiliary-vector.patch b/SOURCES/0001-eal-abstract-away-the-auxiliary-vector.patch new file mode 100644 index 0000000..dd6d053 --- /dev/null +++ b/SOURCES/0001-eal-abstract-away-the-auxiliary-vector.patch @@ -0,0 +1,209 @@ +From 2ed9bf330709e75c2f066f6ec13ece1ecdb4e9db Mon Sep 17 00:00:00 2001 +From: Aaron Conole +Date: Mon, 2 Apr 2018 14:24:34 -0400 +Subject: [PATCH] eal: abstract away the auxiliary vector + +Rather than attempting to load the contents of the auxv directly, +prefer to use an exposed API - and if that doesn't exist then attempt +to load the vector. This is because on some systems, when a user +is downgraded, the /proc/self/auxv file retains the old ownership +and permissions. The original method of /proc/self/auxv is retained. + +This also removes a potential abort() in the code when compiled with +NDEBUG. A quick parse of the code shows that many (if not all) of +the CPU flag parsing isn't used internally, so it should be okay. + +Signed-off-by: Aaron Conole +Signed-off-by: Timothy Redaelli +--- + lib/librte_eal/common/arch/arm/rte_cpuflags.c | 20 +---- + .../common/arch/ppc_64/rte_cpuflags.c | 15 +--- + lib/librte_eal/common/eal_common_cpuflags.c | 79 +++++++++++++++++++ + .../common/include/generic/rte_cpuflags.h | 21 +++++ + 4 files changed, 106 insertions(+), 29 deletions(-) + +diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c +index 390a19a26..caf3dc83a 100644 +--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c ++++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c +@@ -105,22 +105,10 @@ const struct feature_entry rte_cpu_feature_table[] = { + static void + rte_cpu_get_features(hwcap_registers_t out) + { +- int auxv_fd; +- _Elfx_auxv_t auxv; +- +- auxv_fd = open("/proc/self/auxv", O_RDONLY); +- assert(auxv_fd != -1); +- while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { +- if (auxv.a_type == AT_HWCAP) { +- out[REG_HWCAP] = auxv.a_un.a_val; +- } else if (auxv.a_type == AT_HWCAP2) { +- out[REG_HWCAP2] = auxv.a_un.a_val; +- } else if (auxv.a_type == AT_PLATFORM) { +- if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR)) +- out[REG_PLATFORM] = 0x0001; +- } +- } +- close(auxv_fd); ++ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP); ++ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2); ++ if (!rte_cpu_strcmp_auxval(AT_PLATFORM, PLATFORM_STR)) ++ out[REG_PLATFORM] = 0x0001; + } + + /* +diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c +index 970a61c5e..e7a82452b 100644 +--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c ++++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c +@@ -104,19 +104,8 @@ const struct feature_entry rte_cpu_feature_table[] = { + static void + rte_cpu_get_features(hwcap_registers_t out) + { +- int auxv_fd; +- Elf64_auxv_t auxv; +- +- auxv_fd = open("/proc/self/auxv", O_RDONLY); +- assert(auxv_fd != -1); +- while (read(auxv_fd, &auxv, +- sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { +- if (auxv.a_type == AT_HWCAP) +- out[REG_HWCAP] = auxv.a_un.a_val; +- else if (auxv.a_type == AT_HWCAP2) +- out[REG_HWCAP2] = auxv.a_un.a_val; +- } +- close(auxv_fd); ++ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP); ++ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2); + } + + /* +diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c +index 3a055f7c7..a09667563 100644 +--- a/lib/librte_eal/common/eal_common_cpuflags.c ++++ b/lib/librte_eal/common/eal_common_cpuflags.c +@@ -2,11 +2,90 @@ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include ++#include + #include ++#include ++#include ++#include ++#include ++ ++#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) ++#if __GLIBC_PREREQ(2, 16) ++#include ++#define HAS_AUXV 1 ++#endif ++#endif + + #include + #include + ++#ifndef HAS_AUXV ++static unsigned long ++getauxval(unsigned long type) ++{ ++ errno = ENOTSUP; ++ return 0; ++} ++#endif ++ ++#ifdef RTE_ARCH_64 ++typedef Elf64_auxv_t Internal_Elfx_auxv_t; ++#else ++typedef Elf32_auxv_t Internal_Elfx_auxv_t; ++#endif ++ ++ ++/** ++ * Provides a method for retrieving values from the auxiliary vector and ++ * possibly running a string comparison. ++ * ++ * @return Always returns a result. When the result is 0, check errno ++ * to see if an error occurred during processing. ++ */ ++static unsigned long ++_rte_cpu_getauxval(unsigned long type, const char *str) ++{ ++ unsigned long val; ++ ++ errno = 0; ++ val = getauxval(type); ++ ++ if (!val && (errno == ENOTSUP || errno == ENOENT)) { ++ int auxv_fd = open("/proc/self/auxv", O_RDONLY); ++ Internal_Elfx_auxv_t auxv; ++ ++ if (auxv_fd == -1) ++ return 0; ++ ++ errno = ENOENT; ++ while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { ++ if (auxv.a_type == type) { ++ errno = 0; ++ val = auxv.a_un.a_val; ++ if (str) ++ val = strcmp((const char *)val, str); ++ break; ++ } ++ } ++ close(auxv_fd); ++ } ++ ++ return val; ++} ++ ++unsigned long ++rte_cpu_getauxval(unsigned long type) ++{ ++ return _rte_cpu_getauxval(type, NULL); ++} ++ ++int ++rte_cpu_strcmp_auxval(unsigned long type, const char *str) ++{ ++ return _rte_cpu_getauxval(type, str); ++} ++ + /** + * Checks if the machine is adequate for running the binary. If it is not, the + * program exits with status 1. +diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h +index 8d31687d8..156ea0029 100644 +--- a/lib/librte_eal/common/include/generic/rte_cpuflags.h ++++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h +@@ -64,4 +64,25 @@ rte_cpu_check_supported(void); + int + rte_cpu_is_supported(void); + ++/** ++ * This function attempts to retrieve a value from the auxiliary vector. ++ * If it is unsuccessful, the result will be 0, and errno will be set. ++ * ++ * @return A value from the auxiliary vector. When the value is 0, check ++ * errno to determine if an error occurred. ++ */ ++unsigned long ++rte_cpu_getauxval(unsigned long type); ++ ++/** ++ * This function retrieves a value from the auxiliary vector, and compares it ++ * as a string against the value retrieved. ++ * ++ * @return The result of calling strcmp() against the value retrieved from ++ * the auxiliary vector. When the value is 0 (meaning a match is found), ++ * check errno to determine if an error occurred. ++ */ ++int ++rte_cpu_strcmp_auxval(unsigned long type, const char *str); ++ + #endif /* _RTE_CPUFLAGS_H_ */ +-- +2.17.0 + diff --git a/SOURCES/0001-eal-fix-build-with-glibc-2.16.patch b/SOURCES/0001-eal-fix-build-with-glibc-2.16.patch new file mode 100644 index 0000000..bb09440 --- /dev/null +++ b/SOURCES/0001-eal-fix-build-with-glibc-2.16.patch @@ -0,0 +1,34 @@ +From 8ddd6a90ea3cbd3724191984039c71de8b58d41b Mon Sep 17 00:00:00 2001 +From: Thomas Monjalon +Date: Fri, 27 Apr 2018 02:54:00 +0200 +Subject: [PATCH 1/2] eal: fix build with glibc < 2.16 + +The fake getauxval function does not use its parameter. +So the compiler raised this error: + lib/librte_eal/common/eal_common_cpuflags.c:25:25: error: + unused parameter 'type' + +Fixes: 2ed9bf330709 ("eal: abstract away the auxiliary vector") + +Signed-off-by: Thomas Monjalon +Acked-by: Maxime Coquelin +--- + lib/librte_eal/common/eal_common_cpuflags.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c +index a09667563..6a9dbaeb1 100644 +--- a/lib/librte_eal/common/eal_common_cpuflags.c ++++ b/lib/librte_eal/common/eal_common_cpuflags.c +@@ -22,7 +22,7 @@ + + #ifndef HAS_AUXV + static unsigned long +-getauxval(unsigned long type) ++getauxval(unsigned long type __rte_unused) + { + errno = ENOTSUP; + return 0; +-- +2.17.0 + diff --git a/SOURCES/0001-net-bnxt-fix-link-speed-setting-with-autoneg-off.patch b/SOURCES/0001-net-bnxt-fix-link-speed-setting-with-autoneg-off.patch new file mode 100644 index 0000000..e5da593 --- /dev/null +++ b/SOURCES/0001-net-bnxt-fix-link-speed-setting-with-autoneg-off.patch @@ -0,0 +1,96 @@ +From 3bdf79031ac9991210af55902aa632adde32d938 Mon Sep 17 00:00:00 2001 +From: Ajit Khaparde +Date: Sat, 24 Feb 2018 21:02:16 -0800 +Subject: [PATCH] net/bnxt: fix link speed setting with autoneg off + +[ backported from upstream commit 90cc14d77359bb3f8e48f4ef966ee77a52703949 ] + +When Autoneg is turned off especially on remote side, +link does not come up. This patch fixes that. + +Fixes: 7bc8e9a227cc ("net/bnxt: support async link notification") + +Signed-off-by: Ajit Khaparde +--- + drivers/net/bnxt/bnxt.h | 1 + + drivers/net/bnxt/bnxt_hwrm.c | 25 ++++++++++++++++++++++--- + 2 files changed, 23 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h +index 8ab1c7f85..3bc2b9379 100644 +--- a/drivers/net/bnxt/bnxt.h ++++ b/drivers/net/bnxt/bnxt.h +@@ -162,6 +162,7 @@ struct bnxt_link_info { + uint16_t link_speed; + uint16_t support_speeds; + uint16_t auto_link_speed; ++ uint16_t force_link_speed; + uint16_t auto_link_speed_mask; + uint32_t preemphasis; + uint8_t phy_type; +diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c +index 51b0056cd..ce214d7cb 100644 +--- a/drivers/net/bnxt/bnxt_hwrm.c ++++ b/drivers/net/bnxt/bnxt_hwrm.c +@@ -738,7 +738,8 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf) + HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ALL_SPEEDS; + } + /* AutoNeg - Advertise speeds specified. */ +- if (conf->auto_link_speed_mask) { ++ if (conf->auto_link_speed_mask && ++ !(conf->phy_flags & HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE)) { + req.auto_mode = + HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK; + req.auto_link_speed_mask = +@@ -801,12 +802,22 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp, + link_info->support_speeds = rte_le_to_cpu_16(resp->support_speeds); + link_info->auto_link_speed = rte_le_to_cpu_16(resp->auto_link_speed); + link_info->preemphasis = rte_le_to_cpu_32(resp->preemphasis); ++ link_info->force_link_speed = rte_le_to_cpu_16(resp->force_link_speed); + link_info->phy_ver[0] = resp->phy_maj; + link_info->phy_ver[1] = resp->phy_min; + link_info->phy_ver[2] = resp->phy_bld; + + HWRM_UNLOCK(); + ++ RTE_LOG(DEBUG, PMD, "Link Speed %d\n", link_info->link_speed); ++ RTE_LOG(DEBUG, PMD, "Auto Mode %d\n", link_info->auto_mode); ++ RTE_LOG(DEBUG, PMD, "Support Speeds %x\n", link_info->support_speeds); ++ RTE_LOG(DEBUG, PMD, "Auto Link Speed %x\n", link_info->auto_link_speed); ++ RTE_LOG(DEBUG, PMD, "Auto Link Speed Mask %x\n", ++ link_info->auto_link_speed_mask); ++ RTE_LOG(DEBUG, PMD, "Forced Link Speed %x\n", ++ link_info->force_link_speed); ++ + return rc; + } + +@@ -2124,7 +2135,9 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + autoneg = bnxt_check_eth_link_autoneg(dev_conf->link_speeds); + speed = bnxt_parse_eth_link_speed(dev_conf->link_speeds); + link_req.phy_flags = HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESET_PHY; +- if (autoneg == 1) { ++ /* Autoneg can be done only when the FW allows */ ++ if (autoneg == 1 && !(bp->link_info.auto_link_speed || ++ bp->link_info.force_link_speed)) { + link_req.phy_flags |= + HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESTART_AUTONEG; + link_req.auto_link_speed_mask = +@@ -2142,7 +2155,13 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + } + + link_req.phy_flags |= HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE; +- link_req.link_speed = speed; ++ /* If user wants a particular speed try that first. */ ++ if (speed) ++ link_req.link_speed = speed; ++ else if (bp->link_info.force_link_speed) ++ link_req.link_speed = bp->link_info.force_link_speed; ++ else ++ link_req.link_speed = bp->link_info.auto_link_speed; + } + link_req.duplex = bnxt_parse_eth_link_duplex(dev_conf->link_speeds); + link_req.auto_pause = bp->link_info.auto_pause; +-- +2.14.3 + diff --git a/SOURCES/0001-net-enic-allocate-stats-DMA-buffer-upfront-during-pr.patch b/SOURCES/0001-net-enic-allocate-stats-DMA-buffer-upfront-during-pr.patch new file mode 100644 index 0000000..fcabc06 --- /dev/null +++ b/SOURCES/0001-net-enic-allocate-stats-DMA-buffer-upfront-during-pr.patch @@ -0,0 +1,114 @@ +From 8d782f3f89e1dcd0c8af1c3c93501d7a06159d66 Mon Sep 17 00:00:00 2001 +From: Hyong Youb Kim +Date: Wed, 7 Mar 2018 18:46:58 -0800 +Subject: [PATCH] net/enic: allocate stats DMA buffer upfront during probe + +The driver provides a DMA buffer to the firmware when it requests port +stats. The NIC then fills that buffer with latest stats. Currently, +the driver allocates the DMA buffer the first time it requests stats +and saves it for later use. This can lead to crashes when +primary/secondary processes are involved. For example, the following +sequence crashes the secondary process. + +1. Start a primary app that does not call rte_eth_stats_get() +2. dpdk-procinfo -- --stats + +dpdk-procinfo crashes while trying to allocate the stats DMA buffer +because the alloc function pointer (vdev.alloc_consistent) is valid +only in the primary process, not in the secondary process. + +Overwriting the alloc function pointer in the secondary process is not +an option, as it will simply make the pointer invalid in the primary +process. Instead, allocate the DMA buffer during probe so that only +the primary process does both allocate and free. This allows the +secondary process to dump stats as well. + +Fixes: 9913fbb91df0 ("enic/base: common code") +Cc: stable@dpdk.org + +Signed-off-by: Hyong Youb Kim +Reviewed-by: John Daley +--- + drivers/net/enic/base/vnic_dev.c | 24 ++++++++++++++---------- + drivers/net/enic/base/vnic_dev.h | 1 + + drivers/net/enic/enic_main.c | 9 +++++++++ + 3 files changed, 24 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/enic/base/vnic_dev.c b/drivers/net/enic/base/vnic_dev.c +index 05b595eb8..1f8d222fc 100644 +--- a/drivers/net/enic/base/vnic_dev.c ++++ b/drivers/net/enic/base/vnic_dev.c +@@ -587,17 +587,9 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) + { + u64 a0, a1; + int wait = 1000; +- static u32 instance; +- char name[NAME_MAX]; + +- if (!vdev->stats) { +- snprintf((char *)name, sizeof(name), +- "vnic_stats-%u", instance++); +- vdev->stats = vdev->alloc_consistent(vdev->priv, +- sizeof(struct vnic_stats), &vdev->stats_pa, (u8 *)name); +- if (!vdev->stats) +- return -ENOMEM; +- } ++ if (!vdev->stats) ++ return -ENOMEM; + + *stats = vdev->stats; + a0 = vdev->stats_pa; +@@ -922,6 +914,18 @@ u32 vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev) + return vdev->intr_coal_timer_info.max_usec; + } + ++int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev) ++{ ++ char name[NAME_MAX]; ++ static u32 instance; ++ ++ snprintf((char *)name, sizeof(name), "vnic_stats-%u", instance++); ++ vdev->stats = vdev->alloc_consistent(vdev->priv, ++ sizeof(struct vnic_stats), ++ &vdev->stats_pa, (u8 *)name); ++ return vdev->stats == NULL ? -ENOMEM : 0; ++} ++ + void vnic_dev_unregister(struct vnic_dev *vdev) + { + if (vdev) { +diff --git a/drivers/net/enic/base/vnic_dev.h b/drivers/net/enic/base/vnic_dev.h +index 8c0992063..7e5736b4d 100644 +--- a/drivers/net/enic/base/vnic_dev.h ++++ b/drivers/net/enic/base/vnic_dev.h +@@ -165,6 +165,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct rte_pci_device *pdev, struct vnic_dev_bar *bar, + unsigned int num_bars); + struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev); ++int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev); + int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback); + int vnic_dev_get_size(void); + int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op); +diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c +index c3796c543..235ef5940 100644 +--- a/drivers/net/enic/enic_main.c ++++ b/drivers/net/enic/enic_main.c +@@ -1478,6 +1478,15 @@ int enic_probe(struct enic *enic) + enic_alloc_consistent, + enic_free_consistent); + ++ /* ++ * Allocate the consistent memory for stats upfront so both primary and ++ * secondary processes can dump stats. ++ */ ++ err = vnic_dev_alloc_stats_mem(enic->vdev); ++ if (err) { ++ dev_err(enic, "Failed to allocate cmd memory, aborting\n"); ++ goto err_out_unregister; ++ } + /* Issue device open to get device in known state */ + err = enic_dev_open(enic); + if (err) { +-- +2.14.3 + diff --git a/SOURCES/0001-net-enic-fix-L4-Rx-ptype-comparison.patch b/SOURCES/0001-net-enic-fix-L4-Rx-ptype-comparison.patch new file mode 100644 index 0000000..a6ae804 --- /dev/null +++ b/SOURCES/0001-net-enic-fix-L4-Rx-ptype-comparison.patch @@ -0,0 +1,43 @@ +From f596cb198e65ff6839d35763d824399eb407adab Mon Sep 17 00:00:00 2001 +From: Hyong Youb Kim +Date: Wed, 10 Jan 2018 01:17:04 -0800 +Subject: [PATCH] net/enic: fix L4 Rx ptype comparison + +[ upstream commit 5dbff3af25a4a68980992f5040246e1d7f20b4cd ] + +For non-UDP/TCP packets, enic may wrongly set PKT_RX_L4_CKSUM_BAD in +ol_flags. The comparison that checks if a packet is UDP or TCP assumes +that RTE_PTYPE_L4 values are bit flags, but they are not. For example, +the following evaluates to true because NONFRAG is 0x600 and UDP is +0x200, and causes the current code to think the packet is UDP. + +!!(RTE_PTYPE_L4_NONFRAG & RTE_PTYPE_L4_UDP) + +So, fix this by comparing the packet type against UDP and TCP +individually. + +Fixes: 453d15059b58 ("net/enic: use new Rx checksum flags") + +Signed-off-by: Hyong Youb Kim +Reviewed-by: John Daley +--- + drivers/net/enic/enic_rxtx.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c +index a3663d516..831c90a1c 100644 +--- a/drivers/net/enic/enic_rxtx.c ++++ b/drivers/net/enic/enic_rxtx.c +@@ -285,7 +285,8 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) + else + pkt_flags |= PKT_RX_IP_CKSUM_BAD; + +- if (l4_flags & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { ++ if (l4_flags == RTE_PTYPE_L4_UDP || ++ l4_flags == RTE_PTYPE_L4_TCP) { + if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) + pkt_flags |= PKT_RX_L4_CKSUM_GOOD; + else +-- +2.14.3 + diff --git a/SOURCES/0001-net-enic-fix-crash-due-to-static-max-number-of-queue.patch b/SOURCES/0001-net-enic-fix-crash-due-to-static-max-number-of-queue.patch new file mode 100644 index 0000000..0ebb64b --- /dev/null +++ b/SOURCES/0001-net-enic-fix-crash-due-to-static-max-number-of-queue.patch @@ -0,0 +1,194 @@ +From acc4c80cf3b5fb3c0f87bcb7c4eb68958f60ef15 Mon Sep 17 00:00:00 2001 +From: Hyong Youb Kim +Date: Mon, 22 Jan 2018 17:05:28 -0800 +Subject: [PATCH] net/enic: fix crash due to static max number of queues + +[ upstream commit 6c45c330589d334c4f7b729e61ae30a6acfcc119 ] + +ENIC_CQ_MAX, ENIC_WQ_MAX and others are arbitrary values that +prevent the app from using more queues when they are available on +hardware. Remove them and dynamically allocate vnic_cq and such +arrays to accommodate all available hardware queues. + +As a side effect of removing ENIC_CQ_MAX, this commit fixes a segfault +that would happen when the app requests more than 16 CQs, because +enic_set_vnic_res() does not consider ENIC_CQ_MAX. For example, the +following command causes a crash. + +testpmd -- --rxq=16 --txq=16 + +Fixes: ce93d3c36db0 ("net/enic: fix resource check failures when bonding devices") + +Signed-off-by: Hyong Youb Kim +Reviewed-by: John Daley +--- + drivers/net/enic/enic.h | 25 +++++++++--------------- + drivers/net/enic/enic_ethdev.c | 20 ++------------------ + drivers/net/enic/enic_main.c | 43 ++++++++++++++++++++++++++++++++---------- + 3 files changed, 44 insertions(+), 44 deletions(-) + +diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h +index e36ec385c..a43fddc5f 100644 +--- a/drivers/net/enic/enic.h ++++ b/drivers/net/enic/enic.h +@@ -53,13 +53,6 @@ + #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Poll-mode Driver" + #define DRV_COPYRIGHT "Copyright 2008-2015 Cisco Systems, Inc" + +-#define ENIC_WQ_MAX 8 +-/* With Rx scatter support, we use two RQs on VIC per RQ used by app. Both +- * RQs use the same CQ. +- */ +-#define ENIC_RQ_MAX 16 +-#define ENIC_CQ_MAX (ENIC_WQ_MAX + (ENIC_RQ_MAX / 2)) +-#define ENIC_INTR_MAX (ENIC_CQ_MAX + 2) + #define ENIC_MAX_MAC_ADDR 64 + + #define VLAN_ETH_HLEN 18 +@@ -150,17 +143,17 @@ struct enic { + unsigned int flags; + unsigned int priv_flags; + +- /* work queue */ +- struct vnic_wq wq[ENIC_WQ_MAX]; +- unsigned int wq_count; ++ /* work queue (len = conf_wq_count) */ ++ struct vnic_wq *wq; ++ unsigned int wq_count; /* equals eth_dev nb_tx_queues */ + +- /* receive queue */ +- struct vnic_rq rq[ENIC_RQ_MAX]; +- unsigned int rq_count; ++ /* receive queue (len = conf_rq_count) */ ++ struct vnic_rq *rq; ++ unsigned int rq_count; /* equals eth_dev nb_rx_queues */ + +- /* completion queue */ +- struct vnic_cq cq[ENIC_CQ_MAX]; +- unsigned int cq_count; ++ /* completion queue (len = conf_cq_count) */ ++ struct vnic_cq *cq; ++ unsigned int cq_count; /* equals rq_count + wq_count */ + + /* interrupt resource */ + struct vnic_intr intr; +diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c +index 669dbf336..98391b008 100644 +--- a/drivers/net/enic/enic_ethdev.c ++++ b/drivers/net/enic/enic_ethdev.c +@@ -205,13 +205,7 @@ static int enicpmd_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, + return -E_RTE_SECONDARY; + + ENICPMD_FUNC_TRACE(); +- if (queue_idx >= ENIC_WQ_MAX) { +- dev_err(enic, +- "Max number of TX queues exceeded. Max is %d\n", +- ENIC_WQ_MAX); +- return -EINVAL; +- } +- ++ RTE_ASSERT(queue_idx < enic->conf_wq_count); + eth_dev->data->tx_queues[queue_idx] = (void *)&enic->wq[queue_idx]; + + ret = enic_alloc_wq(enic, queue_idx, socket_id, nb_desc); +@@ -325,17 +319,7 @@ static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return -E_RTE_SECONDARY; +- +- /* With Rx scatter support, two RQs are now used on VIC per RQ used +- * by the application. +- */ +- if (queue_idx * 2 >= ENIC_RQ_MAX) { +- dev_err(enic, +- "Max number of RX queues exceeded. Max is %d. This PMD uses 2 RQs on VIC per RQ used by DPDK.\n", +- ENIC_RQ_MAX); +- return -EINVAL; +- } +- ++ RTE_ASSERT(enic_rte_rq_idx_to_sop_idx(queue_idx) < enic->conf_rq_count); + eth_dev->data->rx_queues[queue_idx] = + (void *)&enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + +diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c +index 8af0ccd3c..1694aed12 100644 +--- a/drivers/net/enic/enic_main.c ++++ b/drivers/net/enic/enic_main.c +@@ -1075,6 +1075,9 @@ static void enic_dev_deinit(struct enic *enic) + vnic_dev_notify_unset(enic->vdev); + + rte_free(eth_dev->data->mac_addrs); ++ rte_free(enic->cq); ++ rte_free(enic->rq); ++ rte_free(enic->wq); + } + + +@@ -1082,27 +1085,28 @@ int enic_set_vnic_res(struct enic *enic) + { + struct rte_eth_dev *eth_dev = enic->rte_dev; + int rc = 0; ++ unsigned int required_rq, required_wq, required_cq; + +- /* With Rx scatter support, two RQs are now used per RQ used by +- * the application. +- */ +- if (enic->conf_rq_count < eth_dev->data->nb_rx_queues) { ++ /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */ ++ required_rq = eth_dev->data->nb_rx_queues * 2; ++ required_wq = eth_dev->data->nb_tx_queues; ++ required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues; ++ ++ if (enic->conf_rq_count < required_rq) { + dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n", + eth_dev->data->nb_rx_queues, +- eth_dev->data->nb_rx_queues * 2, enic->conf_rq_count); ++ required_rq, enic->conf_rq_count); + rc = -EINVAL; + } +- if (enic->conf_wq_count < eth_dev->data->nb_tx_queues) { ++ if (enic->conf_wq_count < required_wq) { + dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n", + eth_dev->data->nb_tx_queues, enic->conf_wq_count); + rc = -EINVAL; + } + +- if (enic->conf_cq_count < (eth_dev->data->nb_rx_queues + +- eth_dev->data->nb_tx_queues)) { ++ if (enic->conf_cq_count < required_cq) { + dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n", +- (eth_dev->data->nb_rx_queues + +- eth_dev->data->nb_tx_queues), enic->conf_cq_count); ++ required_cq, enic->conf_cq_count); + rc = -EINVAL; + } + +@@ -1307,6 +1311,25 @@ static int enic_dev_init(struct enic *enic) + dev_err(enic, "See the ENIC PMD guide for more information.\n"); + return -EINVAL; + } ++ /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */ ++ enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) * ++ enic->conf_cq_count, 8); ++ enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) * ++ enic->conf_rq_count, 8); ++ enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) * ++ enic->conf_wq_count, 8); ++ if (enic->conf_cq_count > 0 && enic->cq == NULL) { ++ dev_err(enic, "failed to allocate vnic_cq, aborting.\n"); ++ return -1; ++ } ++ if (enic->conf_rq_count > 0 && enic->rq == NULL) { ++ dev_err(enic, "failed to allocate vnic_rq, aborting.\n"); ++ return -1; ++ } ++ if (enic->conf_wq_count > 0 && enic->wq == NULL) { ++ dev_err(enic, "failed to allocate vnic_wq, aborting.\n"); ++ return -1; ++ } + + /* Get the supported filters */ + enic_fdir_info(enic); +-- +2.14.3 + diff --git a/SOURCES/0001-net-enic-fix-crash-on-MTU-update-with-non-setup-queu.patch b/SOURCES/0001-net-enic-fix-crash-on-MTU-update-with-non-setup-queu.patch new file mode 100644 index 0000000..782da03 --- /dev/null +++ b/SOURCES/0001-net-enic-fix-crash-on-MTU-update-with-non-setup-queu.patch @@ -0,0 +1,35 @@ +From 33a2d6594945eb3278bfa441f79ae9a38ea538a0 Mon Sep 17 00:00:00 2001 +From: John Daley +Date: Wed, 4 Apr 2018 16:54:53 -0700 +Subject: [PATCH] net/enic: fix crash on MTU update with non-setup queues + +The enic code called from rte_eth_dev_set_mtu() was assuming that the +Rx queues are already set up via a call to rte_eth_tx_queue_setup(). +OVS calls rte_eth_dev_set_mtu() before rte_eth_rx_queue_setup() and +a null pointer was dereferenced. + +Fixes: c3e09182bcd6 ("net/enic: support scatter Rx in MTU update") +Cc: stable@dpdk.org + +Signed-off-by: John Daley +Reviewed-by: Hyong Youb Kim +--- + drivers/net/enic/enic_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c +index d15d236f7..98d47752c 100644 +--- a/drivers/net/enic/enic_main.c ++++ b/drivers/net/enic/enic_main.c +@@ -1440,6 +1440,8 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) + /* free and reallocate RQs with the new MTU */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; ++ if (!rq->in_use) ++ continue; + + enic_free_rq(rq); + rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp, +-- +2.14.3 + diff --git a/SOURCES/0001-net-mlx-fix-rdma-core-glue-path-with-EAL-plugins.patch b/SOURCES/0001-net-mlx-fix-rdma-core-glue-path-with-EAL-plugins.patch new file mode 100644 index 0000000..070e910 --- /dev/null +++ b/SOURCES/0001-net-mlx-fix-rdma-core-glue-path-with-EAL-plugins.patch @@ -0,0 +1,217 @@ +From 08c028d08c42c1a4cd26aff1ed9c6438ddfd1206 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Fri, 2 Mar 2018 15:15:17 +0100 +Subject: [PATCH] net/mlx: fix rdma-core glue path with EAL plugins + +Glue object files are looked up in RTE_EAL_PMD_PATH by default when set and +should be installed in this directory. + +During startup, EAL attempts to load them automatically like other plug-ins +found there. While normally harmless, dlopen() fails when rdma-core is not +installed, EAL interprets this as a fatal error and terminates the +application. + +This patch requests glue objects to be installed in a different directory +to prevent their automatic loading by EAL since they are PMD helpers, not +actual DPDK plug-ins. + +Fixes: f6242d0655cd ("net/mlx: make rdma-core glue path configurable") +Cc: stable@dpdk.org + +Reported-by: Timothy Redaelli +Signed-off-by: Adrien Mazarguil +Tested-by: Timothy Redaelli +--- + doc/guides/nics/mlx4.rst | 7 +++--- + doc/guides/nics/mlx5.rst | 7 +++--- + drivers/net/mlx4/mlx4.c | 50 +++++++++++++++++++++++++++++++++++++++- + drivers/net/mlx5/mlx5.c | 50 +++++++++++++++++++++++++++++++++++++++- + 4 files changed, 106 insertions(+), 8 deletions(-) + +diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst +index 98b971667..afeff276e 100644 +--- a/doc/guides/nics/mlx4.rst ++++ b/doc/guides/nics/mlx4.rst +@@ -98,9 +98,10 @@ These options can be modified in the ``.config`` file. + missing with ``ldd(1)``. + + It works by moving these dependencies to a purpose-built rdma-core "glue" +- plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if +- set, or in a standard location for the dynamic linker (e.g. ``/lib``) if +- left to the default empty string (``""``). ++ plug-in which must either be installed in a directory whose name is based ++ on ``CONFIG_RTE_EAL_PMD_PATH`` suffixed with ``-glue`` if set, or in a ++ standard location for the dynamic linker (e.g. ``/lib``) if left to the ++ default empty string (``""``). + + This option has no performance impact. + +diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst +index 0e6e525c9..46d26e4c8 100644 +--- a/doc/guides/nics/mlx5.rst ++++ b/doc/guides/nics/mlx5.rst +@@ -171,9 +171,10 @@ These options can be modified in the ``.config`` file. + missing with ``ldd(1)``. + + It works by moving these dependencies to a purpose-built rdma-core "glue" +- plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if +- set, or in a standard location for the dynamic linker (e.g. ``/lib``) if +- left to the default empty string (``""``). ++ plug-in which must either be installed in a directory whose name is based ++ on ``CONFIG_RTE_EAL_PMD_PATH`` suffixed with ``-glue`` if set, or in a ++ standard location for the dynamic linker (e.g. ``/lib``) if left to the ++ default empty string (``""``). + + This option has no performance impact. + +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index ee93dafe6..fb8a8b848 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -707,12 +707,54 @@ static struct rte_pci_driver mlx4_driver = { + + #ifdef RTE_LIBRTE_MLX4_DLOPEN_DEPS + ++/** ++ * Suffix RTE_EAL_PMD_PATH with "-glue". ++ * ++ * This function performs a sanity check on RTE_EAL_PMD_PATH before ++ * suffixing its last component. ++ * ++ * @param buf[out] ++ * Output buffer, should be large enough otherwise NULL is returned. ++ * @param size ++ * Size of @p out. ++ * ++ * @return ++ * Pointer to @p buf or @p NULL in case suffix cannot be appended. ++ */ ++static char * ++mlx4_glue_path(char *buf, size_t size) ++{ ++ static const char *const bad[] = { "/", ".", "..", NULL }; ++ const char *path = RTE_EAL_PMD_PATH; ++ size_t len = strlen(path); ++ size_t off; ++ int i; ++ ++ while (len && path[len - 1] == '/') ++ --len; ++ for (off = len; off && path[off - 1] != '/'; --off) ++ ; ++ for (i = 0; bad[i]; ++i) ++ if (!strncmp(path + off, bad[i], (int)(len - off))) ++ goto error; ++ i = snprintf(buf, size, "%.*s-glue", (int)len, path); ++ if (i == -1 || (size_t)i >= size) ++ goto error; ++ return buf; ++error: ++ ERROR("unable to append \"-glue\" to last component of" ++ " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," ++ " please re-configure DPDK"); ++ return NULL; ++} ++ + /** + * Initialization routine for run-time dependency on rdma-core. + */ + static int + mlx4_glue_init(void) + { ++ char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; + const char *path[] = { + /* + * A basic security check is necessary before trusting +@@ -720,7 +762,13 @@ mlx4_glue_init(void) + */ + (geteuid() == getuid() && getegid() == getgid() ? + getenv("MLX4_GLUE_PATH") : NULL), +- RTE_EAL_PMD_PATH, ++ /* ++ * When RTE_EAL_PMD_PATH is set, use its glue-suffixed ++ * variant, otherwise let dlopen() look up libraries on its ++ * own. ++ */ ++ (*RTE_EAL_PMD_PATH ? ++ mlx4_glue_path(glue_path, sizeof(glue_path)) : ""), + }; + unsigned int i = 0; + void *handle = NULL; +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index 61cb93101..0ca16d11d 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -1042,12 +1042,54 @@ static struct rte_pci_driver mlx5_driver = { + + #ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS + ++/** ++ * Suffix RTE_EAL_PMD_PATH with "-glue". ++ * ++ * This function performs a sanity check on RTE_EAL_PMD_PATH before ++ * suffixing its last component. ++ * ++ * @param buf[out] ++ * Output buffer, should be large enough otherwise NULL is returned. ++ * @param size ++ * Size of @p out. ++ * ++ * @return ++ * Pointer to @p buf or @p NULL in case suffix cannot be appended. ++ */ ++static char * ++mlx5_glue_path(char *buf, size_t size) ++{ ++ static const char *const bad[] = { "/", ".", "..", NULL }; ++ const char *path = RTE_EAL_PMD_PATH; ++ size_t len = strlen(path); ++ size_t off; ++ int i; ++ ++ while (len && path[len - 1] == '/') ++ --len; ++ for (off = len; off && path[off - 1] != '/'; --off) ++ ; ++ for (i = 0; bad[i]; ++i) ++ if (!strncmp(path + off, bad[i], (int)(len - off))) ++ goto error; ++ i = snprintf(buf, size, "%.*s-glue", (int)len, path); ++ if (i == -1 || (size_t)i >= size) ++ goto error; ++ return buf; ++error: ++ ERROR("unable to append \"-glue\" to last component of" ++ " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," ++ " please re-configure DPDK"); ++ return NULL; ++} ++ + /** + * Initialization routine for run-time dependency on rdma-core. + */ + static int + mlx5_glue_init(void) + { ++ char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; + const char *path[] = { + /* + * A basic security check is necessary before trusting +@@ -1055,7 +1097,13 @@ mlx5_glue_init(void) + */ + (geteuid() == getuid() && getegid() == getgid() ? + getenv("MLX5_GLUE_PATH") : NULL), +- RTE_EAL_PMD_PATH, ++ /* ++ * When RTE_EAL_PMD_PATH is set, use its glue-suffixed ++ * variant, otherwise let dlopen() look up libraries on its ++ * own. ++ */ ++ (*RTE_EAL_PMD_PATH ? ++ mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), + }; + unsigned int i = 0; + void *handle = NULL; +-- +2.17.0 + diff --git a/SOURCES/0001-net-mlx4-fix-broadcast-Rx.patch b/SOURCES/0001-net-mlx4-fix-broadcast-Rx.patch new file mode 100644 index 0000000..d77c7d0 --- /dev/null +++ b/SOURCES/0001-net-mlx4-fix-broadcast-Rx.patch @@ -0,0 +1,68 @@ +From 643958cf910f9d2b6a22307a331834a93722884d Mon Sep 17 00:00:00 2001 +From: Moti Haimovsky +Date: Sun, 28 Jan 2018 11:43:37 +0200 +Subject: [PATCH] net/mlx4: fix broadcast Rx + +This patch fixes the issue of mlx4 not receiving broadcast packets +when configured to work promiscuous or allmulticast modes. + +Fixes: eacaac7bae36 ("net/mlx4: restore promisc and allmulti support") +Cc: stable@dpdk.org + +Signed-off-by: Moti Haimovsky +Acked-by: Adrien Mazarguil +--- + drivers/net/mlx4/mlx4_flow.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c +index fb84060db..2c6710505 100644 +--- a/drivers/net/mlx4/mlx4_flow.c ++++ b/drivers/net/mlx4/mlx4_flow.c +@@ -1223,9 +1223,12 @@ mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan) + * + * Various flow rules are created depending on the mode the device is in: + * +- * 1. Promiscuous: port MAC + catch-all (VLAN filtering is ignored). +- * 2. All multicast: port MAC/VLAN + catch-all multicast. +- * 3. Otherwise: port MAC/VLAN + broadcast MAC/VLAN. ++ * 1. Promiscuous: ++ * port MAC + broadcast + catch-all (VLAN filtering is ignored). ++ * 2. All multicast: ++ * port MAC/VLAN + broadcast + catch-all multicast. ++ * 3. Otherwise: ++ * port MAC/VLAN + broadcast MAC/VLAN. + * + * About MAC flow rules: + * +@@ -1305,9 +1308,6 @@ mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error) + !priv->dev->data->promiscuous ? + &vlan_spec.tci : + NULL; +- int broadcast = +- !priv->dev->data->promiscuous && +- !priv->dev->data->all_multicast; + uint16_t vlan = 0; + struct rte_flow *flow; + unsigned int i; +@@ -1341,7 +1341,7 @@ mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error) + rule_vlan = NULL; + } + } +- for (i = 0; i != RTE_DIM(priv->mac) + broadcast; ++i) { ++ for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) { + const struct ether_addr *mac; + + /* Broadcasts are handled by an extra iteration. */ +@@ -1405,7 +1405,7 @@ mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error) + goto next_vlan; + } + /* Take care of promiscuous and all multicast flow rules. */ +- if (!broadcast) { ++ if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) { + for (flow = LIST_FIRST(&priv->flows); + flow && flow->internal; + flow = LIST_NEXT(flow, next)) { +-- +2.17.0 + diff --git a/SOURCES/0001-net-mlx5-fix-memory-region-boundary-checks.patch b/SOURCES/0001-net-mlx5-fix-memory-region-boundary-checks.patch new file mode 100644 index 0000000..355a294 --- /dev/null +++ b/SOURCES/0001-net-mlx5-fix-memory-region-boundary-checks.patch @@ -0,0 +1,47 @@ +From d01327d95624923003b6e984040c8c17c06b5361 Mon Sep 17 00:00:00 2001 +From: Shahaf Shuler +Date: Thu, 25 Jan 2018 18:18:03 +0200 +Subject: [PATCH] net/mlx5: fix memory region boundary checks + +[ upstream commit 25f28d9d294d00a1c539d47a13c14ce282e31b09 ] + +Since commit f81ec748434b ("net/mlx5: fix memory region lookup") the +Memory Region (MR) are no longer overlaps. + +Comparing the end address of the MR should be exclusive, otherwise two +contiguous MRs may cause wrong matching. + +Fixes: f81ec748434b ("net/mlx5: fix memory region lookup") + +Signed-off-by: Xueming Li +Signed-off-by: Shahaf Shuler +Acked-by: Nelio Laranjeiro +--- + drivers/net/mlx5/mlx5_rxtx.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h +index 604f30849..de5b769ef 100644 +--- a/drivers/net/mlx5/mlx5_rxtx.h ++++ b/drivers/net/mlx5/mlx5_rxtx.h +@@ -547,7 +547,7 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) + struct mlx5_mr *mr; + + assert(i < RTE_DIM(txq->mp2mr)); +- if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr)) ++ if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end > addr)) + return txq->mp2mr[i]->lkey; + for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { + if (unlikely(txq->mp2mr[i] == NULL || +@@ -556,7 +556,7 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) + break; + } + if (txq->mp2mr[i]->start <= addr && +- txq->mp2mr[i]->end >= addr) { ++ txq->mp2mr[i]->end > addr) { + assert(txq->mp2mr[i]->lkey != (uint32_t)-1); + assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) == + txq->mp2mr[i]->lkey); +-- +2.14.3 + diff --git a/SOURCES/0001-net-mlx5-fix-memory-region-cache-lookup.patch b/SOURCES/0001-net-mlx5-fix-memory-region-cache-lookup.patch new file mode 100644 index 0000000..cf26ece --- /dev/null +++ b/SOURCES/0001-net-mlx5-fix-memory-region-cache-lookup.patch @@ -0,0 +1,37 @@ +From 95086c654322861a10f9835233a333d7c2188df3 Mon Sep 17 00:00:00 2001 +From: Shahaf Shuler +Date: Thu, 25 Jan 2018 18:17:58 +0200 +Subject: [PATCH] net/mlx5: fix memory region cache lookup + +The Memory Region (MR) cache contains pointers to mlx5_mr. +The MR cache indexes are filled when a new MR is created. As it is +possible for MR to be created on the flight, an extra validation must be +added to avoid segmentation fault. + +Fixes: b0b093845793 ("net/mlx5: use buffer address for LKEY search") +Cc: stable@dpdk.org + +Signed-off-by: Xueming Li +Signed-off-by: Shahaf Shuler +Acked-by: Nelio Laranjeiro +--- + drivers/net/mlx5/mlx5_rxtx.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h +index 9d9844365..f92eb33fb 100644 +--- a/drivers/net/mlx5/mlx5_rxtx.h ++++ b/drivers/net/mlx5/mlx5_rxtx.h +@@ -555,7 +555,8 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) + if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr)) + return txq->mp2mr[i]->lkey; + for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { +- if (unlikely(txq->mp2mr[i]->mr == NULL)) { ++ if (unlikely(txq->mp2mr[i] == NULL || ++ txq->mp2mr[i]->mr == NULL)) { + /* Unknown MP, add a new MR for it. */ + break; + } +-- +2.14.3 + diff --git a/SOURCES/0001-net-mlx5-use-PCI-address-as-port-name.patch b/SOURCES/0001-net-mlx5-use-PCI-address-as-port-name.patch new file mode 100644 index 0000000..f0da9c6 --- /dev/null +++ b/SOURCES/0001-net-mlx5-use-PCI-address-as-port-name.patch @@ -0,0 +1,73 @@ +From 1092b5cd70af89b5000f793e4e326ccb04871552 Mon Sep 17 00:00:00 2001 +From: Yuanhan Liu +Date: Mon, 22 Jan 2018 17:30:06 +0800 +Subject: [PATCH] net/mlx5: use PCI address as port name + +It is suggested to use PCI BDF to identify a port for port addition +in OVS-DPDK. While mlx5 has its own naming style: name it by ib dev +name. This breaks the typical OVS DPDK use case and brings more puzzle +to the end users. + +To fix it, this patch changes it to use PCI BDF as the name, too. +Also, a postfix " port %u" is added, just in case their might be more +than 1 port associated with a PCI device. + +Signed-off-by: Yuanhan Liu +Acked-by: Nelio Laranjeiro +--- + drivers/net/mlx5/mlx5.c | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index a5eb3fdc5..ec3ddcf28 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -662,6 +662,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) + INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt); + + for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) { ++ char name[RTE_ETH_NAME_MAX_LEN]; ++ int len; + uint32_t port = i + 1; /* ports are indexed from one */ + uint32_t test = (1 << i); + struct ibv_context *ctx = NULL; +@@ -685,14 +687,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) + .rx_vec_en = MLX5_ARG_UNSET, + }; + ++ len = snprintf(name, sizeof(name), PCI_PRI_FMT, ++ pci_dev->addr.domain, pci_dev->addr.bus, ++ pci_dev->addr.devid, pci_dev->addr.function); ++ if (device_attr.orig_attr.phys_port_cnt > 1) ++ snprintf(name + len, sizeof(name), " port %u", i); ++ + mlx5_dev[idx].ports |= test; + + if (mlx5_is_secondary()) { +- /* from rte_ethdev.c */ +- char name[RTE_ETH_NAME_MAX_LEN]; +- +- snprintf(name, sizeof(name), "%s port %u", +- ibv_get_device_name(ibv_dev), port); + eth_dev = rte_eth_dev_attach_secondary(name); + if (eth_dev == NULL) { + ERROR("can not attach rte ethdev"); +@@ -902,14 +905,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) + priv_get_mtu(priv, &priv->mtu); + DEBUG("port %u MTU is %u", priv->port, priv->mtu); + +- /* from rte_ethdev.c */ +- { +- char name[RTE_ETH_NAME_MAX_LEN]; +- +- snprintf(name, sizeof(name), "%s port %u", +- ibv_get_device_name(ibv_dev), port); +- eth_dev = rte_eth_dev_allocate(name); +- } ++ eth_dev = rte_eth_dev_allocate(name); + if (eth_dev == NULL) { + ERROR("can not allocate rte ethdev"); + err = ENOMEM; +-- +2.14.3 + diff --git a/SOURCES/0001-net-nfp-configure-default-RSS-reta-table.patch b/SOURCES/0001-net-nfp-configure-default-RSS-reta-table.patch new file mode 100644 index 0000000..e0b30fa --- /dev/null +++ b/SOURCES/0001-net-nfp-configure-default-RSS-reta-table.patch @@ -0,0 +1,256 @@ +From 82d2ba939b9bbd99ed25ea06d5a690cf96489ea9 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Fri, 24 Nov 2017 15:31:49 +0000 +Subject: [PATCH] net/nfp: configure default RSS reta table + +Some apps can enable RSS but not update the reta table nor the hash. +This patch adds a default reta table setup based on total number of +configured rx queues. The hash key is dependent on how the app +configures the rx_conf struct. + +Signed-off-by: Alejandro Lucero +(cherry picked from commit f92e94478803c2307e68bf1023e6b49106bc843d) +--- + drivers/net/nfp/nfp_net.c | 154 +++++++++++++++++++++++++++++++------- + 1 file changed, 125 insertions(+), 29 deletions(-) + +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 3a63b1ca3..559230ab6 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -101,6 +101,15 @@ static void nfp_net_stop(struct rte_eth_dev *dev); + static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + ++static int nfp_net_rss_config_default(struct rte_eth_dev *dev); ++static int nfp_net_rss_hash_update(struct rte_eth_dev *dev, ++ struct rte_eth_rss_conf *rss_conf); ++static int nfp_net_rss_reta_write(struct rte_eth_dev *dev, ++ struct rte_eth_rss_reta_entry64 *reta_conf, ++ uint16_t reta_size); ++static int nfp_net_rss_hash_write(struct rte_eth_dev *dev, ++ struct rte_eth_rss_conf *rss_conf); ++ + /* The offset of the queue controller queues in the PCIe Target */ + #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) + +@@ -721,6 +730,8 @@ nfp_net_start(struct rte_eth_dev *dev) + { + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ struct rte_eth_conf *dev_conf; ++ struct rte_eth_rxmode *rxmode; + uint32_t new_ctrl, update = 0; + struct nfp_net_hw *hw; + uint32_t intr_vector; +@@ -770,6 +781,19 @@ nfp_net_start(struct rte_eth_dev *dev) + + rte_intr_enable(intr_handle); + ++ dev_conf = &dev->data->dev_conf; ++ rxmode = &dev_conf->rxmode; ++ ++ /* Checking RX mode */ ++ if (rxmode->mq_mode & ETH_MQ_RX_RSS) { ++ if (hw->cap & NFP_NET_CFG_CTRL_RSS) { ++ if (!nfp_net_rss_config_default(dev)) ++ update |= NFP_NET_CFG_UPDATE_RSS; ++ } else { ++ PMD_INIT_LOG(INFO, "RSS not supported"); ++ return -EINVAL; ++ } ++ } + /* Enable device */ + new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_ENABLE; + +@@ -2329,22 +2353,17 @@ nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) + return ret; + } + +-/* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */ + static int +-nfp_net_reta_update(struct rte_eth_dev *dev, ++nfp_net_rss_reta_write(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) + { + uint32_t reta, mask; + int i, j; + int idx, shift; +- uint32_t update; + struct nfp_net_hw *hw = + NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); + +- if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) +- return -EINVAL; +- + if (reta_size != NFP_NET_CFG_RSS_ITBL_SZ) { + RTE_LOG(ERR, PMD, "The size of hash lookup table configured " + "(%d) doesn't match the number hardware can supported " +@@ -2381,6 +2400,26 @@ nfp_net_reta_update(struct rte_eth_dev *dev, + nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) + shift, + reta); + } ++ return 0; ++} ++ ++/* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */ ++static int ++nfp_net_reta_update(struct rte_eth_dev *dev, ++ struct rte_eth_rss_reta_entry64 *reta_conf, ++ uint16_t reta_size) ++{ ++ struct nfp_net_hw *hw = ++ NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint32_t update; ++ int ret; ++ ++ if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) ++ return -EINVAL; ++ ++ ret = nfp_net_rss_reta_write(dev, reta_conf, reta_size); ++ if (ret != 0) ++ return ret; + + update = NFP_NET_CFG_UPDATE_RSS; + +@@ -2439,33 +2478,24 @@ nfp_net_reta_query(struct rte_eth_dev *dev, + } + + static int +-nfp_net_rss_hash_update(struct rte_eth_dev *dev, ++nfp_net_rss_hash_write(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) + { +- uint32_t update; ++ struct nfp_net_hw *hw; ++ uint64_t rss_hf; + uint32_t cfg_rss_ctrl = 0; + uint8_t key; +- uint64_t rss_hf; + int i; +- struct nfp_net_hw *hw; + + hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); + +- rss_hf = rss_conf->rss_hf; +- +- /* Checking if RSS is enabled */ +- if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) { +- if (rss_hf != 0) { /* Enable RSS? */ +- RTE_LOG(ERR, PMD, "RSS unsupported\n"); +- return -EINVAL; +- } +- return 0; /* Nothing to do */ ++ /* Writing the key byte a byte */ ++ for (i = 0; i < rss_conf->rss_key_len; i++) { ++ memcpy(&key, &rss_conf->rss_key[i], 1); ++ nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key); + } + +- if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) { +- RTE_LOG(ERR, PMD, "hash key too long\n"); +- return -EINVAL; +- } ++ rss_hf = rss_conf->rss_hf; + + if (rss_hf & ETH_RSS_IPV4) + cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 | +@@ -2483,15 +2513,40 @@ nfp_net_rss_hash_update(struct rte_eth_dev *dev, + /* configuring where to apply the RSS hash */ + nn_cfg_writel(hw, NFP_NET_CFG_RSS_CTRL, cfg_rss_ctrl); + +- /* Writing the key byte a byte */ +- for (i = 0; i < rss_conf->rss_key_len; i++) { +- memcpy(&key, &rss_conf->rss_key[i], 1); +- nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY + i, key); +- } +- + /* Writing the key size */ + nn_cfg_writeb(hw, NFP_NET_CFG_RSS_KEY_SZ, rss_conf->rss_key_len); + ++ return 0; ++} ++ ++static int ++nfp_net_rss_hash_update(struct rte_eth_dev *dev, ++ struct rte_eth_rss_conf *rss_conf) ++{ ++ uint32_t update; ++ uint64_t rss_hf; ++ struct nfp_net_hw *hw; ++ ++ hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ ++ rss_hf = rss_conf->rss_hf; ++ ++ /* Checking if RSS is enabled */ ++ if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS)) { ++ if (rss_hf != 0) { /* Enable RSS? */ ++ RTE_LOG(ERR, PMD, "RSS unsupported\n"); ++ return -EINVAL; ++ } ++ return 0; /* Nothing to do */ ++ } ++ ++ if (rss_conf->rss_key_len > NFP_NET_CFG_RSS_KEY_SZ) { ++ RTE_LOG(ERR, PMD, "hash key too long\n"); ++ return -EINVAL; ++ } ++ ++ nfp_net_rss_hash_write(dev, rss_conf); ++ + update = NFP_NET_CFG_UPDATE_RSS; + + if (nfp_net_reconfig(hw, hw->ctrl, update) < 0) +@@ -2548,6 +2603,47 @@ nfp_net_rss_hash_conf_get(struct rte_eth_dev *dev, + return 0; + } + ++static int ++nfp_net_rss_config_default(struct rte_eth_dev *dev) ++{ ++ struct rte_eth_conf *dev_conf; ++ struct rte_eth_rss_conf rss_conf; ++ struct rte_eth_rss_reta_entry64 nfp_reta_conf[2]; ++ uint16_t rx_queues = dev->data->nb_rx_queues; ++ uint16_t queue; ++ int i, j, ret; ++ ++ RTE_LOG(INFO, PMD, "setting default RSS conf for %u queues\n", ++ rx_queues); ++ ++ nfp_reta_conf[0].mask = ~0x0; ++ nfp_reta_conf[1].mask = ~0x0; ++ ++ queue = 0; ++ for (i = 0; i < 0x40; i += 8) { ++ for (j = i; j < (i + 8); j++) { ++ nfp_reta_conf[0].reta[j] = queue; ++ nfp_reta_conf[1].reta[j] = queue++; ++ queue %= rx_queues; ++ } ++ } ++ ret = nfp_net_rss_reta_write(dev, nfp_reta_conf, 0x80); ++ if (ret != 0) ++ return ret; ++ ++ dev_conf = &dev->data->dev_conf; ++ if (!dev_conf) { ++ RTE_LOG(INFO, PMD, "wrong rss conf"); ++ return -EINVAL; ++ } ++ rss_conf = dev_conf->rx_adv_conf.rss_conf; ++ ++ ret = nfp_net_rss_hash_write(dev, &rss_conf); ++ ++ return ret; ++} ++ ++ + /* Initialise and register driver with DPDK Application */ + static const struct eth_dev_ops nfp_net_eth_dev_ops = { + .dev_configure = nfp_net_configure, +-- +2.17.0 + diff --git a/SOURCES/0001-net-nfp-fix-lock-file-usage.patch b/SOURCES/0001-net-nfp-fix-lock-file-usage.patch new file mode 100644 index 0000000..cd6d170 --- /dev/null +++ b/SOURCES/0001-net-nfp-fix-lock-file-usage.patch @@ -0,0 +1,138 @@ +From 552ff0a9afc2d41685ed0295e6080df0c05382b4 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Wed, 23 May 2018 13:28:56 +0100 +Subject: [PATCH] net/nfp: fix lock file usage + +DPDK apps can be executed as non-root users but current NFP lock +file for avoiding concurrent accesses to CPP interface is precluding +this option or requires to modify system file permissions. + +When the NFP device is bound to VFIO, this driver does not allow this +concurrent access, so the lock file is not required at all. + +OVS-DPDK as executed in RedHat distributions is the main NFP user +needing this fix. + +Fixes: c7e9729da6b5 ("net/nfp: support CPP") + +Signed-off-by: Alejandro Lucero +(cherry picked from commit 5f6ed2f4e0cb79580c0bedba2bd764a39923d7ec) +--- + drivers/net/nfp/nfp_net.c | 13 ++++++++++++- + drivers/net/nfp/nfpcore/nfp_cpp.h | 5 ++++- + drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c | 11 +++++++---- + drivers/net/nfp/nfpcore/nfp_cppcore.c | 7 ++++--- + 4 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 559230ab6..71249572d 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -3146,7 +3146,18 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + if (!dev) + return ret; + +- cpp = nfp_cpp_from_device_name(dev->device.name); ++ /* ++ * When device bound to UIO, the device could be used, by mistake, ++ * by two DPDK apps, and the UIO driver does not avoid it. This ++ * could lead to a serious problem when configuring the NFP CPP ++ * interface. Here we avoid this telling to the CPP init code to ++ * use a lock file if UIO is being used. ++ */ ++ if (dev->kdrv == RTE_KDRV_VFIO) ++ cpp = nfp_cpp_from_device_name(dev->device.name, 0); ++ else ++ cpp = nfp_cpp_from_device_name(dev->device.name, 1); ++ + if (!cpp) { + RTE_LOG(ERR, PMD, "A CPP handle can not be obtained"); + ret = -EIO; +diff --git a/drivers/net/nfp/nfpcore/nfp_cpp.h b/drivers/net/nfp/nfpcore/nfp_cpp.h +index 7e862145c..de2ff84e9 100644 +--- a/drivers/net/nfp/nfpcore/nfp_cpp.h ++++ b/drivers/net/nfp/nfpcore/nfp_cpp.h +@@ -31,6 +31,8 @@ struct nfp_cpp { + * island XPB CSRs. + */ + uint32_t imb_cat_table[16]; ++ ++ int driver_lock_needed; + }; + + /* +@@ -179,7 +181,8 @@ uint32_t __nfp_cpp_model_autodetect(struct nfp_cpp *cpp); + * + * @return NFP CPP handle, or NULL on failure (and set errno accordingly). + */ +-struct nfp_cpp *nfp_cpp_from_device_name(const char *devname); ++struct nfp_cpp *nfp_cpp_from_device_name(const char *devname, ++ int driver_lock_needed); + + /* + * Free a NFP CPP handle +diff --git a/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c b/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +index ad6ce72fe..e46dbc7d7 100644 +--- a/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c ++++ b/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +@@ -848,9 +848,11 @@ nfp6000_init(struct nfp_cpp *cpp, const char *devname) + memset(desc->busdev, 0, BUSDEV_SZ); + strncpy(desc->busdev, devname, strlen(devname)); + +- ret = nfp_acquire_process_lock(desc); +- if (ret) +- return -1; ++ if (cpp->driver_lock_needed) { ++ ret = nfp_acquire_process_lock(desc); ++ if (ret) ++ return -1; ++ } + + snprintf(tmp_str, sizeof(tmp_str), "%s/%s/driver", PCI_DEVICES, + desc->busdev); +@@ -910,7 +912,8 @@ nfp6000_free(struct nfp_cpp *cpp) + if (desc->bar[x - 1].iomem) + munmap(desc->bar[x - 1].iomem, 1 << (desc->barsz - 3)); + } +- close(desc->lock); ++ if (cpp->driver_lock_needed) ++ close(desc->lock); + close(desc->device); + free(desc); + } +diff --git a/drivers/net/nfp/nfpcore/nfp_cppcore.c b/drivers/net/nfp/nfpcore/nfp_cppcore.c +index 94d4a0b6b..f61143f7e 100644 +--- a/drivers/net/nfp/nfpcore/nfp_cppcore.c ++++ b/drivers/net/nfp/nfpcore/nfp_cppcore.c +@@ -542,7 +542,7 @@ nfp_xpb_readl(struct nfp_cpp *cpp, uint32_t xpb_addr, uint32_t *value) + } + + static struct nfp_cpp * +-nfp_cpp_alloc(const char *devname) ++nfp_cpp_alloc(const char *devname, int driver_lock_needed) + { + const struct nfp_cpp_operations *ops; + struct nfp_cpp *cpp; +@@ -558,6 +558,7 @@ nfp_cpp_alloc(const char *devname) + return NULL; + + cpp->op = ops; ++ cpp->driver_lock_needed = driver_lock_needed; + + if (cpp->op->init) { + err = cpp->op->init(cpp, devname); +@@ -603,9 +604,9 @@ nfp_cpp_free(struct nfp_cpp *cpp) + } + + struct nfp_cpp * +-nfp_cpp_from_device_name(const char *devname) ++nfp_cpp_from_device_name(const char *devname, int driver_lock_needed) + { +- return nfp_cpp_alloc(devname); ++ return nfp_cpp_alloc(devname, driver_lock_needed); + } + + /* +-- +2.17.0 + diff --git a/SOURCES/0001-net-nfp-fix-mbufs-releasing-when-stop-or-close.patch b/SOURCES/0001-net-nfp-fix-mbufs-releasing-when-stop-or-close.patch new file mode 100644 index 0000000..91cfe9c --- /dev/null +++ b/SOURCES/0001-net-nfp-fix-mbufs-releasing-when-stop-or-close.patch @@ -0,0 +1,46 @@ +From 0c0e46c36bcc5dfe9d2aa605e1a5f714d45e0b7f Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Mon, 23 Apr 2018 12:23:58 +0100 +Subject: [PATCH] net/nfp: fix mbufs releasing when stop or close + +PMDs have the responsibility of releasing mbufs sent through xmit burst +function. NFP PMD attaches those sent mbufs to the TX ring structure, +and it is at the next time a specific ring descriptor is going to be +used when the previous linked mbuf, already transmitted at that point, +is released. Those mbufs belonging to a chained mbuf got its own link +to a ring descriptor, and they are released independently of the mbuf +head of that chain. + +The problem is how those mbufs are released when the PMD is stopped or +closed. Instead of releasing those mbufs as the xmit functions does, +this is independently of being in a mbuf chain, the code calls +rte_pktmbuf_free which will release not just the mbuf head in that +chain but all the chained mbufs. The loop will try to release those +mbufs which have already been released again when chained mbufs exist. + +This patch fixes the problem using rte_pktmbuf_free_seg instead. + +Fixes: b812daadad0d ("nfp: add Rx and Tx") +Cc: stable@dpdk.org + +Signed-off-by: Alejandro Lucero +--- + drivers/net/nfp/nfp_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 2a4b006e0..a5875f230 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -263,7 +263,7 @@ nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) + + for (i = 0; i < txq->tx_count; i++) { + if (txq->txbufs[i].mbuf) { +- rte_pktmbuf_free(txq->txbufs[i].mbuf); ++ rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); + txq->txbufs[i].mbuf = NULL; + } + } +-- +2.17.0 + diff --git a/SOURCES/0001-net-nfp-support-CPP.patch b/SOURCES/0001-net-nfp-support-CPP.patch new file mode 100644 index 0000000..dc8a7d7 --- /dev/null +++ b/SOURCES/0001-net-nfp-support-CPP.patch @@ -0,0 +1,8282 @@ +From bf9429fcf4ded868270561a3469bf933021304a4 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Thu, 5 Apr 2018 15:42:44 +0100 +Subject: [PATCH 1/3] net/nfp: support CPP + +CPP refers to the internal NFP Command Push Pull bus. This patch allows +to create CPP commands from user space allowing to access any single +part of the chip. + +This CPP interface is the base for having other functionalities like +mutexes when accessing specific chip components, chip resources management, +firmware upload or using the NSP, an embedded arm processor which can +perform tasks on demand. + +NSP was the previous only way for doing things in the chip by the PMD, +where a NSPU interface was used for commands like firmware upload or +port link configuration. CPP interface supersedes NSPU, but it is still +possible to use NSP through CPP. + +CPP interface adds a great flexibility for doing things like extended +stats or firmware debugging. + +Signed-off-by: Alejandro Lucero +(cherry picked from commit c7e9729da6b521ee8a1f8bb3a1fa4c156f059ced) +--- + drivers/net/nfp/nfpcore/nfp-common/nfp_cppat.h | 722 +++++++++++++++++ + drivers/net/nfp/nfpcore/nfp-common/nfp_platform.h | 36 + + drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h | 592 ++++++++++++++ + drivers/net/nfp/nfpcore/nfp6000/nfp6000.h | 40 + + drivers/net/nfp/nfpcore/nfp6000/nfp_xpb.h | 26 + + drivers/net/nfp/nfpcore/nfp_cpp.h | 776 ++++++++++++++++++ + drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c | 936 ++++++++++++++++++++++ + drivers/net/nfp/nfpcore/nfp_cppcore.c | 856 ++++++++++++++++++++ + drivers/net/nfp/nfpcore/nfp_crc.c | 49 ++ + drivers/net/nfp/nfpcore/nfp_crc.h | 19 + + drivers/net/nfp/nfpcore/nfp_hwinfo.c | 199 +++++ + drivers/net/nfp/nfpcore/nfp_hwinfo.h | 85 ++ + drivers/net/nfp/nfpcore/nfp_mip.c | 154 ++++ + drivers/net/nfp/nfpcore/nfp_mip.h | 21 + + drivers/net/nfp/nfpcore/nfp_mutex.c | 424 ++++++++++ + drivers/net/nfp/nfpcore/nfp_nffw.c | 235 ++++++ + drivers/net/nfp/nfpcore/nfp_nffw.h | 86 ++ + drivers/net/nfp/nfpcore/nfp_nsp.c | 427 ++++++++++ + drivers/net/nfp/nfpcore/nfp_nsp.h | 304 +++++++ + drivers/net/nfp/nfpcore/nfp_nsp_cmds.c | 109 +++ + drivers/net/nfp/nfpcore/nfp_nsp_eth.c | 665 +++++++++++++++ + drivers/net/nfp/nfpcore/nfp_resource.c | 264 ++++++ + drivers/net/nfp/nfpcore/nfp_resource.h | 52 ++ + drivers/net/nfp/nfpcore/nfp_rtsym.c | 327 ++++++++ + drivers/net/nfp/nfpcore/nfp_rtsym.h | 61 ++ + drivers/net/nfp/nfpcore/nfp_target.h | 579 +++++++++++++ + 26 files changed, 8044 insertions(+) + create mode 100644 drivers/net/nfp/nfpcore/nfp-common/nfp_cppat.h + create mode 100644 drivers/net/nfp/nfpcore/nfp-common/nfp_platform.h + create mode 100644 drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h + create mode 100644 drivers/net/nfp/nfpcore/nfp6000/nfp6000.h + create mode 100644 drivers/net/nfp/nfpcore/nfp6000/nfp_xpb.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_cpp.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_cppcore.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_crc.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_crc.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_hwinfo.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_hwinfo.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_mip.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_mip.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_mutex.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_nffw.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_nffw.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_nsp.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_nsp.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_nsp_cmds.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_nsp_eth.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_resource.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_resource.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_rtsym.c + create mode 100644 drivers/net/nfp/nfpcore/nfp_rtsym.h + create mode 100644 drivers/net/nfp/nfpcore/nfp_target.h + +diff --git a/drivers/net/nfp/nfpcore/nfp-common/nfp_cppat.h b/drivers/net/nfp/nfpcore/nfp-common/nfp_cppat.h +new file mode 100644 +index 000000000..6e380cca0 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp-common/nfp_cppat.h +@@ -0,0 +1,722 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_CPPAT_H__ ++#define __NFP_CPPAT_H__ ++ ++#include "nfp_platform.h" ++#include "nfp_resid.h" ++ ++/* This file contains helpers for creating CPP commands ++ * ++ * All magic NFP-6xxx IMB 'mode' numbers here are from: ++ * Databook (1 August 2013) ++ * - System Overview and Connectivity ++ * -- Internal Connectivity ++ * --- Distributed Switch Fabric - Command Push/Pull (DSF-CPP) Bus ++ * ---- CPP addressing ++ * ----- Table 3.6. CPP Address Translation Mode Commands ++ */ ++ ++#define _NIC_NFP6000_MU_LOCALITY_DIRECT 2 ++ ++static inline int ++_nfp6000_decode_basic(uint64_t addr, int *dest_island, int cpp_tgt, int mode, ++ int addr40, int isld1, int isld0); ++ ++static uint64_t ++_nic_mask64(int msb, int lsb, int at0) ++{ ++ uint64_t v; ++ int w = msb - lsb + 1; ++ ++ if (w == 64) ++ return ~(uint64_t)0; ++ ++ if ((lsb + w) > 64) ++ return 0; ++ ++ v = (UINT64_C(1) << w) - 1; ++ ++ if (at0) ++ return v; ++ ++ return v << lsb; ++} ++ ++/* For VQDR, we may not modify the Channel bits, which might overlap ++ * with the Index bit. When it does, we need to ensure that isld0 == isld1. ++ */ ++static inline int ++_nfp6000_encode_basic(uint64_t *addr, int dest_island, int cpp_tgt, int mode, ++ int addr40, int isld1, int isld0) ++{ ++ uint64_t _u64; ++ int iid_lsb, idx_lsb; ++ int i, v = 0; ++ int isld[2]; ++ ++ isld[0] = isld0; ++ isld[1] = isld1; ++ ++ switch (cpp_tgt) { ++ case NFP6000_CPPTGT_MU: ++ /* This function doesn't handle MU */ ++ return NFP_ERRNO(EINVAL); ++ case NFP6000_CPPTGT_CTXPB: ++ /* This function doesn't handle CTXPB */ ++ return NFP_ERRNO(EINVAL); ++ default: ++ break; ++ } ++ ++ switch (mode) { ++ case 0: ++ if (cpp_tgt == NFP6000_CPPTGT_VQDR && !addr40) { ++ /* ++ * In this specific mode we'd rather not modify the ++ * address but we can verify if the existing contents ++ * will point to a valid island. ++ */ ++ i = _nfp6000_decode_basic(*addr, &v, cpp_tgt, mode, ++ addr40, isld1, ++ isld0); ++ if (i != 0) ++ /* Full Island ID and channel bits overlap */ ++ return i; ++ ++ /* ++ * If dest_island is invalid, the current address won't ++ * go where expected. ++ */ ++ if (dest_island != -1 && dest_island != v) ++ return NFP_ERRNO(EINVAL); ++ ++ /* If dest_island was -1, we don't care */ ++ return 0; ++ } ++ ++ iid_lsb = (addr40) ? 34 : 26; ++ ++ /* <39:34> or <31:26> */ ++ _u64 = _nic_mask64((iid_lsb + 5), iid_lsb, 0); ++ *addr &= ~_u64; ++ *addr |= (((uint64_t)dest_island) << iid_lsb) & _u64; ++ return 0; ++ case 1: ++ if (cpp_tgt == NFP6000_CPPTGT_VQDR && !addr40) { ++ i = _nfp6000_decode_basic(*addr, &v, cpp_tgt, mode, ++ addr40, isld1, isld0); ++ if (i != 0) ++ /* Full Island ID and channel bits overlap */ ++ return i; ++ ++ /* ++ * If dest_island is invalid, the current address won't ++ * go where expected. ++ */ ++ if (dest_island != -1 && dest_island != v) ++ return NFP_ERRNO(EINVAL); ++ ++ /* If dest_island was -1, we don't care */ ++ return 0; ++ } ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ if (dest_island == isld0) { ++ /* Only need to clear the Index bit */ ++ *addr &= ~_nic_mask64(idx_lsb, idx_lsb, 0); ++ return 0; ++ } ++ ++ if (dest_island == isld1) { ++ /* Only need to set the Index bit */ ++ *addr |= (UINT64_C(1) << idx_lsb); ++ return 0; ++ } ++ ++ return NFP_ERRNO(ENODEV); ++ case 2: ++ if (cpp_tgt == NFP6000_CPPTGT_VQDR && !addr40) { ++ /* iid<0> = addr<30> = channel<0> */ ++ /* channel<1> = addr<31> = Index */ ++ ++ /* ++ * Special case where we allow channel bits to be set ++ * before hand and with them select an island. ++ * So we need to confirm that it's at least plausible. ++ */ ++ i = _nfp6000_decode_basic(*addr, &v, cpp_tgt, mode, ++ addr40, isld1, isld0); ++ if (i != 0) ++ /* Full Island ID and channel bits overlap */ ++ return i; ++ ++ /* ++ * If dest_island is invalid, the current address won't ++ * go where expected. ++ */ ++ if (dest_island != -1 && dest_island != v) ++ return NFP_ERRNO(EINVAL); ++ ++ /* If dest_island was -1, we don't care */ ++ return 0; ++ } ++ ++ /* ++ * Make sure we compare against isldN values by clearing the ++ * LSB. This is what the silicon does. ++ **/ ++ isld[0] &= ~1; ++ isld[1] &= ~1; ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ iid_lsb = idx_lsb - 1; ++ ++ /* ++ * Try each option, take first one that fits. Not sure if we ++ * would want to do some smarter searching and prefer 0 or non-0 ++ * island IDs. ++ */ ++ ++ for (i = 0; i < 2; i++) { ++ for (v = 0; v < 2; v++) { ++ if (dest_island != (isld[i] | v)) ++ continue; ++ *addr &= ~_nic_mask64(idx_lsb, iid_lsb, 0); ++ *addr |= (((uint64_t)i) << idx_lsb); ++ *addr |= (((uint64_t)v) << iid_lsb); ++ return 0; ++ } ++ } ++ ++ return NFP_ERRNO(ENODEV); ++ case 3: ++ if (cpp_tgt == NFP6000_CPPTGT_VQDR && !addr40) { ++ /* ++ * iid<0> = addr<29> = data ++ * iid<1> = addr<30> = channel<0> ++ * channel<1> = addr<31> = Index ++ */ ++ i = _nfp6000_decode_basic(*addr, &v, cpp_tgt, mode, ++ addr40, isld1, isld0); ++ if (i != 0) ++ /* Full Island ID and channel bits overlap */ ++ return i; ++ ++ if (dest_island != -1 && dest_island != v) ++ return NFP_ERRNO(EINVAL); ++ ++ /* If dest_island was -1, we don't care */ ++ return 0; ++ } ++ ++ isld[0] &= ~3; ++ isld[1] &= ~3; ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ iid_lsb = idx_lsb - 2; ++ ++ for (i = 0; i < 2; i++) { ++ for (v = 0; v < 4; v++) { ++ if (dest_island != (isld[i] | v)) ++ continue; ++ *addr &= ~_nic_mask64(idx_lsb, iid_lsb, 0); ++ *addr |= (((uint64_t)i) << idx_lsb); ++ *addr |= (((uint64_t)v) << iid_lsb); ++ return 0; ++ } ++ } ++ return NFP_ERRNO(ENODEV); ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_decode_basic(uint64_t addr, int *dest_island, int cpp_tgt, int mode, ++ int addr40, int isld1, int isld0) ++{ ++ int iid_lsb, idx_lsb; ++ ++ switch (cpp_tgt) { ++ case NFP6000_CPPTGT_MU: ++ /* This function doesn't handle MU */ ++ return NFP_ERRNO(EINVAL); ++ case NFP6000_CPPTGT_CTXPB: ++ /* This function doesn't handle CTXPB */ ++ return NFP_ERRNO(EINVAL); ++ default: ++ break; ++ } ++ ++ switch (mode) { ++ case 0: ++ /* ++ * For VQDR, in this mode for 32-bit addressing it would be ++ * islands 0, 16, 32 and 48 depending on channel and upper ++ * address bits. Since those are not all valid islands, most ++ * decode cases would result in bad island IDs, but we do them ++ * anyway since this is decoding an address that is already ++ * assumed to be used as-is to get to sram. ++ */ ++ iid_lsb = (addr40) ? 34 : 26; ++ *dest_island = (int)(addr >> iid_lsb) & 0x3F; ++ return 0; ++ case 1: ++ /* ++ * For VQDR 32-bit, this would decode as: ++ * Channel 0: island#0 ++ * Channel 1: island#0 ++ * Channel 2: island#1 ++ * Channel 3: island#1 ++ * ++ * That would be valid as long as both islands have VQDR. ++ * Let's allow this. ++ */ ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1; ++ else ++ *dest_island = isld0; ++ ++ return 0; ++ case 2: ++ /* ++ * For VQDR 32-bit: ++ * Channel 0: (island#0 | 0) ++ * Channel 1: (island#0 | 1) ++ * Channel 2: (island#1 | 0) ++ * Channel 3: (island#1 | 1) ++ * ++ * Make sure we compare against isldN values by clearing the ++ * LSB. This is what the silicon does. ++ */ ++ isld0 &= ~1; ++ isld1 &= ~1; ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ iid_lsb = idx_lsb - 1; ++ ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1 | (int)((addr >> iid_lsb) & 1); ++ else ++ *dest_island = isld0 | (int)((addr >> iid_lsb) & 1); ++ ++ return 0; ++ case 3: ++ /* ++ * In this mode the data address starts to affect the island ID ++ * so rather not allow it. In some really specific case one ++ * could use this to send the upper half of the VQDR channel to ++ * another MU, but this is getting very specific. However, as ++ * above for mode 0, this is the decoder and the caller should ++ * validate the resulting IID. This blindly does what the ++ * silicon would do. ++ */ ++ ++ isld0 &= ~3; ++ isld1 &= ~3; ++ ++ idx_lsb = (addr40) ? 39 : 31; ++ iid_lsb = idx_lsb - 2; ++ ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1 | (int)((addr >> iid_lsb) & 3); ++ else ++ *dest_island = isld0 | (int)((addr >> iid_lsb) & 3); ++ ++ return 0; ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_cppat_mu_locality_lsb(int mode, int addr40) ++{ ++ switch (mode) { ++ case 0: ++ case 1: ++ case 2: ++ case 3: ++ return (addr40) ? 38 : 30; ++ default: ++ break; ++ } ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_encode_mu(uint64_t *addr, int dest_island, int mode, int addr40, ++ int isld1, int isld0) ++{ ++ uint64_t _u64; ++ int iid_lsb, idx_lsb, locality_lsb; ++ int i, v; ++ int isld[2]; ++ int da; ++ ++ isld[0] = isld0; ++ isld[1] = isld1; ++ locality_lsb = _nfp6000_cppat_mu_locality_lsb(mode, addr40); ++ ++ if (((*addr >> locality_lsb) & 3) == _NIC_NFP6000_MU_LOCALITY_DIRECT) ++ da = 1; ++ else ++ da = 0; ++ ++ switch (mode) { ++ case 0: ++ iid_lsb = (addr40) ? 32 : 24; ++ _u64 = _nic_mask64((iid_lsb + 5), iid_lsb, 0); ++ *addr &= ~_u64; ++ *addr |= (((uint64_t)dest_island) << iid_lsb) & _u64; ++ return 0; ++ case 1: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ _u64 = _nic_mask64((iid_lsb + 5), iid_lsb, 0); ++ *addr &= ~_u64; ++ *addr |= (((uint64_t)dest_island) << iid_lsb) & _u64; ++ return 0; ++ } ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ if (dest_island == isld0) { ++ *addr &= ~_nic_mask64(idx_lsb, idx_lsb, 0); ++ return 0; ++ } ++ ++ if (dest_island == isld1) { ++ *addr |= (UINT64_C(1) << idx_lsb); ++ return 0; ++ } ++ ++ return NFP_ERRNO(ENODEV); ++ case 2: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ _u64 = _nic_mask64((iid_lsb + 5), iid_lsb, 0); ++ *addr &= ~_u64; ++ *addr |= (((uint64_t)dest_island) << iid_lsb) & _u64; ++ return 0; ++ } ++ ++ /* ++ * Make sure we compare against isldN values by clearing the ++ * LSB. This is what the silicon does. ++ */ ++ isld[0] &= ~1; ++ isld[1] &= ~1; ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ iid_lsb = idx_lsb - 1; ++ ++ /* ++ * Try each option, take first one that fits. Not sure if we ++ * would want to do some smarter searching and prefer 0 or ++ * non-0 island IDs. ++ */ ++ ++ for (i = 0; i < 2; i++) { ++ for (v = 0; v < 2; v++) { ++ if (dest_island != (isld[i] | v)) ++ continue; ++ *addr &= ~_nic_mask64(idx_lsb, iid_lsb, 0); ++ *addr |= (((uint64_t)i) << idx_lsb); ++ *addr |= (((uint64_t)v) << iid_lsb); ++ return 0; ++ } ++ } ++ return NFP_ERRNO(ENODEV); ++ case 3: ++ /* ++ * Only the EMU will use 40 bit addressing. Silently set the ++ * direct locality bit for everyone else. The SDK toolchain ++ * uses dest_island <= 0 to test for atypical address encodings ++ * to support access to local-island CTM with a 32-but address ++ * (high-locality is effectively ignored and just used for ++ * routing to island #0). ++ */ ++ if (dest_island > 0 && ++ (dest_island < 24 || dest_island > 26)) { ++ *addr |= ((uint64_t)_NIC_NFP6000_MU_LOCALITY_DIRECT) ++ << locality_lsb; ++ da = 1; ++ } ++ ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ _u64 = _nic_mask64((iid_lsb + 5), iid_lsb, 0); ++ *addr &= ~_u64; ++ *addr |= (((uint64_t)dest_island) << iid_lsb) & _u64; ++ return 0; ++ } ++ ++ isld[0] &= ~3; ++ isld[1] &= ~3; ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ iid_lsb = idx_lsb - 2; ++ ++ for (i = 0; i < 2; i++) { ++ for (v = 0; v < 4; v++) { ++ if (dest_island != (isld[i] | v)) ++ continue; ++ *addr &= ~_nic_mask64(idx_lsb, iid_lsb, 0); ++ *addr |= (((uint64_t)i) << idx_lsb); ++ *addr |= (((uint64_t)v) << iid_lsb); ++ return 0; ++ } ++ } ++ ++ return NFP_ERRNO(ENODEV); ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_decode_mu(uint64_t addr, int *dest_island, int mode, int addr40, ++ int isld1, int isld0) ++{ ++ int iid_lsb, idx_lsb, locality_lsb; ++ int da; ++ ++ locality_lsb = _nfp6000_cppat_mu_locality_lsb(mode, addr40); ++ ++ if (((addr >> locality_lsb) & 3) == _NIC_NFP6000_MU_LOCALITY_DIRECT) ++ da = 1; ++ else ++ da = 0; ++ ++ switch (mode) { ++ case 0: ++ iid_lsb = (addr40) ? 32 : 24; ++ *dest_island = (int)(addr >> iid_lsb) & 0x3F; ++ return 0; ++ case 1: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *dest_island = (int)(addr >> iid_lsb) & 0x3F; ++ return 0; ++ } ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1; ++ else ++ *dest_island = isld0; ++ ++ return 0; ++ case 2: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *dest_island = (int)(addr >> iid_lsb) & 0x3F; ++ return 0; ++ } ++ /* ++ * Make sure we compare against isldN values by clearing the ++ * LSB. This is what the silicon does. ++ */ ++ isld0 &= ~1; ++ isld1 &= ~1; ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ iid_lsb = idx_lsb - 1; ++ ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1 | (int)((addr >> iid_lsb) & 1); ++ else ++ *dest_island = isld0 | (int)((addr >> iid_lsb) & 1); ++ ++ return 0; ++ case 3: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *dest_island = (int)(addr >> iid_lsb) & 0x3F; ++ return 0; ++ } ++ ++ isld0 &= ~3; ++ isld1 &= ~3; ++ ++ idx_lsb = (addr40) ? 37 : 29; ++ iid_lsb = idx_lsb - 2; ++ ++ if (addr & _nic_mask64(idx_lsb, idx_lsb, 0)) ++ *dest_island = isld1 | (int)((addr >> iid_lsb) & 3); ++ else ++ *dest_island = isld0 | (int)((addr >> iid_lsb) & 3); ++ ++ return 0; ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_cppat_addr_encode(uint64_t *addr, int dest_island, int cpp_tgt, ++ int mode, int addr40, int isld1, int isld0) ++{ ++ switch (cpp_tgt) { ++ case NFP6000_CPPTGT_NBI: ++ case NFP6000_CPPTGT_VQDR: ++ case NFP6000_CPPTGT_ILA: ++ case NFP6000_CPPTGT_PCIE: ++ case NFP6000_CPPTGT_ARM: ++ case NFP6000_CPPTGT_CRYPTO: ++ case NFP6000_CPPTGT_CLS: ++ return _nfp6000_encode_basic(addr, dest_island, cpp_tgt, mode, ++ addr40, isld1, isld0); ++ ++ case NFP6000_CPPTGT_MU: ++ return _nfp6000_encode_mu(addr, dest_island, mode, addr40, ++ isld1, isld0); ++ ++ case NFP6000_CPPTGT_CTXPB: ++ if (mode != 1 || addr40 != 0) ++ return NFP_ERRNO(EINVAL); ++ ++ *addr &= ~_nic_mask64(29, 24, 0); ++ *addr |= (((uint64_t)dest_island) << 24) & ++ _nic_mask64(29, 24, 0); ++ return 0; ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++static inline int ++_nfp6000_cppat_addr_decode(uint64_t addr, int *dest_island, int cpp_tgt, ++ int mode, int addr40, int isld1, int isld0) ++{ ++ switch (cpp_tgt) { ++ case NFP6000_CPPTGT_NBI: ++ case NFP6000_CPPTGT_VQDR: ++ case NFP6000_CPPTGT_ILA: ++ case NFP6000_CPPTGT_PCIE: ++ case NFP6000_CPPTGT_ARM: ++ case NFP6000_CPPTGT_CRYPTO: ++ case NFP6000_CPPTGT_CLS: ++ return _nfp6000_decode_basic(addr, dest_island, cpp_tgt, mode, ++ addr40, isld1, isld0); ++ ++ case NFP6000_CPPTGT_MU: ++ return _nfp6000_decode_mu(addr, dest_island, mode, addr40, ++ isld1, isld0); ++ ++ case NFP6000_CPPTGT_CTXPB: ++ if (mode != 1 || addr40 != 0) ++ return -EINVAL; ++ *dest_island = (int)(addr >> 24) & 0x3F; ++ return 0; ++ default: ++ break; ++ } ++ ++ return -EINVAL; ++} ++ ++static inline int ++_nfp6000_cppat_addr_iid_clear(uint64_t *addr, int cpp_tgt, int mode, int addr40) ++{ ++ int iid_lsb, locality_lsb, da; ++ ++ switch (cpp_tgt) { ++ case NFP6000_CPPTGT_NBI: ++ case NFP6000_CPPTGT_VQDR: ++ case NFP6000_CPPTGT_ILA: ++ case NFP6000_CPPTGT_PCIE: ++ case NFP6000_CPPTGT_ARM: ++ case NFP6000_CPPTGT_CRYPTO: ++ case NFP6000_CPPTGT_CLS: ++ switch (mode) { ++ case 0: ++ iid_lsb = (addr40) ? 34 : 26; ++ *addr &= ~(UINT64_C(0x3F) << iid_lsb); ++ return 0; ++ case 1: ++ iid_lsb = (addr40) ? 39 : 31; ++ *addr &= ~_nic_mask64(iid_lsb, iid_lsb, 0); ++ return 0; ++ case 2: ++ iid_lsb = (addr40) ? 38 : 30; ++ *addr &= ~_nic_mask64(iid_lsb + 1, iid_lsb, 0); ++ return 0; ++ case 3: ++ iid_lsb = (addr40) ? 37 : 29; ++ *addr &= ~_nic_mask64(iid_lsb + 2, iid_lsb, 0); ++ return 0; ++ default: ++ break; ++ } ++ case NFP6000_CPPTGT_MU: ++ locality_lsb = _nfp6000_cppat_mu_locality_lsb(mode, addr40); ++ da = (((*addr >> locality_lsb) & 3) == ++ _NIC_NFP6000_MU_LOCALITY_DIRECT); ++ switch (mode) { ++ case 0: ++ iid_lsb = (addr40) ? 32 : 24; ++ *addr &= ~(UINT64_C(0x3F) << iid_lsb); ++ return 0; ++ case 1: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *addr &= ~(UINT64_C(0x3F) << iid_lsb); ++ return 0; ++ } ++ iid_lsb = (addr40) ? 37 : 29; ++ *addr &= ~_nic_mask64(iid_lsb, iid_lsb, 0); ++ return 0; ++ case 2: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *addr &= ~(UINT64_C(0x3F) << iid_lsb); ++ return 0; ++ } ++ ++ iid_lsb = (addr40) ? 36 : 28; ++ *addr &= ~_nic_mask64(iid_lsb + 1, iid_lsb, 0); ++ return 0; ++ case 3: ++ if (da) { ++ iid_lsb = (addr40) ? 32 : 24; ++ *addr &= ~(UINT64_C(0x3F) << iid_lsb); ++ return 0; ++ } ++ ++ iid_lsb = (addr40) ? 35 : 27; ++ *addr &= ~_nic_mask64(iid_lsb + 2, iid_lsb, 0); ++ return 0; ++ default: ++ break; ++ } ++ case NFP6000_CPPTGT_CTXPB: ++ if (mode != 1 || addr40 != 0) ++ return 0; ++ *addr &= ~(UINT64_C(0x3F) << 24); ++ return 0; ++ default: ++ break; ++ } ++ ++ return NFP_ERRNO(EINVAL); ++} ++ ++#endif /* __NFP_CPPAT_H__ */ +diff --git a/drivers/net/nfp/nfpcore/nfp-common/nfp_platform.h b/drivers/net/nfp/nfpcore/nfp-common/nfp_platform.h +new file mode 100644 +index 000000000..b8541c593 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp-common/nfp_platform.h +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_PLATFORM_H__ ++#define __NFP_PLATFORM_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef BIT_ULL ++#define BIT(x) (1 << (x)) ++#define BIT_ULL(x) (1ULL << (x)) ++#endif ++ ++#ifndef ARRAY_SIZE ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++#endif ++ ++#define NFP_ERRNO(err) (errno = (err), -1) ++#define NFP_ERRNO_RET(err, ret) (errno = (err), (ret)) ++#define NFP_NOERR(errv) (errno) ++#define NFP_ERRPTR(err) (errno = (err), NULL) ++#define NFP_PTRERR(errv) (errno) ++ ++#endif /* __NFP_PLATFORM_H__ */ +diff --git a/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h b/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h +new file mode 100644 +index 000000000..0e03948ec +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h +@@ -0,0 +1,592 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_RESID_H__ ++#define __NFP_RESID_H__ ++ ++#if (!defined(_NFP_RESID_NO_C_FUNC) && \ ++ (defined(__NFP_TOOL_NFCC) || defined(__NFP_TOOL_NFAS))) ++#define _NFP_RESID_NO_C_FUNC ++#endif ++ ++#ifndef _NFP_RESID_NO_C_FUNC ++#include "nfp_platform.h" ++#endif ++ ++/* ++ * NFP Chip Architectures ++ * ++ * These are semi-arbitrary values to indicate an NFP architecture. ++ * They serve as a software view of a group of chip families, not necessarily a ++ * direct mapping to actual hardware design. ++ */ ++#define NFP_CHIP_ARCH_YD 1 ++#define NFP_CHIP_ARCH_TH 2 ++ ++/* ++ * NFP Chip Families. ++ * ++ * These are not enums, because they need to be microcode compatible. ++ * They are also not maskable. ++ * ++ * Note: The NFP-4xxx family is handled as NFP-6xxx in most software ++ * components. ++ * ++ */ ++#define NFP_CHIP_FAMILY_NFP6000 0x6000 /* ARCH_TH */ ++ ++/* NFP Microengine/Flow Processing Core Versions */ ++#define NFP_CHIP_ME_VERSION_2_7 0x0207 ++#define NFP_CHIP_ME_VERSION_2_8 0x0208 ++#define NFP_CHIP_ME_VERSION_2_9 0x0209 ++ ++/* NFP Chip Base Revisions. Minor stepping can just be added to these */ ++#define NFP_CHIP_REVISION_A0 0x00 ++#define NFP_CHIP_REVISION_B0 0x10 ++#define NFP_CHIP_REVISION_C0 0x20 ++#define NFP_CHIP_REVISION_PF 0xff /* Maximum possible revision */ ++ ++/* CPP Targets for each chip architecture */ ++#define NFP6000_CPPTGT_NBI 1 ++#define NFP6000_CPPTGT_VQDR 2 ++#define NFP6000_CPPTGT_ILA 6 ++#define NFP6000_CPPTGT_MU 7 ++#define NFP6000_CPPTGT_PCIE 9 ++#define NFP6000_CPPTGT_ARM 10 ++#define NFP6000_CPPTGT_CRYPTO 12 ++#define NFP6000_CPPTGT_CTXPB 14 ++#define NFP6000_CPPTGT_CLS 15 ++ ++/* ++ * Wildcard indicating a CPP read or write action ++ * ++ * The action used will be either read or write depending on whether a read or ++ * write instruction/call is performed on the NFP_CPP_ID. It is recomended that ++ * the RW action is used even if all actions to be performed on a NFP_CPP_ID are ++ * known to be only reads or writes. Doing so will in many cases save NFP CPP ++ * internal software resources. ++ */ ++#define NFP_CPP_ACTION_RW 32 ++ ++#define NFP_CPP_TARGET_ID_MASK 0x1f ++ ++/* ++ * NFP_CPP_ID - pack target, token, and action into a CPP ID. ++ * ++ * Create a 32-bit CPP identifier representing the access to be made. ++ * These identifiers are used as parameters to other NFP CPP functions. Some ++ * CPP devices may allow wildcard identifiers to be specified. ++ * ++ * @param[in] target NFP CPP target id ++ * @param[in] action NFP CPP action id ++ * @param[in] token NFP CPP token id ++ * @return NFP CPP ID ++ */ ++#define NFP_CPP_ID(target, action, token) \ ++ ((((target) & 0x7f) << 24) | (((token) & 0xff) << 16) | \ ++ (((action) & 0xff) << 8)) ++ ++#define NFP_CPP_ISLAND_ID(target, action, token, island) \ ++ ((((target) & 0x7f) << 24) | (((token) & 0xff) << 16) | \ ++ (((action) & 0xff) << 8) | (((island) & 0xff) << 0)) ++ ++#ifndef _NFP_RESID_NO_C_FUNC ++ ++/** ++ * Return the NFP CPP target of a NFP CPP ID ++ * @param[in] id NFP CPP ID ++ * @return NFP CPP target ++ */ ++static inline uint8_t ++NFP_CPP_ID_TARGET_of(uint32_t id) ++{ ++ return (id >> 24) & NFP_CPP_TARGET_ID_MASK; ++} ++ ++/* ++ * Return the NFP CPP token of a NFP CPP ID ++ * @param[in] id NFP CPP ID ++ * @return NFP CPP token ++ */ ++static inline uint8_t ++NFP_CPP_ID_TOKEN_of(uint32_t id) ++{ ++ return (id >> 16) & 0xff; ++} ++ ++/* ++ * Return the NFP CPP action of a NFP CPP ID ++ * @param[in] id NFP CPP ID ++ * @return NFP CPP action ++ */ ++static inline uint8_t ++NFP_CPP_ID_ACTION_of(uint32_t id) ++{ ++ return (id >> 8) & 0xff; ++} ++ ++/* ++ * Return the NFP CPP action of a NFP CPP ID ++ * @param[in] id NFP CPP ID ++ * @return NFP CPP action ++ */ ++static inline uint8_t ++NFP_CPP_ID_ISLAND_of(uint32_t id) ++{ ++ return (id) & 0xff; ++} ++ ++#endif /* _NFP_RESID_NO_C_FUNC */ ++ ++/* ++ * Check if @p chip_family is an ARCH_TH chip. ++ * @param chip_family One of NFP_CHIP_FAMILY_* ++ */ ++#define NFP_FAMILY_IS_ARCH_TH(chip_family) \ ++ ((int)(chip_family) == (int)NFP_CHIP_FAMILY_NFP6000) ++ ++/* ++ * Get the NFP_CHIP_ARCH_* of @p chip_family. ++ * @param chip_family One of NFP_CHIP_FAMILY_* ++ */ ++#define NFP_FAMILY_ARCH(x) \ ++ (__extension__ ({ \ ++ typeof(x) _x = (x); \ ++ (NFP_FAMILY_IS_ARCH_TH(_x) ? NFP_CHIP_ARCH_TH : \ ++ NFP_FAMILY_IS_ARCH_YD(_x) ? NFP_CHIP_ARCH_YD : -1) \ ++ })) ++ ++/* ++ * Check if @p chip_family is an NFP-6xxx chip. ++ * @param chip_family One of NFP_CHIP_FAMILY_* ++ */ ++#define NFP_FAMILY_IS_NFP6000(chip_family) \ ++ ((int)(chip_family) == (int)NFP_CHIP_FAMILY_NFP6000) ++ ++/* ++ * Make microengine ID for NFP-6xxx. ++ * @param island_id Island ID. ++ * @param menum ME number, 0 based, within island. ++ * ++ * NOTE: menum should really be unsigned - MSC compiler throws error (not ++ * warning) if a clause is always true i.e. menum >= 0 if cluster_num is type ++ * unsigned int hence the cast of the menum to an int in that particular clause ++ */ ++#define NFP6000_MEID(a, b) \ ++ (__extension__ ({ \ ++ typeof(a) _a = (a); \ ++ typeof(b) _b = (b); \ ++ (((((int)(_a) & 0x3F) == (int)(_a)) && \ ++ (((int)(_b) >= 0) && ((int)(_b) < 12))) ? \ ++ (int)(((_a) << 4) | ((_b) + 4)) : -1) \ ++ })) ++ ++/* ++ * Do a general sanity check on the ME ID. ++ * The check is on the highest possible island ID for the chip family and the ++ * microengine number must be a master ID. ++ * @param meid ME ID as created by NFP6000_MEID ++ */ ++#define NFP6000_MEID_IS_VALID(meid) \ ++ (__extension__ ({ \ ++ typeof(meid) _a = (meid); \ ++ ((((_a) >> 4) < 64) && (((_a) >> 4) >= 0) && \ ++ (((_a) & 0xF) >= 4)) \ ++ })) ++ ++/* ++ * Extract island ID from ME ID. ++ * @param meid ME ID as created by NFP6000_MEID ++ */ ++#define NFP6000_MEID_ISLAND_of(meid) (((meid) >> 4) & 0x3F) ++ ++/* ++ * Extract microengine number (0 based) from ME ID. ++ * @param meid ME ID as created by NFP6000_MEID ++ */ ++#define NFP6000_MEID_MENUM_of(meid) (((meid) & 0xF) - 4) ++ ++/* ++ * Extract microengine group number (0 based) from ME ID. ++ * The group is two code-sharing microengines, so group 0 refers to MEs 0,1, ++ * group 1 refers to MEs 2,3 etc. ++ * @param meid ME ID as created by NFP6000_MEID ++ */ ++#define NFP6000_MEID_MEGRP_of(meid) (NFP6000_MEID_MENUM_of(meid) >> 1) ++ ++#ifndef _NFP_RESID_NO_C_FUNC ++ ++/* ++ * Convert a string to an ME ID. ++ * ++ * @param s A string of format iX.meY ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the ME ID part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return ME ID on success, -1 on error. ++ */ ++int nfp6000_idstr2meid(const char *s, const char **endptr); ++ ++/* ++ * Extract island ID from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp6000_idstr2island("i32.me5", &c); ++ * // val == 32, c == "me5" ++ * val = nfp6000_idstr2island("i32", &c); ++ * // val == 32, c == "" ++ * ++ * @param s A string of format "iX.anything" or "iX" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the island part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the island ID, -1 on error. ++ */ ++int nfp6000_idstr2island(const char *s, const char **endptr); ++ ++/* ++ * Extract microengine number from string. ++ * ++ * Example: ++ * char *c; ++ * int menum = nfp6000_idstr2menum("me5.anything", &c); ++ * // menum == 5, c == "anything" ++ * menum = nfp6000_idstr2menum("me5", &c); ++ * // menum == 5, c == "" ++ * ++ * @param s A string of format "meX.anything" or "meX" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the ME number part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the ME number, -1 on error. ++ */ ++int nfp6000_idstr2menum(const char *s, const char **endptr); ++ ++/* ++ * Extract context number from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp6000_idstr2ctxnum("ctx5.anything", &c); ++ * // val == 5, c == "anything" ++ * val = nfp6000_idstr2ctxnum("ctx5", &c); ++ * // val == 5, c == "" ++ * ++ * @param s A string of format "ctxN.anything" or "ctxN" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the context number part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the context number, -1 on error. ++ */ ++int nfp6000_idstr2ctxnum(const char *s, const char **endptr); ++ ++/* ++ * Extract microengine group number from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp6000_idstr2megrp("tg2.anything", &c); ++ * // val == 2, c == "anything" ++ * val = nfp6000_idstr2megrp("tg5", &c); ++ * // val == 2, c == "" ++ * ++ * @param s A string of format "tgX.anything" or "tgX" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the ME group part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the ME group number, -1 on error. ++ */ ++int nfp6000_idstr2megrp(const char *s, const char **endptr); ++ ++/* ++ * Create ME ID string of format "iX[.meY]". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param meid Microengine ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_meid2str(char *s, int meid); ++ ++/* ++ * Create ME ID string of format "name[.meY]" or "iX[.meY]". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param meid Microengine ID. ++ * @return Pointer to "s" on success, NULL on error. ++ * ++ * Similar to nfp6000_meid2str() except use an alias instead of "iX" ++ * if one exists for the island. ++ */ ++const char *nfp6000_meid2altstr(char *s, int meid); ++ ++/* ++ * Create string of format "iX". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param island_id Island ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_island2str(char *s, int island_id); ++ ++/* ++ * Create string of format "name", an island alias. ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param island_id Island ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_island2altstr(char *s, int island_id); ++ ++/* ++ * Create string of format "meY". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param menum Microengine number within island. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_menum2str(char *s, int menum); ++ ++/* ++ * Create string of format "ctxY". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param ctxnum Context number within microengine. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_ctxnum2str(char *s, int ctxnum); ++ ++/* ++ * Create string of format "tgY". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param megrp Microengine group number within cluster. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp6000_megrp2str(char *s, int megrp); ++ ++/* ++ * Convert a string to an ME ID. ++ * ++ * @param chip_family Chip family ID ++ * @param s A string of format iX.meY (or clX.meY) ++ * @param endptr If non-NULL, *endptr will point to the trailing ++ * string after the ME ID part of the string, which ++ * is either an empty string or the first character ++ * after the separating period. ++ * @return ME ID on success, -1 on error. ++ */ ++int nfp_idstr2meid(int chip_family, const char *s, const char **endptr); ++ ++/* ++ * Extract island ID from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp_idstr2island(chip, "i32.me5", &c); ++ * // val == 32, c == "me5" ++ * val = nfp_idstr2island(chip, "i32", &c); ++ * // val == 32, c == "" ++ * ++ * @param chip_family Chip family ID ++ * @param s A string of format "iX.anything" or "iX" ++ * @param endptr If non-NULL, *endptr will point to the trailing ++ * striong after the ME ID part of the string, which ++ * is either an empty string or the first character ++ * after the separating period. ++ * @return The island ID on succes, -1 on error. ++ */ ++int nfp_idstr2island(int chip_family, const char *s, const char **endptr); ++ ++/* ++ * Extract microengine number from string. ++ * ++ * Example: ++ * char *c; ++ * int menum = nfp_idstr2menum("me5.anything", &c); ++ * // menum == 5, c == "anything" ++ * menum = nfp_idstr2menum("me5", &c); ++ * // menum == 5, c == "" ++ * ++ * @param chip_family Chip family ID ++ * @param s A string of format "meX.anything" or "meX" ++ * @param endptr If non-NULL, *endptr will point to the trailing ++ * striong after the ME ID part of the string, which ++ * is either an empty string or the first character ++ * after the separating period. ++ * @return The ME number on succes, -1 on error. ++ */ ++int nfp_idstr2menum(int chip_family, const char *s, const char **endptr); ++ ++/* ++ * Extract context number from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp_idstr2ctxnum("ctx5.anything", &c); ++ * // val == 5, c == "anything" ++ * val = nfp_idstr2ctxnum("ctx5", &c); ++ * // val == 5, c == "" ++ * ++ * @param s A string of format "ctxN.anything" or "ctxN" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the context number part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the context number, -1 on error. ++ */ ++int nfp_idstr2ctxnum(int chip_family, const char *s, const char **endptr); ++ ++/* ++ * Extract microengine group number from string. ++ * ++ * Example: ++ * char *c; ++ * int val = nfp_idstr2megrp("tg2.anything", &c); ++ * // val == 2, c == "anything" ++ * val = nfp_idstr2megrp("tg5", &c); ++ * // val == 5, c == "" ++ * ++ * @param s A string of format "tgX.anything" or "tgX" ++ * @param endptr If non-NULL, *endptr will point to the trailing string ++ * after the ME group part of the string, which is either ++ * an empty string or the first character after the separating ++ * period. ++ * @return If successful, the ME group number, -1 on error. ++ */ ++int nfp_idstr2megrp(int chip_family, const char *s, const char **endptr); ++ ++/* ++ * Create ME ID string of format "iX[.meY]". ++ * ++ * @param chip_family Chip family ID ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param meid Microengine ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_meid2str(int chip_family, char *s, int meid); ++ ++/* ++ * Create ME ID string of format "name[.meY]" or "iX[.meY]". ++ * ++ * @param chip_family Chip family ID ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param meid Microengine ID. ++ * @return Pointer to "s" on success, NULL on error. ++ * ++ * Similar to nfp_meid2str() except use an alias instead of "iX" ++ * if one exists for the island. ++ */ ++const char *nfp_meid2altstr(int chip_family, char *s, int meid); ++ ++/* ++ * Create string of format "iX". ++ * ++ * @param chip_family Chip family ID ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param island_id Island ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_island2str(int chip_family, char *s, int island_id); ++ ++/* ++ * Create string of format "name", an island alias. ++ * ++ * @param chip_family Chip family ID ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param island_id Island ID. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_island2altstr(int chip_family, char *s, int island_id); ++ ++/* ++ * Create string of format "meY". ++ * ++ * @param chip_family Chip family ID ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param menum Microengine number within island. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_menum2str(int chip_family, char *s, int menum); ++ ++/* ++ * Create string of format "ctxY". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param ctxnum Context number within microengine. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_ctxnum2str(int chip_family, char *s, int ctxnum); ++ ++/* ++ * Create string of format "tgY". ++ * ++ * @param s Pointer to char buffer of size NFP_MEID_STR_SZ. ++ * The resulting string is output here. ++ * @param megrp Microengine group number within cluster. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_megrp2str(int chip_family, char *s, int megrp); ++ ++/* ++ * Convert a two character string to revision number. ++ * ++ * Revision integer is 0x00 for A0, 0x11 for B1 etc. ++ * ++ * @param s Two character string. ++ * @return Revision number, -1 on error ++ */ ++int nfp_idstr2rev(const char *s); ++ ++/* ++ * Create string from revision number. ++ * ++ * String will be upper case. ++ * ++ * @param s Pointer to char buffer with size of at least 3 ++ * for 2 characters and string terminator. ++ * @param rev Revision number. ++ * @return Pointer to "s" on success, NULL on error. ++ */ ++const char *nfp_rev2str(char *s, int rev); ++ ++/* ++ * Get the NFP CPP address from a string ++ * ++ * String is in the format [island@]target[:[action:[token:]]address] ++ * ++ * @param chip_family Chip family ID ++ * @param tid Pointer to string to parse ++ * @param cpp_idp Pointer to CPP ID ++ * @param cpp_addrp Pointer to CPP address ++ * @return 0 on success, or -1 and errno ++ */ ++int nfp_str2cpp(int chip_family, ++ const char *tid, ++ uint32_t *cpp_idp, ++ uint64_t *cpp_addrp); ++ ++ ++#endif /* _NFP_RESID_NO_C_FUNC */ ++ ++#endif /* __NFP_RESID_H__ */ +diff --git a/drivers/net/nfp/nfpcore/nfp6000/nfp6000.h b/drivers/net/nfp/nfpcore/nfp6000/nfp6000.h +new file mode 100644 +index 000000000..47e1ddaee +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp6000/nfp6000.h +@@ -0,0 +1,40 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_NFP6000_H__ ++#define __NFP_NFP6000_H__ ++ ++/* CPP Target IDs */ ++#define NFP_CPP_TARGET_INVALID 0 ++#define NFP_CPP_TARGET_NBI 1 ++#define NFP_CPP_TARGET_QDR 2 ++#define NFP_CPP_TARGET_ILA 6 ++#define NFP_CPP_TARGET_MU 7 ++#define NFP_CPP_TARGET_PCIE 9 ++#define NFP_CPP_TARGET_ARM 10 ++#define NFP_CPP_TARGET_CRYPTO 12 ++#define NFP_CPP_TARGET_ISLAND_XPB 14 /* Shared with CAP */ ++#define NFP_CPP_TARGET_ISLAND_CAP 14 /* Shared with XPB */ ++#define NFP_CPP_TARGET_CT_XPB 14 ++#define NFP_CPP_TARGET_LOCAL_SCRATCH 15 ++#define NFP_CPP_TARGET_CLS NFP_CPP_TARGET_LOCAL_SCRATCH ++ ++#define NFP_ISL_EMEM0 24 ++ ++#define NFP_MU_ADDR_ACCESS_TYPE_MASK 3ULL ++#define NFP_MU_ADDR_ACCESS_TYPE_DIRECT 2ULL ++ ++static inline int ++nfp_cppat_mu_locality_lsb(int mode, int addr40) ++{ ++ switch (mode) { ++ case 0 ... 3: ++ return addr40 ? 38 : 30; ++ default: ++ return -EINVAL; ++ } ++} ++ ++#endif /* NFP_NFP6000_H */ +diff --git a/drivers/net/nfp/nfpcore/nfp6000/nfp_xpb.h b/drivers/net/nfp/nfpcore/nfp6000/nfp_xpb.h +new file mode 100644 +index 000000000..7ada1bb2f +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp6000/nfp_xpb.h +@@ -0,0 +1,26 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_XPB_H__ ++#define __NFP_XPB_H__ ++ ++/* ++ * For use with NFP6000 Databook "XPB Addressing" section ++ */ ++#define NFP_XPB_OVERLAY(island) (((island) & 0x3f) << 24) ++ ++#define NFP_XPB_ISLAND(island) (NFP_XPB_OVERLAY(island) + 0x60000) ++ ++#define NFP_XPB_ISLAND_of(offset) (((offset) >> 24) & 0x3F) ++ ++/* ++ * For use with NFP6000 Databook "XPB Island and Device IDs" chapter ++ */ ++#define NFP_XPB_DEVICE(island, slave, device) \ ++ (NFP_XPB_OVERLAY(island) | \ ++ (((slave) & 3) << 22) | \ ++ (((device) & 0x3f) << 16)) ++ ++#endif /* NFP_XPB_H */ +diff --git a/drivers/net/nfp/nfpcore/nfp_cpp.h b/drivers/net/nfp/nfpcore/nfp_cpp.h +new file mode 100644 +index 000000000..7e862145c +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_cpp.h +@@ -0,0 +1,776 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_CPP_H__ ++#define __NFP_CPP_H__ ++ ++#include "nfp-common/nfp_platform.h" ++#include "nfp-common/nfp_resid.h" ++ ++struct nfp_cpp_mutex; ++ ++/* ++ * NFP CPP handle ++ */ ++struct nfp_cpp { ++ uint32_t model; ++ uint32_t interface; ++ uint8_t *serial; ++ int serial_len; ++ void *priv; ++ ++ /* Mutex cache */ ++ struct nfp_cpp_mutex *mutex_cache; ++ const struct nfp_cpp_operations *op; ++ ++ /* ++ * NFP-6xxx originating island IMB CPP Address Translation. CPP Target ++ * ID is index into array. Values are obtained at runtime from local ++ * island XPB CSRs. ++ */ ++ uint32_t imb_cat_table[16]; ++}; ++ ++/* ++ * NFP CPP device area handle ++ */ ++struct nfp_cpp_area { ++ struct nfp_cpp *cpp; ++ char *name; ++ unsigned long long offset; ++ unsigned long size; ++ /* Here follows the 'priv' part of nfp_cpp_area. */ ++}; ++ ++/* ++ * NFP CPP operations structure ++ */ ++struct nfp_cpp_operations { ++ /* Size of priv area in struct nfp_cpp_area */ ++ size_t area_priv_size; ++ ++ /* Instance an NFP CPP */ ++ int (*init)(struct nfp_cpp *cpp, const char *devname); ++ ++ /* ++ * Free the bus. ++ * Called only once, during nfp_cpp_unregister() ++ */ ++ void (*free)(struct nfp_cpp *cpp); ++ ++ /* ++ * Initialize a new NFP CPP area ++ * NOTE: This is _not_ serialized ++ */ ++ int (*area_init)(struct nfp_cpp_area *area, ++ uint32_t dest, ++ unsigned long long address, ++ unsigned long size); ++ /* ++ * Clean up a NFP CPP area before it is freed ++ * NOTE: This is _not_ serialized ++ */ ++ void (*area_cleanup)(struct nfp_cpp_area *area); ++ ++ /* ++ * Acquire resources for a NFP CPP area ++ * Serialized ++ */ ++ int (*area_acquire)(struct nfp_cpp_area *area); ++ /* ++ * Release resources for a NFP CPP area ++ * Serialized ++ */ ++ void (*area_release)(struct nfp_cpp_area *area); ++ /* ++ * Return a void IO pointer to a NFP CPP area ++ * NOTE: This is _not_ serialized ++ */ ++ ++ void *(*area_iomem)(struct nfp_cpp_area *area); ++ ++ void *(*area_mapped)(struct nfp_cpp_area *area); ++ /* ++ * Perform a read from a NFP CPP area ++ * Serialized ++ */ ++ int (*area_read)(struct nfp_cpp_area *area, ++ void *kernel_vaddr, ++ unsigned long offset, ++ unsigned int length); ++ /* ++ * Perform a write to a NFP CPP area ++ * Serialized ++ */ ++ int (*area_write)(struct nfp_cpp_area *area, ++ const void *kernel_vaddr, ++ unsigned long offset, ++ unsigned int length); ++}; ++ ++/* ++ * This should be the only external function the transport ++ * module supplies ++ */ ++const struct nfp_cpp_operations *nfp_cpp_transport_operations(void); ++ ++/* ++ * Set the model id ++ * ++ * @param cpp NFP CPP operations structure ++ * @param model Model ID ++ */ ++void nfp_cpp_model_set(struct nfp_cpp *cpp, uint32_t model); ++ ++/* ++ * Set the private instance owned data of a nfp_cpp struct ++ * ++ * @param cpp NFP CPP operations structure ++ * @param interface Interface ID ++ */ ++void nfp_cpp_interface_set(struct nfp_cpp *cpp, uint32_t interface); ++ ++/* ++ * Set the private instance owned data of a nfp_cpp struct ++ * ++ * @param cpp NFP CPP operations structure ++ * @param serial NFP serial byte array ++ * @param len Length of the serial byte array ++ */ ++int nfp_cpp_serial_set(struct nfp_cpp *cpp, const uint8_t *serial, ++ size_t serial_len); ++ ++/* ++ * Set the private data of the nfp_cpp instance ++ * ++ * @param cpp NFP CPP operations structure ++ * @return Opaque device pointer ++ */ ++void nfp_cpp_priv_set(struct nfp_cpp *cpp, void *priv); ++ ++/* ++ * Return the private data of the nfp_cpp instance ++ * ++ * @param cpp NFP CPP operations structure ++ * @return Opaque device pointer ++ */ ++void *nfp_cpp_priv(struct nfp_cpp *cpp); ++ ++/* ++ * Get the privately allocated portion of a NFP CPP area handle ++ * ++ * @param cpp_area NFP CPP area handle ++ * @return Pointer to the private area, or NULL on failure ++ */ ++void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area); ++ ++uint32_t __nfp_cpp_model_autodetect(struct nfp_cpp *cpp); ++ ++/* ++ * NFP CPP core interface for CPP clients. ++ */ ++ ++/* ++ * Open a NFP CPP handle to a CPP device ++ * ++ * @param[in] id 0-based ID for the CPP interface to use ++ * ++ * @return NFP CPP handle, or NULL on failure (and set errno accordingly). ++ */ ++struct nfp_cpp *nfp_cpp_from_device_name(const char *devname); ++ ++/* ++ * Free a NFP CPP handle ++ * ++ * @param[in] cpp NFP CPP handle ++ */ ++void nfp_cpp_free(struct nfp_cpp *cpp); ++ ++#define NFP_CPP_MODEL_INVALID 0xffffffff ++ ++/* ++ * NFP_CPP_MODEL_CHIP_of - retrieve the chip ID from the model ID ++ * ++ * The chip ID is a 16-bit BCD+A-F encoding for the chip type. ++ * ++ * @param[in] model NFP CPP model id ++ * @return NFP CPP chip id ++ */ ++#define NFP_CPP_MODEL_CHIP_of(model) (((model) >> 16) & 0xffff) ++ ++/* ++ * NFP_CPP_MODEL_IS_6000 - Check for the NFP6000 family of devices ++ * ++ * NOTE: The NFP4000 series is considered as a NFP6000 series variant. ++ * ++ * @param[in] model NFP CPP model id ++ * @return true if model is in the NFP6000 family, false otherwise. ++ */ ++#define NFP_CPP_MODEL_IS_6000(model) \ ++ ((NFP_CPP_MODEL_CHIP_of(model) >= 0x4000) && \ ++ (NFP_CPP_MODEL_CHIP_of(model) < 0x7000)) ++ ++/* ++ * nfp_cpp_model - Retrieve the Model ID of the NFP ++ * ++ * @param[in] cpp NFP CPP handle ++ * @return NFP CPP Model ID ++ */ ++uint32_t nfp_cpp_model(struct nfp_cpp *cpp); ++ ++/* ++ * NFP Interface types - logical interface for this CPP connection 4 bits are ++ * reserved for interface type. ++ */ ++#define NFP_CPP_INTERFACE_TYPE_INVALID 0x0 ++#define NFP_CPP_INTERFACE_TYPE_PCI 0x1 ++#define NFP_CPP_INTERFACE_TYPE_ARM 0x2 ++#define NFP_CPP_INTERFACE_TYPE_RPC 0x3 ++#define NFP_CPP_INTERFACE_TYPE_ILA 0x4 ++ ++/* ++ * Construct a 16-bit NFP Interface ID ++ * ++ * Interface IDs consists of 4 bits of interface type, 4 bits of unit ++ * identifier, and 8 bits of channel identifier. ++ * ++ * The NFP Interface ID is used in the implementation of NFP CPP API mutexes, ++ * which use the MU Atomic CompareAndWrite operation - hence the limit to 16 ++ * bits to be able to use the NFP Interface ID as a lock owner. ++ * ++ * @param[in] type NFP Interface Type ++ * @param[in] unit Unit identifier for the interface type ++ * @param[in] channel Channel identifier for the interface unit ++ * @return Interface ID ++ */ ++#define NFP_CPP_INTERFACE(type, unit, channel) \ ++ ((((type) & 0xf) << 12) | \ ++ (((unit) & 0xf) << 8) | \ ++ (((channel) & 0xff) << 0)) ++ ++/* ++ * Get the interface type of a NFP Interface ID ++ * @param[in] interface NFP Interface ID ++ * @return NFP Interface ID's type ++ */ ++#define NFP_CPP_INTERFACE_TYPE_of(interface) (((interface) >> 12) & 0xf) ++ ++/* ++ * Get the interface unit of a NFP Interface ID ++ * @param[in] interface NFP Interface ID ++ * @return NFP Interface ID's unit ++ */ ++#define NFP_CPP_INTERFACE_UNIT_of(interface) (((interface) >> 8) & 0xf) ++ ++/* ++ * Get the interface channel of a NFP Interface ID ++ * @param[in] interface NFP Interface ID ++ * @return NFP Interface ID's channel ++ */ ++#define NFP_CPP_INTERFACE_CHANNEL_of(interface) (((interface) >> 0) & 0xff) ++ ++/* ++ * Retrieve the Interface ID of the NFP ++ * @param[in] cpp NFP CPP handle ++ * @return NFP CPP Interface ID ++ */ ++uint16_t nfp_cpp_interface(struct nfp_cpp *cpp); ++ ++/* ++ * Retrieve the NFP Serial Number (unique per NFP) ++ * @param[in] cpp NFP CPP handle ++ * @param[out] serial Pointer to reference the serial number array ++ * ++ * @return size of the NFP6000 serial number, in bytes ++ */ ++int nfp_cpp_serial(struct nfp_cpp *cpp, const uint8_t **serial); ++ ++/* ++ * Allocate a NFP CPP area handle, as an offset into a CPP ID ++ * @param[in] cpp NFP CPP handle ++ * @param[in] cpp_id NFP CPP ID ++ * @param[in] address Offset into the NFP CPP ID address space ++ * @param[in] size Size of the area to reserve ++ * ++ * @return NFP CPP handle, or NULL on failure (and set errno accordingly). ++ */ ++struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, ++ unsigned long size); ++ ++/* ++ * Allocate a NFP CPP area handle, as an offset into a CPP ID, by a named owner ++ * @param[in] cpp NFP CPP handle ++ * @param[in] cpp_id NFP CPP ID ++ * @param[in] name Name of owner of the area ++ * @param[in] address Offset into the NFP CPP ID address space ++ * @param[in] size Size of the area to reserve ++ * ++ * @return NFP CPP handle, or NULL on failure (and set errno accordingly). ++ */ ++struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp, ++ uint32_t cpp_id, ++ const char *name, ++ unsigned long long address, ++ unsigned long size); ++ ++/* ++ * Free an allocated NFP CPP area handle ++ * @param[in] area NFP CPP area handle ++ */ ++void nfp_cpp_area_free(struct nfp_cpp_area *area); ++ ++/* ++ * Acquire the resources needed to access the NFP CPP area handle ++ * ++ * @param[in] area NFP CPP area handle ++ * ++ * @return 0 on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_area_acquire(struct nfp_cpp_area *area); ++ ++/* ++ * Release the resources needed to access the NFP CPP area handle ++ * ++ * @param[in] area NFP CPP area handle ++ */ ++void nfp_cpp_area_release(struct nfp_cpp_area *area); ++ ++/* ++ * Allocate, then acquire the resources needed to access the NFP CPP area handle ++ * @param[in] cpp NFP CPP handle ++ * @param[in] cpp_id NFP CPP ID ++ * @param[in] address Offset into the NFP CPP ID address space ++ * @param[in] size Size of the area to reserve ++ * ++ * @return NFP CPP handle, or NULL on failure (and set errno accordingly). ++ */ ++struct nfp_cpp_area *nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, ++ uint32_t cpp_id, ++ unsigned long long address, ++ unsigned long size); ++ ++/* ++ * Release the resources, then free the NFP CPP area handle ++ * @param[in] area NFP CPP area handle ++ */ ++void nfp_cpp_area_release_free(struct nfp_cpp_area *area); ++ ++uint8_t *nfp_cpp_map_area(struct nfp_cpp *cpp, int domain, int target, ++ uint64_t addr, unsigned long size, ++ struct nfp_cpp_area **area); ++/* ++ * Return an IO pointer to the beginning of the NFP CPP area handle. The area ++ * must be acquired with 'nfp_cpp_area_acquire()' before calling this operation. ++ * ++ * @param[in] area NFP CPP area handle ++ * ++ * @return Pointer to IO memory, or NULL on failure (and set errno accordingly). ++ */ ++void *nfp_cpp_area_mapped(struct nfp_cpp_area *area); ++ ++/* ++ * Read from a NFP CPP area handle into a buffer. The area must be acquired with ++ * 'nfp_cpp_area_acquire()' before calling this operation. ++ * ++ * @param[in] area NFP CPP area handle ++ * @param[in] offset Offset into the area ++ * @param[in] buffer Location of buffer to receive the data ++ * @param[in] length Length of the data to read ++ * ++ * @return bytes read on success, -1 on failure (and set errno accordingly). ++ * ++ */ ++int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset, ++ void *buffer, size_t length); ++ ++/* ++ * Write to a NFP CPP area handle from a buffer. The area must be acquired with ++ * 'nfp_cpp_area_acquire()' before calling this operation. ++ * ++ * @param[in] area NFP CPP area handle ++ * @param[in] offset Offset into the area ++ * @param[in] buffer Location of buffer that holds the data ++ * @param[in] length Length of the data to read ++ * ++ * @return bytes written on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset, ++ const void *buffer, size_t length); ++ ++/* ++ * nfp_cpp_area_iomem() - get IOMEM region for CPP area ++ * @area: CPP area handle ++ * ++ * Returns an iomem pointer for use with readl()/writel() style operations. ++ * ++ * NOTE: Area must have been locked down with an 'acquire'. ++ * ++ * Return: pointer to the area, or NULL ++ */ ++void *nfp_cpp_area_iomem(struct nfp_cpp_area *area); ++ ++/* ++ * Verify that IO can be performed on an offset in an area ++ * ++ * @param[in] area NFP CPP area handle ++ * @param[in] offset Offset into the area ++ * @param[in] size Size of region to validate ++ * ++ * @return 0 on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_area_check_range(struct nfp_cpp_area *area, ++ unsigned long long offset, unsigned long size); ++ ++/* ++ * Get the NFP CPP handle that is the parent of a NFP CPP area handle ++ * ++ * @param cpp_area NFP CPP area handle ++ * @return NFP CPP handle ++ */ ++struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area); ++ ++/* ++ * Get the name passed during allocation of the NFP CPP area handle ++ * ++ * @param cpp_area NFP CPP area handle ++ * @return Pointer to the area's name ++ */ ++const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area); ++ ++/* ++ * Read a block of data from a NFP CPP ID ++ * ++ * @param[in] cpp NFP CPP handle ++ * @param[in] cpp_id NFP CPP ID ++ * @param[in] address Offset into the NFP CPP ID address space ++ * @param[in] kernel_vaddr Buffer to copy read data to ++ * @param[in] length Size of the area to reserve ++ * ++ * @return bytes read on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_read(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, void *kernel_vaddr, size_t length); ++ ++/* ++ * Write a block of data to a NFP CPP ID ++ * ++ * @param[in] cpp NFP CPP handle ++ * @param[in] cpp_id NFP CPP ID ++ * @param[in] address Offset into the NFP CPP ID address space ++ * @param[in] kernel_vaddr Buffer to copy write data from ++ * @param[in] length Size of the area to reserve ++ * ++ * @return bytes written on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_write(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, const void *kernel_vaddr, ++ size_t length); ++ ++ ++ ++/* ++ * Fill a NFP CPP area handle and offset with a value ++ * ++ * @param[in] area NFP CPP area handle ++ * @param[in] offset Offset into the NFP CPP ID address space ++ * @param[in] value 32-bit value to fill area with ++ * @param[in] length Size of the area to reserve ++ * ++ * @return bytes written on success, -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_area_fill(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t value, size_t length); ++ ++/* ++ * Read a single 32-bit value from a NFP CPP area handle ++ * ++ * @param area NFP CPP area handle ++ * @param offset offset into NFP CPP area handle ++ * @param value output value ++ * ++ * The area must be acquired with 'nfp_cpp_area_acquire()' before calling this ++ * operation. ++ * ++ * NOTE: offset must be 32-bit aligned. ++ * ++ * @return 0 on success, or -1 on error (and set errno accordingly). ++ */ ++int nfp_cpp_area_readl(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t *value); ++ ++/* ++ * Write a single 32-bit value to a NFP CPP area handle ++ * ++ * @param area NFP CPP area handle ++ * @param offset offset into NFP CPP area handle ++ * @param value value to write ++ * ++ * The area must be acquired with 'nfp_cpp_area_acquire()' before calling this ++ * operation. ++ * ++ * NOTE: offset must be 32-bit aligned. ++ * ++ * @return 0 on success, or -1 on error (and set errno accordingly). ++ */ ++int nfp_cpp_area_writel(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t value); ++ ++/* ++ * Read a single 64-bit value from a NFP CPP area handle ++ * ++ * @param area NFP CPP area handle ++ * @param offset offset into NFP CPP area handle ++ * @param value output value ++ * ++ * The area must be acquired with 'nfp_cpp_area_acquire()' before calling this ++ * operation. ++ * ++ * NOTE: offset must be 64-bit aligned. ++ * ++ * @return 0 on success, or -1 on error (and set errno accordingly). ++ */ ++int nfp_cpp_area_readq(struct nfp_cpp_area *area, unsigned long offset, ++ uint64_t *value); ++ ++/* ++ * Write a single 64-bit value to a NFP CPP area handle ++ * ++ * @param area NFP CPP area handle ++ * @param offset offset into NFP CPP area handle ++ * @param value value to write ++ * ++ * The area must be acquired with 'nfp_cpp_area_acquire()' before calling this ++ * operation. ++ * ++ * NOTE: offset must be 64-bit aligned. ++ * ++ * @return 0 on success, or -1 on error (and set errno accordingly). ++ */ ++int nfp_cpp_area_writeq(struct nfp_cpp_area *area, unsigned long offset, ++ uint64_t value); ++ ++/* ++ * Write a single 32-bit value on the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param value value to write ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_xpb_writel(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t value); ++ ++/* ++ * Read a single 32-bit value from the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param value output value ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_xpb_readl(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t *value); ++ ++/* ++ * Modify bits of a 32-bit value from the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param mask mask of bits to alter ++ * @param value value to modify ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_xpb_writelm(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t mask, ++ uint32_t value); ++ ++/* ++ * Modify bits of a 32-bit value from the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param mask mask of bits to alter ++ * @param value value to monitor for ++ * @param timeout_us maximum number of us to wait (-1 for forever) ++ * ++ * @return >= 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_xpb_waitlm(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t mask, ++ uint32_t value, int timeout_us); ++ ++/* ++ * Read a 32-bit word from a NFP CPP ID ++ * ++ * @param cpp NFP CPP handle ++ * @param cpp_id NFP CPP ID ++ * @param address offset into the NFP CPP ID address space ++ * @param value output value ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_readl(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, uint32_t *value); ++ ++/* ++ * Write a 32-bit value to a NFP CPP ID ++ * ++ * @param cpp NFP CPP handle ++ * @param cpp_id NFP CPP ID ++ * @param address offset into the NFP CPP ID address space ++ * @param value value to write ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ * ++ */ ++int nfp_cpp_writel(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, uint32_t value); ++ ++/* ++ * Read a 64-bit work from a NFP CPP ID ++ * ++ * @param cpp NFP CPP handle ++ * @param cpp_id NFP CPP ID ++ * @param address offset into the NFP CPP ID address space ++ * @param value output value ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_readq(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, uint64_t *value); ++ ++/* ++ * Write a 64-bit value to a NFP CPP ID ++ * ++ * @param cpp NFP CPP handle ++ * @param cpp_id NFP CPP ID ++ * @param address offset into the NFP CPP ID address space ++ * @param value value to write ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_writeq(struct nfp_cpp *cpp, uint32_t cpp_id, ++ unsigned long long address, uint64_t value); ++ ++/* ++ * Initialize a mutex location ++ ++ * The CPP target:address must point to a 64-bit aligned location, and will ++ * initialize 64 bits of data at the location. ++ * ++ * This creates the initial mutex state, as locked by this nfp_cpp_interface(). ++ * ++ * This function should only be called when setting up the initial lock state ++ * upon boot-up of the system. ++ * ++ * @param cpp NFP CPP handle ++ * @param target NFP CPP target ID ++ * @param address Offset into the address space of the NFP CPP target ID ++ * @param key_id Unique 32-bit value for this mutex ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target, ++ unsigned long long address, uint32_t key_id); ++ ++/* ++ * Create a mutex handle from an address controlled by a MU Atomic engine ++ * ++ * The CPP target:address must point to a 64-bit aligned location, and reserve ++ * 64 bits of data at the location for use by the handle. ++ * ++ * Only target/address pairs that point to entities that support the MU Atomic ++ * Engine's CmpAndSwap32 command are supported. ++ * ++ * @param cpp NFP CPP handle ++ * @param target NFP CPP target ID ++ * @param address Offset into the address space of the NFP CPP target ID ++ * @param key_id 32-bit unique key (must match the key at this location) ++ * ++ * @return A non-NULL struct nfp_cpp_mutex * on success, NULL on ++ * failure. ++ */ ++struct nfp_cpp_mutex *nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target, ++ unsigned long long address, ++ uint32_t key_id); ++ ++/* ++ * Get the NFP CPP handle the mutex was created with ++ * ++ * @param mutex NFP mutex handle ++ * @return NFP CPP handle ++ */ ++struct nfp_cpp *nfp_cpp_mutex_cpp(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Get the mutex key ++ * ++ * @param mutex NFP mutex handle ++ * @return Mutex key ++ */ ++uint32_t nfp_cpp_mutex_key(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Get the mutex owner ++ * ++ * @param mutex NFP mutex handle ++ * @return Interface ID of the mutex owner ++ * ++ * NOTE: This is for debug purposes ONLY - the owner may change at any time, ++ * unless it has been locked by this NFP CPP handle. ++ */ ++uint16_t nfp_cpp_mutex_owner(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Get the mutex target ++ * ++ * @param mutex NFP mutex handle ++ * @return Mutex CPP target (ie NFP_CPP_TARGET_MU) ++ */ ++int nfp_cpp_mutex_target(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Get the mutex address ++ * ++ * @param mutex NFP mutex handle ++ * @return Mutex CPP address ++ */ ++uint64_t nfp_cpp_mutex_address(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Free a mutex handle - does not alter the lock state ++ * ++ * @param mutex NFP CPP Mutex handle ++ */ ++void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Lock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * @param mutex NFP CPP Mutex handle ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Unlock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * @param mutex NFP CPP Mutex handle ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex); ++ ++/* ++ * Attempt to lock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * @param mutex NFP CPP Mutex handle ++ * @return 0 if the lock succeeded, -1 on failure (and errno set ++ * appropriately). ++ */ ++int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex); ++ ++#endif /* !__NFP_CPP_H__ */ +diff --git a/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c b/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +new file mode 100644 +index 000000000..ad6ce72fe +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +@@ -0,0 +1,936 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++/* ++ * nfp_cpp_pcie_ops.c ++ * Authors: Vinayak Tammineedi ++ * ++ * Multiplexes the NFP BARs between NFP internal resources and ++ * implements the PCIe specific interface for generic CPP bus access. ++ * ++ * The BARs are managed and allocated if they are available. ++ * The generic CPP bus abstraction builds upon this BAR interface. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp_target.h" ++#include "nfp6000/nfp6000.h" ++ ++#define NFP_PCIE_BAR(_pf) (0x30000 + ((_pf) & 7) * 0xc0) ++ ++#define NFP_PCIE_BAR_PCIE2CPP_ACTION_BASEADDRESS(_x) (((_x) & 0x1f) << 16) ++#define NFP_PCIE_BAR_PCIE2CPP_BASEADDRESS(_x) (((_x) & 0xffff) << 0) ++#define NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT(_x) (((_x) & 0x3) << 27) ++#define NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_32BIT 0 ++#define NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_64BIT 1 ++#define NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_0BYTE 3 ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE(_x) (((_x) & 0x7) << 29) ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_OF(_x) (((_x) >> 29) & 0x7) ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_FIXED 0 ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_BULK 1 ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_TARGET 2 ++#define NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_GENERAL 3 ++#define NFP_PCIE_BAR_PCIE2CPP_TARGET_BASEADDRESS(_x) (((_x) & 0xf) << 23) ++#define NFP_PCIE_BAR_PCIE2CPP_TOKEN_BASEADDRESS(_x) (((_x) & 0x3) << 21) ++ ++/* ++ * Minimal size of the PCIe cfg memory we depend on being mapped, ++ * queue controller and DMA controller don't have to be covered. ++ */ ++#define NFP_PCI_MIN_MAP_SIZE 0x080000 ++ ++#define NFP_PCIE_P2C_FIXED_SIZE(bar) (1 << (bar)->bitsize) ++#define NFP_PCIE_P2C_BULK_SIZE(bar) (1 << (bar)->bitsize) ++#define NFP_PCIE_P2C_GENERAL_TARGET_OFFSET(bar, x) ((x) << ((bar)->bitsize - 2)) ++#define NFP_PCIE_P2C_GENERAL_TOKEN_OFFSET(bar, x) ((x) << ((bar)->bitsize - 4)) ++#define NFP_PCIE_P2C_GENERAL_SIZE(bar) (1 << ((bar)->bitsize - 4)) ++ ++#define NFP_PCIE_CFG_BAR_PCIETOCPPEXPBAR(bar, slot) \ ++ (NFP_PCIE_BAR(0) + ((bar) * 8 + (slot)) * 4) ++ ++#define NFP_PCIE_CPP_BAR_PCIETOCPPEXPBAR(bar, slot) \ ++ (((bar) * 8 + (slot)) * 4) ++ ++/* ++ * Define to enable a bit more verbose debug output. ++ * Set to 1 to enable a bit more verbose debug output. ++ */ ++struct nfp_pcie_user; ++struct nfp6000_area_priv; ++ ++/* ++ * struct nfp_bar - describes BAR configuration and usage ++ * @nfp: backlink to owner ++ * @barcfg: cached contents of BAR config CSR ++ * @base: the BAR's base CPP offset ++ * @mask: mask for the BAR aperture (read only) ++ * @bitsize: bitsize of BAR aperture (read only) ++ * @index: index of the BAR ++ * @lock: lock to specify if bar is in use ++ * @refcnt: number of current users ++ * @iomem: mapped IO memory ++ */ ++#define NFP_BAR_MAX 7 ++struct nfp_bar { ++ struct nfp_pcie_user *nfp; ++ uint32_t barcfg; ++ uint64_t base; /* CPP address base */ ++ uint64_t mask; /* Bit mask of the bar */ ++ uint32_t bitsize; /* Bit size of the bar */ ++ int index; ++ int lock; ++ ++ char *csr; ++ char *iomem; ++}; ++ ++#define BUSDEV_SZ 13 ++struct nfp_pcie_user { ++ struct nfp_bar bar[NFP_BAR_MAX]; ++ ++ int device; ++ int lock; ++ char busdev[BUSDEV_SZ]; ++ int barsz; ++ char *cfg; ++}; ++ ++static uint32_t ++nfp_bar_maptype(struct nfp_bar *bar) ++{ ++ return NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_OF(bar->barcfg); ++} ++ ++#define TARGET_WIDTH_32 4 ++#define TARGET_WIDTH_64 8 ++ ++static int ++nfp_compute_bar(const struct nfp_bar *bar, uint32_t *bar_config, ++ uint64_t *bar_base, int tgt, int act, int tok, ++ uint64_t offset, size_t size, int width) ++{ ++ uint32_t bitsize; ++ uint32_t newcfg; ++ uint64_t mask; ++ ++ if (tgt >= 16) ++ return -EINVAL; ++ ++ switch (width) { ++ case 8: ++ newcfg = ++ NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT ++ (NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_64BIT); ++ break; ++ case 4: ++ newcfg = ++ NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT ++ (NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_32BIT); ++ break; ++ case 0: ++ newcfg = ++ NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT ++ (NFP_PCIE_BAR_PCIE2CPP_LENGTHSELECT_0BYTE); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ if (act != NFP_CPP_ACTION_RW && act != 0) { ++ /* Fixed CPP mapping with specific action */ ++ mask = ~(NFP_PCIE_P2C_FIXED_SIZE(bar) - 1); ++ ++ newcfg |= ++ NFP_PCIE_BAR_PCIE2CPP_MAPTYPE ++ (NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_FIXED); ++ newcfg |= NFP_PCIE_BAR_PCIE2CPP_TARGET_BASEADDRESS(tgt); ++ newcfg |= NFP_PCIE_BAR_PCIE2CPP_ACTION_BASEADDRESS(act); ++ newcfg |= NFP_PCIE_BAR_PCIE2CPP_TOKEN_BASEADDRESS(tok); ++ ++ if ((offset & mask) != ((offset + size - 1) & mask)) { ++ printf("BAR%d: Won't use for Fixed mapping\n", ++ bar->index); ++ printf("\t<%#llx,%#llx>, action=%d\n", ++ (unsigned long long)offset, ++ (unsigned long long)(offset + size), act); ++ printf("\tBAR too small (0x%llx).\n", ++ (unsigned long long)mask); ++ return -EINVAL; ++ } ++ offset &= mask; ++ ++#ifdef DEBUG ++ printf("BAR%d: Created Fixed mapping\n", bar->index); ++ printf("\t%d:%d:%d:0x%#llx-0x%#llx>\n", tgt, act, tok, ++ (unsigned long long)offset, ++ (unsigned long long)(offset + mask)); ++#endif ++ ++ bitsize = 40 - 16; ++ } else { ++ mask = ~(NFP_PCIE_P2C_BULK_SIZE(bar) - 1); ++ ++ /* Bulk mapping */ ++ newcfg |= ++ NFP_PCIE_BAR_PCIE2CPP_MAPTYPE ++ (NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_BULK); ++ ++ newcfg |= NFP_PCIE_BAR_PCIE2CPP_TARGET_BASEADDRESS(tgt); ++ newcfg |= NFP_PCIE_BAR_PCIE2CPP_TOKEN_BASEADDRESS(tok); ++ ++ if ((offset & mask) != ((offset + size - 1) & mask)) { ++ printf("BAR%d: Won't use for bulk mapping\n", ++ bar->index); ++ printf("\t<%#llx,%#llx>\n", (unsigned long long)offset, ++ (unsigned long long)(offset + size)); ++ printf("\ttarget=%d, token=%d\n", tgt, tok); ++ printf("\tBAR too small (%#llx) - (%#llx != %#llx).\n", ++ (unsigned long long)mask, ++ (unsigned long long)(offset & mask), ++ (unsigned long long)(offset + size - 1) & mask); ++ ++ return -EINVAL; ++ } ++ ++ offset &= mask; ++ ++#ifdef DEBUG ++ printf("BAR%d: Created bulk mapping %d:x:%d:%#llx-%#llx\n", ++ bar->index, tgt, tok, (unsigned long long)offset, ++ (unsigned long long)(offset + ~mask)); ++#endif ++ ++ bitsize = 40 - 21; ++ } ++ ++ if (bar->bitsize < bitsize) { ++ printf("BAR%d: Too small for %d:%d:%d\n", bar->index, tgt, tok, ++ act); ++ return -EINVAL; ++ } ++ ++ newcfg |= offset >> bitsize; ++ ++ if (bar_base) ++ *bar_base = offset; ++ ++ if (bar_config) ++ *bar_config = newcfg; ++ ++ return 0; ++} ++ ++static int ++nfp_bar_write(struct nfp_pcie_user *nfp, struct nfp_bar *bar, ++ uint32_t newcfg) ++{ ++ int base, slot; ++ ++ base = bar->index >> 3; ++ slot = bar->index & 7; ++ ++ if (!nfp->cfg) ++ return (-ENOMEM); ++ ++ bar->csr = nfp->cfg + ++ NFP_PCIE_CFG_BAR_PCIETOCPPEXPBAR(base, slot); ++ ++ *(uint32_t *)(bar->csr) = newcfg; ++ ++ bar->barcfg = newcfg; ++#ifdef DEBUG ++ printf("BAR%d: updated to 0x%08x\n", bar->index, newcfg); ++#endif ++ ++ return 0; ++} ++ ++static int ++nfp_reconfigure_bar(struct nfp_pcie_user *nfp, struct nfp_bar *bar, int tgt, ++ int act, int tok, uint64_t offset, size_t size, int width) ++{ ++ uint64_t newbase; ++ uint32_t newcfg; ++ int err; ++ ++ err = nfp_compute_bar(bar, &newcfg, &newbase, tgt, act, tok, offset, ++ size, width); ++ if (err) ++ return err; ++ ++ bar->base = newbase; ++ ++ return nfp_bar_write(nfp, bar, newcfg); ++} ++ ++/* ++ * Map all PCI bars. We assume that the BAR with the PCIe config block is ++ * already mapped. ++ * ++ * BAR0.0: Reserved for General Mapping (for MSI-X access to PCIe SRAM) ++ */ ++static int ++nfp_enable_bars(struct nfp_pcie_user *nfp) ++{ ++ struct nfp_bar *bar; ++ int x; ++ ++ for (x = ARRAY_SIZE(nfp->bar); x > 0; x--) { ++ bar = &nfp->bar[x - 1]; ++ bar->barcfg = 0; ++ bar->nfp = nfp; ++ bar->index = x; ++ bar->mask = (1 << (nfp->barsz - 3)) - 1; ++ bar->bitsize = nfp->barsz - 3; ++ bar->base = 0; ++ bar->iomem = NULL; ++ bar->lock = 0; ++ bar->csr = nfp->cfg + ++ NFP_PCIE_CFG_BAR_PCIETOCPPEXPBAR(bar->index >> 3, ++ bar->index & 7); ++ bar->iomem = ++ (char *)mmap(0, 1 << bar->bitsize, PROT_READ | PROT_WRITE, ++ MAP_SHARED, nfp->device, ++ bar->index << bar->bitsize); ++ ++ if (bar->iomem == MAP_FAILED) ++ return (-ENOMEM); ++ } ++ return 0; ++} ++ ++static struct nfp_bar * ++nfp_alloc_bar(struct nfp_pcie_user *nfp) ++{ ++ struct nfp_bar *bar; ++ int x; ++ ++ for (x = ARRAY_SIZE(nfp->bar); x > 0; x--) { ++ bar = &nfp->bar[x - 1]; ++ if (!bar->lock) { ++ bar->lock = 1; ++ return bar; ++ } ++ } ++ return NULL; ++} ++ ++static void ++nfp_disable_bars(struct nfp_pcie_user *nfp) ++{ ++ struct nfp_bar *bar; ++ int x; ++ ++ for (x = ARRAY_SIZE(nfp->bar); x > 0; x--) { ++ bar = &nfp->bar[x - 1]; ++ if (bar->iomem) { ++ munmap(bar->iomem, 1 << (nfp->barsz - 3)); ++ bar->iomem = NULL; ++ bar->lock = 0; ++ } ++ } ++} ++ ++/* ++ * Generic CPP bus access interface. ++ */ ++ ++struct nfp6000_area_priv { ++ struct nfp_bar *bar; ++ uint32_t bar_offset; ++ ++ uint32_t target; ++ uint32_t action; ++ uint32_t token; ++ uint64_t offset; ++ struct { ++ int read; ++ int write; ++ int bar; ++ } width; ++ size_t size; ++ char *iomem; ++}; ++ ++static int ++nfp6000_area_init(struct nfp_cpp_area *area, uint32_t dest, ++ unsigned long long address, unsigned long size) ++{ ++ struct nfp_pcie_user *nfp = nfp_cpp_priv(nfp_cpp_area_cpp(area)); ++ struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); ++ uint32_t target = NFP_CPP_ID_TARGET_of(dest); ++ uint32_t action = NFP_CPP_ID_ACTION_of(dest); ++ uint32_t token = NFP_CPP_ID_TOKEN_of(dest); ++ int pp, ret = 0; ++ ++ pp = nfp6000_target_pushpull(NFP_CPP_ID(target, action, token), ++ address); ++ if (pp < 0) ++ return pp; ++ ++ priv->width.read = PUSH_WIDTH(pp); ++ priv->width.write = PULL_WIDTH(pp); ++ ++ if (priv->width.read > 0 && ++ priv->width.write > 0 && priv->width.read != priv->width.write) ++ return -EINVAL; ++ ++ if (priv->width.read > 0) ++ priv->width.bar = priv->width.read; ++ else ++ priv->width.bar = priv->width.write; ++ ++ priv->bar = nfp_alloc_bar(nfp); ++ if (priv->bar == NULL) ++ return -ENOMEM; ++ ++ priv->target = target; ++ priv->action = action; ++ priv->token = token; ++ priv->offset = address; ++ priv->size = size; ++ ++ ret = nfp_reconfigure_bar(nfp, priv->bar, priv->target, priv->action, ++ priv->token, priv->offset, priv->size, ++ priv->width.bar); ++ ++ return ret; ++} ++ ++static int ++nfp6000_area_acquire(struct nfp_cpp_area *area) ++{ ++ struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); ++ ++ /* Calculate offset into BAR. */ ++ if (nfp_bar_maptype(priv->bar) == ++ NFP_PCIE_BAR_PCIE2CPP_MAPTYPE_GENERAL) { ++ priv->bar_offset = priv->offset & ++ (NFP_PCIE_P2C_GENERAL_SIZE(priv->bar) - 1); ++ priv->bar_offset += ++ NFP_PCIE_P2C_GENERAL_TARGET_OFFSET(priv->bar, ++ priv->target); ++ priv->bar_offset += ++ NFP_PCIE_P2C_GENERAL_TOKEN_OFFSET(priv->bar, priv->token); ++ } else { ++ priv->bar_offset = priv->offset & priv->bar->mask; ++ } ++ ++ /* Must have been too big. Sub-allocate. */ ++ if (!priv->bar->iomem) ++ return (-ENOMEM); ++ ++ priv->iomem = priv->bar->iomem + priv->bar_offset; ++ ++ return 0; ++} ++ ++static void * ++nfp6000_area_mapped(struct nfp_cpp_area *area) ++{ ++ struct nfp6000_area_priv *area_priv = nfp_cpp_area_priv(area); ++ ++ if (!area_priv->iomem) ++ return NULL; ++ ++ return area_priv->iomem; ++} ++ ++static void ++nfp6000_area_release(struct nfp_cpp_area *area) ++{ ++ struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); ++ priv->bar->lock = 0; ++ priv->bar = NULL; ++ priv->iomem = NULL; ++} ++ ++static void * ++nfp6000_area_iomem(struct nfp_cpp_area *area) ++{ ++ struct nfp6000_area_priv *priv = nfp_cpp_area_priv(area); ++ return priv->iomem; ++} ++ ++static int ++nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr, ++ unsigned long offset, unsigned int length) ++{ ++ uint64_t *wrptr64 = kernel_vaddr; ++ const volatile uint64_t *rdptr64; ++ struct nfp6000_area_priv *priv; ++ uint32_t *wrptr32 = kernel_vaddr; ++ const volatile uint32_t *rdptr32; ++ int width; ++ unsigned int n; ++ bool is_64; ++ ++ priv = nfp_cpp_area_priv(area); ++ rdptr64 = (uint64_t *)(priv->iomem + offset); ++ rdptr32 = (uint32_t *)(priv->iomem + offset); ++ ++ if (offset + length > priv->size) ++ return -EFAULT; ++ ++ width = priv->width.read; ++ ++ if (width <= 0) ++ return -EINVAL; ++ ++ /* Unaligned? Translate to an explicit access */ ++ if ((priv->offset + offset) & (width - 1)) { ++ printf("aread_read unaligned!!!\n"); ++ return -EINVAL; ++ } ++ ++ is_64 = width == TARGET_WIDTH_64; ++ ++ /* MU reads via a PCIe2CPP BAR supports 32bit (and other) lengths */ ++ if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && ++ priv->action == NFP_CPP_ACTION_RW) { ++ is_64 = false; ++ } ++ ++ if (is_64) { ++ if (offset % sizeof(uint64_t) != 0 || ++ length % sizeof(uint64_t) != 0) ++ return -EINVAL; ++ } else { ++ if (offset % sizeof(uint32_t) != 0 || ++ length % sizeof(uint32_t) != 0) ++ return -EINVAL; ++ } ++ ++ if (!priv->bar) ++ return -EFAULT; ++ ++ if (is_64) ++ for (n = 0; n < length; n += sizeof(uint64_t)) { ++ *wrptr64 = *rdptr64; ++ wrptr64++; ++ rdptr64++; ++ } ++ else ++ for (n = 0; n < length; n += sizeof(uint32_t)) { ++ *wrptr32 = *rdptr32; ++ wrptr32++; ++ rdptr32++; ++ } ++ ++ return n; ++} ++ ++static int ++nfp6000_area_write(struct nfp_cpp_area *area, const void *kernel_vaddr, ++ unsigned long offset, unsigned int length) ++{ ++ const uint64_t *rdptr64 = kernel_vaddr; ++ uint64_t *wrptr64; ++ const uint32_t *rdptr32 = kernel_vaddr; ++ struct nfp6000_area_priv *priv; ++ uint32_t *wrptr32; ++ int width; ++ unsigned int n; ++ bool is_64; ++ ++ priv = nfp_cpp_area_priv(area); ++ wrptr64 = (uint64_t *)(priv->iomem + offset); ++ wrptr32 = (uint32_t *)(priv->iomem + offset); ++ ++ if (offset + length > priv->size) ++ return -EFAULT; ++ ++ width = priv->width.write; ++ ++ if (width <= 0) ++ return -EINVAL; ++ ++ /* Unaligned? Translate to an explicit access */ ++ if ((priv->offset + offset) & (width - 1)) ++ return -EINVAL; ++ ++ is_64 = width == TARGET_WIDTH_64; ++ ++ /* MU writes via a PCIe2CPP BAR supports 32bit (and other) lengths */ ++ if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && ++ priv->action == NFP_CPP_ACTION_RW) ++ is_64 = false; ++ ++ if (is_64) { ++ if (offset % sizeof(uint64_t) != 0 || ++ length % sizeof(uint64_t) != 0) ++ return -EINVAL; ++ } else { ++ if (offset % sizeof(uint32_t) != 0 || ++ length % sizeof(uint32_t) != 0) ++ return -EINVAL; ++ } ++ ++ if (!priv->bar) ++ return -EFAULT; ++ ++ if (is_64) ++ for (n = 0; n < length; n += sizeof(uint64_t)) { ++ *wrptr64 = *rdptr64; ++ wrptr64++; ++ rdptr64++; ++ } ++ else ++ for (n = 0; n < length; n += sizeof(uint32_t)) { ++ *wrptr32 = *rdptr32; ++ wrptr32++; ++ rdptr32++; ++ } ++ ++ return n; ++} ++ ++#define PCI_DEVICES "/sys/bus/pci/devices" ++ ++static int ++nfp_acquire_process_lock(struct nfp_pcie_user *desc) ++{ ++ int rc; ++ struct flock lock; ++ char lockname[30]; ++ ++ memset(&lock, 0, sizeof(lock)); ++ ++ snprintf(lockname, sizeof(lockname), "/var/lock/nfp_%s", desc->busdev); ++ desc->lock = open(lockname, O_RDWR | O_CREAT, 0666); ++ if (desc->lock < 0) ++ return desc->lock; ++ ++ lock.l_type = F_WRLCK; ++ lock.l_whence = SEEK_SET; ++ rc = -1; ++ while (rc != 0) { ++ rc = fcntl(desc->lock, F_SETLKW, &lock); ++ if (rc < 0) { ++ if (errno != EAGAIN && errno != EACCES) { ++ close(desc->lock); ++ return rc; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int ++nfp6000_set_model(struct nfp_pcie_user *desc, struct nfp_cpp *cpp) ++{ ++ char tmp_str[80]; ++ uint32_t tmp; ++ int fp; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/config", PCI_DEVICES, ++ desc->busdev); ++ ++ fp = open(tmp_str, O_RDONLY); ++ if (!fp) ++ return -1; ++ ++ lseek(fp, 0x2e, SEEK_SET); ++ ++ if (read(fp, &tmp, sizeof(tmp)) != sizeof(tmp)) { ++ printf("Error reading config file for model\n"); ++ return -1; ++ } ++ ++ tmp = tmp << 16; ++ ++ if (close(fp) == -1) ++ return -1; ++ ++ nfp_cpp_model_set(cpp, tmp); ++ ++ return 0; ++} ++ ++static int ++nfp6000_set_interface(struct nfp_pcie_user *desc, struct nfp_cpp *cpp) ++{ ++ char tmp_str[80]; ++ uint16_t tmp; ++ int fp; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/config", PCI_DEVICES, ++ desc->busdev); ++ ++ fp = open(tmp_str, O_RDONLY); ++ if (!fp) ++ return -1; ++ ++ lseek(fp, 0x154, SEEK_SET); ++ ++ if (read(fp, &tmp, sizeof(tmp)) != sizeof(tmp)) { ++ printf("error reading config file for interface\n"); ++ return -1; ++ } ++ ++ if (close(fp) == -1) ++ return -1; ++ ++ nfp_cpp_interface_set(cpp, tmp); ++ ++ return 0; ++} ++ ++#define PCI_CFG_SPACE_SIZE 256 ++#define PCI_CFG_SPACE_EXP_SIZE 4096 ++#define PCI_EXT_CAP_ID(header) (int)(header & 0x0000ffff) ++#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) ++#define PCI_EXT_CAP_ID_DSN 0x03 ++static int ++nfp_pci_find_next_ext_capability(int fp, int cap) ++{ ++ uint32_t header; ++ int ttl; ++ int pos = PCI_CFG_SPACE_SIZE; ++ ++ /* minimum 8 bytes per capability */ ++ ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8; ++ ++ lseek(fp, pos, SEEK_SET); ++ if (read(fp, &header, sizeof(header)) != sizeof(header)) { ++ printf("error reading config file for serial\n"); ++ return -1; ++ } ++ ++ /* ++ * If we have no capabilities, this is indicated by cap ID, ++ * cap version and next pointer all being 0. ++ */ ++ if (header == 0) ++ return 0; ++ ++ while (ttl-- > 0) { ++ if (PCI_EXT_CAP_ID(header) == cap) ++ return pos; ++ ++ pos = PCI_EXT_CAP_NEXT(header); ++ if (pos < PCI_CFG_SPACE_SIZE) ++ break; ++ ++ lseek(fp, pos, SEEK_SET); ++ if (read(fp, &header, sizeof(header)) != sizeof(header)) { ++ printf("error reading config file for serial\n"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ ++static int ++nfp6000_set_serial(struct nfp_pcie_user *desc, struct nfp_cpp *cpp) ++{ ++ char tmp_str[80]; ++ uint16_t tmp; ++ uint8_t serial[6]; ++ int serial_len = 6; ++ int fp, pos; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/config", PCI_DEVICES, ++ desc->busdev); ++ ++ fp = open(tmp_str, O_RDONLY); ++ if (!fp) ++ return -1; ++ ++ pos = nfp_pci_find_next_ext_capability(fp, PCI_EXT_CAP_ID_DSN); ++ if (pos <= 0) { ++ printf("PCI_EXT_CAP_ID_DSN not found. Using default offset\n"); ++ lseek(fp, 0x156, SEEK_SET); ++ } else { ++ lseek(fp, pos + 6, SEEK_SET); ++ } ++ ++ if (read(fp, &tmp, sizeof(tmp)) != sizeof(tmp)) { ++ printf("error reading config file for serial\n"); ++ return -1; ++ } ++ ++ serial[4] = (uint8_t)((tmp >> 8) & 0xff); ++ serial[5] = (uint8_t)(tmp & 0xff); ++ ++ if (read(fp, &tmp, sizeof(tmp)) != sizeof(tmp)) { ++ printf("error reading config file for serial\n"); ++ return -1; ++ } ++ ++ serial[2] = (uint8_t)((tmp >> 8) & 0xff); ++ serial[3] = (uint8_t)(tmp & 0xff); ++ ++ if (read(fp, &tmp, sizeof(tmp)) != sizeof(tmp)) { ++ printf("error reading config file for serial\n"); ++ return -1; ++ } ++ ++ serial[0] = (uint8_t)((tmp >> 8) & 0xff); ++ serial[1] = (uint8_t)(tmp & 0xff); ++ ++ if (close(fp) == -1) ++ return -1; ++ ++ nfp_cpp_serial_set(cpp, serial, serial_len); ++ ++ return 0; ++} ++ ++static int ++nfp6000_set_barsz(struct nfp_pcie_user *desc) ++{ ++ char tmp_str[80]; ++ unsigned long start, end, flags, tmp; ++ int i; ++ FILE *fp; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/resource", PCI_DEVICES, ++ desc->busdev); ++ ++ fp = fopen(tmp_str, "r"); ++ if (!fp) ++ return -1; ++ ++ if (fscanf(fp, "0x%lx 0x%lx 0x%lx", &start, &end, &flags) == 0) { ++ printf("error reading resource file for bar size\n"); ++ return -1; ++ } ++ ++ if (fclose(fp) == -1) ++ return -1; ++ ++ tmp = (end - start) + 1; ++ i = 0; ++ while (tmp >>= 1) ++ i++; ++ desc->barsz = i; ++ return 0; ++} ++ ++static int ++nfp6000_init(struct nfp_cpp *cpp, const char *devname) ++{ ++ char link[120]; ++ char tmp_str[80]; ++ ssize_t size; ++ int ret = 0; ++ uint32_t model; ++ struct nfp_pcie_user *desc; ++ ++ desc = malloc(sizeof(*desc)); ++ if (!desc) ++ return -1; ++ ++ ++ memset(desc->busdev, 0, BUSDEV_SZ); ++ strncpy(desc->busdev, devname, strlen(devname)); ++ ++ ret = nfp_acquire_process_lock(desc); ++ if (ret) ++ return -1; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/driver", PCI_DEVICES, ++ desc->busdev); ++ ++ size = readlink(tmp_str, link, sizeof(link)); ++ ++ if (size == -1) ++ tmp_str[0] = '\0'; ++ ++ if (size == sizeof(link)) ++ tmp_str[0] = '\0'; ++ ++ snprintf(tmp_str, sizeof(tmp_str), "%s/%s/resource0", PCI_DEVICES, ++ desc->busdev); ++ ++ desc->device = open(tmp_str, O_RDWR); ++ if (desc->device == -1) ++ return -1; ++ ++ if (nfp6000_set_model(desc, cpp) < 0) ++ return -1; ++ if (nfp6000_set_interface(desc, cpp) < 0) ++ return -1; ++ if (nfp6000_set_serial(desc, cpp) < 0) ++ return -1; ++ if (nfp6000_set_barsz(desc) < 0) ++ return -1; ++ ++ desc->cfg = (char *)mmap(0, 1 << (desc->barsz - 3), ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED, desc->device, 0); ++ ++ if (desc->cfg == MAP_FAILED) ++ return -1; ++ ++ nfp_enable_bars(desc); ++ ++ nfp_cpp_priv_set(cpp, desc); ++ ++ model = __nfp_cpp_model_autodetect(cpp); ++ nfp_cpp_model_set(cpp, model); ++ ++ return ret; ++} ++ ++static void ++nfp6000_free(struct nfp_cpp *cpp) ++{ ++ struct nfp_pcie_user *desc = nfp_cpp_priv(cpp); ++ int x; ++ ++ /* Unmap may cause if there are any pending transaxctions */ ++ nfp_disable_bars(desc); ++ munmap(desc->cfg, 1 << (desc->barsz - 3)); ++ ++ for (x = ARRAY_SIZE(desc->bar); x > 0; x--) { ++ if (desc->bar[x - 1].iomem) ++ munmap(desc->bar[x - 1].iomem, 1 << (desc->barsz - 3)); ++ } ++ close(desc->lock); ++ close(desc->device); ++ free(desc); ++} ++ ++static const struct nfp_cpp_operations nfp6000_pcie_ops = { ++ .init = nfp6000_init, ++ .free = nfp6000_free, ++ ++ .area_priv_size = sizeof(struct nfp6000_area_priv), ++ .area_init = nfp6000_area_init, ++ .area_acquire = nfp6000_area_acquire, ++ .area_release = nfp6000_area_release, ++ .area_mapped = nfp6000_area_mapped, ++ .area_read = nfp6000_area_read, ++ .area_write = nfp6000_area_write, ++ .area_iomem = nfp6000_area_iomem, ++}; ++ ++const struct ++nfp_cpp_operations *nfp_cpp_transport_operations(void) ++{ ++ return &nfp6000_pcie_ops; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_cppcore.c b/drivers/net/nfp/nfpcore/nfp_cppcore.c +new file mode 100644 +index 000000000..94d4a0b6b +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_cppcore.c +@@ -0,0 +1,856 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp_target.h" ++#include "nfp6000/nfp6000.h" ++#include "nfp6000/nfp_xpb.h" ++#include "nfp_nffw.h" ++ ++#define NFP_PL_DEVICE_ID 0x00000004 ++#define NFP_PL_DEVICE_ID_MASK 0xff ++ ++#define NFP6000_ARM_GCSR_SOFTMODEL0 0x00400144 ++ ++void ++nfp_cpp_priv_set(struct nfp_cpp *cpp, void *priv) ++{ ++ cpp->priv = priv; ++} ++ ++void * ++nfp_cpp_priv(struct nfp_cpp *cpp) ++{ ++ return cpp->priv; ++} ++ ++void ++nfp_cpp_model_set(struct nfp_cpp *cpp, uint32_t model) ++{ ++ cpp->model = model; ++} ++ ++uint32_t ++nfp_cpp_model(struct nfp_cpp *cpp) ++{ ++ if (!cpp) ++ return NFP_CPP_MODEL_INVALID; ++ ++ if (cpp->model == 0) ++ cpp->model = __nfp_cpp_model_autodetect(cpp); ++ ++ return cpp->model; ++} ++ ++void ++nfp_cpp_interface_set(struct nfp_cpp *cpp, uint32_t interface) ++{ ++ cpp->interface = interface; ++} ++ ++int ++nfp_cpp_serial(struct nfp_cpp *cpp, const uint8_t **serial) ++{ ++ *serial = cpp->serial; ++ return cpp->serial_len; ++} ++ ++int ++nfp_cpp_serial_set(struct nfp_cpp *cpp, const uint8_t *serial, ++ size_t serial_len) ++{ ++ if (cpp->serial_len) ++ free(cpp->serial); ++ ++ cpp->serial = malloc(serial_len); ++ if (!cpp->serial) ++ return -1; ++ ++ memcpy(cpp->serial, serial, serial_len); ++ cpp->serial_len = serial_len; ++ ++ return 0; ++} ++ ++uint16_t ++nfp_cpp_interface(struct nfp_cpp *cpp) ++{ ++ if (!cpp) ++ return NFP_CPP_INTERFACE(NFP_CPP_INTERFACE_TYPE_INVALID, 0, 0); ++ ++ return cpp->interface; ++} ++ ++void * ++nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area) ++{ ++ return &cpp_area[1]; ++} ++ ++struct nfp_cpp * ++nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area) ++{ ++ return cpp_area->cpp; ++} ++ ++const char * ++nfp_cpp_area_name(struct nfp_cpp_area *cpp_area) ++{ ++ return cpp_area->name; ++} ++ ++/* ++ * nfp_cpp_area_alloc - allocate a new CPP area ++ * @cpp: CPP handle ++ * @dest: CPP id ++ * @address: start address on CPP target ++ * @size: size of area in bytes ++ * ++ * Allocate and initialize a CPP area structure. The area must later ++ * be locked down with an 'acquire' before it can be safely accessed. ++ * ++ * NOTE: @address and @size must be 32-bit aligned values. ++ */ ++struct nfp_cpp_area * ++nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp, uint32_t dest, ++ const char *name, unsigned long long address, ++ unsigned long size) ++{ ++ struct nfp_cpp_area *area; ++ uint64_t tmp64 = (uint64_t)address; ++ int tmp, err; ++ ++ if (!cpp) ++ return NULL; ++ ++ /* CPP bus uses only a 40-bit address */ ++ if ((address + size) > (1ULL << 40)) ++ return NFP_ERRPTR(EFAULT); ++ ++ /* Remap from cpp_island to cpp_target */ ++ err = nfp_target_cpp(dest, tmp64, &dest, &tmp64, cpp->imb_cat_table); ++ if (err < 0) ++ return NULL; ++ ++ address = (unsigned long long)tmp64; ++ ++ if (!name) ++ name = ""; ++ ++ area = calloc(1, sizeof(*area) + cpp->op->area_priv_size + ++ strlen(name) + 1); ++ if (!area) ++ return NULL; ++ ++ area->cpp = cpp; ++ area->name = ((char *)area) + sizeof(*area) + cpp->op->area_priv_size; ++ memcpy(area->name, name, strlen(name) + 1); ++ ++ /* ++ * Preserve errno around the call to area_init, since most ++ * implementations will blindly call nfp_target_action_width()for both ++ * read or write modes, and that will set errno to EINVAL. ++ */ ++ tmp = errno; ++ ++ err = cpp->op->area_init(area, dest, address, size); ++ if (err < 0) { ++ free(area); ++ return NULL; ++ } ++ ++ /* Restore errno */ ++ errno = tmp; ++ ++ area->offset = address; ++ area->size = size; ++ ++ return area; ++} ++ ++struct nfp_cpp_area * ++nfp_cpp_area_alloc(struct nfp_cpp *cpp, uint32_t dest, ++ unsigned long long address, unsigned long size) ++{ ++ return nfp_cpp_area_alloc_with_name(cpp, dest, NULL, address, size); ++} ++ ++/* ++ * nfp_cpp_area_alloc_acquire - allocate a new CPP area and lock it down ++ * ++ * @cpp: CPP handle ++ * @dest: CPP id ++ * @address: start address on CPP target ++ * @size: size of area ++ * ++ * Allocate and initilizae a CPP area structure, and lock it down so ++ * that it can be accessed directly. ++ * ++ * NOTE: @address and @size must be 32-bit aligned values. ++ * ++ * NOTE: The area must also be 'released' when the structure is freed. ++ */ ++struct nfp_cpp_area * ++nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, uint32_t destination, ++ unsigned long long address, unsigned long size) ++{ ++ struct nfp_cpp_area *area; ++ ++ area = nfp_cpp_area_alloc(cpp, destination, address, size); ++ if (!area) ++ return NULL; ++ ++ if (nfp_cpp_area_acquire(area)) { ++ nfp_cpp_area_free(area); ++ return NULL; ++ } ++ ++ return area; ++} ++ ++/* ++ * nfp_cpp_area_free - free up the CPP area ++ * area: CPP area handle ++ * ++ * Frees up memory resources held by the CPP area. ++ */ ++void ++nfp_cpp_area_free(struct nfp_cpp_area *area) ++{ ++ if (area->cpp->op->area_cleanup) ++ area->cpp->op->area_cleanup(area); ++ free(area); ++} ++ ++/* ++ * nfp_cpp_area_release_free - release CPP area and free it ++ * area: CPP area handle ++ * ++ * Releases CPP area and frees up memory resources held by the it. ++ */ ++void ++nfp_cpp_area_release_free(struct nfp_cpp_area *area) ++{ ++ nfp_cpp_area_release(area); ++ nfp_cpp_area_free(area); ++} ++ ++/* ++ * nfp_cpp_area_acquire - lock down a CPP area for access ++ * @area: CPP area handle ++ * ++ * Locks down the CPP area for a potential long term activity. Area ++ * must always be locked down before being accessed. ++ */ ++int ++nfp_cpp_area_acquire(struct nfp_cpp_area *area) ++{ ++ if (area->cpp->op->area_acquire) { ++ int err = area->cpp->op->area_acquire(area); ++ ++ if (err < 0) ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * nfp_cpp_area_release - release a locked down CPP area ++ * @area: CPP area handle ++ * ++ * Releases a previously locked down CPP area. ++ */ ++void ++nfp_cpp_area_release(struct nfp_cpp_area *area) ++{ ++ if (area->cpp->op->area_release) ++ area->cpp->op->area_release(area); ++} ++ ++/* ++ * nfp_cpp_area_iomem() - get IOMEM region for CPP area ++ * ++ * @area: CPP area handle ++ * ++ * Returns an iomem pointer for use with readl()/writel() style operations. ++ * ++ * NOTE: Area must have been locked down with an 'acquire'. ++ * ++ * Return: pointer to the area, or NULL ++ */ ++void * ++nfp_cpp_area_iomem(struct nfp_cpp_area *area) ++{ ++ void *iomem = NULL; ++ ++ if (area->cpp->op->area_iomem) ++ iomem = area->cpp->op->area_iomem(area); ++ ++ return iomem; ++} ++ ++/* ++ * nfp_cpp_area_read - read data from CPP area ++ * ++ * @area: CPP area handle ++ * @offset: offset into CPP area ++ * @kernel_vaddr: kernel address to put data into ++ * @length: number of bytes to read ++ * ++ * Read data from indicated CPP region. ++ * ++ * NOTE: @offset and @length must be 32-bit aligned values. ++ * ++ * NOTE: Area must have been locked down with an 'acquire'. ++ */ ++int ++nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset, ++ void *kernel_vaddr, size_t length) ++{ ++ if ((offset + length) > area->size) ++ return NFP_ERRNO(EFAULT); ++ ++ return area->cpp->op->area_read(area, kernel_vaddr, offset, length); ++} ++ ++/* ++ * nfp_cpp_area_write - write data to CPP area ++ * ++ * @area: CPP area handle ++ * @offset: offset into CPP area ++ * @kernel_vaddr: kernel address to read data from ++ * @length: number of bytes to write ++ * ++ * Write data to indicated CPP region. ++ * ++ * NOTE: @offset and @length must be 32-bit aligned values. ++ * ++ * NOTE: Area must have been locked down with an 'acquire'. ++ */ ++int ++nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset, ++ const void *kernel_vaddr, size_t length) ++{ ++ if ((offset + length) > area->size) ++ return NFP_ERRNO(EFAULT); ++ ++ return area->cpp->op->area_write(area, kernel_vaddr, offset, length); ++} ++ ++void * ++nfp_cpp_area_mapped(struct nfp_cpp_area *area) ++{ ++ if (area->cpp->op->area_mapped) ++ return area->cpp->op->area_mapped(area); ++ return NULL; ++} ++ ++/* ++ * nfp_cpp_area_check_range - check if address range fits in CPP area ++ * ++ * @area: CPP area handle ++ * @offset: offset into CPP area ++ * @length: size of address range in bytes ++ * ++ * Check if address range fits within CPP area. Return 0 if area fits ++ * or -1 on error. ++ */ ++int ++nfp_cpp_area_check_range(struct nfp_cpp_area *area, unsigned long long offset, ++ unsigned long length) ++{ ++ if (((offset + length) > area->size)) ++ return NFP_ERRNO(EFAULT); ++ ++ return 0; ++} ++ ++/* ++ * Return the correct CPP address, and fixup xpb_addr as needed, ++ * based upon NFP model. ++ */ ++static uint32_t ++nfp_xpb_to_cpp(struct nfp_cpp *cpp, uint32_t *xpb_addr) ++{ ++ uint32_t xpb; ++ int island; ++ ++ if (!NFP_CPP_MODEL_IS_6000(cpp->model)) ++ return 0; ++ ++ xpb = NFP_CPP_ID(14, NFP_CPP_ACTION_RW, 0); ++ ++ /* ++ * Ensure that non-local XPB accesses go out through the ++ * global XPBM bus. ++ */ ++ island = ((*xpb_addr) >> 24) & 0x3f; ++ ++ if (!island) ++ return xpb; ++ ++ if (island == 1) { ++ /* ++ * Accesses to the ARM Island overlay uses Island 0 ++ * Global Bit ++ */ ++ (*xpb_addr) &= ~0x7f000000; ++ if (*xpb_addr < 0x60000) ++ *xpb_addr |= (1 << 30); ++ else ++ /* And only non-ARM interfaces use island id = 1 */ ++ if (NFP_CPP_INTERFACE_TYPE_of(nfp_cpp_interface(cpp)) != ++ NFP_CPP_INTERFACE_TYPE_ARM) ++ *xpb_addr |= (1 << 24); ++ } else { ++ (*xpb_addr) |= (1 << 30); ++ } ++ ++ return xpb; ++} ++ ++int ++nfp_cpp_area_readl(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t *value) ++{ ++ int sz; ++ uint32_t tmp = 0; ++ ++ sz = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); ++ *value = rte_le_to_cpu_32(tmp); ++ ++ return (sz == sizeof(*value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_area_writel(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t value) ++{ ++ int sz; ++ ++ value = rte_cpu_to_le_32(value); ++ sz = nfp_cpp_area_write(area, offset, &value, sizeof(value)); ++ return (sz == sizeof(value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_area_readq(struct nfp_cpp_area *area, unsigned long offset, ++ uint64_t *value) ++{ ++ int sz; ++ uint64_t tmp = 0; ++ ++ sz = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); ++ *value = rte_le_to_cpu_64(tmp); ++ ++ return (sz == sizeof(*value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_area_writeq(struct nfp_cpp_area *area, unsigned long offset, ++ uint64_t value) ++{ ++ int sz; ++ ++ value = rte_cpu_to_le_64(value); ++ sz = nfp_cpp_area_write(area, offset, &value, sizeof(value)); ++ ++ return (sz == sizeof(value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_readl(struct nfp_cpp *cpp, uint32_t cpp_id, unsigned long long address, ++ uint32_t *value) ++{ ++ int sz; ++ uint32_t tmp; ++ ++ sz = nfp_cpp_read(cpp, cpp_id, address, &tmp, sizeof(tmp)); ++ *value = rte_le_to_cpu_32(tmp); ++ ++ return (sz == sizeof(*value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_writel(struct nfp_cpp *cpp, uint32_t cpp_id, unsigned long long address, ++ uint32_t value) ++{ ++ int sz; ++ ++ value = rte_cpu_to_le_32(value); ++ sz = nfp_cpp_write(cpp, cpp_id, address, &value, sizeof(value)); ++ ++ return (sz == sizeof(value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_readq(struct nfp_cpp *cpp, uint32_t cpp_id, unsigned long long address, ++ uint64_t *value) ++{ ++ int sz; ++ uint64_t tmp; ++ ++ sz = nfp_cpp_read(cpp, cpp_id, address, &tmp, sizeof(tmp)); ++ *value = rte_le_to_cpu_64(tmp); ++ ++ return (sz == sizeof(*value)) ? 0 : -1; ++} ++ ++int ++nfp_cpp_writeq(struct nfp_cpp *cpp, uint32_t cpp_id, unsigned long long address, ++ uint64_t value) ++{ ++ int sz; ++ ++ value = rte_cpu_to_le_64(value); ++ sz = nfp_cpp_write(cpp, cpp_id, address, &value, sizeof(value)); ++ ++ return (sz == sizeof(value)) ? 0 : -1; ++} ++ ++int ++nfp_xpb_writel(struct nfp_cpp *cpp, uint32_t xpb_addr, uint32_t value) ++{ ++ uint32_t cpp_dest; ++ ++ cpp_dest = nfp_xpb_to_cpp(cpp, &xpb_addr); ++ ++ return nfp_cpp_writel(cpp, cpp_dest, xpb_addr, value); ++} ++ ++int ++nfp_xpb_readl(struct nfp_cpp *cpp, uint32_t xpb_addr, uint32_t *value) ++{ ++ uint32_t cpp_dest; ++ ++ cpp_dest = nfp_xpb_to_cpp(cpp, &xpb_addr); ++ ++ return nfp_cpp_readl(cpp, cpp_dest, xpb_addr, value); ++} ++ ++static struct nfp_cpp * ++nfp_cpp_alloc(const char *devname) ++{ ++ const struct nfp_cpp_operations *ops; ++ struct nfp_cpp *cpp; ++ int err; ++ ++ ops = nfp_cpp_transport_operations(); ++ ++ if (!ops || !ops->init) ++ return NFP_ERRPTR(EINVAL); ++ ++ cpp = calloc(1, sizeof(*cpp)); ++ if (!cpp) ++ return NULL; ++ ++ cpp->op = ops; ++ ++ if (cpp->op->init) { ++ err = cpp->op->init(cpp, devname); ++ if (err < 0) { ++ free(cpp); ++ return NULL; ++ } ++ } ++ ++ if (NFP_CPP_MODEL_IS_6000(nfp_cpp_model(cpp))) { ++ uint32_t xpbaddr; ++ size_t tgt; ++ ++ for (tgt = 0; tgt < ARRAY_SIZE(cpp->imb_cat_table); tgt++) { ++ /* Hardcoded XPB IMB Base, island 0 */ ++ xpbaddr = 0x000a0000 + (tgt * 4); ++ err = nfp_xpb_readl(cpp, xpbaddr, ++ (uint32_t *)&cpp->imb_cat_table[tgt]); ++ if (err < 0) { ++ free(cpp); ++ return NULL; ++ } ++ } ++ } ++ ++ return cpp; ++} ++ ++/* ++ * nfp_cpp_free - free the CPP handle ++ * @cpp: CPP handle ++ */ ++void ++nfp_cpp_free(struct nfp_cpp *cpp) ++{ ++ if (cpp->op && cpp->op->free) ++ cpp->op->free(cpp); ++ ++ if (cpp->serial_len) ++ free(cpp->serial); ++ ++ free(cpp); ++} ++ ++struct nfp_cpp * ++nfp_cpp_from_device_name(const char *devname) ++{ ++ return nfp_cpp_alloc(devname); ++} ++ ++/* ++ * Modify bits of a 32-bit value from the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param mask mask of bits to alter ++ * @param value value to modify ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int ++nfp_xpb_writelm(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t mask, ++ uint32_t value) ++{ ++ int err; ++ uint32_t tmp; ++ ++ err = nfp_xpb_readl(cpp, xpb_tgt, &tmp); ++ if (err < 0) ++ return err; ++ ++ tmp &= ~mask; ++ tmp |= (mask & value); ++ return nfp_xpb_writel(cpp, xpb_tgt, tmp); ++} ++ ++/* ++ * Modify bits of a 32-bit value from the XPB bus ++ * ++ * @param cpp NFP CPP device handle ++ * @param xpb_tgt XPB target and address ++ * @param mask mask of bits to alter ++ * @param value value to monitor for ++ * @param timeout_us maximum number of us to wait (-1 for forever) ++ * ++ * @return >= 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int ++nfp_xpb_waitlm(struct nfp_cpp *cpp, uint32_t xpb_tgt, uint32_t mask, ++ uint32_t value, int timeout_us) ++{ ++ uint32_t tmp; ++ int err; ++ ++ do { ++ err = nfp_xpb_readl(cpp, xpb_tgt, &tmp); ++ if (err < 0) ++ goto exit; ++ ++ if ((tmp & mask) == (value & mask)) { ++ if (timeout_us < 0) ++ timeout_us = 0; ++ break; ++ } ++ ++ if (timeout_us < 0) ++ continue; ++ ++ timeout_us -= 100; ++ usleep(100); ++ } while (timeout_us >= 0); ++ ++ if (timeout_us < 0) ++ err = NFP_ERRNO(ETIMEDOUT); ++ else ++ err = timeout_us; ++ ++exit: ++ return err; ++} ++ ++/* ++ * nfp_cpp_read - read from CPP target ++ * @cpp: CPP handle ++ * @destination: CPP id ++ * @address: offset into CPP target ++ * @kernel_vaddr: kernel buffer for result ++ * @length: number of bytes to read ++ */ ++int ++nfp_cpp_read(struct nfp_cpp *cpp, uint32_t destination, ++ unsigned long long address, void *kernel_vaddr, size_t length) ++{ ++ struct nfp_cpp_area *area; ++ int err; ++ ++ area = nfp_cpp_area_alloc_acquire(cpp, destination, address, length); ++ if (!area) { ++ printf("Area allocation/acquire failed\n"); ++ return -1; ++ } ++ ++ err = nfp_cpp_area_read(area, 0, kernel_vaddr, length); ++ ++ nfp_cpp_area_release_free(area); ++ return err; ++} ++ ++/* ++ * nfp_cpp_write - write to CPP target ++ * @cpp: CPP handle ++ * @destination: CPP id ++ * @address: offset into CPP target ++ * @kernel_vaddr: kernel buffer to read from ++ * @length: number of bytes to write ++ */ ++int ++nfp_cpp_write(struct nfp_cpp *cpp, uint32_t destination, ++ unsigned long long address, const void *kernel_vaddr, ++ size_t length) ++{ ++ struct nfp_cpp_area *area; ++ int err; ++ ++ area = nfp_cpp_area_alloc_acquire(cpp, destination, address, length); ++ if (!area) ++ return -1; ++ ++ err = nfp_cpp_area_write(area, 0, kernel_vaddr, length); ++ ++ nfp_cpp_area_release_free(area); ++ return err; ++} ++ ++/* ++ * nfp_cpp_area_fill - fill a CPP area with a value ++ * @area: CPP area ++ * @offset: offset into CPP area ++ * @value: value to fill with ++ * @length: length of area to fill ++ */ ++int ++nfp_cpp_area_fill(struct nfp_cpp_area *area, unsigned long offset, ++ uint32_t value, size_t length) ++{ ++ int err; ++ size_t i; ++ uint64_t value64; ++ ++ value = rte_cpu_to_le_32(value); ++ value64 = ((uint64_t)value << 32) | value; ++ ++ if ((offset + length) > area->size) ++ return NFP_ERRNO(EINVAL); ++ ++ if ((area->offset + offset) & 3) ++ return NFP_ERRNO(EINVAL); ++ ++ if (((area->offset + offset) & 7) == 4 && length >= 4) { ++ err = nfp_cpp_area_write(area, offset, &value, sizeof(value)); ++ if (err < 0) ++ return err; ++ if (err != sizeof(value)) ++ return NFP_ERRNO(ENOSPC); ++ offset += sizeof(value); ++ length -= sizeof(value); ++ } ++ ++ for (i = 0; (i + sizeof(value)) < length; i += sizeof(value64)) { ++ err = ++ nfp_cpp_area_write(area, offset + i, &value64, ++ sizeof(value64)); ++ if (err < 0) ++ return err; ++ if (err != sizeof(value64)) ++ return NFP_ERRNO(ENOSPC); ++ } ++ ++ if ((i + sizeof(value)) <= length) { ++ err = ++ nfp_cpp_area_write(area, offset + i, &value, sizeof(value)); ++ if (err < 0) ++ return err; ++ if (err != sizeof(value)) ++ return NFP_ERRNO(ENOSPC); ++ i += sizeof(value); ++ } ++ ++ return (int)i; ++} ++ ++/* ++ * NOTE: This code should not use nfp_xpb_* functions, ++ * as those are model-specific ++ */ ++uint32_t ++__nfp_cpp_model_autodetect(struct nfp_cpp *cpp) ++{ ++ uint32_t arm_id = NFP_CPP_ID(NFP_CPP_TARGET_ARM, 0, 0); ++ uint32_t model = 0; ++ ++ nfp_cpp_readl(cpp, arm_id, NFP6000_ARM_GCSR_SOFTMODEL0, &model); ++ ++ if (NFP_CPP_MODEL_IS_6000(model)) { ++ uint32_t tmp; ++ ++ nfp_cpp_model_set(cpp, model); ++ ++ /* The PL's PluDeviceID revision code is authoratative */ ++ model &= ~0xff; ++ nfp_xpb_readl(cpp, NFP_XPB_DEVICE(1, 1, 16) + ++ NFP_PL_DEVICE_ID, &tmp); ++ model |= (NFP_PL_DEVICE_ID_MASK & tmp) - 0x10; ++ } ++ ++ return model; ++} ++ ++/* ++ * nfp_cpp_map_area() - Helper function to map an area ++ * @cpp: NFP CPP handler ++ * @domain: CPP domain ++ * @target: CPP target ++ * @addr: CPP address ++ * @size: Size of the area ++ * @area: Area handle (output) ++ * ++ * Map an area of IOMEM access. To undo the effect of this function call ++ * @nfp_cpp_area_release_free(*area). ++ * ++ * Return: Pointer to memory mapped area or ERR_PTR ++ */ ++uint8_t * ++nfp_cpp_map_area(struct nfp_cpp *cpp, int domain, int target, uint64_t addr, ++ unsigned long size, struct nfp_cpp_area **area) ++{ ++ uint8_t *res; ++ uint32_t dest; ++ ++ dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain); ++ ++ *area = nfp_cpp_area_alloc_acquire(cpp, dest, addr, size); ++ if (!*area) ++ goto err_eio; ++ ++ res = nfp_cpp_area_iomem(*area); ++ if (!res) ++ goto err_release_free; ++ ++ return res; ++ ++err_release_free: ++ nfp_cpp_area_release_free(*area); ++err_eio: ++ return NULL; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_crc.c b/drivers/net/nfp/nfpcore/nfp_crc.c +new file mode 100644 +index 000000000..20431bf84 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_crc.c +@@ -0,0 +1,49 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++ ++#include "nfp_crc.h" ++ ++static inline uint32_t ++nfp_crc32_be_generic(uint32_t crc, unsigned char const *p, size_t len, ++ uint32_t polynomial) ++{ ++ int i; ++ while (len--) { ++ crc ^= *p++ << 24; ++ for (i = 0; i < 8; i++) ++ crc = (crc << 1) ^ ((crc & 0x80000000) ? polynomial : ++ 0); ++ } ++ return crc; ++} ++ ++static inline uint32_t ++nfp_crc32_be(uint32_t crc, unsigned char const *p, size_t len) ++{ ++ return nfp_crc32_be_generic(crc, p, len, CRCPOLY_BE); ++} ++ ++static uint32_t ++nfp_crc32_posix_end(uint32_t crc, size_t total_len) ++{ ++ /* Extend with the length of the string. */ ++ while (total_len != 0) { ++ uint8_t c = total_len & 0xff; ++ ++ crc = nfp_crc32_be(crc, &c, 1); ++ total_len >>= 8; ++ } ++ ++ return ~crc; ++} ++ ++uint32_t ++nfp_crc32_posix(const void *buff, size_t len) ++{ ++ return nfp_crc32_posix_end(nfp_crc32_be(0, buff, len), len); ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_crc.h b/drivers/net/nfp/nfpcore/nfp_crc.h +new file mode 100644 +index 000000000..f99c89fca +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_crc.h +@@ -0,0 +1,19 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_CRC_H__ ++#define __NFP_CRC_H__ ++ ++/* ++ * There are multiple 16-bit CRC polynomials in common use, but this is ++ * *the* standard CRC-32 polynomial, first popularized by Ethernet. ++ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 ++ */ ++#define CRCPOLY_LE 0xedb88320 ++#define CRCPOLY_BE 0x04c11db7 ++ ++uint32_t nfp_crc32_posix(const void *buff, size_t len); ++ ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.c b/drivers/net/nfp/nfpcore/nfp_hwinfo.c +new file mode 100644 +index 000000000..c0516bf8e +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.c +@@ -0,0 +1,199 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++/* Parse the hwinfo table that the ARM firmware builds in the ARM scratch SRAM ++ * after chip reset. ++ * ++ * Examples of the fields: ++ * me.count = 40 ++ * me.mask = 0x7f_ffff_ffff ++ * ++ * me.count is the total number of MEs on the system. ++ * me.mask is the bitmask of MEs that are available for application usage. ++ * ++ * (ie, in this example, ME 39 has been reserved by boardconfig.) ++ */ ++ ++#include ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp6000/nfp6000.h" ++#include "nfp_resource.h" ++#include "nfp_hwinfo.h" ++#include "nfp_crc.h" ++ ++static int ++nfp_hwinfo_is_updating(struct nfp_hwinfo *hwinfo) ++{ ++ return hwinfo->version & NFP_HWINFO_VERSION_UPDATING; ++} ++ ++static int ++nfp_hwinfo_db_walk(struct nfp_hwinfo *hwinfo, uint32_t size) ++{ ++ const char *key, *val, *end = hwinfo->data + size; ++ ++ for (key = hwinfo->data; *key && key < end; ++ key = val + strlen(val) + 1) { ++ val = key + strlen(key) + 1; ++ if (val >= end) { ++ printf("Bad HWINFO - overflowing key\n"); ++ return -EINVAL; ++ } ++ ++ if (val + strlen(val) + 1 > end) { ++ printf("Bad HWINFO - overflowing value\n"); ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++static int ++nfp_hwinfo_db_validate(struct nfp_hwinfo *db, uint32_t len) ++{ ++ uint32_t size, new_crc, *crc; ++ ++ size = db->size; ++ if (size > len) { ++ printf("Unsupported hwinfo size %u > %u\n", size, len); ++ return -EINVAL; ++ } ++ ++ size -= sizeof(uint32_t); ++ new_crc = nfp_crc32_posix((char *)db, size); ++ crc = (uint32_t *)(db->start + size); ++ if (new_crc != *crc) { ++ printf("Corrupt hwinfo table (CRC mismatch)\n"); ++ printf("\tcalculated 0x%x, expected 0x%x\n", new_crc, *crc); ++ return -EINVAL; ++ } ++ ++ return nfp_hwinfo_db_walk(db, size); ++} ++ ++static struct nfp_hwinfo * ++nfp_hwinfo_try_fetch(struct nfp_cpp *cpp, size_t *cpp_size) ++{ ++ struct nfp_hwinfo *header; ++ void *res; ++ uint64_t cpp_addr; ++ uint32_t cpp_id; ++ int err; ++ uint8_t *db; ++ ++ res = nfp_resource_acquire(cpp, NFP_RESOURCE_NFP_HWINFO); ++ if (res) { ++ cpp_id = nfp_resource_cpp_id(res); ++ cpp_addr = nfp_resource_address(res); ++ *cpp_size = nfp_resource_size(res); ++ ++ nfp_resource_release(res); ++ ++ if (*cpp_size < HWINFO_SIZE_MIN) ++ return NULL; ++ } else { ++ return NULL; ++ } ++ ++ db = malloc(*cpp_size + 1); ++ if (!db) ++ return NULL; ++ ++ err = nfp_cpp_read(cpp, cpp_id, cpp_addr, db, *cpp_size); ++ if (err != (int)*cpp_size) ++ goto exit_free; ++ ++ header = (void *)db; ++ printf("NFP HWINFO header: %08x\n", *(uint32_t *)header); ++ if (nfp_hwinfo_is_updating(header)) ++ goto exit_free; ++ ++ if (header->version != NFP_HWINFO_VERSION_2) { ++ printf("Unknown HWInfo version: 0x%08x\n", ++ header->version); ++ goto exit_free; ++ } ++ ++ /* NULL-terminate for safety */ ++ db[*cpp_size] = '\0'; ++ ++ return (void *)db; ++exit_free: ++ free(db); ++ return NULL; ++} ++ ++static struct nfp_hwinfo * ++nfp_hwinfo_fetch(struct nfp_cpp *cpp, size_t *hwdb_size) ++{ ++ struct timespec wait; ++ struct nfp_hwinfo *db; ++ int count; ++ ++ wait.tv_sec = 0; ++ wait.tv_nsec = 10000000; ++ count = 0; ++ ++ for (;;) { ++ db = nfp_hwinfo_try_fetch(cpp, hwdb_size); ++ if (db) ++ return db; ++ ++ nanosleep(&wait, NULL); ++ if (count++ > 200) { ++ printf("NFP access error\n"); ++ return NULL; ++ } ++ } ++} ++ ++struct nfp_hwinfo * ++nfp_hwinfo_read(struct nfp_cpp *cpp) ++{ ++ struct nfp_hwinfo *db; ++ size_t hwdb_size = 0; ++ int err; ++ ++ db = nfp_hwinfo_fetch(cpp, &hwdb_size); ++ if (!db) ++ return NULL; ++ ++ err = nfp_hwinfo_db_validate(db, hwdb_size); ++ if (err) { ++ free(db); ++ return NULL; ++ } ++ return db; ++} ++ ++/* ++ * nfp_hwinfo_lookup() - Find a value in the HWInfo table by name ++ * @hwinfo: NFP HWinfo table ++ * @lookup: HWInfo name to search for ++ * ++ * Return: Value of the HWInfo name, or NULL ++ */ ++const char * ++nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup) ++{ ++ const char *key, *val, *end; ++ ++ if (!hwinfo || !lookup) ++ return NULL; ++ ++ end = hwinfo->data + hwinfo->size - sizeof(uint32_t); ++ ++ for (key = hwinfo->data; *key && key < end; ++ key = val + strlen(val) + 1) { ++ val = key + strlen(key) + 1; ++ ++ if (strcmp(key, lookup) == 0) ++ return val; ++ } ++ ++ return NULL; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.h b/drivers/net/nfp/nfpcore/nfp_hwinfo.h +new file mode 100644 +index 000000000..ccc616321 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.h +@@ -0,0 +1,85 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_HWINFO_H__ ++#define __NFP_HWINFO_H__ ++ ++#include ++ ++#define HWINFO_SIZE_MIN 0x100 ++ ++/* ++ * The Hardware Info Table defines the properties of the system. ++ * ++ * HWInfo v1 Table (fixed size) ++ * ++ * 0x0000: uint32_t version Hardware Info Table version (1.0) ++ * 0x0004: uint32_t size Total size of the table, including the ++ * CRC32 (IEEE 802.3) ++ * 0x0008: uint32_t jumptab Offset of key/value table ++ * 0x000c: uint32_t keys Total number of keys in the key/value ++ * table ++ * NNNNNN: Key/value jump table and string data ++ * (size - 4): uint32_t crc32 CRC32 (same as IEEE 802.3, POSIX csum, etc) ++ * CRC32("",0) = ~0, CRC32("a",1) = 0x48C279FE ++ * ++ * HWInfo v2 Table (variable size) ++ * ++ * 0x0000: uint32_t version Hardware Info Table version (2.0) ++ * 0x0004: uint32_t size Current size of the data area, excluding ++ * CRC32 ++ * 0x0008: uint32_t limit Maximum size of the table ++ * 0x000c: uint32_t reserved Unused, set to zero ++ * NNNNNN: Key/value data ++ * (size - 4): uint32_t crc32 CRC32 (same as IEEE 802.3, POSIX csum, etc) ++ * CRC32("",0) = ~0, CRC32("a",1) = 0x48C279FE ++ * ++ * If the HWInfo table is in the process of being updated, the low bit of ++ * version will be set. ++ * ++ * HWInfo v1 Key/Value Table ++ * ------------------------- ++ * ++ * The key/value table is a set of offsets to ASCIIZ strings which have ++ * been strcmp(3) sorted (yes, please use bsearch(3) on the table). ++ * ++ * All keys are guaranteed to be unique. ++ * ++ * N+0: uint32_t key_1 Offset to the first key ++ * N+4: uint32_t val_1 Offset to the first value ++ * N+8: uint32_t key_2 Offset to the second key ++ * N+c: uint32_t val_2 Offset to the second value ++ * ... ++ * ++ * HWInfo v2 Key/Value Table ++ * ------------------------- ++ * ++ * Packed UTF8Z strings, ie 'key1\000value1\000key2\000value2\000' ++ * ++ * Unsorted. ++ */ ++ ++#define NFP_HWINFO_VERSION_1 ('H' << 24 | 'I' << 16 | 1 << 8 | 0 << 1 | 0) ++#define NFP_HWINFO_VERSION_2 ('H' << 24 | 'I' << 16 | 2 << 8 | 0 << 1 | 0) ++#define NFP_HWINFO_VERSION_UPDATING BIT(0) ++ ++struct nfp_hwinfo { ++ uint8_t start[0]; ++ ++ uint32_t version; ++ uint32_t size; ++ ++ /* v2 specific fields */ ++ uint32_t limit; ++ uint32_t resv; ++ ++ char data[]; ++}; ++ ++struct nfp_hwinfo *nfp_hwinfo_read(struct nfp_cpp *cpp); ++ ++const char *nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup); ++ ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_mip.c b/drivers/net/nfp/nfpcore/nfp_mip.c +new file mode 100644 +index 000000000..c86966df8 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_mip.c +@@ -0,0 +1,154 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp_mip.h" ++#include "nfp_nffw.h" ++ ++#define NFP_MIP_SIGNATURE rte_cpu_to_le_32(0x0050494d) /* "MIP\0" */ ++#define NFP_MIP_VERSION rte_cpu_to_le_32(1) ++#define NFP_MIP_MAX_OFFSET (256 * 1024) ++ ++struct nfp_mip { ++ uint32_t signature; ++ uint32_t mip_version; ++ uint32_t mip_size; ++ uint32_t first_entry; ++ ++ uint32_t version; ++ uint32_t buildnum; ++ uint32_t buildtime; ++ uint32_t loadtime; ++ ++ uint32_t symtab_addr; ++ uint32_t symtab_size; ++ uint32_t strtab_addr; ++ uint32_t strtab_size; ++ ++ char name[16]; ++ char toolchain[32]; ++}; ++ ++/* Read memory and check if it could be a valid MIP */ ++static int ++nfp_mip_try_read(struct nfp_cpp *cpp, uint32_t cpp_id, uint64_t addr, ++ struct nfp_mip *mip) ++{ ++ int ret; ++ ++ ret = nfp_cpp_read(cpp, cpp_id, addr, mip, sizeof(*mip)); ++ if (ret != sizeof(*mip)) { ++ printf("Failed to read MIP data (%d, %zu)\n", ++ ret, sizeof(*mip)); ++ return -EIO; ++ } ++ if (mip->signature != NFP_MIP_SIGNATURE) { ++ printf("Incorrect MIP signature (0x%08x)\n", ++ rte_le_to_cpu_32(mip->signature)); ++ return -EINVAL; ++ } ++ if (mip->mip_version != NFP_MIP_VERSION) { ++ printf("Unsupported MIP version (%d)\n", ++ rte_le_to_cpu_32(mip->mip_version)); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* Try to locate MIP using the resource table */ ++static int ++nfp_mip_read_resource(struct nfp_cpp *cpp, struct nfp_mip *mip) ++{ ++ struct nfp_nffw_info *nffw_info; ++ uint32_t cpp_id; ++ uint64_t addr; ++ int err; ++ ++ nffw_info = nfp_nffw_info_open(cpp); ++ if (!nffw_info) ++ return -ENODEV; ++ ++ err = nfp_nffw_info_mip_first(nffw_info, &cpp_id, &addr); ++ if (err) ++ goto exit_close_nffw; ++ ++ err = nfp_mip_try_read(cpp, cpp_id, addr, mip); ++exit_close_nffw: ++ nfp_nffw_info_close(nffw_info); ++ return err; ++} ++ ++/* ++ * nfp_mip_open() - Get device MIP structure ++ * @cpp: NFP CPP Handle ++ * ++ * Copy MIP structure from NFP device and return it. The returned ++ * structure is handled internally by the library and should be ++ * freed by calling nfp_mip_close(). ++ * ++ * Return: pointer to mip, NULL on failure. ++ */ ++struct nfp_mip * ++nfp_mip_open(struct nfp_cpp *cpp) ++{ ++ struct nfp_mip *mip; ++ int err; ++ ++ mip = malloc(sizeof(*mip)); ++ if (!mip) ++ return NULL; ++ ++ err = nfp_mip_read_resource(cpp, mip); ++ if (err) { ++ free(mip); ++ return NULL; ++ } ++ ++ mip->name[sizeof(mip->name) - 1] = 0; ++ ++ return mip; ++} ++ ++void ++nfp_mip_close(struct nfp_mip *mip) ++{ ++ free(mip); ++} ++ ++const char * ++nfp_mip_name(const struct nfp_mip *mip) ++{ ++ return mip->name; ++} ++ ++/* ++ * nfp_mip_symtab() - Get the address and size of the MIP symbol table ++ * @mip: MIP handle ++ * @addr: Location for NFP DDR address of MIP symbol table ++ * @size: Location for size of MIP symbol table ++ */ ++void ++nfp_mip_symtab(const struct nfp_mip *mip, uint32_t *addr, uint32_t *size) ++{ ++ *addr = rte_le_to_cpu_32(mip->symtab_addr); ++ *size = rte_le_to_cpu_32(mip->symtab_size); ++} ++ ++/* ++ * nfp_mip_strtab() - Get the address and size of the MIP symbol name table ++ * @mip: MIP handle ++ * @addr: Location for NFP DDR address of MIP symbol name table ++ * @size: Location for size of MIP symbol name table ++ */ ++void ++nfp_mip_strtab(const struct nfp_mip *mip, uint32_t *addr, uint32_t *size) ++{ ++ *addr = rte_le_to_cpu_32(mip->strtab_addr); ++ *size = rte_le_to_cpu_32(mip->strtab_size); ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_mip.h b/drivers/net/nfp/nfpcore/nfp_mip.h +new file mode 100644 +index 000000000..d0919b58f +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_mip.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_MIP_H__ ++#define __NFP_MIP_H__ ++ ++#include "nfp_nffw.h" ++ ++struct nfp_mip; ++ ++struct nfp_mip *nfp_mip_open(struct nfp_cpp *cpp); ++void nfp_mip_close(struct nfp_mip *mip); ++ ++const char *nfp_mip_name(const struct nfp_mip *mip); ++void nfp_mip_symtab(const struct nfp_mip *mip, uint32_t *addr, uint32_t *size); ++void nfp_mip_strtab(const struct nfp_mip *mip, uint32_t *addr, uint32_t *size); ++int nfp_nffw_info_mip_first(struct nfp_nffw_info *state, uint32_t *cpp_id, ++ uint64_t *off); ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_mutex.c b/drivers/net/nfp/nfpcore/nfp_mutex.c +new file mode 100644 +index 000000000..318c5800d +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_mutex.c +@@ -0,0 +1,424 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp6000/nfp6000.h" ++ ++#define MUTEX_LOCKED(interface) ((((uint32_t)(interface)) << 16) | 0x000f) ++#define MUTEX_UNLOCK(interface) (0 | 0x0000) ++ ++#define MUTEX_IS_LOCKED(value) (((value) & 0xffff) == 0x000f) ++#define MUTEX_IS_UNLOCKED(value) (((value) & 0xffff) == 0x0000) ++#define MUTEX_INTERFACE(value) (((value) >> 16) & 0xffff) ++ ++/* ++ * If you need more than 65536 recursive locks, please ++ * rethink your code. ++ */ ++#define MUTEX_DEPTH_MAX 0xffff ++ ++struct nfp_cpp_mutex { ++ struct nfp_cpp *cpp; ++ uint8_t target; ++ uint16_t depth; ++ unsigned long long address; ++ uint32_t key; ++ unsigned int usage; ++ struct nfp_cpp_mutex *prev, *next; ++}; ++ ++static int ++_nfp_cpp_mutex_validate(uint32_t model, int *target, unsigned long long address) ++{ ++ /* Address must be 64-bit aligned */ ++ if (address & 7) ++ return NFP_ERRNO(EINVAL); ++ ++ if (NFP_CPP_MODEL_IS_6000(model)) { ++ if (*target != NFP_CPP_TARGET_MU) ++ return NFP_ERRNO(EINVAL); ++ } else { ++ return NFP_ERRNO(EINVAL); ++ } ++ ++ return 0; ++} ++ ++/* ++ * Initialize a mutex location ++ * ++ * The CPP target:address must point to a 64-bit aligned location, and ++ * will initialize 64 bits of data at the location. ++ * ++ * This creates the initial mutex state, as locked by this ++ * nfp_cpp_interface(). ++ * ++ * This function should only be called when setting up ++ * the initial lock state upon boot-up of the system. ++ * ++ * @param mutex NFP CPP Mutex handle ++ * @param target NFP CPP target ID (ie NFP_CPP_TARGET_CLS or ++ * NFP_CPP_TARGET_MU) ++ * @param address Offset into the address space of the NFP CPP target ID ++ * @param key Unique 32-bit value for this mutex ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int ++nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target, unsigned long long address, ++ uint32_t key) ++{ ++ uint32_t model = nfp_cpp_model(cpp); ++ uint32_t muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */ ++ int err; ++ ++ err = _nfp_cpp_mutex_validate(model, &target, address); ++ if (err < 0) ++ return err; ++ ++ err = nfp_cpp_writel(cpp, muw, address + 4, key); ++ if (err < 0) ++ return err; ++ ++ err = ++ nfp_cpp_writel(cpp, muw, address + 0, ++ MUTEX_LOCKED(nfp_cpp_interface(cpp))); ++ if (err < 0) ++ return err; ++ ++ return 0; ++} ++ ++/* ++ * Create a mutex handle from an address controlled by a MU Atomic engine ++ * ++ * The CPP target:address must point to a 64-bit aligned location, and ++ * reserve 64 bits of data at the location for use by the handle. ++ * ++ * Only target/address pairs that point to entities that support the ++ * MU Atomic Engine are supported. ++ * ++ * @param cpp NFP CPP handle ++ * @param target NFP CPP target ID (ie NFP_CPP_TARGET_CLS or ++ * NFP_CPP_TARGET_MU) ++ * @param address Offset into the address space of the NFP CPP target ID ++ * @param key 32-bit unique key (must match the key at this location) ++ * ++ * @return A non-NULL struct nfp_cpp_mutex * on success, NULL on failure. ++ */ ++struct nfp_cpp_mutex * ++nfp_cpp_mutex_alloc(struct nfp_cpp *cpp, int target, ++ unsigned long long address, uint32_t key) ++{ ++ uint32_t model = nfp_cpp_model(cpp); ++ struct nfp_cpp_mutex *mutex; ++ uint32_t mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */ ++ int err; ++ uint32_t tmp; ++ ++ /* Look for cached mutex */ ++ for (mutex = cpp->mutex_cache; mutex; mutex = mutex->next) { ++ if (mutex->target == target && mutex->address == address) ++ break; ++ } ++ ++ if (mutex) { ++ if (mutex->key == key) { ++ mutex->usage++; ++ return mutex; ++ } ++ ++ /* If the key doesn't match... */ ++ return NFP_ERRPTR(EEXIST); ++ } ++ ++ err = _nfp_cpp_mutex_validate(model, &target, address); ++ if (err < 0) ++ return NULL; ++ ++ err = nfp_cpp_readl(cpp, mur, address + 4, &tmp); ++ if (err < 0) ++ return NULL; ++ ++ if (tmp != key) ++ return NFP_ERRPTR(EEXIST); ++ ++ mutex = calloc(sizeof(*mutex), 1); ++ if (!mutex) ++ return NFP_ERRPTR(ENOMEM); ++ ++ mutex->cpp = cpp; ++ mutex->target = target; ++ mutex->address = address; ++ mutex->key = key; ++ mutex->depth = 0; ++ mutex->usage = 1; ++ ++ /* Add mutex to the cache */ ++ if (cpp->mutex_cache) { ++ cpp->mutex_cache->prev = mutex; ++ mutex->next = cpp->mutex_cache; ++ cpp->mutex_cache = mutex; ++ } else { ++ cpp->mutex_cache = mutex; ++ } ++ ++ return mutex; ++} ++ ++struct nfp_cpp * ++nfp_cpp_mutex_cpp(struct nfp_cpp_mutex *mutex) ++{ ++ return mutex->cpp; ++} ++ ++uint32_t ++nfp_cpp_mutex_key(struct nfp_cpp_mutex *mutex) ++{ ++ return mutex->key; ++} ++ ++uint16_t ++nfp_cpp_mutex_owner(struct nfp_cpp_mutex *mutex) ++{ ++ uint32_t mur = NFP_CPP_ID(mutex->target, 3, 0); /* atomic_read */ ++ uint32_t value, key; ++ int err; ++ ++ err = nfp_cpp_readl(mutex->cpp, mur, mutex->address, &value); ++ if (err < 0) ++ return err; ++ ++ err = nfp_cpp_readl(mutex->cpp, mur, mutex->address + 4, &key); ++ if (err < 0) ++ return err; ++ ++ if (key != mutex->key) ++ return NFP_ERRNO(EPERM); ++ ++ if (!MUTEX_IS_LOCKED(value)) ++ return 0; ++ ++ return MUTEX_INTERFACE(value); ++} ++ ++int ++nfp_cpp_mutex_target(struct nfp_cpp_mutex *mutex) ++{ ++ return mutex->target; ++} ++ ++uint64_t ++nfp_cpp_mutex_address(struct nfp_cpp_mutex *mutex) ++{ ++ return mutex->address; ++} ++ ++/* ++ * Free a mutex handle - does not alter the lock state ++ * ++ * @param mutex NFP CPP Mutex handle ++ */ ++void ++nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex) ++{ ++ mutex->usage--; ++ if (mutex->usage > 0) ++ return; ++ ++ /* Remove mutex from the cache */ ++ if (mutex->next) ++ mutex->next->prev = mutex->prev; ++ if (mutex->prev) ++ mutex->prev->next = mutex->next; ++ ++ /* If mutex->cpp == NULL, something broke */ ++ if (mutex->cpp && mutex == mutex->cpp->mutex_cache) ++ mutex->cpp->mutex_cache = mutex->next; ++ ++ free(mutex); ++} ++ ++/* ++ * Lock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * @param mutex NFP CPP Mutex handle ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int ++nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex) ++{ ++ int err; ++ time_t warn_at = time(NULL) + 15; ++ ++ while ((err = nfp_cpp_mutex_trylock(mutex)) != 0) { ++ /* If errno != EBUSY, then the lock was damaged */ ++ if (err < 0 && errno != EBUSY) ++ return err; ++ if (time(NULL) >= warn_at) { ++ printf("Warning: waiting for NFP mutex\n"); ++ printf("\tusage:%u\n", mutex->usage); ++ printf("\tdepth:%hd]\n", mutex->depth); ++ printf("\ttarget:%d\n", mutex->target); ++ printf("\taddr:%llx\n", mutex->address); ++ printf("\tkey:%08x]\n", mutex->key); ++ warn_at = time(NULL) + 60; ++ } ++ sched_yield(); ++ } ++ return 0; ++} ++ ++/* ++ * Unlock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * @param mutex NFP CPP Mutex handle ++ * ++ * @return 0 on success, or -1 on failure (and set errno accordingly). ++ */ ++int ++nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex) ++{ ++ uint32_t muw = NFP_CPP_ID(mutex->target, 4, 0); /* atomic_write */ ++ uint32_t mur = NFP_CPP_ID(mutex->target, 3, 0); /* atomic_read */ ++ struct nfp_cpp *cpp = mutex->cpp; ++ uint32_t key, value; ++ uint16_t interface = nfp_cpp_interface(cpp); ++ int err; ++ ++ if (mutex->depth > 1) { ++ mutex->depth--; ++ return 0; ++ } ++ ++ err = nfp_cpp_readl(mutex->cpp, mur, mutex->address, &value); ++ if (err < 0) ++ goto exit; ++ ++ err = nfp_cpp_readl(mutex->cpp, mur, mutex->address + 4, &key); ++ if (err < 0) ++ goto exit; ++ ++ if (key != mutex->key) { ++ err = NFP_ERRNO(EPERM); ++ goto exit; ++ } ++ ++ if (value != MUTEX_LOCKED(interface)) { ++ err = NFP_ERRNO(EACCES); ++ goto exit; ++ } ++ ++ err = nfp_cpp_writel(cpp, muw, mutex->address, MUTEX_UNLOCK(interface)); ++ if (err < 0) ++ goto exit; ++ ++ mutex->depth = 0; ++ ++exit: ++ return err; ++} ++ ++/* ++ * Attempt to lock a mutex handle, using the NFP MU Atomic Engine ++ * ++ * Valid lock states: ++ * ++ * 0x....0000 - Unlocked ++ * 0x....000f - Locked ++ * ++ * @param mutex NFP CPP Mutex handle ++ * @return 0 if the lock succeeded, -1 on failure (and errno set ++ * appropriately). ++ */ ++int ++nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex) ++{ ++ uint32_t mur = NFP_CPP_ID(mutex->target, 3, 0); /* atomic_read */ ++ uint32_t muw = NFP_CPP_ID(mutex->target, 4, 0); /* atomic_write */ ++ uint32_t mus = NFP_CPP_ID(mutex->target, 5, 3); /* test_set_imm */ ++ uint32_t key, value, tmp; ++ struct nfp_cpp *cpp = mutex->cpp; ++ int err; ++ ++ if (mutex->depth > 0) { ++ if (mutex->depth == MUTEX_DEPTH_MAX) ++ return NFP_ERRNO(E2BIG); ++ ++ mutex->depth++; ++ return 0; ++ } ++ ++ /* Verify that the lock marker is not damaged */ ++ err = nfp_cpp_readl(cpp, mur, mutex->address + 4, &key); ++ if (err < 0) ++ goto exit; ++ ++ if (key != mutex->key) { ++ err = NFP_ERRNO(EPERM); ++ goto exit; ++ } ++ ++ /* ++ * Compare against the unlocked state, and if true, ++ * write the interface id into the top 16 bits, and ++ * mark as locked. ++ */ ++ value = MUTEX_LOCKED(nfp_cpp_interface(cpp)); ++ ++ /* ++ * We use test_set_imm here, as it implies a read ++ * of the current state, and sets the bits in the ++ * bytemask of the command to 1s. Since the mutex ++ * is guaranteed to be 64-bit aligned, the bytemask ++ * of this 32-bit command is ensured to be 8'b00001111, ++ * which implies that the lower 4 bits will be set to ++ * ones regardless of the initial state. ++ * ++ * Since this is a 'Readback' operation, with no Pull ++ * data, we can treat this as a normal Push (read) ++ * atomic, which returns the original value. ++ */ ++ err = nfp_cpp_readl(cpp, mus, mutex->address, &tmp); ++ if (err < 0) ++ goto exit; ++ ++ /* Was it unlocked? */ ++ if (MUTEX_IS_UNLOCKED(tmp)) { ++ /* ++ * The read value can only be 0x....0000 in the unlocked state. ++ * If there was another contending for this lock, then ++ * the lock state would be 0x....000f ++ * ++ * Write our owner ID into the lock ++ * While not strictly necessary, this helps with ++ * debug and bookkeeping. ++ */ ++ err = nfp_cpp_writel(cpp, muw, mutex->address, value); ++ if (err < 0) ++ goto exit; ++ ++ mutex->depth = 1; ++ goto exit; ++ } ++ ++ /* Already locked by us? Success! */ ++ if (tmp == value) { ++ mutex->depth = 1; ++ goto exit; ++ } ++ ++ err = NFP_ERRNO(MUTEX_IS_LOCKED(tmp) ? EBUSY : EINVAL); ++ ++exit: ++ return err; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_nffw.c b/drivers/net/nfp/nfpcore/nfp_nffw.c +new file mode 100644 +index 000000000..8bec0e3c9 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nffw.c +@@ -0,0 +1,235 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include "nfp_cpp.h" ++#include "nfp_nffw.h" ++#include "nfp_mip.h" ++#include "nfp6000/nfp6000.h" ++#include "nfp_resource.h" ++ ++/* ++ * flg_info_version = flags[0]<27:16> ++ * This is a small version counter intended only to detect if the current ++ * implementation can read the current struct. Struct changes should be very ++ * rare and as such a 12-bit counter should cover large spans of time. By the ++ * time it wraps around, we don't expect to have 4096 versions of this struct ++ * to be in use at the same time. ++ */ ++static uint32_t ++nffw_res_info_version_get(const struct nfp_nffw_info_data *res) ++{ ++ return (res->flags[0] >> 16) & 0xfff; ++} ++ ++/* flg_init = flags[0]<0> */ ++static uint32_t ++nffw_res_flg_init_get(const struct nfp_nffw_info_data *res) ++{ ++ return (res->flags[0] >> 0) & 1; ++} ++ ++/* loaded = loaded__mu_da__mip_off_hi<31:31> */ ++static uint32_t ++nffw_fwinfo_loaded_get(const struct nffw_fwinfo *fi) ++{ ++ return (fi->loaded__mu_da__mip_off_hi >> 31) & 1; ++} ++ ++/* mip_cppid = mip_cppid */ ++static uint32_t ++nffw_fwinfo_mip_cppid_get(const struct nffw_fwinfo *fi) ++{ ++ return fi->mip_cppid; ++} ++ ++/* loaded = loaded__mu_da__mip_off_hi<8:8> */ ++static uint32_t ++nffw_fwinfo_mip_mu_da_get(const struct nffw_fwinfo *fi) ++{ ++ return (fi->loaded__mu_da__mip_off_hi >> 8) & 1; ++} ++ ++/* mip_offset = (loaded__mu_da__mip_off_hi<7:0> << 8) | mip_offset_lo */ ++static uint64_t ++nffw_fwinfo_mip_offset_get(const struct nffw_fwinfo *fi) ++{ ++ uint64_t mip_off_hi = fi->loaded__mu_da__mip_off_hi; ++ ++ return (mip_off_hi & 0xFF) << 32 | fi->mip_offset_lo; ++} ++ ++#define NFP_IMB_TGTADDRESSMODECFG_MODE_of(_x) (((_x) >> 13) & 0x7) ++#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE BIT(12) ++#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_32_BIT 0 ++#define NFP_IMB_TGTADDRESSMODECFG_ADDRMODE_40_BIT BIT(12) ++ ++static int ++nfp_mip_mu_locality_lsb(struct nfp_cpp *cpp) ++{ ++ unsigned int mode, addr40; ++ uint32_t xpbaddr, imbcppat; ++ int err; ++ ++ /* Hardcoded XPB IMB Base, island 0 */ ++ xpbaddr = 0x000a0000 + NFP_CPP_TARGET_MU * 4; ++ err = nfp_xpb_readl(cpp, xpbaddr, &imbcppat); ++ if (err < 0) ++ return err; ++ ++ mode = NFP_IMB_TGTADDRESSMODECFG_MODE_of(imbcppat); ++ addr40 = !!(imbcppat & NFP_IMB_TGTADDRESSMODECFG_ADDRMODE); ++ ++ return nfp_cppat_mu_locality_lsb(mode, addr40); ++} ++ ++static unsigned int ++nffw_res_fwinfos(struct nfp_nffw_info_data *fwinf, struct nffw_fwinfo **arr) ++{ ++ /* ++ * For the this code, version 0 is most likely to be version 1 in this ++ * case. Since the kernel driver does not take responsibility for ++ * initialising the nfp.nffw resource, any previous code (CA firmware or ++ * userspace) that left the version 0 and did set the init flag is going ++ * to be version 1. ++ */ ++ switch (nffw_res_info_version_get(fwinf)) { ++ case 0: ++ case 1: ++ *arr = &fwinf->info.v1.fwinfo[0]; ++ return NFFW_FWINFO_CNT_V1; ++ case 2: ++ *arr = &fwinf->info.v2.fwinfo[0]; ++ return NFFW_FWINFO_CNT_V2; ++ default: ++ *arr = NULL; ++ return 0; ++ } ++} ++ ++/* ++ * nfp_nffw_info_open() - Acquire the lock on the NFFW table ++ * @cpp: NFP CPP handle ++ * ++ * Return: 0, or -ERRNO ++ */ ++struct nfp_nffw_info * ++nfp_nffw_info_open(struct nfp_cpp *cpp) ++{ ++ struct nfp_nffw_info_data *fwinf; ++ struct nfp_nffw_info *state; ++ uint32_t info_ver; ++ int err; ++ ++ state = malloc(sizeof(*state)); ++ if (!state) ++ return NULL; ++ ++ memset(state, 0, sizeof(*state)); ++ ++ state->res = nfp_resource_acquire(cpp, NFP_RESOURCE_NFP_NFFW); ++ if (!state->res) ++ goto err_free; ++ ++ fwinf = &state->fwinf; ++ ++ if (sizeof(*fwinf) > nfp_resource_size(state->res)) ++ goto err_release; ++ ++ err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), ++ nfp_resource_address(state->res), ++ fwinf, sizeof(*fwinf)); ++ if (err < (int)sizeof(*fwinf)) ++ goto err_release; ++ ++ if (!nffw_res_flg_init_get(fwinf)) ++ goto err_release; ++ ++ info_ver = nffw_res_info_version_get(fwinf); ++ if (info_ver > NFFW_INFO_VERSION_CURRENT) ++ goto err_release; ++ ++ state->cpp = cpp; ++ return state; ++ ++err_release: ++ nfp_resource_release(state->res); ++err_free: ++ free(state); ++ return NULL; ++} ++ ++/* ++ * nfp_nffw_info_release() - Release the lock on the NFFW table ++ * @state: NFP FW info state ++ * ++ * Return: 0, or -ERRNO ++ */ ++void ++nfp_nffw_info_close(struct nfp_nffw_info *state) ++{ ++ nfp_resource_release(state->res); ++ free(state); ++} ++ ++/* ++ * nfp_nffw_info_fwid_first() - Return the first firmware ID in the NFFW ++ * @state: NFP FW info state ++ * ++ * Return: First NFFW firmware info, NULL on failure ++ */ ++static struct nffw_fwinfo * ++nfp_nffw_info_fwid_first(struct nfp_nffw_info *state) ++{ ++ struct nffw_fwinfo *fwinfo; ++ unsigned int cnt, i; ++ ++ cnt = nffw_res_fwinfos(&state->fwinf, &fwinfo); ++ if (!cnt) ++ return NULL; ++ ++ for (i = 0; i < cnt; i++) ++ if (nffw_fwinfo_loaded_get(&fwinfo[i])) ++ return &fwinfo[i]; ++ ++ return NULL; ++} ++ ++/* ++ * nfp_nffw_info_mip_first() - Retrieve the location of the first FW's MIP ++ * @state: NFP FW info state ++ * @cpp_id: Pointer to the CPP ID of the MIP ++ * @off: Pointer to the CPP Address of the MIP ++ * ++ * Return: 0, or -ERRNO ++ */ ++int ++nfp_nffw_info_mip_first(struct nfp_nffw_info *state, uint32_t *cpp_id, ++ uint64_t *off) ++{ ++ struct nffw_fwinfo *fwinfo; ++ ++ fwinfo = nfp_nffw_info_fwid_first(state); ++ if (!fwinfo) ++ return -EINVAL; ++ ++ *cpp_id = nffw_fwinfo_mip_cppid_get(fwinfo); ++ *off = nffw_fwinfo_mip_offset_get(fwinfo); ++ ++ if (nffw_fwinfo_mip_mu_da_get(fwinfo)) { ++ int locality_off; ++ ++ if (NFP_CPP_ID_TARGET_of(*cpp_id) != NFP_CPP_TARGET_MU) ++ return 0; ++ ++ locality_off = nfp_mip_mu_locality_lsb(state->cpp); ++ if (locality_off < 0) ++ return locality_off; ++ ++ *off &= ~(NFP_MU_ADDR_ACCESS_TYPE_MASK << locality_off); ++ *off |= NFP_MU_ADDR_ACCESS_TYPE_DIRECT << locality_off; ++ } ++ ++ return 0; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_nffw.h b/drivers/net/nfp/nfpcore/nfp_nffw.h +new file mode 100644 +index 000000000..3bbdf1c13 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nffw.h +@@ -0,0 +1,86 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_NFFW_H__ ++#define __NFP_NFFW_H__ ++ ++#include "nfp-common/nfp_platform.h" ++#include "nfp_cpp.h" ++ ++/* ++ * Init-CSR owner IDs for firmware map to firmware IDs which start at 4. ++ * Lower IDs are reserved for target and loader IDs. ++ */ ++#define NFFW_FWID_EXT 3 /* For active MEs that we didn't load. */ ++#define NFFW_FWID_BASE 4 ++ ++#define NFFW_FWID_ALL 255 ++ ++/* Init-CSR owner IDs for firmware map to firmware IDs which start at 4. ++ * Lower IDs are reserved for target and loader IDs. ++ */ ++#define NFFW_FWID_EXT 3 /* For active MEs that we didn't load. */ ++#define NFFW_FWID_BASE 4 ++ ++#define NFFW_FWID_ALL 255 ++ ++/** ++ * NFFW_INFO_VERSION history: ++ * 0: This was never actually used (before versioning), but it refers to ++ * the previous struct which had FWINFO_CNT = MEINFO_CNT = 120 that later ++ * changed to 200. ++ * 1: First versioned struct, with ++ * FWINFO_CNT = 120 ++ * MEINFO_CNT = 120 ++ * 2: FWINFO_CNT = 200 ++ * MEINFO_CNT = 200 ++ */ ++#define NFFW_INFO_VERSION_CURRENT 2 ++ ++/* Enough for all current chip families */ ++#define NFFW_MEINFO_CNT_V1 120 ++#define NFFW_FWINFO_CNT_V1 120 ++#define NFFW_MEINFO_CNT_V2 200 ++#define NFFW_FWINFO_CNT_V2 200 ++ ++struct nffw_meinfo { ++ uint32_t ctxmask__fwid__meid; ++}; ++ ++struct nffw_fwinfo { ++ uint32_t loaded__mu_da__mip_off_hi; ++ uint32_t mip_cppid; /* 0 means no MIP */ ++ uint32_t mip_offset_lo; ++}; ++ ++struct nfp_nffw_info_v1 { ++ struct nffw_meinfo meinfo[NFFW_MEINFO_CNT_V1]; ++ struct nffw_fwinfo fwinfo[NFFW_FWINFO_CNT_V1]; ++}; ++ ++struct nfp_nffw_info_v2 { ++ struct nffw_meinfo meinfo[NFFW_MEINFO_CNT_V2]; ++ struct nffw_fwinfo fwinfo[NFFW_FWINFO_CNT_V2]; ++}; ++ ++struct nfp_nffw_info_data { ++ uint32_t flags[2]; ++ union { ++ struct nfp_nffw_info_v1 v1; ++ struct nfp_nffw_info_v2 v2; ++ } info; ++}; ++ ++struct nfp_nffw_info { ++ struct nfp_cpp *cpp; ++ struct nfp_resource *res; ++ ++ struct nfp_nffw_info_data fwinf; ++}; ++ ++struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp); ++void nfp_nffw_info_close(struct nfp_nffw_info *state); ++ ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_nsp.c b/drivers/net/nfp/nfpcore/nfp_nsp.c +new file mode 100644 +index 000000000..876a4017c +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nsp.c +@@ -0,0 +1,427 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#define NFP_SUBSYS "nfp_nsp" ++ ++#include ++#include ++ ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp_nsp.h" ++#include "nfp_resource.h" ++ ++int ++nfp_nsp_config_modified(struct nfp_nsp *state) ++{ ++ return state->modified; ++} ++ ++void ++nfp_nsp_config_set_modified(struct nfp_nsp *state, int modified) ++{ ++ state->modified = modified; ++} ++ ++void * ++nfp_nsp_config_entries(struct nfp_nsp *state) ++{ ++ return state->entries; ++} ++ ++unsigned int ++nfp_nsp_config_idx(struct nfp_nsp *state) ++{ ++ return state->idx; ++} ++ ++void ++nfp_nsp_config_set_state(struct nfp_nsp *state, void *entries, unsigned int idx) ++{ ++ state->entries = entries; ++ state->idx = idx; ++} ++ ++void ++nfp_nsp_config_clear_state(struct nfp_nsp *state) ++{ ++ state->entries = NULL; ++ state->idx = 0; ++} ++ ++static void ++nfp_nsp_print_extended_error(uint32_t ret_val) ++{ ++ int i; ++ ++ if (!ret_val) ++ return; ++ ++ for (i = 0; i < (int)ARRAY_SIZE(nsp_errors); i++) ++ if (ret_val == (uint32_t)nsp_errors[i].code) ++ printf("err msg: %s\n", nsp_errors[i].msg); ++} ++ ++static int ++nfp_nsp_check(struct nfp_nsp *state) ++{ ++ struct nfp_cpp *cpp = state->cpp; ++ uint64_t nsp_status, reg; ++ uint32_t nsp_cpp; ++ int err; ++ ++ nsp_cpp = nfp_resource_cpp_id(state->res); ++ nsp_status = nfp_resource_address(state->res) + NSP_STATUS; ++ ++ err = nfp_cpp_readq(cpp, nsp_cpp, nsp_status, ®); ++ if (err < 0) ++ return err; ++ ++ if (FIELD_GET(NSP_STATUS_MAGIC, reg) != NSP_MAGIC) { ++ printf("Cannot detect NFP Service Processor\n"); ++ return -ENODEV; ++ } ++ ++ state->ver.major = FIELD_GET(NSP_STATUS_MAJOR, reg); ++ state->ver.minor = FIELD_GET(NSP_STATUS_MINOR, reg); ++ ++ if (state->ver.major != NSP_MAJOR || state->ver.minor < NSP_MINOR) { ++ printf("Unsupported ABI %hu.%hu\n", state->ver.major, ++ state->ver.minor); ++ return -EINVAL; ++ } ++ ++ if (reg & NSP_STATUS_BUSY) { ++ printf("Service processor busy!\n"); ++ return -EBUSY; ++ } ++ ++ return 0; ++} ++ ++/* ++ * nfp_nsp_open() - Prepare for communication and lock the NSP resource. ++ * @cpp: NFP CPP Handle ++ */ ++struct nfp_nsp * ++nfp_nsp_open(struct nfp_cpp *cpp) ++{ ++ struct nfp_resource *res; ++ struct nfp_nsp *state; ++ int err; ++ ++ res = nfp_resource_acquire(cpp, NFP_RESOURCE_NSP); ++ if (!res) ++ return NULL; ++ ++ state = malloc(sizeof(*state)); ++ if (!state) { ++ nfp_resource_release(res); ++ return NULL; ++ } ++ memset(state, 0, sizeof(*state)); ++ state->cpp = cpp; ++ state->res = res; ++ ++ err = nfp_nsp_check(state); ++ if (err) { ++ nfp_nsp_close(state); ++ return NULL; ++ } ++ ++ return state; ++} ++ ++/* ++ * nfp_nsp_close() - Clean up and unlock the NSP resource. ++ * @state: NFP SP state ++ */ ++void ++nfp_nsp_close(struct nfp_nsp *state) ++{ ++ nfp_resource_release(state->res); ++ free(state); ++} ++ ++uint16_t ++nfp_nsp_get_abi_ver_major(struct nfp_nsp *state) ++{ ++ return state->ver.major; ++} ++ ++uint16_t ++nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state) ++{ ++ return state->ver.minor; ++} ++ ++static int ++nfp_nsp_wait_reg(struct nfp_cpp *cpp, uint64_t *reg, uint32_t nsp_cpp, ++ uint64_t addr, uint64_t mask, uint64_t val) ++{ ++ struct timespec wait; ++ int count; ++ int err; ++ ++ wait.tv_sec = 0; ++ wait.tv_nsec = 25000000; ++ count = 0; ++ ++ for (;;) { ++ err = nfp_cpp_readq(cpp, nsp_cpp, addr, reg); ++ if (err < 0) ++ return err; ++ ++ if ((*reg & mask) == val) ++ return 0; ++ ++ nanosleep(&wait, 0); ++ if (count++ > 1000) ++ return -ETIMEDOUT; ++ } ++} ++ ++/* ++ * nfp_nsp_command() - Execute a command on the NFP Service Processor ++ * @state: NFP SP state ++ * @code: NFP SP Command Code ++ * @option: NFP SP Command Argument ++ * @buff_cpp: NFP SP Buffer CPP Address info ++ * @buff_addr: NFP SP Buffer Host address ++ * ++ * Return: 0 for success with no result ++ * ++ * positive value for NSP completion with a result code ++ * ++ * -EAGAIN if the NSP is not yet present ++ * -ENODEV if the NSP is not a supported model ++ * -EBUSY if the NSP is stuck ++ * -EINTR if interrupted while waiting for completion ++ * -ETIMEDOUT if the NSP took longer than 30 seconds to complete ++ */ ++static int ++nfp_nsp_command(struct nfp_nsp *state, uint16_t code, uint32_t option, ++ uint32_t buff_cpp, uint64_t buff_addr) ++{ ++ uint64_t reg, ret_val, nsp_base, nsp_buffer, nsp_status, nsp_command; ++ struct nfp_cpp *cpp = state->cpp; ++ uint32_t nsp_cpp; ++ int err; ++ ++ nsp_cpp = nfp_resource_cpp_id(state->res); ++ nsp_base = nfp_resource_address(state->res); ++ nsp_status = nsp_base + NSP_STATUS; ++ nsp_command = nsp_base + NSP_COMMAND; ++ nsp_buffer = nsp_base + NSP_BUFFER; ++ ++ err = nfp_nsp_check(state); ++ if (err) ++ return err; ++ ++ if (!FIELD_FIT(NSP_BUFFER_CPP, buff_cpp >> 8) || ++ !FIELD_FIT(NSP_BUFFER_ADDRESS, buff_addr)) { ++ printf("Host buffer out of reach %08x %" PRIx64 "\n", ++ buff_cpp, buff_addr); ++ return -EINVAL; ++ } ++ ++ err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_buffer, ++ FIELD_PREP(NSP_BUFFER_CPP, buff_cpp >> 8) | ++ FIELD_PREP(NSP_BUFFER_ADDRESS, buff_addr)); ++ if (err < 0) ++ return err; ++ ++ err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command, ++ FIELD_PREP(NSP_COMMAND_OPTION, option) | ++ FIELD_PREP(NSP_COMMAND_CODE, code) | ++ FIELD_PREP(NSP_COMMAND_START, 1)); ++ if (err < 0) ++ return err; ++ ++ /* Wait for NSP_COMMAND_START to go to 0 */ ++ err = nfp_nsp_wait_reg(cpp, ®, nsp_cpp, nsp_command, ++ NSP_COMMAND_START, 0); ++ if (err) { ++ printf("Error %d waiting for code 0x%04x to start\n", ++ err, code); ++ return err; ++ } ++ ++ /* Wait for NSP_STATUS_BUSY to go to 0 */ ++ err = nfp_nsp_wait_reg(cpp, ®, nsp_cpp, nsp_status, NSP_STATUS_BUSY, ++ 0); ++ if (err) { ++ printf("Error %d waiting for code 0x%04x to complete\n", ++ err, code); ++ return err; ++ } ++ ++ err = nfp_cpp_readq(cpp, nsp_cpp, nsp_command, &ret_val); ++ if (err < 0) ++ return err; ++ ret_val = FIELD_GET(NSP_COMMAND_OPTION, ret_val); ++ ++ err = FIELD_GET(NSP_STATUS_RESULT, reg); ++ if (err) { ++ printf("Result (error) code set: %d (%d) command: %d\n", ++ -err, (int)ret_val, code); ++ nfp_nsp_print_extended_error(ret_val); ++ return -err; ++ } ++ ++ return ret_val; ++} ++ ++#define SZ_1M 0x00100000 ++ ++static int ++nfp_nsp_command_buf(struct nfp_nsp *nsp, uint16_t code, uint32_t option, ++ const void *in_buf, unsigned int in_size, void *out_buf, ++ unsigned int out_size) ++{ ++ struct nfp_cpp *cpp = nsp->cpp; ++ unsigned int max_size; ++ uint64_t reg, cpp_buf; ++ int ret, err; ++ uint32_t cpp_id; ++ ++ if (nsp->ver.minor < 13) { ++ printf("NSP: Code 0x%04x with buffer not supported\n", code); ++ printf("\t(ABI %hu.%hu)\n", nsp->ver.major, nsp->ver.minor); ++ return -EOPNOTSUPP; ++ } ++ ++ err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), ++ nfp_resource_address(nsp->res) + ++ NSP_DFLT_BUFFER_CONFIG, ++ ®); ++ if (err < 0) ++ return err; ++ ++ max_size = RTE_MAX(in_size, out_size); ++ if (FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M < max_size) { ++ printf("NSP: default buffer too small for command 0x%04x\n", ++ code); ++ printf("\t(%llu < %u)\n", ++ FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M, ++ max_size); ++ return -EINVAL; ++ } ++ ++ err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), ++ nfp_resource_address(nsp->res) + ++ NSP_DFLT_BUFFER, ++ ®); ++ if (err < 0) ++ return err; ++ ++ cpp_id = FIELD_GET(NSP_BUFFER_CPP, reg) << 8; ++ cpp_buf = FIELD_GET(NSP_BUFFER_ADDRESS, reg); ++ ++ if (in_buf && in_size) { ++ err = nfp_cpp_write(cpp, cpp_id, cpp_buf, in_buf, in_size); ++ if (err < 0) ++ return err; ++ } ++ /* Zero out remaining part of the buffer */ ++ if (out_buf && out_size && out_size > in_size) { ++ memset(out_buf, 0, out_size - in_size); ++ err = nfp_cpp_write(cpp, cpp_id, cpp_buf + in_size, out_buf, ++ out_size - in_size); ++ if (err < 0) ++ return err; ++ } ++ ++ ret = nfp_nsp_command(nsp, code, option, cpp_id, cpp_buf); ++ if (ret < 0) ++ return ret; ++ ++ if (out_buf && out_size) { ++ err = nfp_cpp_read(cpp, cpp_id, cpp_buf, out_buf, out_size); ++ if (err < 0) ++ return err; ++ } ++ ++ return ret; ++} ++ ++int ++nfp_nsp_wait(struct nfp_nsp *state) ++{ ++ struct timespec wait; ++ int count; ++ int err; ++ ++ wait.tv_sec = 0; ++ wait.tv_nsec = 25000000; ++ count = 0; ++ ++ for (;;) { ++ err = nfp_nsp_command(state, SPCODE_NOOP, 0, 0, 0); ++ if (err != -EAGAIN) ++ break; ++ ++ nanosleep(&wait, 0); ++ ++ if (count++ > 1000) { ++ err = -ETIMEDOUT; ++ break; ++ } ++ } ++ if (err) ++ printf("NSP failed to respond %d\n", err); ++ ++ return err; ++} ++ ++int ++nfp_nsp_device_soft_reset(struct nfp_nsp *state) ++{ ++ return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0); ++} ++ ++int ++nfp_nsp_mac_reinit(struct nfp_nsp *state) ++{ ++ return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0); ++} ++ ++int ++nfp_nsp_load_fw(struct nfp_nsp *state, void *buf, unsigned int size) ++{ ++ return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, size, buf, size, ++ NULL, 0); ++} ++ ++int ++nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size) ++{ ++ return nfp_nsp_command_buf(state, SPCODE_ETH_RESCAN, size, NULL, 0, ++ buf, size); ++} ++ ++int ++nfp_nsp_write_eth_table(struct nfp_nsp *state, const void *buf, ++ unsigned int size) ++{ ++ return nfp_nsp_command_buf(state, SPCODE_ETH_CONTROL, size, buf, size, ++ NULL, 0); ++} ++ ++int ++nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size) ++{ ++ return nfp_nsp_command_buf(state, SPCODE_NSP_IDENTIFY, size, NULL, 0, ++ buf, size); ++} ++ ++int ++nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, void *buf, ++ unsigned int size) ++{ ++ return nfp_nsp_command_buf(state, SPCODE_NSP_SENSORS, sensor_mask, NULL, ++ 0, buf, size); ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_nsp.h b/drivers/net/nfp/nfpcore/nfp_nsp.h +new file mode 100644 +index 000000000..c9c7b0d0f +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nsp.h +@@ -0,0 +1,304 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef NSP_NSP_H ++#define NSP_NSP_H 1 ++ ++#include "nfp_cpp.h" ++#include "nfp_nsp.h" ++ ++#define GENMASK_ULL(h, l) \ ++ (((~0ULL) - (1ULL << (l)) + 1) & \ ++ (~0ULL >> (64 - 1 - (h)))) ++ ++#define __bf_shf(x) (__builtin_ffsll(x) - 1) ++ ++#define FIELD_GET(_mask, _reg) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ (typeof(_x))(((_reg) & (_x)) >> __bf_shf(_x)); \ ++ })) ++ ++#define FIELD_FIT(_mask, _val) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ !((((typeof(_x))_val) << __bf_shf(_x)) & ~(_x)); \ ++ })) ++ ++#define FIELD_PREP(_mask, _val) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ ((typeof(_x))(_val) << __bf_shf(_x)) & (_x); \ ++ })) ++ ++/* Offsets relative to the CSR base */ ++#define NSP_STATUS 0x00 ++#define NSP_STATUS_MAGIC GENMASK_ULL(63, 48) ++#define NSP_STATUS_MAJOR GENMASK_ULL(47, 44) ++#define NSP_STATUS_MINOR GENMASK_ULL(43, 32) ++#define NSP_STATUS_CODE GENMASK_ULL(31, 16) ++#define NSP_STATUS_RESULT GENMASK_ULL(15, 8) ++#define NSP_STATUS_BUSY BIT_ULL(0) ++ ++#define NSP_COMMAND 0x08 ++#define NSP_COMMAND_OPTION GENMASK_ULL(63, 32) ++#define NSP_COMMAND_CODE GENMASK_ULL(31, 16) ++#define NSP_COMMAND_START BIT_ULL(0) ++ ++/* CPP address to retrieve the data from */ ++#define NSP_BUFFER 0x10 ++#define NSP_BUFFER_CPP GENMASK_ULL(63, 40) ++#define NSP_BUFFER_PCIE GENMASK_ULL(39, 38) ++#define NSP_BUFFER_ADDRESS GENMASK_ULL(37, 0) ++ ++#define NSP_DFLT_BUFFER 0x18 ++ ++#define NSP_DFLT_BUFFER_CONFIG 0x20 ++#define NSP_DFLT_BUFFER_SIZE_MB GENMASK_ULL(7, 0) ++ ++#define NSP_MAGIC 0xab10 ++#define NSP_MAJOR 0 ++#define NSP_MINOR 8 ++ ++#define NSP_CODE_MAJOR GENMASK(15, 12) ++#define NSP_CODE_MINOR GENMASK(11, 0) ++ ++enum nfp_nsp_cmd { ++ SPCODE_NOOP = 0, /* No operation */ ++ SPCODE_SOFT_RESET = 1, /* Soft reset the NFP */ ++ SPCODE_FW_DEFAULT = 2, /* Load default (UNDI) FW */ ++ SPCODE_PHY_INIT = 3, /* Initialize the PHY */ ++ SPCODE_MAC_INIT = 4, /* Initialize the MAC */ ++ SPCODE_PHY_RXADAPT = 5, /* Re-run PHY RX Adaptation */ ++ SPCODE_FW_LOAD = 6, /* Load fw from buffer, len in option */ ++ SPCODE_ETH_RESCAN = 7, /* Rescan ETHs, write ETH_TABLE to buf */ ++ SPCODE_ETH_CONTROL = 8, /* Update media config from buffer */ ++ SPCODE_NSP_SENSORS = 12, /* Read NSP sensor(s) */ ++ SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */ ++}; ++ ++static const struct { ++ int code; ++ const char *msg; ++} nsp_errors[] = { ++ { 6010, "could not map to phy for port" }, ++ { 6011, "not an allowed rate/lanes for port" }, ++ { 6012, "not an allowed rate/lanes for port" }, ++ { 6013, "high/low error, change other port first" }, ++ { 6014, "config not found in flash" }, ++}; ++ ++struct nfp_nsp { ++ struct nfp_cpp *cpp; ++ struct nfp_resource *res; ++ struct { ++ uint16_t major; ++ uint16_t minor; ++ } ver; ++ ++ /* Eth table config state */ ++ int modified; ++ unsigned int idx; ++ void *entries; ++}; ++ ++struct nfp_nsp *nfp_nsp_open(struct nfp_cpp *cpp); ++void nfp_nsp_close(struct nfp_nsp *state); ++uint16_t nfp_nsp_get_abi_ver_major(struct nfp_nsp *state); ++uint16_t nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state); ++int nfp_nsp_wait(struct nfp_nsp *state); ++int nfp_nsp_device_soft_reset(struct nfp_nsp *state); ++int nfp_nsp_load_fw(struct nfp_nsp *state, void *buf, unsigned int size); ++int nfp_nsp_mac_reinit(struct nfp_nsp *state); ++int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size); ++int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, ++ void *buf, unsigned int size); ++ ++static inline int nfp_nsp_has_mac_reinit(struct nfp_nsp *state) ++{ ++ return nfp_nsp_get_abi_ver_minor(state) > 20; ++} ++ ++enum nfp_eth_interface { ++ NFP_INTERFACE_NONE = 0, ++ NFP_INTERFACE_SFP = 1, ++ NFP_INTERFACE_SFPP = 10, ++ NFP_INTERFACE_SFP28 = 28, ++ NFP_INTERFACE_QSFP = 40, ++ NFP_INTERFACE_CXP = 100, ++ NFP_INTERFACE_QSFP28 = 112, ++}; ++ ++enum nfp_eth_media { ++ NFP_MEDIA_DAC_PASSIVE = 0, ++ NFP_MEDIA_DAC_ACTIVE, ++ NFP_MEDIA_FIBRE, ++}; ++ ++enum nfp_eth_aneg { ++ NFP_ANEG_AUTO = 0, ++ NFP_ANEG_SEARCH, ++ NFP_ANEG_25G_CONSORTIUM, ++ NFP_ANEG_25G_IEEE, ++ NFP_ANEG_DISABLED, ++}; ++ ++enum nfp_eth_fec { ++ NFP_FEC_AUTO_BIT = 0, ++ NFP_FEC_BASER_BIT, ++ NFP_FEC_REED_SOLOMON_BIT, ++ NFP_FEC_DISABLED_BIT, ++}; ++ ++#define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) ++#define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) ++#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) ++#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT) ++ ++#define ETH_ALEN 6 ++ ++/** ++ * struct nfp_eth_table - ETH table information ++ * @count: number of table entries ++ * @max_index: max of @index fields of all @ports ++ * @ports: table of ports ++ * ++ * @eth_index: port index according to legacy ethX numbering ++ * @index: chip-wide first channel index ++ * @nbi: NBI index ++ * @base: first channel index (within NBI) ++ * @lanes: number of channels ++ * @speed: interface speed (in Mbps) ++ * @interface: interface (module) plugged in ++ * @media: media type of the @interface ++ * @fec: forward error correction mode ++ * @aneg: auto negotiation mode ++ * @mac_addr: interface MAC address ++ * @label_port: port id ++ * @label_subport: id of interface within port (for split ports) ++ * @enabled: is enabled? ++ * @tx_enabled: is TX enabled? ++ * @rx_enabled: is RX enabled? ++ * @override_changed: is media reconfig pending? ++ * ++ * @port_type: one of %PORT_* defines for ethtool ++ * @port_lanes: total number of lanes on the port (sum of lanes of all subports) ++ * @is_split: is interface part of a split port ++ * @fec_modes_supported: bitmap of FEC modes supported ++ */ ++struct nfp_eth_table { ++ unsigned int count; ++ unsigned int max_index; ++ struct nfp_eth_table_port { ++ unsigned int eth_index; ++ unsigned int index; ++ unsigned int nbi; ++ unsigned int base; ++ unsigned int lanes; ++ unsigned int speed; ++ ++ unsigned int interface; ++ enum nfp_eth_media media; ++ ++ enum nfp_eth_fec fec; ++ enum nfp_eth_aneg aneg; ++ ++ uint8_t mac_addr[ETH_ALEN]; ++ ++ uint8_t label_port; ++ uint8_t label_subport; ++ ++ int enabled; ++ int tx_enabled; ++ int rx_enabled; ++ ++ int override_changed; ++ ++ /* Computed fields */ ++ uint8_t port_type; ++ ++ unsigned int port_lanes; ++ ++ int is_split; ++ ++ unsigned int fec_modes_supported; ++ } ports[0]; ++}; ++ ++struct nfp_eth_table *nfp_eth_read_ports(struct nfp_cpp *cpp); ++ ++int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, int enable); ++int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, ++ int configed); ++int ++nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); ++ ++int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size); ++int nfp_nsp_write_eth_table(struct nfp_nsp *state, const void *buf, ++ unsigned int size); ++void nfp_nsp_config_set_state(struct nfp_nsp *state, void *entries, ++ unsigned int idx); ++void nfp_nsp_config_clear_state(struct nfp_nsp *state); ++void nfp_nsp_config_set_modified(struct nfp_nsp *state, int modified); ++void *nfp_nsp_config_entries(struct nfp_nsp *state); ++int nfp_nsp_config_modified(struct nfp_nsp *state); ++unsigned int nfp_nsp_config_idx(struct nfp_nsp *state); ++ ++static inline int nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) ++{ ++ return !!eth_port->fec_modes_supported; ++} ++ ++static inline unsigned int ++nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port) ++{ ++ return eth_port->fec_modes_supported; ++} ++ ++struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx); ++int nfp_eth_config_commit_end(struct nfp_nsp *nsp); ++void nfp_eth_config_cleanup_end(struct nfp_nsp *nsp); ++ ++int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode); ++int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed); ++int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes); ++ ++/** ++ * struct nfp_nsp_identify - NSP static information ++ * @version: opaque version string ++ * @flags: version flags ++ * @br_primary: branch id of primary bootloader ++ * @br_secondary: branch id of secondary bootloader ++ * @br_nsp: branch id of NSP ++ * @primary: version of primarary bootloader ++ * @secondary: version id of secondary bootloader ++ * @nsp: version id of NSP ++ * @sensor_mask: mask of present sensors available on NIC ++ */ ++struct nfp_nsp_identify { ++ char version[40]; ++ uint8_t flags; ++ uint8_t br_primary; ++ uint8_t br_secondary; ++ uint8_t br_nsp; ++ uint16_t primary; ++ uint16_t secondary; ++ uint16_t nsp; ++ uint64_t sensor_mask; ++}; ++ ++struct nfp_nsp_identify *__nfp_nsp_identify(struct nfp_nsp *nsp); ++ ++enum nfp_nsp_sensor_id { ++ NFP_SENSOR_CHIP_TEMPERATURE, ++ NFP_SENSOR_ASSEMBLY_POWER, ++ NFP_SENSOR_ASSEMBLY_12V_POWER, ++ NFP_SENSOR_ASSEMBLY_3V3_POWER, ++}; ++ ++int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id, ++ long *val); ++ ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_nsp_cmds.c b/drivers/net/nfp/nfpcore/nfp_nsp_cmds.c +new file mode 100644 +index 000000000..bfd1eddb3 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nsp_cmds.c +@@ -0,0 +1,109 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++#include "nfp_cpp.h" ++#include "nfp_nsp.h" ++#include "nfp_nffw.h" ++ ++struct nsp_identify { ++ uint8_t version[40]; ++ uint8_t flags; ++ uint8_t br_primary; ++ uint8_t br_secondary; ++ uint8_t br_nsp; ++ uint16_t primary; ++ uint16_t secondary; ++ uint16_t nsp; ++ uint8_t reserved[6]; ++ uint64_t sensor_mask; ++}; ++ ++struct nfp_nsp_identify * ++__nfp_nsp_identify(struct nfp_nsp *nsp) ++{ ++ struct nfp_nsp_identify *nspi = NULL; ++ struct nsp_identify *ni; ++ int ret; ++ ++ if (nfp_nsp_get_abi_ver_minor(nsp) < 15) ++ return NULL; ++ ++ ni = malloc(sizeof(*ni)); ++ if (!ni) ++ return NULL; ++ ++ memset(ni, 0, sizeof(*ni)); ++ ret = nfp_nsp_read_identify(nsp, ni, sizeof(*ni)); ++ if (ret < 0) { ++ printf("reading bsp version failed %d\n", ++ ret); ++ goto exit_free; ++ } ++ ++ nspi = malloc(sizeof(*nspi)); ++ if (!nspi) ++ goto exit_free; ++ ++ memset(nspi, 0, sizeof(*nspi)); ++ memcpy(nspi->version, ni->version, sizeof(nspi->version)); ++ nspi->version[sizeof(nspi->version) - 1] = '\0'; ++ nspi->flags = ni->flags; ++ nspi->br_primary = ni->br_primary; ++ nspi->br_secondary = ni->br_secondary; ++ nspi->br_nsp = ni->br_nsp; ++ nspi->primary = rte_le_to_cpu_16(ni->primary); ++ nspi->secondary = rte_le_to_cpu_16(ni->secondary); ++ nspi->nsp = rte_le_to_cpu_16(ni->nsp); ++ nspi->sensor_mask = rte_le_to_cpu_64(ni->sensor_mask); ++ ++exit_free: ++ free(ni); ++ return nspi; ++} ++ ++struct nfp_sensors { ++ uint32_t chip_temp; ++ uint32_t assembly_power; ++ uint32_t assembly_12v_power; ++ uint32_t assembly_3v3_power; ++}; ++ ++int ++nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id, long *val) ++{ ++ struct nfp_sensors s; ++ struct nfp_nsp *nsp; ++ int ret; ++ ++ nsp = nfp_nsp_open(cpp); ++ if (!nsp) ++ return -EIO; ++ ++ ret = nfp_nsp_read_sensors(nsp, BIT(id), &s, sizeof(s)); ++ nfp_nsp_close(nsp); ++ ++ if (ret < 0) ++ return ret; ++ ++ switch (id) { ++ case NFP_SENSOR_CHIP_TEMPERATURE: ++ *val = rte_le_to_cpu_32(s.chip_temp); ++ break; ++ case NFP_SENSOR_ASSEMBLY_POWER: ++ *val = rte_le_to_cpu_32(s.assembly_power); ++ break; ++ case NFP_SENSOR_ASSEMBLY_12V_POWER: ++ *val = rte_le_to_cpu_32(s.assembly_12v_power); ++ break; ++ case NFP_SENSOR_ASSEMBLY_3V3_POWER: ++ *val = rte_le_to_cpu_32(s.assembly_3v3_power); ++ break; ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/nfp/nfpcore/nfp_nsp_eth.c +new file mode 100644 +index 000000000..67946891a +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_nsp_eth.c +@@ -0,0 +1,665 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++#include ++#include "nfp_cpp.h" ++#include "nfp_nsp.h" ++#include "nfp6000/nfp6000.h" ++ ++#define GENMASK_ULL(h, l) \ ++ (((~0ULL) - (1ULL << (l)) + 1) & \ ++ (~0ULL >> (64 - 1 - (h)))) ++ ++#define __bf_shf(x) (__builtin_ffsll(x) - 1) ++ ++#define FIELD_GET(_mask, _reg) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ (typeof(_x))(((_reg) & (_x)) >> __bf_shf(_x)); \ ++ })) ++ ++#define FIELD_FIT(_mask, _val) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ !((((typeof(_x))_val) << __bf_shf(_x)) & ~(_x)); \ ++ })) ++ ++#define FIELD_PREP(_mask, _val) \ ++ (__extension__ ({ \ ++ typeof(_mask) _x = (_mask); \ ++ ((typeof(_x))(_val) << __bf_shf(_x)) & (_x); \ ++ })) ++ ++#define NSP_ETH_NBI_PORT_COUNT 24 ++#define NSP_ETH_MAX_COUNT (2 * NSP_ETH_NBI_PORT_COUNT) ++#define NSP_ETH_TABLE_SIZE (NSP_ETH_MAX_COUNT * \ ++ sizeof(union eth_table_entry)) ++ ++#define NSP_ETH_PORT_LANES GENMASK_ULL(3, 0) ++#define NSP_ETH_PORT_INDEX GENMASK_ULL(15, 8) ++#define NSP_ETH_PORT_LABEL GENMASK_ULL(53, 48) ++#define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) ++#define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) ++#define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) ++ ++#define NSP_ETH_PORT_LANES_MASK rte_cpu_to_le_64(NSP_ETH_PORT_LANES) ++ ++#define NSP_ETH_STATE_CONFIGURED BIT_ULL(0) ++#define NSP_ETH_STATE_ENABLED BIT_ULL(1) ++#define NSP_ETH_STATE_TX_ENABLED BIT_ULL(2) ++#define NSP_ETH_STATE_RX_ENABLED BIT_ULL(3) ++#define NSP_ETH_STATE_RATE GENMASK_ULL(11, 8) ++#define NSP_ETH_STATE_INTERFACE GENMASK_ULL(19, 12) ++#define NSP_ETH_STATE_MEDIA GENMASK_ULL(21, 20) ++#define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) ++#define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) ++#define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) ++ ++#define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) ++#define NSP_ETH_CTRL_ENABLED BIT_ULL(1) ++#define NSP_ETH_CTRL_TX_ENABLED BIT_ULL(2) ++#define NSP_ETH_CTRL_RX_ENABLED BIT_ULL(3) ++#define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) ++#define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) ++#define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) ++#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) ++ ++/* Which connector port. */ ++#define PORT_TP 0x00 ++#define PORT_AUI 0x01 ++#define PORT_MII 0x02 ++#define PORT_FIBRE 0x03 ++#define PORT_BNC 0x04 ++#define PORT_DA 0x05 ++#define PORT_NONE 0xef ++#define PORT_OTHER 0xff ++ ++#define SPEED_10 10 ++#define SPEED_100 100 ++#define SPEED_1000 1000 ++#define SPEED_2500 2500 ++#define SPEED_5000 5000 ++#define SPEED_10000 10000 ++#define SPEED_14000 14000 ++#define SPEED_20000 20000 ++#define SPEED_25000 25000 ++#define SPEED_40000 40000 ++#define SPEED_50000 50000 ++#define SPEED_56000 56000 ++#define SPEED_100000 100000 ++ ++enum nfp_eth_raw { ++ NSP_ETH_RAW_PORT = 0, ++ NSP_ETH_RAW_STATE, ++ NSP_ETH_RAW_MAC, ++ NSP_ETH_RAW_CONTROL, ++ ++ NSP_ETH_NUM_RAW ++}; ++ ++enum nfp_eth_rate { ++ RATE_INVALID = 0, ++ RATE_10M, ++ RATE_100M, ++ RATE_1G, ++ RATE_10G, ++ RATE_25G, ++}; ++ ++union eth_table_entry { ++ struct { ++ uint64_t port; ++ uint64_t state; ++ uint8_t mac_addr[6]; ++ uint8_t resv[2]; ++ uint64_t control; ++ }; ++ uint64_t raw[NSP_ETH_NUM_RAW]; ++}; ++ ++static const struct { ++ enum nfp_eth_rate rate; ++ unsigned int speed; ++} nsp_eth_rate_tbl[] = { ++ { RATE_INVALID, 0, }, ++ { RATE_10M, SPEED_10, }, ++ { RATE_100M, SPEED_100, }, ++ { RATE_1G, SPEED_1000, }, ++ { RATE_10G, SPEED_10000, }, ++ { RATE_25G, SPEED_25000, }, ++}; ++ ++static unsigned int ++nfp_eth_rate2speed(enum nfp_eth_rate rate) ++{ ++ int i; ++ ++ for (i = 0; i < (int)ARRAY_SIZE(nsp_eth_rate_tbl); i++) ++ if (nsp_eth_rate_tbl[i].rate == rate) ++ return nsp_eth_rate_tbl[i].speed; ++ ++ return 0; ++} ++ ++static unsigned int ++nfp_eth_speed2rate(unsigned int speed) ++{ ++ int i; ++ ++ for (i = 0; i < (int)ARRAY_SIZE(nsp_eth_rate_tbl); i++) ++ if (nsp_eth_rate_tbl[i].speed == speed) ++ return nsp_eth_rate_tbl[i].rate; ++ ++ return RATE_INVALID; ++} ++ ++static void ++nfp_eth_copy_mac_reverse(uint8_t *dst, const uint8_t *src) ++{ ++ int i; ++ ++ for (i = 0; i < (int)ETH_ALEN; i++) ++ dst[ETH_ALEN - i - 1] = src[i]; ++} ++ ++static void ++nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, ++ unsigned int index, struct nfp_eth_table_port *dst) ++{ ++ unsigned int rate; ++ unsigned int fec; ++ uint64_t port, state; ++ ++ port = rte_le_to_cpu_64(src->port); ++ state = rte_le_to_cpu_64(src->state); ++ ++ dst->eth_index = FIELD_GET(NSP_ETH_PORT_INDEX, port); ++ dst->index = index; ++ dst->nbi = index / NSP_ETH_NBI_PORT_COUNT; ++ dst->base = index % NSP_ETH_NBI_PORT_COUNT; ++ dst->lanes = FIELD_GET(NSP_ETH_PORT_LANES, port); ++ ++ dst->enabled = FIELD_GET(NSP_ETH_STATE_ENABLED, state); ++ dst->tx_enabled = FIELD_GET(NSP_ETH_STATE_TX_ENABLED, state); ++ dst->rx_enabled = FIELD_GET(NSP_ETH_STATE_RX_ENABLED, state); ++ ++ rate = nfp_eth_rate2speed(FIELD_GET(NSP_ETH_STATE_RATE, state)); ++ dst->speed = dst->lanes * rate; ++ ++ dst->interface = FIELD_GET(NSP_ETH_STATE_INTERFACE, state); ++ dst->media = FIELD_GET(NSP_ETH_STATE_MEDIA, state); ++ ++ nfp_eth_copy_mac_reverse(dst->mac_addr, src->mac_addr); ++ ++ dst->label_port = FIELD_GET(NSP_ETH_PORT_PHYLABEL, port); ++ dst->label_subport = FIELD_GET(NSP_ETH_PORT_LABEL, port); ++ ++ if (nfp_nsp_get_abi_ver_minor(nsp) < 17) ++ return; ++ ++ dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state); ++ dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state); ++ ++ if (nfp_nsp_get_abi_ver_minor(nsp) < 22) ++ return; ++ ++ fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port); ++ dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT; ++ fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port); ++ dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT; ++ if (dst->fec_modes_supported) ++ dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; ++ ++ dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state); ++} ++ ++static void ++nfp_eth_calc_port_geometry(struct nfp_eth_table *table) ++{ ++ unsigned int i, j; ++ ++ for (i = 0; i < table->count; i++) { ++ table->max_index = RTE_MAX(table->max_index, ++ table->ports[i].index); ++ ++ for (j = 0; j < table->count; j++) { ++ if (table->ports[i].label_port != ++ table->ports[j].label_port) ++ continue; ++ table->ports[i].port_lanes += table->ports[j].lanes; ++ ++ if (i == j) ++ continue; ++ if (table->ports[i].label_subport == ++ table->ports[j].label_subport) ++ printf("Port %d subport %d is a duplicate\n", ++ table->ports[i].label_port, ++ table->ports[i].label_subport); ++ ++ table->ports[i].is_split = 1; ++ } ++ } ++} ++ ++static void ++nfp_eth_calc_port_type(struct nfp_eth_table_port *entry) ++{ ++ if (entry->interface == NFP_INTERFACE_NONE) { ++ entry->port_type = PORT_NONE; ++ return; ++ } ++ ++ if (entry->media == NFP_MEDIA_FIBRE) ++ entry->port_type = PORT_FIBRE; ++ else ++ entry->port_type = PORT_DA; ++} ++ ++static struct nfp_eth_table * ++__nfp_eth_read_ports(struct nfp_nsp *nsp) ++{ ++ union eth_table_entry *entries; ++ struct nfp_eth_table *table; ++ uint32_t table_sz; ++ int i, j, ret, cnt = 0; ++ ++ entries = malloc(NSP_ETH_TABLE_SIZE); ++ if (!entries) ++ return NULL; ++ ++ memset(entries, 0, NSP_ETH_TABLE_SIZE); ++ ret = nfp_nsp_read_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); ++ if (ret < 0) { ++ printf("reading port table failed %d\n", ret); ++ goto err; ++ } ++ ++ for (i = 0; i < NSP_ETH_MAX_COUNT; i++) ++ if (entries[i].port & NSP_ETH_PORT_LANES_MASK) ++ cnt++; ++ ++ /* Some versions of flash will give us 0 instead of port count. For ++ * those that give a port count, verify it against the value calculated ++ * above. ++ */ ++ if (ret && ret != cnt) { ++ printf("table entry count (%d) unmatch entries present (%d)\n", ++ ret, cnt); ++ goto err; ++ } ++ ++ table_sz = sizeof(*table) + sizeof(struct nfp_eth_table_port) * cnt; ++ table = malloc(table_sz); ++ if (!table) ++ goto err; ++ ++ memset(table, 0, table_sz); ++ table->count = cnt; ++ for (i = 0, j = 0; i < NSP_ETH_MAX_COUNT; i++) ++ if (entries[i].port & NSP_ETH_PORT_LANES_MASK) ++ nfp_eth_port_translate(nsp, &entries[i], i, ++ &table->ports[j++]); ++ ++ nfp_eth_calc_port_geometry(table); ++ for (i = 0; i < (int)table->count; i++) ++ nfp_eth_calc_port_type(&table->ports[i]); ++ ++ free(entries); ++ ++ return table; ++ ++err: ++ free(entries); ++ return NULL; ++} ++ ++/* ++ * nfp_eth_read_ports() - retrieve port information ++ * @cpp: NFP CPP handle ++ * ++ * Read the port information from the device. Returned structure should ++ * be freed with kfree() once no longer needed. ++ * ++ * Return: populated ETH table or NULL on error. ++ */ ++struct nfp_eth_table * ++nfp_eth_read_ports(struct nfp_cpp *cpp) ++{ ++ struct nfp_eth_table *ret; ++ struct nfp_nsp *nsp; ++ ++ nsp = nfp_nsp_open(cpp); ++ if (!nsp) ++ return NULL; ++ ++ ret = __nfp_eth_read_ports(nsp); ++ nfp_nsp_close(nsp); ++ ++ return ret; ++} ++ ++struct nfp_nsp * ++nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx) ++{ ++ union eth_table_entry *entries; ++ struct nfp_nsp *nsp; ++ int ret; ++ ++ entries = malloc(NSP_ETH_TABLE_SIZE); ++ if (!entries) ++ return NULL; ++ ++ memset(entries, 0, NSP_ETH_TABLE_SIZE); ++ nsp = nfp_nsp_open(cpp); ++ if (!nsp) { ++ free(entries); ++ return nsp; ++ } ++ ++ ret = nfp_nsp_read_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); ++ if (ret < 0) { ++ printf("reading port table failed %d\n", ret); ++ goto err; ++ } ++ ++ if (!(entries[idx].port & NSP_ETH_PORT_LANES_MASK)) { ++ printf("trying to set port state on disabled port %d\n", idx); ++ goto err; ++ } ++ ++ nfp_nsp_config_set_state(nsp, entries, idx); ++ return nsp; ++ ++err: ++ nfp_nsp_close(nsp); ++ free(entries); ++ return NULL; ++} ++ ++void ++nfp_eth_config_cleanup_end(struct nfp_nsp *nsp) ++{ ++ union eth_table_entry *entries = nfp_nsp_config_entries(nsp); ++ ++ nfp_nsp_config_set_modified(nsp, 0); ++ nfp_nsp_config_clear_state(nsp); ++ nfp_nsp_close(nsp); ++ free(entries); ++} ++ ++/* ++ * nfp_eth_config_commit_end() - perform recorded configuration changes ++ * @nsp: NFP NSP handle returned from nfp_eth_config_start() ++ * ++ * Perform the configuration which was requested with __nfp_eth_set_*() ++ * helpers and recorded in @nsp state. If device was already configured ++ * as requested or no __nfp_eth_set_*() operations were made no NSP command ++ * will be performed. ++ * ++ * Return: ++ * 0 - configuration successful; ++ * 1 - no changes were needed; ++ * -ERRNO - configuration failed. ++ */ ++int ++nfp_eth_config_commit_end(struct nfp_nsp *nsp) ++{ ++ union eth_table_entry *entries = nfp_nsp_config_entries(nsp); ++ int ret = 1; ++ ++ if (nfp_nsp_config_modified(nsp)) { ++ ret = nfp_nsp_write_eth_table(nsp, entries, NSP_ETH_TABLE_SIZE); ++ ret = ret < 0 ? ret : 0; ++ } ++ ++ nfp_eth_config_cleanup_end(nsp); ++ ++ return ret; ++} ++ ++/* ++ * nfp_eth_set_mod_enable() - set PHY module enable control bit ++ * @cpp: NFP CPP handle ++ * @idx: NFP chip-wide port index ++ * @enable: Desired state ++ * ++ * Enable or disable PHY module (this usually means setting the TX lanes ++ * disable bits). ++ * ++ * Return: ++ * 0 - configuration successful; ++ * 1 - no changes were needed; ++ * -ERRNO - configuration failed. ++ */ ++int ++nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, int enable) ++{ ++ union eth_table_entry *entries; ++ struct nfp_nsp *nsp; ++ uint64_t reg; ++ ++ nsp = nfp_eth_config_start(cpp, idx); ++ if (!nsp) ++ return -1; ++ ++ entries = nfp_nsp_config_entries(nsp); ++ ++ /* Check if we are already in requested state */ ++ reg = rte_le_to_cpu_64(entries[idx].state); ++ if (enable != (int)FIELD_GET(NSP_ETH_CTRL_ENABLED, reg)) { ++ reg = rte_le_to_cpu_64(entries[idx].control); ++ reg &= ~NSP_ETH_CTRL_ENABLED; ++ reg |= FIELD_PREP(NSP_ETH_CTRL_ENABLED, enable); ++ entries[idx].control = rte_cpu_to_le_64(reg); ++ ++ nfp_nsp_config_set_modified(nsp, 1); ++ } ++ ++ return nfp_eth_config_commit_end(nsp); ++} ++ ++/* ++ * nfp_eth_set_configured() - set PHY module configured control bit ++ * @cpp: NFP CPP handle ++ * @idx: NFP chip-wide port index ++ * @configed: Desired state ++ * ++ * Set the ifup/ifdown state on the PHY. ++ * ++ * Return: ++ * 0 - configuration successful; ++ * 1 - no changes were needed; ++ * -ERRNO - configuration failed. ++ */ ++int ++nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, int configed) ++{ ++ union eth_table_entry *entries; ++ struct nfp_nsp *nsp; ++ uint64_t reg; ++ ++ nsp = nfp_eth_config_start(cpp, idx); ++ if (!nsp) ++ return -EIO; ++ ++ /* ++ * Older ABI versions did support this feature, however this has only ++ * been reliable since ABI 20. ++ */ ++ if (nfp_nsp_get_abi_ver_minor(nsp) < 20) { ++ nfp_eth_config_cleanup_end(nsp); ++ return -EOPNOTSUPP; ++ } ++ ++ entries = nfp_nsp_config_entries(nsp); ++ ++ /* Check if we are already in requested state */ ++ reg = rte_le_to_cpu_64(entries[idx].state); ++ if (configed != (int)FIELD_GET(NSP_ETH_STATE_CONFIGURED, reg)) { ++ reg = rte_le_to_cpu_64(entries[idx].control); ++ reg &= ~NSP_ETH_CTRL_CONFIGURED; ++ reg |= FIELD_PREP(NSP_ETH_CTRL_CONFIGURED, configed); ++ entries[idx].control = rte_cpu_to_le_64(reg); ++ ++ nfp_nsp_config_set_modified(nsp, 1); ++ } ++ ++ return nfp_eth_config_commit_end(nsp); ++} ++ ++static int ++nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, ++ const uint64_t mask, const unsigned int shift, ++ unsigned int val, const uint64_t ctrl_bit) ++{ ++ union eth_table_entry *entries = nfp_nsp_config_entries(nsp); ++ unsigned int idx = nfp_nsp_config_idx(nsp); ++ uint64_t reg; ++ ++ /* ++ * Note: set features were added in ABI 0.14 but the error ++ * codes were initially not populated correctly. ++ */ ++ if (nfp_nsp_get_abi_ver_minor(nsp) < 17) { ++ printf("set operations not supported, please update flash\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ /* Check if we are already in requested state */ ++ reg = rte_le_to_cpu_64(entries[idx].raw[raw_idx]); ++ if (val == (reg & mask) >> shift) ++ return 0; ++ ++ reg &= ~mask; ++ reg |= (val << shift) & mask; ++ entries[idx].raw[raw_idx] = rte_cpu_to_le_64(reg); ++ ++ entries[idx].control |= rte_cpu_to_le_64(ctrl_bit); ++ ++ nfp_nsp_config_set_modified(nsp, 1); ++ ++ return 0; ++} ++ ++#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ ++ (__extension__ ({ \ ++ typeof(mask) _x = (mask); \ ++ nfp_eth_set_bit_config(nsp, raw_idx, _x, __bf_shf(_x), \ ++ val, ctrl_bit); \ ++ })) ++ ++/* ++ * __nfp_eth_set_aneg() - set PHY autonegotiation control bit ++ * @nsp: NFP NSP handle returned from nfp_eth_config_start() ++ * @mode: Desired autonegotiation mode ++ * ++ * Allow/disallow PHY module to advertise/perform autonegotiation. ++ * Will write to hwinfo overrides in the flash (persistent config). ++ * ++ * Return: 0 or -ERRNO. ++ */ ++int ++__nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) ++{ ++ return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, ++ NSP_ETH_STATE_ANEG, mode, ++ NSP_ETH_CTRL_SET_ANEG); ++} ++ ++/* ++ * __nfp_eth_set_fec() - set PHY forward error correction control bit ++ * @nsp: NFP NSP handle returned from nfp_eth_config_start() ++ * @mode: Desired fec mode ++ * ++ * Set the PHY module forward error correction mode. ++ * Will write to hwinfo overrides in the flash (persistent config). ++ * ++ * Return: 0 or -ERRNO. ++ */ ++static int ++__nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode) ++{ ++ return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, ++ NSP_ETH_STATE_FEC, mode, ++ NSP_ETH_CTRL_SET_FEC); ++} ++ ++/* ++ * nfp_eth_set_fec() - set PHY forward error correction control mode ++ * @cpp: NFP CPP handle ++ * @idx: NFP chip-wide port index ++ * @mode: Desired fec mode ++ * ++ * Return: ++ * 0 - configuration successful; ++ * 1 - no changes were needed; ++ * -ERRNO - configuration failed. ++ */ ++int ++nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode) ++{ ++ struct nfp_nsp *nsp; ++ int err; ++ ++ nsp = nfp_eth_config_start(cpp, idx); ++ if (!nsp) ++ return -EIO; ++ ++ err = __nfp_eth_set_fec(nsp, mode); ++ if (err) { ++ nfp_eth_config_cleanup_end(nsp); ++ return err; ++ } ++ ++ return nfp_eth_config_commit_end(nsp); ++} ++ ++/* ++ * __nfp_eth_set_speed() - set interface speed/rate ++ * @nsp: NFP NSP handle returned from nfp_eth_config_start() ++ * @speed: Desired speed (per lane) ++ * ++ * Set lane speed. Provided @speed value should be subport speed divided ++ * by number of lanes this subport is spanning (i.e. 10000 for 40G, 25000 for ++ * 50G, etc.) ++ * Will write to hwinfo overrides in the flash (persistent config). ++ * ++ * Return: 0 or -ERRNO. ++ */ ++int ++__nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) ++{ ++ enum nfp_eth_rate rate; ++ ++ rate = nfp_eth_speed2rate(speed); ++ if (rate == RATE_INVALID) { ++ printf("could not find matching lane rate for speed %u\n", ++ speed); ++ return -EINVAL; ++ } ++ ++ return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, ++ NSP_ETH_STATE_RATE, rate, ++ NSP_ETH_CTRL_SET_RATE); ++} ++ ++/* ++ * __nfp_eth_set_split() - set interface lane split ++ * @nsp: NFP NSP handle returned from nfp_eth_config_start() ++ * @lanes: Desired lanes per port ++ * ++ * Set number of lanes in the port. ++ * Will write to hwinfo overrides in the flash (persistent config). ++ * ++ * Return: 0 or -ERRNO. ++ */ ++int ++__nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) ++{ ++ return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, ++ lanes, NSP_ETH_CTRL_SET_LANES); ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_resource.c b/drivers/net/nfp/nfpcore/nfp_resource.c +new file mode 100644 +index 000000000..e1df2b2e1 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_resource.c +@@ -0,0 +1,264 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#include ++#include ++#include ++ ++#include "nfp_cpp.h" ++#include "nfp6000/nfp6000.h" ++#include "nfp_resource.h" ++#include "nfp_crc.h" ++ ++#define NFP_RESOURCE_TBL_TARGET NFP_CPP_TARGET_MU ++#define NFP_RESOURCE_TBL_BASE 0x8100000000ULL ++ ++/* NFP Resource Table self-identifier */ ++#define NFP_RESOURCE_TBL_NAME "nfp.res" ++#define NFP_RESOURCE_TBL_KEY 0x00000000 /* Special key for entry 0 */ ++ ++#define NFP_RESOURCE_ENTRY_NAME_SZ 8 ++ ++/* ++ * struct nfp_resource_entry - Resource table entry ++ * @owner: NFP CPP Lock, interface owner ++ * @key: NFP CPP Lock, posix_crc32(name, 8) ++ * @region: Memory region descriptor ++ * @name: ASCII, zero padded name ++ * @reserved ++ * @cpp_action: CPP Action ++ * @cpp_token: CPP Token ++ * @cpp_target: CPP Target ID ++ * @page_offset: 256-byte page offset into target's CPP address ++ * @page_size: size, in 256-byte pages ++ */ ++struct nfp_resource_entry { ++ struct nfp_resource_entry_mutex { ++ uint32_t owner; ++ uint32_t key; ++ } mutex; ++ struct nfp_resource_entry_region { ++ uint8_t name[NFP_RESOURCE_ENTRY_NAME_SZ]; ++ uint8_t reserved[5]; ++ uint8_t cpp_action; ++ uint8_t cpp_token; ++ uint8_t cpp_target; ++ uint32_t page_offset; ++ uint32_t page_size; ++ } region; ++}; ++ ++#define NFP_RESOURCE_TBL_SIZE 4096 ++#define NFP_RESOURCE_TBL_ENTRIES (int)(NFP_RESOURCE_TBL_SIZE / \ ++ sizeof(struct nfp_resource_entry)) ++ ++struct nfp_resource { ++ char name[NFP_RESOURCE_ENTRY_NAME_SZ + 1]; ++ uint32_t cpp_id; ++ uint64_t addr; ++ uint64_t size; ++ struct nfp_cpp_mutex *mutex; ++}; ++ ++static int ++nfp_cpp_resource_find(struct nfp_cpp *cpp, struct nfp_resource *res) ++{ ++ char name_pad[NFP_RESOURCE_ENTRY_NAME_SZ] = {}; ++ struct nfp_resource_entry entry; ++ uint32_t cpp_id, key; ++ int ret, i; ++ ++ cpp_id = NFP_CPP_ID(NFP_RESOURCE_TBL_TARGET, 3, 0); /* Atomic read */ ++ ++ memset(name_pad, 0, NFP_RESOURCE_ENTRY_NAME_SZ); ++ strncpy(name_pad, res->name, sizeof(name_pad)); ++ ++ /* Search for a matching entry */ ++ if (!memcmp(name_pad, NFP_RESOURCE_TBL_NAME "\0\0\0\0\0\0\0\0", 8)) { ++ printf("Grabbing device lock not supported\n"); ++ return -EOPNOTSUPP; ++ } ++ key = nfp_crc32_posix(name_pad, sizeof(name_pad)); ++ ++ for (i = 0; i < NFP_RESOURCE_TBL_ENTRIES; i++) { ++ uint64_t addr = NFP_RESOURCE_TBL_BASE + ++ sizeof(struct nfp_resource_entry) * i; ++ ++ ret = nfp_cpp_read(cpp, cpp_id, addr, &entry, sizeof(entry)); ++ if (ret != sizeof(entry)) ++ return -EIO; ++ ++ if (entry.mutex.key != key) ++ continue; ++ ++ /* Found key! */ ++ res->mutex = ++ nfp_cpp_mutex_alloc(cpp, ++ NFP_RESOURCE_TBL_TARGET, addr, key); ++ res->cpp_id = NFP_CPP_ID(entry.region.cpp_target, ++ entry.region.cpp_action, ++ entry.region.cpp_token); ++ res->addr = ((uint64_t)entry.region.page_offset) << 8; ++ res->size = (uint64_t)entry.region.page_size << 8; ++ return 0; ++ } ++ ++ return -ENOENT; ++} ++ ++static int ++nfp_resource_try_acquire(struct nfp_cpp *cpp, struct nfp_resource *res, ++ struct nfp_cpp_mutex *dev_mutex) ++{ ++ int err; ++ ++ if (nfp_cpp_mutex_lock(dev_mutex)) ++ return -EINVAL; ++ ++ err = nfp_cpp_resource_find(cpp, res); ++ if (err) ++ goto err_unlock_dev; ++ ++ err = nfp_cpp_mutex_trylock(res->mutex); ++ if (err) ++ goto err_res_mutex_free; ++ ++ nfp_cpp_mutex_unlock(dev_mutex); ++ ++ return 0; ++ ++err_res_mutex_free: ++ nfp_cpp_mutex_free(res->mutex); ++err_unlock_dev: ++ nfp_cpp_mutex_unlock(dev_mutex); ++ ++ return err; ++} ++ ++/* ++ * nfp_resource_acquire() - Acquire a resource handle ++ * @cpp: NFP CPP handle ++ * @name: Name of the resource ++ * ++ * NOTE: This function locks the acquired resource ++ * ++ * Return: NFP Resource handle, or ERR_PTR() ++ */ ++struct nfp_resource * ++nfp_resource_acquire(struct nfp_cpp *cpp, const char *name) ++{ ++ struct nfp_cpp_mutex *dev_mutex; ++ struct nfp_resource *res; ++ int err; ++ struct timespec wait; ++ int count; ++ ++ res = malloc(sizeof(*res)); ++ if (!res) ++ return NULL; ++ ++ memset(res, 0, sizeof(*res)); ++ ++ strncpy(res->name, name, NFP_RESOURCE_ENTRY_NAME_SZ); ++ ++ dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, ++ NFP_RESOURCE_TBL_BASE, ++ NFP_RESOURCE_TBL_KEY); ++ if (!dev_mutex) { ++ free(res); ++ return NULL; ++ } ++ ++ wait.tv_sec = 0; ++ wait.tv_nsec = 1000000; ++ count = 0; ++ ++ for (;;) { ++ err = nfp_resource_try_acquire(cpp, res, dev_mutex); ++ if (!err) ++ break; ++ if (err != -EBUSY) ++ goto err_free; ++ ++ if (count++ > 1000) { ++ printf("Error: resource %s timed out\n", name); ++ err = -EBUSY; ++ goto err_free; ++ } ++ ++ nanosleep(&wait, NULL); ++ } ++ ++ nfp_cpp_mutex_free(dev_mutex); ++ ++ return res; ++ ++err_free: ++ nfp_cpp_mutex_free(dev_mutex); ++ free(res); ++ return NULL; ++} ++ ++/* ++ * nfp_resource_release() - Release a NFP Resource handle ++ * @res: NFP Resource handle ++ * ++ * NOTE: This function implictly unlocks the resource handle ++ */ ++void ++nfp_resource_release(struct nfp_resource *res) ++{ ++ nfp_cpp_mutex_unlock(res->mutex); ++ nfp_cpp_mutex_free(res->mutex); ++ free(res); ++} ++ ++/* ++ * nfp_resource_cpp_id() - Return the cpp_id of a resource handle ++ * @res: NFP Resource handle ++ * ++ * Return: NFP CPP ID ++ */ ++uint32_t ++nfp_resource_cpp_id(const struct nfp_resource *res) ++{ ++ return res->cpp_id; ++} ++ ++/* ++ * nfp_resource_name() - Return the name of a resource handle ++ * @res: NFP Resource handle ++ * ++ * Return: const char pointer to the name of the resource ++ */ ++const char ++*nfp_resource_name(const struct nfp_resource *res) ++{ ++ return res->name; ++} ++ ++/* ++ * nfp_resource_address() - Return the address of a resource handle ++ * @res: NFP Resource handle ++ * ++ * Return: Address of the resource ++ */ ++uint64_t ++nfp_resource_address(const struct nfp_resource *res) ++{ ++ return res->addr; ++} ++ ++/* ++ * nfp_resource_size() - Return the size in bytes of a resource handle ++ * @res: NFP Resource handle ++ * ++ * Return: Size of the resource in bytes ++ */ ++uint64_t ++nfp_resource_size(const struct nfp_resource *res) ++{ ++ return res->size; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_resource.h b/drivers/net/nfp/nfpcore/nfp_resource.h +new file mode 100644 +index 000000000..06cc6f74f +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_resource.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef NFP_RESOURCE_H ++#define NFP_RESOURCE_H ++ ++#include "nfp_cpp.h" ++ ++#define NFP_RESOURCE_NFP_NFFW "nfp.nffw" ++#define NFP_RESOURCE_NFP_HWINFO "nfp.info" ++#define NFP_RESOURCE_NSP "nfp.sp" ++ ++/** ++ * Opaque handle to a NFP Resource ++ */ ++struct nfp_resource; ++ ++struct nfp_resource *nfp_resource_acquire(struct nfp_cpp *cpp, ++ const char *name); ++ ++/** ++ * Release a NFP Resource, and free the handle ++ * @param[in] res NFP Resource handle ++ */ ++void nfp_resource_release(struct nfp_resource *res); ++ ++/** ++ * Return the CPP ID of a NFP Resource ++ * @param[in] res NFP Resource handle ++ * @return CPP ID of the NFP Resource ++ */ ++uint32_t nfp_resource_cpp_id(const struct nfp_resource *res); ++ ++/** ++ * Return the name of a NFP Resource ++ * @param[in] res NFP Resource handle ++ * @return Name of the NFP Resource ++ */ ++const char *nfp_resource_name(const struct nfp_resource *res); ++ ++/** ++ * Return the target address of a NFP Resource ++ * @param[in] res NFP Resource handle ++ * @return Address of the NFP Resource ++ */ ++uint64_t nfp_resource_address(const struct nfp_resource *res); ++ ++uint64_t nfp_resource_size(const struct nfp_resource *res); ++ ++#endif /* NFP_RESOURCE_H */ +diff --git a/drivers/net/nfp/nfpcore/nfp_rtsym.c b/drivers/net/nfp/nfpcore/nfp_rtsym.c +new file mode 100644 +index 000000000..cb7d83db5 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_rtsym.c +@@ -0,0 +1,327 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++/* ++ * nfp_rtsym.c ++ * Interface for accessing run-time symbol table ++ */ ++ ++#include ++#include ++#include "nfp_cpp.h" ++#include "nfp_mip.h" ++#include "nfp_rtsym.h" ++#include "nfp6000/nfp6000.h" ++ ++/* These need to match the linker */ ++#define SYM_TGT_LMEM 0 ++#define SYM_TGT_EMU_CACHE 0x17 ++ ++struct nfp_rtsym_entry { ++ uint8_t type; ++ uint8_t target; ++ uint8_t island; ++ uint8_t addr_hi; ++ uint32_t addr_lo; ++ uint16_t name; ++ uint8_t menum; ++ uint8_t size_hi; ++ uint32_t size_lo; ++}; ++ ++struct nfp_rtsym_table { ++ struct nfp_cpp *cpp; ++ int num; ++ char *strtab; ++ struct nfp_rtsym symtab[]; ++}; ++ ++static int ++nfp_meid(uint8_t island_id, uint8_t menum) ++{ ++ return (island_id & 0x3F) == island_id && menum < 12 ? ++ (island_id << 4) | (menum + 4) : -1; ++} ++ ++static void ++nfp_rtsym_sw_entry_init(struct nfp_rtsym_table *cache, uint32_t strtab_size, ++ struct nfp_rtsym *sw, struct nfp_rtsym_entry *fw) ++{ ++ sw->type = fw->type; ++ sw->name = cache->strtab + rte_le_to_cpu_16(fw->name) % strtab_size; ++ sw->addr = ((uint64_t)fw->addr_hi << 32) | ++ rte_le_to_cpu_32(fw->addr_lo); ++ sw->size = ((uint64_t)fw->size_hi << 32) | ++ rte_le_to_cpu_32(fw->size_lo); ++ ++#ifdef DEBUG ++ printf("rtsym_entry_init\n"); ++ printf("\tname=%s, addr=%" PRIx64 ", size=%" PRIu64 ",target=%d\n", ++ sw->name, sw->addr, sw->size, sw->target); ++#endif ++ switch (fw->target) { ++ case SYM_TGT_LMEM: ++ sw->target = NFP_RTSYM_TARGET_LMEM; ++ break; ++ case SYM_TGT_EMU_CACHE: ++ sw->target = NFP_RTSYM_TARGET_EMU_CACHE; ++ break; ++ default: ++ sw->target = fw->target; ++ break; ++ } ++ ++ if (fw->menum != 0xff) ++ sw->domain = nfp_meid(fw->island, fw->menum); ++ else if (fw->island != 0xff) ++ sw->domain = fw->island; ++ else ++ sw->domain = -1; ++} ++ ++struct nfp_rtsym_table * ++nfp_rtsym_table_read(struct nfp_cpp *cpp) ++{ ++ struct nfp_rtsym_table *rtbl; ++ struct nfp_mip *mip; ++ ++ mip = nfp_mip_open(cpp); ++ rtbl = __nfp_rtsym_table_read(cpp, mip); ++ nfp_mip_close(mip); ++ ++ return rtbl; ++} ++ ++/* ++ * This looks more complex than it should be. But we need to get the type for ++ * the ~ right in round_down (it needs to be as wide as the result!), and we ++ * want to evaluate the macro arguments just once each. ++ */ ++#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) ++ ++#define round_up(x, y) \ ++ (__extension__ ({ \ ++ typeof(x) _x = (x); \ ++ ((((_x) - 1) | __round_mask(_x, y)) + 1); \ ++ })) ++ ++#define round_down(x, y) \ ++ (__extension__ ({ \ ++ typeof(x) _x = (x); \ ++ ((_x) & ~__round_mask(_x, y)); \ ++ })) ++ ++struct nfp_rtsym_table * ++__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip) ++{ ++ uint32_t strtab_addr, symtab_addr, strtab_size, symtab_size; ++ struct nfp_rtsym_entry *rtsymtab; ++ struct nfp_rtsym_table *cache; ++ const uint32_t dram = ++ NFP_CPP_ID(NFP_CPP_TARGET_MU, NFP_CPP_ACTION_RW, 0) | ++ NFP_ISL_EMEM0; ++ int err, n, size; ++ ++ if (!mip) ++ return NULL; ++ ++ nfp_mip_strtab(mip, &strtab_addr, &strtab_size); ++ nfp_mip_symtab(mip, &symtab_addr, &symtab_size); ++ ++ if (!symtab_size || !strtab_size || symtab_size % sizeof(*rtsymtab)) ++ return NULL; ++ ++ /* Align to 64 bits */ ++ symtab_size = round_up(symtab_size, 8); ++ strtab_size = round_up(strtab_size, 8); ++ ++ rtsymtab = malloc(symtab_size); ++ if (!rtsymtab) ++ return NULL; ++ ++ size = sizeof(*cache); ++ size += symtab_size / sizeof(*rtsymtab) * sizeof(struct nfp_rtsym); ++ size += strtab_size + 1; ++ cache = malloc(size); ++ if (!cache) ++ goto exit_free_rtsym_raw; ++ ++ cache->cpp = cpp; ++ cache->num = symtab_size / sizeof(*rtsymtab); ++ cache->strtab = (void *)&cache->symtab[cache->num]; ++ ++ err = nfp_cpp_read(cpp, dram, symtab_addr, rtsymtab, symtab_size); ++ if (err != (int)symtab_size) ++ goto exit_free_cache; ++ ++ err = nfp_cpp_read(cpp, dram, strtab_addr, cache->strtab, strtab_size); ++ if (err != (int)strtab_size) ++ goto exit_free_cache; ++ cache->strtab[strtab_size] = '\0'; ++ ++ for (n = 0; n < cache->num; n++) ++ nfp_rtsym_sw_entry_init(cache, strtab_size, ++ &cache->symtab[n], &rtsymtab[n]); ++ ++ free(rtsymtab); ++ ++ return cache; ++ ++exit_free_cache: ++ free(cache); ++exit_free_rtsym_raw: ++ free(rtsymtab); ++ return NULL; ++} ++ ++/* ++ * nfp_rtsym_count() - Get the number of RTSYM descriptors ++ * @rtbl: NFP RTsym table ++ * ++ * Return: Number of RTSYM descriptors ++ */ ++int ++nfp_rtsym_count(struct nfp_rtsym_table *rtbl) ++{ ++ if (!rtbl) ++ return -EINVAL; ++ ++ return rtbl->num; ++} ++ ++/* ++ * nfp_rtsym_get() - Get the Nth RTSYM descriptor ++ * @rtbl: NFP RTsym table ++ * @idx: Index (0-based) of the RTSYM descriptor ++ * ++ * Return: const pointer to a struct nfp_rtsym descriptor, or NULL ++ */ ++const struct nfp_rtsym * ++nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx) ++{ ++ if (!rtbl) ++ return NULL; ++ ++ if (idx >= rtbl->num) ++ return NULL; ++ ++ return &rtbl->symtab[idx]; ++} ++ ++/* ++ * nfp_rtsym_lookup() - Return the RTSYM descriptor for a symbol name ++ * @rtbl: NFP RTsym table ++ * @name: Symbol name ++ * ++ * Return: const pointer to a struct nfp_rtsym descriptor, or NULL ++ */ ++const struct nfp_rtsym * ++nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name) ++{ ++ int n; ++ ++ if (!rtbl) ++ return NULL; ++ ++ for (n = 0; n < rtbl->num; n++) ++ if (strcmp(name, rtbl->symtab[n].name) == 0) ++ return &rtbl->symtab[n]; ++ ++ return NULL; ++} ++ ++/* ++ * nfp_rtsym_read_le() - Read a simple unsigned scalar value from symbol ++ * @rtbl: NFP RTsym table ++ * @name: Symbol name ++ * @error: Poniter to error code (optional) ++ * ++ * Lookup a symbol, map, read it and return it's value. Value of the symbol ++ * will be interpreted as a simple little-endian unsigned value. Symbol can ++ * be 4 or 8 bytes in size. ++ * ++ * Return: value read, on error sets the error and returns ~0ULL. ++ */ ++uint64_t ++nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, int *error) ++{ ++ const struct nfp_rtsym *sym; ++ uint32_t val32, id; ++ uint64_t val; ++ int err; ++ ++ sym = nfp_rtsym_lookup(rtbl, name); ++ if (!sym) { ++ err = -ENOENT; ++ goto exit; ++ } ++ ++ id = NFP_CPP_ISLAND_ID(sym->target, NFP_CPP_ACTION_RW, 0, sym->domain); ++ ++#ifdef DEBUG ++ printf("Reading symbol %s with size %" PRIu64 " at %" PRIx64 "\n", ++ name, sym->size, sym->addr); ++#endif ++ switch (sym->size) { ++ case 4: ++ err = nfp_cpp_readl(rtbl->cpp, id, sym->addr, &val32); ++ val = val32; ++ break; ++ case 8: ++ err = nfp_cpp_readq(rtbl->cpp, id, sym->addr, &val); ++ break; ++ default: ++ printf("rtsym '%s' unsupported size: %" PRId64 "\n", ++ name, sym->size); ++ err = -EINVAL; ++ break; ++ } ++ ++ if (err) ++ err = -EIO; ++exit: ++ if (error) ++ *error = err; ++ ++ if (err) ++ return ~0ULL; ++ ++ return val; ++} ++ ++uint8_t * ++nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, ++ unsigned int min_size, struct nfp_cpp_area **area) ++{ ++ const struct nfp_rtsym *sym; ++ uint8_t *mem; ++ ++#ifdef DEBUG ++ printf("mapping symbol %s\n", name); ++#endif ++ sym = nfp_rtsym_lookup(rtbl, name); ++ if (!sym) { ++ printf("symbol lookup fails for %s\n", name); ++ return NULL; ++ } ++ ++ if (sym->size < min_size) { ++ printf("Symbol %s too small (%" PRIu64 " < %u)\n", name, ++ sym->size, min_size); ++ return NULL; ++ } ++ ++ mem = nfp_cpp_map_area(rtbl->cpp, sym->domain, sym->target, sym->addr, ++ sym->size, area); ++ if (!mem) { ++ printf("Failed to map symbol %s\n", name); ++ return NULL; ++ } ++#ifdef DEBUG ++ printf("symbol %s with address %p\n", name, mem); ++#endif ++ ++ return mem; ++} +diff --git a/drivers/net/nfp/nfpcore/nfp_rtsym.h b/drivers/net/nfp/nfpcore/nfp_rtsym.h +new file mode 100644 +index 000000000..8b494211b +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_rtsym.h +@@ -0,0 +1,61 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef __NFP_RTSYM_H__ ++#define __NFP_RTSYM_H__ ++ ++#define NFP_RTSYM_TYPE_NONE 0 ++#define NFP_RTSYM_TYPE_OBJECT 1 ++#define NFP_RTSYM_TYPE_FUNCTION 2 ++#define NFP_RTSYM_TYPE_ABS 3 ++ ++#define NFP_RTSYM_TARGET_NONE 0 ++#define NFP_RTSYM_TARGET_LMEM -1 ++#define NFP_RTSYM_TARGET_EMU_CACHE -7 ++ ++/* ++ * Structure describing a run-time NFP symbol. ++ * ++ * The memory target of the symbol is generally the CPP target number and can be ++ * used directly by the nfp_cpp API calls. However, in some cases (i.e., for ++ * local memory or control store) the target is encoded using a negative number. ++ * ++ * When the target type can not be used to fully describe the location of a ++ * symbol the domain field is used to further specify the location (i.e., the ++ * specific ME or island number). ++ * ++ * For ME target resources, 'domain' is an MEID. ++ * For Island target resources, 'domain' is an island ID, with the one exception ++ * of "sram" symbols for backward compatibility, which are viewed as global. ++ */ ++struct nfp_rtsym { ++ const char *name; ++ uint64_t addr; ++ uint64_t size; ++ int type; ++ int target; ++ int domain; ++}; ++ ++struct nfp_rtsym_table; ++ ++struct nfp_rtsym_table *nfp_rtsym_table_read(struct nfp_cpp *cpp); ++ ++struct nfp_rtsym_table * ++__nfp_rtsym_table_read(struct nfp_cpp *cpp, const struct nfp_mip *mip); ++ ++int nfp_rtsym_count(struct nfp_rtsym_table *rtbl); ++ ++const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx); ++ ++const struct nfp_rtsym * ++nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name); ++ ++uint64_t nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, ++ int *error); ++uint8_t * ++nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, ++ unsigned int min_size, struct nfp_cpp_area **area); ++#endif +diff --git a/drivers/net/nfp/nfpcore/nfp_target.h b/drivers/net/nfp/nfpcore/nfp_target.h +new file mode 100644 +index 000000000..2884a0034 +--- /dev/null ++++ b/drivers/net/nfp/nfpcore/nfp_target.h +@@ -0,0 +1,579 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2018 Netronome Systems, Inc. ++ * All rights reserved. ++ */ ++ ++#ifndef NFP_TARGET_H ++#define NFP_TARGET_H ++ ++#include "nfp-common/nfp_resid.h" ++#include "nfp-common/nfp_cppat.h" ++#include "nfp-common/nfp_platform.h" ++#include "nfp_cpp.h" ++ ++#define P32 1 ++#define P64 2 ++ ++#define PUSHPULL(_pull, _push) (((_pull) << 4) | ((_push) << 0)) ++ ++#ifndef NFP_ERRNO ++#include ++#define NFP_ERRNO(x) (errno = (x), -1) ++#endif ++ ++static inline int ++pushpull_width(int pp) ++{ ++ pp &= 0xf; ++ ++ if (pp == 0) ++ return NFP_ERRNO(EINVAL); ++ return (2 << pp); ++} ++ ++#define PUSH_WIDTH(_pushpull) pushpull_width((_pushpull) >> 0) ++#define PULL_WIDTH(_pushpull) pushpull_width((_pushpull) >> 4) ++ ++static inline int ++target_rw(uint32_t cpp_id, int pp, int start, int len) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < start || island > (start + len))) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 0): ++ return PUSHPULL(0, pp); ++ case NFP_CPP_ID(0, 1, 0): ++ return PUSHPULL(pp, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): ++ return PUSHPULL(pp, pp); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_nbi_dma(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 0): /* ReadNbiDma */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 1, 0): /* WriteNbiDma */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): ++ return PUSHPULL(P64, P64); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_nbi_stats(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 0): /* ReadNbiStats */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 1, 0): /* WriteNbiStats */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): ++ return PUSHPULL(P64, P64); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_nbi_tm(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 0): /* ReadNbiTM */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 1, 0): /* WriteNbiTM */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): ++ return PUSHPULL(P64, P64); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_nbi_ppc(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 0): /* ReadNbiPreclassifier */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 1, 0): /* WriteNbiPreclassifier */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): ++ return PUSHPULL(P64, P64); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_nbi(uint32_t cpp_id, uint64_t address) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint64_t rel_addr = address & 0x3fFFFF; ++ ++ if (island && (island < 8 || island > 9)) ++ return NFP_ERRNO(EINVAL); ++ ++ if (rel_addr < (1 << 20)) ++ return nfp6000_nbi_dma(cpp_id); ++ if (rel_addr < (2 << 20)) ++ return nfp6000_nbi_stats(cpp_id); ++ if (rel_addr < (3 << 20)) ++ return nfp6000_nbi_tm(cpp_id); ++ return nfp6000_nbi_ppc(cpp_id); ++} ++ ++/* ++ * This structure ONLY includes items that can be done with a read or write of ++ * 32-bit or 64-bit words. All others are not listed. ++ */ ++static inline int ++nfp6000_mu_common(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 0): /* read_be/write_be */ ++ return PUSHPULL(P64, P64); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 1): /* read_le/write_le */ ++ return PUSHPULL(P64, P64); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 2): /* {read/write}_swap_be */ ++ return PUSHPULL(P64, P64); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 3): /* {read/write}_swap_le */ ++ return PUSHPULL(P64, P64); ++ case NFP_CPP_ID(0, 0, 0): /* read_be */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 0, 1): /* read_le */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 0, 2): /* read_swap_be */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 0, 3): /* read_swap_le */ ++ return PUSHPULL(0, P64); ++ case NFP_CPP_ID(0, 1, 0): /* write_be */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, 1, 1): /* write_le */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, 1, 2): /* write_swap_be */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, 1, 3): /* write_swap_le */ ++ return PUSHPULL(P64, 0); ++ case NFP_CPP_ID(0, 3, 0): /* atomic_read */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 3, 2): /* mask_compare_write */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 4, 0): /* atomic_write */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 4, 2): /* atomic_write_imm */ ++ return PUSHPULL(0, 0); ++ case NFP_CPP_ID(0, 4, 3): /* swap_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 5, 0): /* set */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 5, 3): /* test_set_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 6, 0): /* clr */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 6, 3): /* test_clr_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 7, 0): /* add */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 7, 3): /* test_add_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 8, 0): /* addsat */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 8, 3): /* test_subsat_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 9, 0): /* sub */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 9, 3): /* test_sub_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 10, 0): /* subsat */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 10, 3): /* test_subsat_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 13, 0): /* microq128_get */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 13, 1): /* microq128_pop */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 13, 2): /* microq128_put */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 15, 0): /* xor */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 15, 3): /* test_xor_imm */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 28, 0): /* read32_be */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 28, 1): /* read32_le */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 28, 2): /* read32_swap_be */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 28, 3): /* read32_swap_le */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 31, 0): /* write32_be */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 31, 1): /* write32_le */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 31, 2): /* write32_swap_be */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 31, 3): /* write32_swap_le */ ++ return PUSHPULL(P32, 0); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp6000_mu_ctm(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 16, 1): /* packet_read_packet_status */ ++ return PUSHPULL(0, P32); ++ default: ++ return nfp6000_mu_common(cpp_id); ++ } ++} ++ ++static inline int ++nfp6000_mu_emu(uint32_t cpp_id) ++{ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 18, 0): /* read_queue */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 18, 1): /* read_queue_ring */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 18, 2): /* write_queue */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 18, 3): /* write_queue_ring */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 20, 2): /* journal */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 21, 0): /* get */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 21, 1): /* get_eop */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 21, 2): /* get_freely */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 22, 0): /* pop */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 22, 1): /* pop_eop */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 22, 2): /* pop_freely */ ++ return PUSHPULL(0, P32); ++ default: ++ return nfp6000_mu_common(cpp_id); ++ } ++} ++ ++static inline int ++nfp6000_mu_imu(uint32_t cpp_id) ++{ ++ return nfp6000_mu_common(cpp_id); ++} ++ ++static inline int ++nfp6000_mu(uint32_t cpp_id, uint64_t address) ++{ ++ int pp; ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island == 0) { ++ if (address < 0x2000000000ULL) ++ pp = nfp6000_mu_ctm(cpp_id); ++ else if (address < 0x8000000000ULL) ++ pp = nfp6000_mu_emu(cpp_id); ++ else if (address < 0x9800000000ULL) ++ pp = nfp6000_mu_ctm(cpp_id); ++ else if (address < 0x9C00000000ULL) ++ pp = nfp6000_mu_emu(cpp_id); ++ else if (address < 0xA000000000ULL) ++ pp = nfp6000_mu_imu(cpp_id); ++ else ++ pp = nfp6000_mu_ctm(cpp_id); ++ } else if (island >= 24 && island <= 27) { ++ pp = nfp6000_mu_emu(cpp_id); ++ } else if (island >= 28 && island <= 31) { ++ pp = nfp6000_mu_imu(cpp_id); ++ } else if (island == 1 || ++ (island >= 4 && island <= 7) || ++ (island >= 12 && island <= 13) || ++ (island >= 32 && island <= 47) || ++ (island >= 48 && island <= 51)) { ++ pp = nfp6000_mu_ctm(cpp_id); ++ } else { ++ pp = NFP_ERRNO(EINVAL); ++ } ++ ++ return pp; ++} ++ ++static inline int ++nfp6000_ila(uint32_t cpp_id) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < 48 || island > 51)) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 1): /* read_check_error */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 2, 0): /* read_int */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 3, 0): /* write_int */ ++ return PUSHPULL(P32, 0); ++ default: ++ return target_rw(cpp_id, P32, 48, 4); ++ } ++} ++ ++static inline int ++nfp6000_pci(uint32_t cpp_id) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < 4 || island > 7)) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 2, 0): ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 3, 0): ++ return PUSHPULL(P32, 0); ++ default: ++ return target_rw(cpp_id, P32, 4, 4); ++ } ++} ++ ++static inline int ++nfp6000_crypto(uint32_t cpp_id) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < 12 || island > 15)) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 2, 0): ++ return PUSHPULL(P64, 0); ++ default: ++ return target_rw(cpp_id, P64, 12, 4); ++ } ++} ++ ++static inline int ++nfp6000_cap_xpb(uint32_t cpp_id) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < 1 || island > 63)) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 1): /* RingGet */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 0, 2): /* Interthread Signal */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 1, 1): /* RingPut */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 1, 2): /* CTNNWr */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 2, 0): /* ReflectRd, signal none */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 2, 1): /* ReflectRd, signal self */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 2, 2): /* ReflectRd, signal remote */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 2, 3): /* ReflectRd, signal both */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 3, 0): /* ReflectWr, signal none */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 3, 1): /* ReflectWr, signal self */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 3, 2): /* ReflectWr, signal remote */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 3, 3): /* ReflectWr, signal both */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, NFP_CPP_ACTION_RW, 1): ++ return PUSHPULL(P32, P32); ++ default: ++ return target_rw(cpp_id, P32, 1, 63); ++ } ++} ++ ++static inline int ++nfp6000_cls(uint32_t cpp_id) ++{ ++ int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ ++ if (island && (island < 1 || island > 63)) ++ return NFP_ERRNO(EINVAL); ++ ++ switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { ++ case NFP_CPP_ID(0, 0, 3): /* xor */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 2, 0): /* set */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 2, 1): /* clr */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 4, 0): /* add */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 4, 1): /* add64 */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 6, 0): /* sub */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 6, 1): /* sub64 */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 6, 2): /* subsat */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 8, 2): /* hash_mask */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 8, 3): /* hash_clear */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 9, 0): /* ring_get */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 9, 1): /* ring_pop */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 9, 2): /* ring_get_freely */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 9, 3): /* ring_pop_freely */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 10, 0): /* ring_put */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 10, 2): /* ring_journal */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 14, 0): /* reflect_write_sig_local */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 15, 1): /* reflect_read_sig_local */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 17, 2): /* statistic */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 24, 0): /* ring_read */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 24, 1): /* ring_write */ ++ return PUSHPULL(P32, 0); ++ case NFP_CPP_ID(0, 25, 0): /* ring_workq_add_thread */ ++ return PUSHPULL(0, P32); ++ case NFP_CPP_ID(0, 25, 1): /* ring_workq_add_work */ ++ return PUSHPULL(P32, 0); ++ default: ++ return target_rw(cpp_id, P32, 0, 64); ++ } ++} ++ ++static inline int ++nfp6000_target_pushpull(uint32_t cpp_id, uint64_t address) ++{ ++ switch (NFP_CPP_ID_TARGET_of(cpp_id)) { ++ case NFP6000_CPPTGT_NBI: ++ return nfp6000_nbi(cpp_id, address); ++ case NFP6000_CPPTGT_VQDR: ++ return target_rw(cpp_id, P32, 24, 4); ++ case NFP6000_CPPTGT_ILA: ++ return nfp6000_ila(cpp_id); ++ case NFP6000_CPPTGT_MU: ++ return nfp6000_mu(cpp_id, address); ++ case NFP6000_CPPTGT_PCIE: ++ return nfp6000_pci(cpp_id); ++ case NFP6000_CPPTGT_ARM: ++ if (address < 0x10000) ++ return target_rw(cpp_id, P64, 1, 1); ++ else ++ return target_rw(cpp_id, P32, 1, 1); ++ case NFP6000_CPPTGT_CRYPTO: ++ return nfp6000_crypto(cpp_id); ++ case NFP6000_CPPTGT_CTXPB: ++ return nfp6000_cap_xpb(cpp_id); ++ case NFP6000_CPPTGT_CLS: ++ return nfp6000_cls(cpp_id); ++ case 0: ++ return target_rw(cpp_id, P32, 4, 4); ++ default: ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp_target_pushpull_width(int pp, int write_not_read) ++{ ++ if (pp < 0) ++ return pp; ++ ++ if (write_not_read) ++ return PULL_WIDTH(pp); ++ else ++ return PUSH_WIDTH(pp); ++} ++ ++static inline int ++nfp6000_target_action_width(uint32_t cpp_id, uint64_t address, ++ int write_not_read) ++{ ++ int pp; ++ ++ pp = nfp6000_target_pushpull(cpp_id, address); ++ ++ return nfp_target_pushpull_width(pp, write_not_read); ++} ++ ++static inline int ++nfp_target_action_width(uint32_t model, uint32_t cpp_id, uint64_t address, ++ int write_not_read) ++{ ++ if (NFP_CPP_MODEL_IS_6000(model)) { ++ return nfp6000_target_action_width(cpp_id, address, ++ write_not_read); ++ } else { ++ return NFP_ERRNO(EINVAL); ++ } ++} ++ ++static inline int ++nfp_target_cpp(uint32_t cpp_island_id, uint64_t cpp_island_address, ++ uint32_t *cpp_target_id, uint64_t *cpp_target_address, ++ const uint32_t *imb_table) ++{ ++ int err; ++ int island = NFP_CPP_ID_ISLAND_of(cpp_island_id); ++ int target = NFP_CPP_ID_TARGET_of(cpp_island_id); ++ uint32_t imb; ++ ++ if (target < 0 || target >= 16) ++ return NFP_ERRNO(EINVAL); ++ ++ if (island == 0) { ++ /* Already translated */ ++ *cpp_target_id = cpp_island_id; ++ *cpp_target_address = cpp_island_address; ++ return 0; ++ } ++ ++ if (!imb_table) { ++ /* CPP + Island only allowed on systems with IMB tables */ ++ return NFP_ERRNO(EINVAL); ++ } ++ ++ imb = imb_table[target]; ++ ++ *cpp_target_address = cpp_island_address; ++ err = _nfp6000_cppat_addr_encode(cpp_target_address, island, target, ++ ((imb >> 13) & 7), ++ ((imb >> 12) & 1), ++ ((imb >> 6) & 0x3f), ++ ((imb >> 0) & 0x3f)); ++ if (err == 0) { ++ *cpp_target_id = ++ NFP_CPP_ID(target, NFP_CPP_ID_ACTION_of(cpp_island_id), ++ NFP_CPP_ID_TOKEN_of(cpp_island_id)); ++ } ++ ++ return err; ++} ++ ++#endif /* NFP_TARGET_H */ +-- +2.14.3 + diff --git a/SOURCES/0001-net-qede-fix-L2-handles-used-for-RSS-hash-update.patch b/SOURCES/0001-net-qede-fix-L2-handles-used-for-RSS-hash-update.patch new file mode 100644 index 0000000..9b43641 --- /dev/null +++ b/SOURCES/0001-net-qede-fix-L2-handles-used-for-RSS-hash-update.patch @@ -0,0 +1,36 @@ +From 3a1a1f3f1344c8a3bc0b06cd23e97b3922a91a61 Mon Sep 17 00:00:00 2001 +Message-Id: <3a1a1f3f1344c8a3bc0b06cd23e97b3922a91a61.1528327303.git.tredaelli@redhat.com> +From: Rasesh Mody +Date: Tue, 5 Jun 2018 16:03:57 -0700 +Subject: [PATCH] net/qede: fix L2-handles used for RSS hash update + +Fix fast path array index which is used for passing L2 handles to RSS +indirection table, properly distribute rxq handles for indirection table. +Currently, it is using the local copy of indirection table. When the RX +queue configuration changes the local copy becomes invalid. + +Fixes: 69d7ba88f1a1 ("net/qede/base: use L2-handles for RSS configuration") +Cc: stable@dpdk.org + +Signed-off-by: Rasesh Mody +Reviewed-by: Kevin Traynor +--- + drivers/net/qede/qede_ethdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c +index 137f91060..cd9ec1070 100644 +--- a/drivers/net/qede/qede_ethdev.c ++++ b/drivers/net/qede/qede_ethdev.c +@@ -2251,7 +2251,7 @@ int qede_rss_hash_update(struct rte_eth_dev *eth_dev, + vport_update_params.vport_id = 0; + /* pass the L2 handles instead of qids */ + for (i = 0 ; i < ECORE_RSS_IND_TABLE_SIZE ; i++) { +- idx = qdev->rss_ind_table[i]; ++ idx = i % QEDE_RSS_COUNT(qdev); + rss_params.rss_ind_table[i] = qdev->fp_array[idx].rxq->handle; + } + vport_update_params.rss_params = &rss_params; +-- +2.17.0 + diff --git a/SOURCES/0001-net-qede-fix-MTU-set-and-max-Rx-length.patch b/SOURCES/0001-net-qede-fix-MTU-set-and-max-Rx-length.patch new file mode 100644 index 0000000..99f7e15 --- /dev/null +++ b/SOURCES/0001-net-qede-fix-MTU-set-and-max-Rx-length.patch @@ -0,0 +1,188 @@ +From 9e334305178fd3715c17088632544bf58e5836a9 Mon Sep 17 00:00:00 2001 +From: Rasesh Mody +Date: Sat, 27 Jan 2018 13:15:30 -0800 +Subject: [PATCH] net/qede: fix MTU set and max Rx length + +This patch fixes issues related to MTU set and max_rx_pkt_len usage. + - Adjust MTU during device configuration when jumbo is enabled + + - In qede_set_mtu(): + Return not supported for VF as currently we do not support it. + + Cache new mtu value in mtu_new for proper update. + + Add check for RXQ allocation before calculating RX buffer size + if not allocated defer RX buffer size calculation till RXQ setup. + + Add check for before performing device start/stop. + + - Use max_rx_pkt_len appropriately + + - Change QEDE_ETH_OVERHEAD macro to adjust driver specifics + +Fixes: 4c4bdadfa9e7 ("net/qede: refactoring multi-queue implementation") +Fixes: 9a6d30ae6d46 ("net/qede: refactoring vport handling code") +Fixes: 1ef4c3a5c1f7 ("net/qede: prevent crash while changing MTU dynamically") +Cc: stable@dpdk.org + +Signed-off-by: Rasesh Mody +--- + drivers/net/qede/qede_ethdev.c | 63 ++++++++++++++++++++++++++++-------------- + drivers/net/qede/qede_rxtx.c | 6 ++-- + drivers/net/qede/qede_rxtx.h | 2 +- + 3 files changed, 47 insertions(+), 24 deletions(-) + +diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c +index 323e8ed3b..895a0da61 100644 +--- a/drivers/net/qede/qede_ethdev.c ++++ b/drivers/net/qede/qede_ethdev.c +@@ -1414,18 +1414,24 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev) + return -ENOMEM; + } + ++ /* If jumbo enabled adjust MTU */ ++ if (eth_dev->data->dev_conf.rxmode.jumbo_frame) ++ eth_dev->data->mtu = ++ eth_dev->data->dev_conf.rxmode.max_rx_pkt_len - ++ ETHER_HDR_LEN - ETHER_CRC_LEN; ++ + /* VF's MTU has to be set using vport-start where as + * PF's MTU can be updated via vport-update. + */ + if (IS_VF(edev)) { +- if (qede_start_vport(qdev, rxmode->max_rx_pkt_len)) ++ if (qede_start_vport(qdev, eth_dev->data->mtu)) + return -1; + } else { +- if (qede_update_mtu(eth_dev, rxmode->max_rx_pkt_len)) ++ if (qede_update_mtu(eth_dev, eth_dev->data->mtu)) + return -1; + } + +- qdev->mtu = rxmode->max_rx_pkt_len; ++ qdev->mtu = eth_dev->data->mtu; + qdev->new_mtu = qdev->mtu; + + /* Enable VLAN offloads by default */ +@@ -2306,16 +2312,23 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) + struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct rte_eth_dev_info dev_info = {0}; + struct qede_fastpath *fp; ++ uint32_t max_rx_pkt_len; + uint32_t frame_size; + uint16_t rx_buf_size; + uint16_t bufsz; ++ bool restart = false; + int i; + + PMD_INIT_FUNC_TRACE(edev); ++ if (IS_VF(edev)) ++ return -ENOTSUP; + qede_dev_info_get(dev, &dev_info); +- frame_size = mtu + QEDE_ETH_OVERHEAD; ++ max_rx_pkt_len = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ++ frame_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD; + if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen)) { +- DP_ERR(edev, "MTU %u out of range\n", mtu); ++ DP_ERR(edev, "MTU %u out of range, %u is maximum allowable\n", ++ mtu, dev_info.max_rx_pktlen - ETHER_HDR_LEN - ++ ETHER_CRC_LEN - QEDE_ETH_OVERHEAD); + return -EINVAL; + } + if (!dev->data->scattered_rx && +@@ -2329,29 +2342,39 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) + */ + dev->rx_pkt_burst = qede_rxtx_pkts_dummy; + dev->tx_pkt_burst = qede_rxtx_pkts_dummy; +- qede_dev_stop(dev); ++ if (dev->data->dev_started) { ++ dev->data->dev_started = 0; ++ qede_dev_stop(dev); ++ restart = true; ++ } + rte_delay_ms(1000); +- qdev->mtu = mtu; ++ qdev->new_mtu = mtu; + /* Fix up RX buf size for all queues of the port */ + for_each_rss(i) { + fp = &qdev->fp_array[i]; +- bufsz = (uint16_t)rte_pktmbuf_data_room_size( +- fp->rxq->mb_pool) - RTE_PKTMBUF_HEADROOM; +- if (dev->data->scattered_rx) +- rx_buf_size = bufsz + QEDE_ETH_OVERHEAD; +- else +- rx_buf_size = mtu + QEDE_ETH_OVERHEAD; +- rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rx_buf_size); +- fp->rxq->rx_buf_size = rx_buf_size; +- DP_INFO(edev, "buf_size adjusted to %u\n", rx_buf_size); +- } +- qede_dev_start(dev); +- if (frame_size > ETHER_MAX_LEN) ++ if (fp->rxq != NULL) { ++ bufsz = (uint16_t)rte_pktmbuf_data_room_size( ++ fp->rxq->mb_pool) - RTE_PKTMBUF_HEADROOM; ++ if (dev->data->scattered_rx) ++ rx_buf_size = bufsz + ETHER_HDR_LEN + ++ ETHER_CRC_LEN + QEDE_ETH_OVERHEAD; ++ else ++ rx_buf_size = frame_size; ++ rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rx_buf_size); ++ fp->rxq->rx_buf_size = rx_buf_size; ++ DP_INFO(edev, "buf_size adjusted to %u\n", rx_buf_size); ++ } ++ } ++ if (max_rx_pkt_len > ETHER_MAX_LEN) + dev->data->dev_conf.rxmode.jumbo_frame = 1; + else + dev->data->dev_conf.rxmode.jumbo_frame = 0; ++ if (!dev->data->dev_started && restart) { ++ qede_dev_start(dev); ++ dev->data->dev_started = 1; ++ } + /* update max frame size */ +- dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size; ++ dev->data->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len; + /* Reassign back */ + dev->rx_pkt_burst = qede_recv_pkts; + dev->tx_pkt_burst = qede_xmit_pkts; +diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c +index df248cf7e..810f0f394 100644 +--- a/drivers/net/qede/qede_rxtx.c ++++ b/drivers/net/qede/qede_rxtx.c +@@ -84,7 +84,6 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + rxq->port_id = dev->data->port_id; + + max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len; +- qdev->mtu = max_rx_pkt_len; + + /* Fix up RX buffer size */ + bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; +@@ -97,9 +96,10 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + } + + if (dev->data->scattered_rx) +- rxq->rx_buf_size = bufsz + QEDE_ETH_OVERHEAD; ++ rxq->rx_buf_size = bufsz + ETHER_HDR_LEN + ++ ETHER_CRC_LEN + QEDE_ETH_OVERHEAD; + else +- rxq->rx_buf_size = qdev->mtu + QEDE_ETH_OVERHEAD; ++ rxq->rx_buf_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD; + /* Align to cache-line size if needed */ + rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size); + +diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h +index 6214c97f3..f1d366613 100644 +--- a/drivers/net/qede/qede_rxtx.h ++++ b/drivers/net/qede/qede_rxtx.h +@@ -64,7 +64,7 @@ + #define QEDE_CEIL_TO_CACHE_LINE_SIZE(n) (((n) + (QEDE_FW_RX_ALIGN_END - 1)) & \ + ~(QEDE_FW_RX_ALIGN_END - 1)) + /* Note: QEDE_LLC_SNAP_HDR_LEN is optional */ +-#define QEDE_ETH_OVERHEAD ((ETHER_HDR_LEN) + ((2 * QEDE_VLAN_TAG_SIZE)) \ ++#define QEDE_ETH_OVERHEAD (((2 * QEDE_VLAN_TAG_SIZE)) - (ETHER_CRC_LEN) \ + + (QEDE_LLC_SNAP_HDR_LEN)) + + #define QEDE_RSS_OFFLOAD_ALL (ETH_RSS_IPV4 |\ +-- +2.14.3 + diff --git a/SOURCES/0001-net-qede-fix-few-log-messages.patch b/SOURCES/0001-net-qede-fix-few-log-messages.patch new file mode 100644 index 0000000..4340acf --- /dev/null +++ b/SOURCES/0001-net-qede-fix-few-log-messages.patch @@ -0,0 +1,49 @@ +From 651f3d4d0f1329b1dcf933e6dc207be44ef51d01 Mon Sep 17 00:00:00 2001 +From: Rasesh Mody +Date: Fri, 2 Feb 2018 22:03:18 -0800 +Subject: [PATCH] net/qede: fix few log messages + +Fixes: 9e334305178f ("net/qede: fix MTU set and max Rx length") +Fixes: 22d07d939c3c ("net/qede/base: update") +Cc: stable@dpdk.org + +Signed-off-by: Rasesh Mody +--- + drivers/net/qede/base/ecore_dcbx.c | 7 +++---- + drivers/net/qede/qede_rxtx.c | 2 +- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/qede/base/ecore_dcbx.c b/drivers/net/qede/base/ecore_dcbx.c +index 632297a78..21ddda92e 100644 +--- a/drivers/net/qede/base/ecore_dcbx.c ++++ b/drivers/net/qede/base/ecore_dcbx.c +@@ -216,10 +216,9 @@ ecore_dcbx_get_app_protocol_type(struct ecore_hwfn *p_hwfn, + *type = DCBX_PROTOCOL_ETH; + } else { + *type = DCBX_MAX_PROTOCOL_TYPE; +- DP_ERR(p_hwfn, +- "No action required, App TLV id = 0x%x" +- " app_prio_bitmap = 0x%x\n", +- id, app_prio_bitmap); ++ DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, ++ "No action required, App TLV entry = 0x%x\n", ++ app_prio_bitmap); + return false; + } + +diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c +index 169ede83a..0de7c6b8a 100644 +--- a/drivers/net/qede/qede_rxtx.c ++++ b/drivers/net/qede/qede_rxtx.c +@@ -158,7 +158,7 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + qdev->fp_array[queue_idx].rxq = rxq; + + DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n", +- queue_idx, nb_desc, qdev->mtu, socket_id); ++ queue_idx, nb_desc, rxq->rx_buf_size, socket_id); + + return 0; + } +-- +2.14.3 + diff --git a/SOURCES/0001-net-qede-fix-memory-alloc-for-multiple-port-reconfig.patch b/SOURCES/0001-net-qede-fix-memory-alloc-for-multiple-port-reconfig.patch new file mode 100755 index 0000000..cf015a1 --- /dev/null +++ b/SOURCES/0001-net-qede-fix-memory-alloc-for-multiple-port-reconfig.patch @@ -0,0 +1,107 @@ +From 36a62d0d627d4858a97768d5ff1d72b4bd0abd8e Mon Sep 17 00:00:00 2001 +From: Rasesh Mody +Date: Thu, 7 Jun 2018 09:30:20 -0700 +Subject: [PATCH] net/qede: fix memory alloc for multiple port reconfig + +Multiple port reconfigurations can lead to memory allocation failures +due to hitting RTE memzone limit or no more room in config while +reserving memzone. + +When freeing memzones, update the memzone mapping and the memzone count. +Release Rx and Tx queue rings allocated during queue setup. + +Fixes: a39001d90dbd ("net/qede: fix DMA memory leak") +Cc: stable@dpdk.org + +Signed-off-by: Rasesh Mody +Reviewed-by: Kevin Traynor +--- + drivers/net/qede/base/bcm_osal.c | 5 +++++ + drivers/net/qede/qede_rxtx.c | 21 +++++++++++---------- + 2 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c +index ca1c2b113..72627dfec 100644 +--- a/drivers/net/qede/base/bcm_osal.c ++++ b/drivers/net/qede/base/bcm_osal.c +@@ -201,6 +201,11 @@ void osal_dma_free_mem(struct ecore_dev *p_dev, dma_addr_t phys) + DP_VERBOSE(p_dev, ECORE_MSG_SP, + "Free memzone %s\n", ecore_mz_mapping[j]->name); + rte_memzone_free(ecore_mz_mapping[j]); ++ while (j < ecore_mz_count - 1) { ++ ecore_mz_mapping[j] = ecore_mz_mapping[j + 1]; ++ j++; ++ } ++ ecore_mz_count--; + return; + } + } +diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c +index bdb5d6f11..4fa1c615b 100644 +--- a/drivers/net/qede/qede_rxtx.c ++++ b/drivers/net/qede/qede_rxtx.c +@@ -192,9 +192,15 @@ static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq) + void qede_rx_queue_release(void *rx_queue) + { + struct qede_rx_queue *rxq = rx_queue; ++ struct qede_dev *qdev = rxq->qdev; ++ struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); ++ ++ PMD_INIT_FUNC_TRACE(edev); + + if (rxq) { + qede_rx_queue_release_mbufs(rxq); ++ qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); ++ qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring); + rte_free(rxq->sw_rx_ring); + rte_free(rxq); + } +@@ -350,9 +356,14 @@ static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq) + void qede_tx_queue_release(void *tx_queue) + { + struct qede_tx_queue *txq = tx_queue; ++ struct qede_dev *qdev = txq->qdev; ++ struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); ++ ++ PMD_INIT_FUNC_TRACE(edev); + + if (txq) { + qede_tx_queue_release_mbufs(txq); ++ qdev->ops->common->chain_free(edev, &txq->tx_pbl); + rte_free(txq->sw_tx_ring); + rte_free(txq); + } +@@ -441,8 +452,6 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev) + struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); + struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct qede_fastpath *fp; +- struct qede_rx_queue *rxq; +- struct qede_tx_queue *txq; + uint16_t sb_idx; + uint8_t i; + +@@ -467,21 +476,13 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev) + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + if (eth_dev->data->rx_queues[i]) { + qede_rx_queue_release(eth_dev->data->rx_queues[i]); +- rxq = eth_dev->data->rx_queues[i]; +- qdev->ops->common->chain_free(edev, +- &rxq->rx_bd_ring); +- qdev->ops->common->chain_free(edev, +- &rxq->rx_comp_ring); + eth_dev->data->rx_queues[i] = NULL; + } + } + + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + if (eth_dev->data->tx_queues[i]) { +- txq = eth_dev->data->tx_queues[i]; + qede_tx_queue_release(eth_dev->data->tx_queues[i]); +- qdev->ops->common->chain_free(edev, +- &txq->tx_pbl); + eth_dev->data->tx_queues[i] = NULL; + } + } +-- +2.17.0 + diff --git a/SOURCES/0001-net-qede-fix-unicast-filter-routine-return-code.patch b/SOURCES/0001-net-qede-fix-unicast-filter-routine-return-code.patch new file mode 100644 index 0000000..30ef90c --- /dev/null +++ b/SOURCES/0001-net-qede-fix-unicast-filter-routine-return-code.patch @@ -0,0 +1,69 @@ +From f8d2581ecbba056db08888d562eafe5181a009ba Mon Sep 17 00:00:00 2001 +Message-Id: +From: Shahed Shaikh +Date: Sat, 19 May 2018 17:15:46 -0700 +Subject: [PATCH] net/qede: fix unicast filter routine return code + +There is no need to return failure (-EEXIST) when the requested +filter is already configured. + +Fixes: d6cb17535f88 ("net/qede: fix VLAN filters") +Fixes: 77fac1b54fc9 ("net/qede: fix filtering code") +Cc: stable@dpdk.org + +Signed-off-by: Shahed Shaikh +--- + drivers/net/qede/qede_ethdev.c | 14 +++++++------- + drivers/net/qede/qede_fdir.c | 4 ++-- + 2 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c +index 3e1a62c9f..30b65195c 100644 +--- a/drivers/net/qede/qede_ethdev.c ++++ b/drivers/net/qede/qede_ethdev.c +@@ -857,10 +857,10 @@ qede_ucast_filter(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast, + ETHER_ADDR_LEN) == 0) && + ucast->vni == tmp->vni && + ucast->vlan == tmp->vlan) { +- DP_ERR(edev, "Unicast MAC is already added" +- " with vlan = %u, vni = %u\n", +- ucast->vlan, ucast->vni); +- return -EEXIST; ++ DP_INFO(edev, "Unicast MAC is already added" ++ " with vlan = %u, vni = %u\n", ++ ucast->vlan, ucast->vni); ++ return 0; + } + } + u = rte_malloc(NULL, sizeof(struct qede_ucast_entry), +@@ -1117,9 +1117,9 @@ static int qede_vlan_filter_set(struct rte_eth_dev *eth_dev, + + SLIST_FOREACH(tmp, &qdev->vlan_list_head, list) { + if (tmp->vid == vlan_id) { +- DP_ERR(edev, "VLAN %u already configured\n", +- vlan_id); +- return -EEXIST; ++ DP_INFO(edev, "VLAN %u already configured\n", ++ vlan_id); ++ return 0; + } + } + +diff --git a/drivers/net/qede/qede_fdir.c b/drivers/net/qede/qede_fdir.c +index da6364ee5..153ef9640 100644 +--- a/drivers/net/qede/qede_fdir.c ++++ b/drivers/net/qede/qede_fdir.c +@@ -141,8 +141,8 @@ qede_config_cmn_fdir_filter(struct rte_eth_dev *eth_dev, + if (add) { + SLIST_FOREACH(tmp, &qdev->fdir_info.fdir_list_head, list) { + if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0) { +- DP_ERR(edev, "flowdir filter exist\n"); +- rc = -EEXIST; ++ DP_INFO(edev, "flowdir filter exist\n"); ++ rc = 0; + goto err2; + } + } +-- +2.17.0 + diff --git a/SOURCES/0001-vhost-fix-IOTLB-pool-out-of-memory-handling.patch b/SOURCES/0001-vhost-fix-IOTLB-pool-out-of-memory-handling.patch new file mode 100644 index 0000000..09d8e98 --- /dev/null +++ b/SOURCES/0001-vhost-fix-IOTLB-pool-out-of-memory-handling.patch @@ -0,0 +1,74 @@ +From 37771844a05c7b0a7b039dcae1b4b0a69b4acced Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 5 Feb 2018 16:04:56 +0100 +Subject: [PATCH] vhost: fix IOTLB pool out-of-memory handling + +In the unlikely case the IOTLB memory pool runs out of memory, +an issue may happen if all entries are used by the IOTLB cache, +and an IOTLB miss happen. If the iotlb pending list is empty, +then no memory is freed and allocation fails a second time. + +This patch fixes this by doing an IOTLB cache random evict if +the IOTLB pending list is empty, ensuring the second allocation +try will succeed. + +In the same spirit, the opposite is done when inserting an +IOTLB entry in the IOTLB cache fails due to out of memory. In +this case, the IOTLB pending is flushed if the IOTLB cache is +empty to ensure the new entry can be inserted. + +Fixes: d012d1f293f4 ("vhost: add IOTLB helper functions") +Fixes: f72c2ad63aeb ("vhost: add pending IOTLB miss request list and helpers") +Cc: stable@dpdk.org + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/iotlb.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c +index b74cc6a78..72cd27df8 100644 +--- a/lib/librte_vhost/iotlb.c ++++ b/lib/librte_vhost/iotlb.c +@@ -50,6 +50,9 @@ struct vhost_iotlb_entry { + + #define IOTLB_CACHE_SIZE 2048 + ++static void ++vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); ++ + static void + vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) + { +@@ -95,9 +98,11 @@ vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, + + ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); + if (ret) { +- RTE_LOG(INFO, VHOST_CONFIG, +- "IOTLB pool empty, clear pending misses\n"); +- vhost_user_iotlb_pending_remove_all(vq); ++ RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, clear entries\n"); ++ if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) ++ vhost_user_iotlb_pending_remove_all(vq); ++ else ++ vhost_user_iotlb_cache_random_evict(vq); + ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); +@@ -186,8 +191,11 @@ vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova, + + ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); + if (ret) { +- RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n"); +- vhost_user_iotlb_cache_random_evict(vq); ++ RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, clear entries\n"); ++ if (!TAILQ_EMPTY(&vq->iotlb_list)) ++ vhost_user_iotlb_cache_random_evict(vq); ++ else ++ vhost_user_iotlb_pending_remove_all(vq); + ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); +-- +2.14.3 + diff --git a/SOURCES/0001-vhost-fix-indirect-descriptors-table-translation-siz.patch b/SOURCES/0001-vhost-fix-indirect-descriptors-table-translation-siz.patch new file mode 100644 index 0000000..0d879e1 --- /dev/null +++ b/SOURCES/0001-vhost-fix-indirect-descriptors-table-translation-siz.patch @@ -0,0 +1,32 @@ +From c7903f9048e1eae871651f72f066393f88c54f55 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:38 +0200 +Subject: [PATCH 01/11] vhost: fix indirect descriptors table translation size + +This patch fixes the size passed at the indirect descriptor +table translation time, which is the len field of the descriptor, +and not a single descriptor. + +This issue has been assigned CVE-2018-1059. + +Fixes: 62fdb8255ae7 ("vhost: use the guest IOVA to host VA helper") + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index d347030..cb1d0cf 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -1329,5 +1329,5 @@ + vhost_iova_to_vva(dev, vq, + vq->desc[desc_indexes[i]].addr, +- sizeof(*desc), ++ vq->desc[desc_indexes[i]].len, + VHOST_ACCESS_RO); + if (unlikely(!desc)) +-- +1.8.3.1 + diff --git a/SOURCES/0001-vhost-improve-dirty-pages-logging-performance.patch b/SOURCES/0001-vhost-improve-dirty-pages-logging-performance.patch new file mode 100644 index 0000000..e99bdc4 --- /dev/null +++ b/SOURCES/0001-vhost-improve-dirty-pages-logging-performance.patch @@ -0,0 +1,319 @@ +From 4d8b1e6aa5d7ecfc1d2ee606b4bd838b4f1ac9d2 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Thu, 17 May 2018 13:44:47 +0200 +Subject: [PATCH] vhost: improve dirty pages logging performance + +[ upstream commit c16915b8710911a75f0fbdb1aa5243f4cdfaf26a ] + +This patch caches all dirty pages logging until the used ring index +is updated. + +The goal of this optimization is to fix a performance regression +introduced when the vhost library started to use atomic operations +to set bits in the shared dirty log map. While the fix was valid +as previous implementation wasn't safe against concurrent accesses, +contention was induced. + +With this patch, during migration, we have: +1. Less atomic operations as only a single atomic OR operation +per 32 or 64 (depending on CPU) pages. +2. Less atomic operations as during a burst, the same page will +be marked dirty only once. +3. Less write memory barriers. + +Fixes: 897f13a1f726 ("vhost: make page logging atomic") +Cc: stable@dpdk.org + +Suggested-by: Michael S. Tsirkin +Signed-off-by: Maxime Coquelin +Reviewed-by: Tiwei Bie +--- + lib/librte_vhost/vhost.h | 119 +++++++++++++++++++++++++++++++++++++++++- + lib/librte_vhost/virtio_net.c | 29 ++++++---- + 2 files changed, 137 insertions(+), 11 deletions(-) + +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index 16d6b8913..42c6a3a75 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -59,6 +59,8 @@ + + #define BUF_VECTOR_MAX 256 + ++#define VHOST_LOG_CACHE_NR 32 ++ + /** + * Structure contains buffer address, length and descriptor index + * from vring to do scatter RX. +@@ -92,6 +94,14 @@ struct batch_copy_elem { + uint64_t log_addr; + }; + ++/* ++ * Structure that contains the info for batched dirty logging. ++ */ ++struct log_cache_entry { ++ uint32_t offset; ++ unsigned long val; ++}; ++ + /** + * Structure contains variables relevant to RX/TX virtqueues. + */ +@@ -133,6 +143,9 @@ struct vhost_virtqueue { + struct batch_copy_elem *batch_copy_elems; + uint16_t batch_copy_nb_elems; + ++ struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR]; ++ uint16_t log_cache_nb_elem; ++ + rte_rwlock_t iotlb_lock; + rte_rwlock_t iotlb_pending_lock; + struct rte_mempool *iotlb_pool; +@@ -266,7 +279,15 @@ struct virtio_net { + static __rte_always_inline void + vhost_set_bit(unsigned int nr, volatile uint8_t *addr) + { +- __sync_fetch_and_or_8(addr, (1U << nr)); ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) ++ /* ++ * __sync_ built-ins are deprecated, but __atomic_ ones ++ * are sub-optimized in older GCC versions. ++ */ ++ __sync_fetch_and_or_1(addr, (1U << nr)); ++#else ++ __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); ++#endif + } + + static __rte_always_inline void +@@ -297,6 +318,102 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) + } + } + ++static __rte_always_inline void ++vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) ++{ ++ unsigned long *log_base; ++ int i; ++ ++ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || ++ !dev->log_base)) ++ return; ++ ++ log_base = (unsigned long *)(uintptr_t)dev->log_base; ++ ++ /* ++ * It is expected a write memory barrier has been issued ++ * before this function is called. ++ */ ++ ++ for (i = 0; i < vq->log_cache_nb_elem; i++) { ++ struct log_cache_entry *elem = vq->log_cache + i; ++ ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) ++ /* ++ * '__sync' builtins are deprecated, but '__atomic' ones ++ * are sub-optimized in older GCC versions. ++ */ ++ __sync_fetch_and_or(log_base + elem->offset, elem->val); ++#else ++ __atomic_fetch_or(log_base + elem->offset, elem->val, ++ __ATOMIC_RELAXED); ++#endif ++ } ++ ++ rte_smp_wmb(); ++ ++ vq->log_cache_nb_elem = 0; ++} ++ ++static __rte_always_inline void ++vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ uint64_t page) ++{ ++ uint32_t bit_nr = page % (sizeof(unsigned long) << 3); ++ uint32_t offset = page / (sizeof(unsigned long) << 3); ++ int i; ++ ++ for (i = 0; i < vq->log_cache_nb_elem; i++) { ++ struct log_cache_entry *elem = vq->log_cache + i; ++ ++ if (elem->offset == offset) { ++ elem->val |= (1UL << bit_nr); ++ return; ++ } ++ } ++ ++ if (unlikely(i >= VHOST_LOG_CACHE_NR)) { ++ /* ++ * No more room for a new log cache entry, ++ * so write the dirty log map directly. ++ */ ++ rte_smp_wmb(); ++ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); ++ ++ return; ++ } ++ ++ vq->log_cache[i].offset = offset; ++ vq->log_cache[i].val = (1UL << bit_nr); ++} ++ ++static __rte_always_inline void ++vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ uint64_t addr, uint64_t len) ++{ ++ uint64_t page; ++ ++ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || ++ !dev->log_base || !len)) ++ return; ++ ++ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) ++ return; ++ ++ page = addr / VHOST_LOG_PAGE; ++ while (page * VHOST_LOG_PAGE < addr + len) { ++ vhost_log_cache_page(dev, vq, page); ++ page += 1; ++ } ++} ++ ++static __rte_always_inline void ++vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ uint64_t offset, uint64_t len) ++{ ++ vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); ++} ++ + static __rte_always_inline void + vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index a013c07b0..5f8763d3a 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -107,7 +107,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, + rte_memcpy(&vq->used->ring[to], + &vq->shadow_used_ring[from], + size * sizeof(struct vring_used_elem)); +- vhost_log_used_vring(dev, vq, ++ vhost_log_cache_used_vring(dev, vq, + offsetof(struct vring_used, ring[to]), + size * sizeof(struct vring_used_elem)); + } +@@ -135,6 +135,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) + + rte_smp_wmb(); + ++ vhost_log_cache_sync(dev, vq); ++ + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); +@@ -159,7 +161,7 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) + + for (i = 0; i < count; i++) { + rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); +- vhost_log_write(dev, elem[i].log_addr, elem[i].len); ++ vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len); + PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); + } + } +@@ -275,7 +277,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + virtio_enqueue_offload(m, + (struct virtio_net_hdr *)(uintptr_t)desc_addr); + PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); +- vhost_log_write(dev, desc_gaddr, dev->vhost_hlen); ++ vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen); + } else { + struct virtio_net_hdr vnet_hdr; + uint64_t remain = dev->vhost_hlen; +@@ -298,7 +300,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + (void *)(uintptr_t)src, len); + + PRINT_PACKET(dev, (uintptr_t)dst, len, 0); +- vhost_log_write(dev, guest_addr, len); ++ vhost_log_cache_write(dev, vq, guest_addr, len); + remain -= len; + guest_addr += len; + dst += len; +@@ -379,7 +381,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); +- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); ++ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, ++ cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + } else { +@@ -468,7 +471,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = pkts[i]->pkt_len + + dev->vhost_hlen; +- vhost_log_used_vring(dev, vq, ++ vhost_log_cache_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } +@@ -528,6 +531,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + + rte_smp_wmb(); + ++ vhost_log_cache_sync(dev, vq); ++ + *(volatile uint16_t *)&vq->used->idx += count; + vq->last_used_idx += count; + vhost_log_used_vring(dev, vq, +@@ -797,7 +802,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + + PRINT_PACKET(dev, (uintptr_t)dst, + len, 0); +- vhost_log_write(dev, guest_addr, len); ++ vhost_log_cache_write(dev, vq, ++ guest_addr, len); + + remain -= len; + guest_addr += len; +@@ -806,7 +812,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + } else { + PRINT_PACKET(dev, (uintptr_t)hdr_addr, + dev->vhost_hlen, 0); +- vhost_log_write(dev, hdr_phys_addr, ++ vhost_log_cache_write(dev, vq, hdr_phys_addr, + dev->vhost_hlen); + } + +@@ -820,7 +826,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); +- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); ++ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, ++ cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + } else { +@@ -1384,7 +1391,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, + { + vq->used->ring[used_idx].id = desc_idx; + vq->used->ring[used_idx].len = 0; +- vhost_log_used_vring(dev, vq, ++ vhost_log_cache_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } +@@ -1399,6 +1406,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, + rte_smp_wmb(); + rte_smp_rmb(); + ++ vhost_log_cache_sync(dev, vq); ++ + vq->used->idx += count; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); +-- +2.14.3 + diff --git a/SOURCES/0001-vhost-prevent-features-to-be-changed-while-device-is.patch b/SOURCES/0001-vhost-prevent-features-to-be-changed-while-device-is.patch new file mode 100644 index 0000000..a5b5993 --- /dev/null +++ b/SOURCES/0001-vhost-prevent-features-to-be-changed-while-device-is.patch @@ -0,0 +1,59 @@ +From fec618a3fdcc88fa50089edb5748a6554ac49070 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Wed, 13 Dec 2017 09:51:06 +0100 +Subject: [PATCH 1/6] vhost: prevent features to be changed while device is + running + +As section 2.2 of the Virtio spec states about features +negotiation: +"During device initialization, the driver reads this and tells +the device the subset that it accepts. The only way to +renegotiate is to reset the device." + +This patch implements a check to prevent illegal features change +while the device is running. + +One exception is the VHOST_F_LOG_ALL feature bit, which is enabled +when live-migration is initiated. But this feature is not negotiated +with the Virtio driver, but directly with the Vhost master. + +Signed-off-by: Maxime Coquelin +Acked-by: Laszlo Ersek +Acked-by: Yuanhan Liu +(cherry picked from commit 07f8db29b8833378dd506f3e197319f8b669aed9) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/vhost_user.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index f4c7ce462..545dbcb2b 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -183,7 +183,22 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) + return -1; + } + +- if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->features != features) { ++ if (dev->flags & VIRTIO_DEV_RUNNING) { ++ if (dev->features == features) ++ return 0; ++ ++ /* ++ * Error out if master tries to change features while device is ++ * in running state. The exception being VHOST_F_LOG_ALL, which ++ * is enabled when the live-migration starts. ++ */ ++ if ((dev->features ^ features) & ~(1ULL << VHOST_F_LOG_ALL)) { ++ RTE_LOG(ERR, VHOST_CONFIG, ++ "(%d) features changed while device is running.\n", ++ dev->vid); ++ return -1; ++ } ++ + if (dev->notify_ops->features_changed) + dev->notify_ops->features_changed(dev->vid, features); + } +-- +2.14.3 + diff --git a/SOURCES/0001-vhost-remove-pending-IOTLB-entry-if-miss-request-fai.patch b/SOURCES/0001-vhost-remove-pending-IOTLB-entry-if-miss-request-fai.patch new file mode 100644 index 0000000..5205ffd --- /dev/null +++ b/SOURCES/0001-vhost-remove-pending-IOTLB-entry-if-miss-request-fai.patch @@ -0,0 +1,81 @@ +From 82b9c1540348b6be7996203065e10421e953cea9 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 5 Feb 2018 16:04:57 +0100 +Subject: [PATCH] vhost: remove pending IOTLB entry if miss request failed + +In case vhost_user_iotlb_miss returns an error, the pending IOTLB +entry has to be removed from the list as no IOTLB update will be +received. + +Fixes: fed67a20ac94 ("vhost: introduce guest IOVA to backend VA helper") +Cc: stable@dpdk.org + +Suggested-by: Tiwei Bie +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/iotlb.c | 2 +- + lib/librte_vhost/iotlb.h | 3 +++ + lib/librte_vhost/vhost.c | 13 ++++++++++--- + 3 files changed, 14 insertions(+), 4 deletions(-) + +diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c +index 72cd27df8..c11ebcaac 100644 +--- a/lib/librte_vhost/iotlb.c ++++ b/lib/librte_vhost/iotlb.c +@@ -120,7 +120,7 @@ vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, + rte_rwlock_write_unlock(&vq->iotlb_pending_lock); + } + +-static void ++void + vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, + uint64_t iova, uint64_t size, uint8_t perm) + { +diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h +index f1a050e44..e7083e37b 100644 +--- a/lib/librte_vhost/iotlb.h ++++ b/lib/librte_vhost/iotlb.h +@@ -71,6 +71,9 @@ bool vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm); + void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm); ++void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, ++ uint64_t size, uint8_t perm); ++ + int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); + + #endif /* _VHOST_IOTLB_H_ */ +diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c +index a31ca5002..a407067e2 100644 +--- a/lib/librte_vhost/vhost.c ++++ b/lib/librte_vhost/vhost.c +@@ -42,7 +42,9 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + if (tmp_size == size) + return vva; + +- if (!vhost_user_iotlb_pending_miss(vq, iova + tmp_size, perm)) { ++ iova += tmp_size; ++ ++ if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) { + /* + * iotlb_lock is read-locked for a full burst, + * but it only protects the iotlb cache. +@@ -52,8 +54,13 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + */ + vhost_user_iotlb_rd_unlock(vq); + +- vhost_user_iotlb_pending_insert(vq, iova + tmp_size, perm); +- vhost_user_iotlb_miss(dev, iova + tmp_size, perm); ++ vhost_user_iotlb_pending_insert(vq, iova, perm); ++ if (vhost_user_iotlb_miss(dev, iova, perm)) { ++ RTE_LOG(ERR, VHOST_CONFIG, ++ "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n", ++ iova); ++ vhost_user_iotlb_pending_remove(vq, iova, 1, perm); ++ } + + vhost_user_iotlb_rd_lock(vq); + } +-- +2.14.3 + diff --git a/SOURCES/0001-vhost_user_protect_active_rings_from_async_ring_changes.patch b/SOURCES/0001-vhost_user_protect_active_rings_from_async_ring_changes.patch index 40222c0..f997709 100644 --- a/SOURCES/0001-vhost_user_protect_active_rings_from_async_ring_changes.patch +++ b/SOURCES/0001-vhost_user_protect_active_rings_from_async_ring_changes.patch @@ -7,7 +7,6 @@ Subject: [dpdk-dev, From: Victor Kaplansky X-Patchwork-Id: 33921 X-Patchwork-Delegate: yuanhan.liu@linux.intel.com -Message-Id: <20180117154925-mutt-send-email-victork@redhat.com> List-Id: dev.dpdk.org To: dev@dpdk.org Cc: stable@dpdk.org, Jens Freimann , diff --git a/SOURCES/0002-eal-fix-build-on-FreeBSD.patch b/SOURCES/0002-eal-fix-build-on-FreeBSD.patch new file mode 100644 index 0000000..b4d41b9 --- /dev/null +++ b/SOURCES/0002-eal-fix-build-on-FreeBSD.patch @@ -0,0 +1,268 @@ +From a5c9b9278cd4fa0b61db045ed19df449f07ab139 Mon Sep 17 00:00:00 2001 +From: Thomas Monjalon +Date: Fri, 27 Apr 2018 03:49:19 +0200 +Subject: [PATCH 2/2] eal: fix build on FreeBSD + +The auxiliary vector read is implemented only for Linux. +It could be done with procstat_getauxv() for FreeBSD. + +Since the commit below, the auxiliary vector functions +are compiled for every architectures, including x86 +which is tested with FreeBSD. + +This patch is moving the Linux implementation in Linux directory, +and adding a fake/empty implementation for FreeBSD. + +Fixes: 2ed9bf330709 ("eal: abstract away the auxiliary vector") + +Signed-off-by: Thomas Monjalon +Acked-by: Maxime Coquelin +--- + lib/librte_eal/bsdapp/eal/Makefile | 1 + + lib/librte_eal/bsdapp/eal/eal_cpuflags.c | 21 ++++++ + lib/librte_eal/common/eal_common_cpuflags.c | 79 ------------------- + lib/librte_eal/linuxapp/eal/Makefile | 1 + + lib/librte_eal/linuxapp/eal/eal_cpuflags.c | 84 +++++++++++++++++++++ + 7 files changed, 109 insertions(+), 79 deletions(-) + create mode 100644 lib/librte_eal/bsdapp/eal/eal_cpuflags.c + create mode 100644 lib/librte_eal/linuxapp/eal/eal_cpuflags.c + +diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile +index 200285e01..3fd33f1e4 100644 +--- a/lib/librte_eal/bsdapp/eal/Makefile ++++ b/lib/librte_eal/bsdapp/eal/Makefile +@@ -25,6 +25,7 @@ LIBABIVER := 7 + + # specific to bsdapp exec-env + SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c ++SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_cpuflags.c + SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c + SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c + SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c +diff --git a/lib/librte_eal/bsdapp/eal/eal_cpuflags.c b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c +new file mode 100644 +index 000000000..69b161ea6 +--- /dev/null ++++ b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 Mellanox Technologies, Ltd ++ */ ++ ++#include ++#include ++ ++unsigned long ++rte_cpu_getauxval(unsigned long type __rte_unused) ++{ ++ /* not implemented */ ++ return 0; ++} ++ ++int ++rte_cpu_strcmp_auxval(unsigned long type __rte_unused, ++ const char *str __rte_unused) ++{ ++ /* not implemented */ ++ return -1; ++} +diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c +index 6a9dbaeb1..3a055f7c7 100644 +--- a/lib/librte_eal/common/eal_common_cpuflags.c ++++ b/lib/librte_eal/common/eal_common_cpuflags.c +@@ -2,90 +2,11 @@ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +-#include +-#include + #include +-#include +-#include +-#include +-#include +- +-#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +-#if __GLIBC_PREREQ(2, 16) +-#include +-#define HAS_AUXV 1 +-#endif +-#endif + + #include + #include + +-#ifndef HAS_AUXV +-static unsigned long +-getauxval(unsigned long type __rte_unused) +-{ +- errno = ENOTSUP; +- return 0; +-} +-#endif +- +-#ifdef RTE_ARCH_64 +-typedef Elf64_auxv_t Internal_Elfx_auxv_t; +-#else +-typedef Elf32_auxv_t Internal_Elfx_auxv_t; +-#endif +- +- +-/** +- * Provides a method for retrieving values from the auxiliary vector and +- * possibly running a string comparison. +- * +- * @return Always returns a result. When the result is 0, check errno +- * to see if an error occurred during processing. +- */ +-static unsigned long +-_rte_cpu_getauxval(unsigned long type, const char *str) +-{ +- unsigned long val; +- +- errno = 0; +- val = getauxval(type); +- +- if (!val && (errno == ENOTSUP || errno == ENOENT)) { +- int auxv_fd = open("/proc/self/auxv", O_RDONLY); +- Internal_Elfx_auxv_t auxv; +- +- if (auxv_fd == -1) +- return 0; +- +- errno = ENOENT; +- while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { +- if (auxv.a_type == type) { +- errno = 0; +- val = auxv.a_un.a_val; +- if (str) +- val = strcmp((const char *)val, str); +- break; +- } +- } +- close(auxv_fd); +- } +- +- return val; +-} +- +-unsigned long +-rte_cpu_getauxval(unsigned long type) +-{ +- return _rte_cpu_getauxval(type, NULL); +-} +- +-int +-rte_cpu_strcmp_auxval(unsigned long type, const char *str) +-{ +- return _rte_cpu_getauxval(type, str); +-} +- + /** + * Checks if the machine is adequate for running the binary. If it is not, the + * program exits with status 1. +diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile +index 45517a27b..3719ec9d7 100644 +--- a/lib/librte_eal/linuxapp/eal/Makefile ++++ b/lib/librte_eal/linuxapp/eal/Makefile +@@ -30,6 +30,7 @@ endif + + # specific to linuxapp exec-env + SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c ++SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_cpuflags.c + SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c + SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c + SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c +diff --git a/lib/librte_eal/linuxapp/eal/eal_cpuflags.c b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c +new file mode 100644 +index 000000000..d38296e1e +--- /dev/null ++++ b/lib/librte_eal/linuxapp/eal/eal_cpuflags.c +@@ -0,0 +1,84 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 Red Hat, Inc. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) ++#if __GLIBC_PREREQ(2, 16) ++#include ++#define HAS_AUXV 1 ++#endif ++#endif ++ ++#include ++ ++#ifndef HAS_AUXV ++static unsigned long ++getauxval(unsigned long type __rte_unused) ++{ ++ errno = ENOTSUP; ++ return 0; ++} ++#endif ++ ++#ifdef RTE_ARCH_64 ++typedef Elf64_auxv_t Internal_Elfx_auxv_t; ++#else ++typedef Elf32_auxv_t Internal_Elfx_auxv_t; ++#endif ++ ++/** ++ * Provides a method for retrieving values from the auxiliary vector and ++ * possibly running a string comparison. ++ * ++ * @return Always returns a result. When the result is 0, check errno ++ * to see if an error occurred during processing. ++ */ ++static unsigned long ++_rte_cpu_getauxval(unsigned long type, const char *str) ++{ ++ unsigned long val; ++ ++ errno = 0; ++ val = getauxval(type); ++ ++ if (!val && (errno == ENOTSUP || errno == ENOENT)) { ++ int auxv_fd = open("/proc/self/auxv", O_RDONLY); ++ Internal_Elfx_auxv_t auxv; ++ ++ if (auxv_fd == -1) ++ return 0; ++ ++ errno = ENOENT; ++ while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { ++ if (auxv.a_type == type) { ++ errno = 0; ++ val = auxv.a_un.a_val; ++ if (str) ++ val = strcmp((const char *)val, str); ++ break; ++ } ++ } ++ close(auxv_fd); ++ } ++ ++ return val; ++} ++ ++unsigned long ++rte_cpu_getauxval(unsigned long type) ++{ ++ return _rte_cpu_getauxval(type, NULL); ++} ++ ++int ++rte_cpu_strcmp_auxval(unsigned long type, const char *str) ++{ ++ return _rte_cpu_getauxval(type, str); ++} +-- +2.17.0 + diff --git a/SOURCES/0002-net-nfp-use-new-CPP-interface.patch b/SOURCES/0002-net-nfp-use-new-CPP-interface.patch new file mode 100644 index 0000000..d2695c9 --- /dev/null +++ b/SOURCES/0002-net-nfp-use-new-CPP-interface.patch @@ -0,0 +1,631 @@ +From 6a4e7e8918cd7f82e65f82219b1d1f936fe5895f Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Thu, 5 Apr 2018 15:42:45 +0100 +Subject: [PATCH 2/3] net/nfp: use new CPP interface + +PF PMD support was based on NSPU interface. This patch changes the +PMD for using the new CPP user space interface which gives more +flexibility for adding new functionalities. + +This change just affects initialization with the datapath being the +same than before. + +Signed-off-by: Alejandro Lucero +(cherry picked from commit 896c265ef954ed22b61b1980b554b8425b300eeb) +--- + drivers/net/nfp/Makefile | 17 ++- + drivers/net/nfp/nfp_net.c | 342 +++++++++++++++++++++++++++++------------- + drivers/net/nfp/nfp_net_pmd.h | 16 +- + 3 files changed, 264 insertions(+), 111 deletions(-) + +diff --git a/drivers/net/nfp/Makefile b/drivers/net/nfp/Makefile +index 4ba066ac4..f71ecde6f 100644 +--- a/drivers/net/nfp/Makefile ++++ b/drivers/net/nfp/Makefile +@@ -48,11 +48,24 @@ EXPORT_MAP := rte_pmd_nfp_version.map + + LIBABIVER := 1 + ++VPATH += $(SRCDIR)/nfpcore ++ ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_cppcore.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_cpp_pcie_ops.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_mutex.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_resource.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_crc.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_mip.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nffw.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_hwinfo.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_rtsym.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nsp.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nsp_cmds.c ++SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nsp_eth.c ++ + # + # all source are stored in SRCS-y + # + SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_net.c +-SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nfpu.c +-SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nspu.c + + include $(RTE_SDK)/mk/rte.lib.mk +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 0501156ba..b923d1269 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2015 Netronome Systems, Inc. ++ * Copyright (c) 2014-2018 Netronome Systems, Inc. + * All rights reserved. + * + * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. +@@ -55,7 +55,13 @@ + #include + #include + +-#include "nfp_nfpu.h" ++#include "nfpcore/nfp_cpp.h" ++#include "nfpcore/nfp_nffw.h" ++#include "nfpcore/nfp_hwinfo.h" ++#include "nfpcore/nfp_mip.h" ++#include "nfpcore/nfp_rtsym.h" ++#include "nfpcore/nfp_nsp.h" ++ + #include "nfp_net_pmd.h" + #include "nfp_net_logs.h" + #include "nfp_net_ctrl.h" +@@ -95,12 +101,8 @@ static void nfp_net_stop(struct rte_eth_dev *dev); + static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +-/* +- * The offset of the queue controller queues in the PCIe Target. These +- * happen to be at the same offset on the NFP6000 and the NFP3200 so +- * we use a single macro here. +- */ +-#define NFP_PCIE_QUEUE(_q) (0x800 * ((_q) & 0xff)) ++/* The offset of the queue controller queues in the PCIe Target */ ++#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) + + /* Maximum value which can be added to a queue with one transaction */ + #define NFP_QCP_MAX_ADD 0x7f +@@ -618,47 +620,29 @@ nfp_net_cfg_queue_setup(struct nfp_net_hw *hw) + #define ETH_ADDR_LEN 6 + + static void +-nfp_eth_copy_mac_reverse(uint8_t *dst, const uint8_t *src) ++nfp_eth_copy_mac(uint8_t *dst, const uint8_t *src) + { + int i; + + for (i = 0; i < ETH_ADDR_LEN; i++) +- dst[ETH_ADDR_LEN - i - 1] = src[i]; ++ dst[i] = src[i]; + } + + static int + nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port) + { +- union eth_table_entry *entry; +- int idx, i; +- +- idx = port; +- entry = hw->eth_table; +- +- /* Reading NFP ethernet table obtained before */ +- for (i = 0; i < NSP_ETH_MAX_COUNT; i++) { +- if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) { +- /* port not in use */ +- entry++; +- continue; +- } +- if (idx == 0) +- break; +- idx--; +- entry++; +- } +- +- if (i == NSP_ETH_MAX_COUNT) +- return -EINVAL; ++ struct nfp_eth_table *nfp_eth_table; + ++ nfp_eth_table = nfp_eth_read_ports(hw->cpp); + /* + * hw points to port0 private data. We need hw now pointing to + * right port. + */ + hw += port; +- nfp_eth_copy_mac_reverse((uint8_t *)&hw->mac_addr, +- (uint8_t *)&entry->mac_addr); ++ nfp_eth_copy_mac((uint8_t *)&hw->mac_addr, ++ (uint8_t *)&nfp_eth_table->ports[port].mac_addr); + ++ free(nfp_eth_table); + return 0; + } + +@@ -809,7 +793,7 @@ nfp_net_start(struct rte_eth_dev *dev) + + if (hw->is_pf) + /* Configure the physical port up */ +- nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 1); ++ nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1); + + hw->ctrl = new_ctrl; + +@@ -860,7 +844,7 @@ nfp_net_stop(struct rte_eth_dev *dev) + + if (hw->is_pf) + /* Configure the physical port down */ +- nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 0); ++ nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0); + } + + /* Reset and stop device. The device can not be restarted. */ +@@ -2633,10 +2617,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + uint64_t tx_bar_off = 0, rx_bar_off = 0; + uint32_t start_q; + int stride = 4; +- +- nspu_desc_t *nspu_desc = NULL; +- uint64_t bar_offset; + int port = 0; ++ int err; + + PMD_INIT_FUNC_TRACE(); + +@@ -2657,7 +2639,6 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + + /* This points to the specific port private data */ + hw = &hwport0[port]; +- hw->pf_port_idx = port; + } else { + hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + hwport0 = 0; +@@ -2691,19 +2672,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + } + + if (hw->is_pf && port == 0) { +- nspu_desc = hw->nspu_desc; +- +- if (nfp_nsp_map_ctrl_bar(nspu_desc, &bar_offset) != 0) { +- /* +- * A firmware should be there after PF probe so this +- * should not happen. +- */ +- RTE_LOG(ERR, PMD, "PF BAR symbol resolution failed\n"); +- return -ENODEV; ++ hw->ctrl_bar = nfp_rtsym_map(hw->sym_tbl, "_pf0_net_bar0", ++ hw->total_ports * 32768, ++ &hw->ctrl_area); ++ if (!hw->ctrl_bar) { ++ printf("nfp_rtsym_map fails for _pf0_net_ctrl_bar\n"); ++ return -EIO; + } + +- /* vNIC PF control BAR is a subset of PF PCI device BAR */ +- hw->ctrl_bar += bar_offset; + PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar); + } + +@@ -2727,13 +2703,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + case PCI_DEVICE_ID_NFP6000_PF_NIC: + case PCI_DEVICE_ID_NFP6000_VF_NIC: + start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ); +- tx_bar_off = NFP_PCIE_QUEUE(start_q); ++ tx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ; + start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_RXQ); +- rx_bar_off = NFP_PCIE_QUEUE(start_q); ++ rx_bar_off = start_q * NFP_QCP_QUEUE_ADDR_SZ; + break; + default: + RTE_LOG(ERR, PMD, "nfp_net: no device ID matching\n"); +- return -ENODEV; ++ err = -ENODEV; ++ goto dev_err_ctrl_map; + } + + PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "\n", tx_bar_off); +@@ -2741,17 +2718,19 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + + if (hw->is_pf && port == 0) { + /* configure access to tx/rx vNIC BARs */ +- nfp_nsp_map_queues_bar(nspu_desc, &bar_offset); +- PMD_INIT_LOG(DEBUG, "tx/rx bar_offset: %" PRIx64 "\n", +- bar_offset); +- hwport0->hw_queues = (uint8_t *)pci_dev->mem_resource[0].addr; +- +- /* vNIC PF tx/rx BARs are a subset of PF PCI device */ +- hwport0->hw_queues += bar_offset; ++ hwport0->hw_queues = nfp_cpp_map_area(hw->cpp, 0, 0, ++ NFP_PCIE_QUEUE(0), ++ NFP_QCP_QUEUE_AREA_SZ, ++ &hw->hwqueues_area); ++ ++ if (!hwport0->hw_queues) { ++ printf("nfp_rtsym_map fails for net.qc\n"); ++ err = -EIO; ++ goto dev_err_ctrl_map; ++ } + +- /* Lets seize the chance to read eth table from hw */ +- if (nfp_nsp_eth_read_table(nspu_desc, &hw->eth_table)) +- return -ENODEV; ++ PMD_INIT_LOG(DEBUG, "tx/rx bar address: 0x%p\n", ++ hwport0->hw_queues); + } + + if (hw->is_pf) { +@@ -2811,7 +2790,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + eth_dev->data->mac_addrs = rte_zmalloc("mac_addr", ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, "Failed to space for MAC address"); +- return -ENOMEM; ++ err = -ENOMEM; ++ goto dev_err_queues_map; + } + + if (hw->is_pf) { +@@ -2822,6 +2802,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + } + + if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr)) { ++ PMD_INIT_LOG(INFO, "Using random mac address for port %d\n", ++ port); + /* Using random mac addresses for VFs */ + eth_random_addr(&hw->mac_addr[0]); + nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr); +@@ -2850,11 +2832,19 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + nfp_net_stats_reset(eth_dev); + + return 0; ++ ++dev_err_queues_map: ++ nfp_cpp_area_free(hw->hwqueues_area); ++dev_err_ctrl_map: ++ nfp_cpp_area_free(hw->ctrl_area); ++ ++ return err; + } + + static int + nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, +- nfpu_desc_t *nfpu_desc, void **priv) ++ struct nfp_cpp *cpp, struct nfp_hwinfo *hwinfo, ++ int phys_port, struct nfp_rtsym_table *sym_tbl, void **priv) + { + struct rte_eth_dev *eth_dev; + struct nfp_net_hw *hw; +@@ -2892,12 +2882,16 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, + * Then dev_private is adjusted per port. + */ + hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port; +- hw->nspu_desc = nfpu_desc->nspu; +- hw->nfpu_desc = nfpu_desc; ++ hw->cpp = cpp; ++ hw->hwinfo = hwinfo; ++ hw->sym_tbl = sym_tbl; ++ hw->pf_port_idx = phys_port; + hw->is_pf = 1; + if (ports > 1) + hw->pf_multiport_enabled = 1; + ++ hw->total_ports = ports; ++ + eth_dev->device = &dev->device; + rte_eth_copy_pci_info(eth_dev, dev); + +@@ -2911,55 +2905,191 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, + return ret; + } + ++#define DEFAULT_FW_PATH "/lib/firmware/netronome" ++ ++static int ++nfp_fw_upload(struct rte_pci_device *dev, struct nfp_nsp *nsp, char *card) ++{ ++ struct nfp_cpp *cpp = nsp->cpp; ++ int fw_f; ++ char *fw_buf; ++ char fw_name[100]; ++ char serial[100]; ++ struct stat file_stat; ++ off_t fsize, bytes; ++ ++ /* Looking for firmware file in order of priority */ ++ ++ /* First try to find a firmware image specific for this device */ ++ sprintf(serial, "serial-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x", ++ cpp->serial[0], cpp->serial[1], cpp->serial[2], cpp->serial[3], ++ cpp->serial[4], cpp->serial[5], cpp->interface >> 8, ++ cpp->interface & 0xff); ++ ++ sprintf(fw_name, "%s/%s.nffw", DEFAULT_FW_PATH, serial); ++ ++ RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name); ++ fw_f = open(fw_name, O_RDONLY); ++ if (fw_f > 0) ++ goto read_fw; ++ ++ /* Then try the PCI name */ ++ sprintf(fw_name, "%s/pci-%s.nffw", DEFAULT_FW_PATH, dev->device.name); ++ ++ RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name); ++ fw_f = open(fw_name, O_RDONLY); ++ if (fw_f > 0) ++ goto read_fw; ++ ++ /* Finally try the card type and media */ ++ sprintf(fw_name, "%s/%s", DEFAULT_FW_PATH, card); ++ RTE_LOG(DEBUG, PMD, "Trying with fw file: %s\n", fw_name); ++ fw_f = open(fw_name, O_RDONLY); ++ if (fw_f < 0) { ++ RTE_LOG(INFO, PMD, "Firmware file %s not found.", fw_name); ++ return -ENOENT; ++ } ++ ++read_fw: ++ if (fstat(fw_f, &file_stat) < 0) { ++ RTE_LOG(INFO, PMD, "Firmware file %s size is unknown", fw_name); ++ close(fw_f); ++ return -ENOENT; ++ } ++ ++ fsize = file_stat.st_size; ++ RTE_LOG(INFO, PMD, "Firmware file found at %s with size: %" PRIu64 "\n", ++ fw_name, (uint64_t)fsize); ++ ++ fw_buf = malloc((size_t)fsize); ++ if (!fw_buf) { ++ RTE_LOG(INFO, PMD, "malloc failed for fw buffer"); ++ close(fw_f); ++ return -ENOMEM; ++ } ++ memset(fw_buf, 0, fsize); ++ ++ bytes = read(fw_f, fw_buf, fsize); ++ if (bytes != fsize) { ++ RTE_LOG(INFO, PMD, "Reading fw to buffer failed.\n" ++ "Just %" PRIu64 " of %" PRIu64 " bytes read", ++ (uint64_t)bytes, (uint64_t)fsize); ++ free(fw_buf); ++ close(fw_f); ++ return -EIO; ++ } ++ ++ RTE_LOG(INFO, PMD, "Uploading the firmware ..."); ++ nfp_nsp_load_fw(nsp, fw_buf, bytes); ++ RTE_LOG(INFO, PMD, "Done"); ++ ++ free(fw_buf); ++ close(fw_f); ++ ++ return 0; ++} ++ ++static int ++nfp_fw_setup(struct rte_pci_device *dev, struct nfp_cpp *cpp, ++ struct nfp_eth_table *nfp_eth_table, struct nfp_hwinfo *hwinfo) ++{ ++ struct nfp_nsp *nsp; ++ const char *nfp_fw_model; ++ char card_desc[100]; ++ int err = 0; ++ ++ nfp_fw_model = nfp_hwinfo_lookup(hwinfo, "assembly.partno"); ++ ++ if (nfp_fw_model) { ++ RTE_LOG(INFO, PMD, "firmware model found: %s\n", nfp_fw_model); ++ } else { ++ RTE_LOG(ERR, PMD, "firmware model NOT found\n"); ++ return -EIO; ++ } ++ ++ if (nfp_eth_table->count == 0 || nfp_eth_table->count > 8) { ++ RTE_LOG(ERR, PMD, "NFP ethernet table reports wrong ports: %u\n", ++ nfp_eth_table->count); ++ return -EIO; ++ } ++ ++ RTE_LOG(INFO, PMD, "NFP ethernet port table reports %u ports\n", ++ nfp_eth_table->count); ++ ++ RTE_LOG(INFO, PMD, "Port speed: %u\n", nfp_eth_table->ports[0].speed); ++ ++ sprintf(card_desc, "nic_%s_%dx%d.nffw", nfp_fw_model, ++ nfp_eth_table->count, nfp_eth_table->ports[0].speed / 1000); ++ ++ nsp = nfp_nsp_open(cpp); ++ if (!nsp) { ++ RTE_LOG(ERR, PMD, "NFP error when obtaining NSP handle\n"); ++ return -EIO; ++ } ++ ++ nfp_nsp_device_soft_reset(nsp); ++ err = nfp_fw_upload(dev, nsp, card_desc); ++ ++ nfp_nsp_close(nsp); ++ return err; ++} ++ + static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *dev) + { +- nfpu_desc_t *nfpu_desc; +- nspu_desc_t *nspu_desc; +- uint64_t offset_symbol; +- uint8_t *bar_offset; +- int major, minor; ++ struct nfp_cpp *cpp; ++ struct nfp_hwinfo *hwinfo; ++ struct nfp_rtsym_table *sym_tbl; ++ struct nfp_eth_table *nfp_eth_table = NULL; + int total_ports; + void *priv = 0; + int ret = -ENODEV; ++ int err; + int i; + + if (!dev) + return ret; + +- nfpu_desc = rte_malloc("nfp nfpu", sizeof(nfpu_desc_t), 0); +- if (!nfpu_desc) +- return -ENOMEM; +- +- if (nfpu_open(dev, nfpu_desc, 0) < 0) { +- RTE_LOG(ERR, PMD, +- "nfpu_open failed\n"); +- goto nfpu_error; ++ cpp = nfp_cpp_from_device_name(dev->device.name); ++ if (!cpp) { ++ RTE_LOG(ERR, PMD, "A CPP handle can not be obtained"); ++ ret = -EIO; ++ goto error; + } + +- nspu_desc = nfpu_desc->nspu; ++ hwinfo = nfp_hwinfo_read(cpp); ++ if (!hwinfo) { ++ RTE_LOG(ERR, PMD, "Error reading hwinfo table"); ++ return -EIO; ++ } + ++ nfp_eth_table = nfp_eth_read_ports(cpp); ++ if (!nfp_eth_table) { ++ RTE_LOG(ERR, PMD, "Error reading NFP ethernet table\n"); ++ return -EIO; ++ } + +- /* Check NSP ABI version */ +- if (nfp_nsp_get_abi_version(nspu_desc, &major, &minor) < 0) { +- RTE_LOG(INFO, PMD, "NFP NSP not present\n"); ++ if (nfp_fw_setup(dev, cpp, nfp_eth_table, hwinfo)) { ++ RTE_LOG(INFO, PMD, "Error when uploading firmware\n"); ++ ret = -EIO; + goto error; + } +- PMD_INIT_LOG(INFO, "nspu ABI version: %d.%d\n", major, minor); + +- if ((major == 0) && (minor < 20)) { +- RTE_LOG(INFO, PMD, "NFP NSP ABI version too old. Required 0.20 or higher\n"); ++ /* Now the symbol table should be there */ ++ sym_tbl = nfp_rtsym_table_read(cpp); ++ if (!sym_tbl) { ++ RTE_LOG(ERR, PMD, "Something is wrong with the firmware" ++ " symbol table"); ++ ret = -EIO; + goto error; + } + +- ret = nfp_nsp_fw_setup(nspu_desc, "nfd_cfg_pf0_num_ports", +- &offset_symbol); +- if (ret) ++ total_ports = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err); ++ if (total_ports != (int)nfp_eth_table->count) { ++ RTE_LOG(ERR, PMD, "Inconsistent number of ports\n"); ++ ret = -EIO; + goto error; +- +- bar_offset = (uint8_t *)dev->mem_resource[0].addr; +- bar_offset += offset_symbol; +- total_ports = (uint32_t)*bar_offset; ++ } + PMD_INIT_LOG(INFO, "Total pf ports: %d\n", total_ports); + + if (total_ports <= 0 || total_ports > 8) { +@@ -2969,18 +3099,15 @@ static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + } + + for (i = 0; i < total_ports; i++) { +- ret = nfp_pf_create_dev(dev, i, total_ports, nfpu_desc, &priv); ++ ret = nfp_pf_create_dev(dev, i, total_ports, cpp, hwinfo, ++ nfp_eth_table->ports[i].index, ++ sym_tbl, &priv); + if (ret) +- goto error; ++ break; + } + +- return 0; +- + error: +- nfpu_close(nfpu_desc); +-nfpu_error: +- rte_free(nfpu_desc); +- ++ free(nfp_eth_table); + return ret; + } + +@@ -3025,8 +3152,19 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) + if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) || + (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) { + port = get_pf_port_number(eth_dev->data->name); ++ /* ++ * hotplug is not possible with multiport PF although freeing ++ * data structures can be done for first port. ++ */ ++ if (port != 0) ++ return -ENOTSUP; + hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + hw = &hwport0[port]; ++ nfp_cpp_area_free(hw->ctrl_area); ++ nfp_cpp_area_free(hw->hwqueues_area); ++ free(hw->hwinfo); ++ free(hw->sym_tbl); ++ nfp_cpp_free(hw->cpp); + } else { + hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + } +diff --git a/drivers/net/nfp/nfp_net_pmd.h b/drivers/net/nfp/nfp_net_pmd.h +index 1ae0ea626..097c871b5 100644 +--- a/drivers/net/nfp/nfp_net_pmd.h ++++ b/drivers/net/nfp/nfp_net_pmd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2015 Netronome Systems, Inc. ++ * Copyright (c) 2014-2018 Netronome Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -63,6 +63,7 @@ struct nfp_net_adapter; + #define NFP_NET_CRTL_BAR 0 + #define NFP_NET_TX_BAR 2 + #define NFP_NET_RX_BAR 2 ++#define NFP_QCP_QUEUE_AREA_SZ 0x80000 + + /* Macros for accessing the Queue Controller Peripheral 'CSRs' */ + #define NFP_QCP_QUEUE_OFF(_x) ((_x) * 0x800) +@@ -430,20 +431,21 @@ struct nfp_net_hw { + /* Records starting point for counters */ + struct rte_eth_stats eth_stats_base; + +-#ifdef NFP_NET_LIBNFP + struct nfp_cpp *cpp; + struct nfp_cpp_area *ctrl_area; +- struct nfp_cpp_area *tx_area; +- struct nfp_cpp_area *rx_area; ++ struct nfp_cpp_area *hwqueues_area; + struct nfp_cpp_area *msix_area; +-#endif ++ + uint8_t *hw_queues; + uint8_t is_pf; + uint8_t pf_port_idx; + uint8_t pf_multiport_enabled; ++ uint8_t total_ports; ++ + union eth_table_entry *eth_table; +- nspu_desc_t *nspu_desc; +- nfpu_desc_t *nfpu_desc; ++ ++ struct nfp_hwinfo *hwinfo; ++ struct nfp_rtsym_table *sym_tbl; + }; + + struct nfp_net_adapter { +-- +2.14.3 + diff --git a/SOURCES/0002-vhost-check-all-range-is-mapped-when-translating-GPA.patch b/SOURCES/0002-vhost-check-all-range-is-mapped-when-translating-GPA.patch new file mode 100644 index 0000000..6d7c421 --- /dev/null +++ b/SOURCES/0002-vhost-check-all-range-is-mapped-when-translating-GPA.patch @@ -0,0 +1,299 @@ +From 575ed8c576daebf38494aa3a10ef95ab806ea97a Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:39 +0200 +Subject: [PATCH 02/11] vhost: check all range is mapped when translating GPAs + +There is currently no check done on the length when translating +guest addresses into host virtual addresses. Also, there is no +guanrantee that the guest addresses range is contiguous in +the host virtual address space. + +This patch prepares vhost_iova_to_vva() and its callers to +return and check the mapped size. If the mapped size is smaller +than the requested size, the caller handle it as an error. + +This issue has been assigned CVE-2018-1059. + +Reported-by: Yongji Xie +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/vhost.c | 39 +++++++++++++++----------- + lib/librte_vhost/vhost.h | 6 ++-- + lib/librte_vhost/virtio_net.c | 64 +++++++++++++++++++++++++++---------------- + 3 files changed, 67 insertions(+), 42 deletions(-) + +diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c +index 51ea720..a8ed40b 100644 +--- a/lib/librte_vhost/vhost.c ++++ b/lib/librte_vhost/vhost.c +@@ -59,15 +59,15 @@ + uint64_t + __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, +- uint64_t iova, uint64_t size, uint8_t perm) ++ uint64_t iova, uint64_t *size, uint8_t perm) + { + uint64_t vva, tmp_size; + +- if (unlikely(!size)) ++ if (unlikely(!*size)) + return 0; + +- tmp_size = size; ++ tmp_size = *size; + + vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm); +- if (tmp_size == size) ++ if (tmp_size == *size) + return vva; + +@@ -159,30 +159,37 @@ struct virtio_net * + vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) + { +- uint64_t size; ++ uint64_t req_size, size; + + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + goto out; + +- size = sizeof(struct vring_desc) * vq->size; ++ req_size = sizeof(struct vring_desc) * vq->size; ++ size = req_size; + vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.desc_user_addr, +- size, VHOST_ACCESS_RW); +- if (!vq->desc) ++ &size, VHOST_ACCESS_RW); ++ if (!vq->desc || size != req_size) + return -1; + +- size = sizeof(struct vring_avail); +- size += sizeof(uint16_t) * vq->size; ++ req_size = sizeof(struct vring_avail); ++ req_size += sizeof(uint16_t) * vq->size; ++ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) ++ req_size += sizeof(uint16_t); ++ size = req_size; + vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.avail_user_addr, +- size, VHOST_ACCESS_RW); +- if (!vq->avail) ++ &size, VHOST_ACCESS_RW); ++ if (!vq->avail || size != req_size) + return -1; + +- size = sizeof(struct vring_used); +- size += sizeof(struct vring_used_elem) * vq->size; ++ req_size = sizeof(struct vring_used); ++ req_size += sizeof(struct vring_used_elem) * vq->size; ++ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) ++ req_size += sizeof(uint16_t); ++ size = req_size; + vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq, + vq->ring_addrs.used_user_addr, +- size, VHOST_ACCESS_RW); +- if (!vq->used) ++ &size, VHOST_ACCESS_RW); ++ if (!vq->used || size != req_size) + return -1; + +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index c8f2a81..de300c1 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -382,5 +382,5 @@ struct virtio_net { + + uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, +- uint64_t iova, uint64_t size, uint8_t perm); ++ uint64_t iova, uint64_t *len, uint8_t perm); + int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq); + void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq); +@@ -388,10 +388,10 @@ uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + static __rte_always_inline uint64_t + vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, +- uint64_t iova, uint64_t size, uint8_t perm) ++ uint64_t iova, uint64_t *len, uint8_t perm) + { + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + return rte_vhost_gpa_to_vva(dev->mem, iova); + +- return __vhost_iova_to_vva(dev, vq, iova, size, perm); ++ return __vhost_iova_to_vva(dev, vq, iova, len, perm); + } + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index cb1d0cf..79bac59 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -205,4 +205,5 @@ + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; ++ uint64_t dlen; + struct vring_desc *desc; + uint64_t desc_addr; +@@ -214,6 +215,7 @@ + + desc = &descs[desc_idx]; ++ dlen = desc->len; + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, +- desc->len, VHOST_ACCESS_RW); ++ &dlen, VHOST_ACCESS_RW); + /* + * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid +@@ -221,5 +223,6 @@ + * otherwise stores offset on the stack instead of in a register. + */ +- if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) { ++ if (unlikely(dlen != desc->len || desc->len < dev->vhost_hlen) || ++ !desc_addr) { + error = -1; + goto out; +@@ -259,8 +262,9 @@ + + desc = &descs[desc->next]; ++ dlen = desc->len; + desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, +- desc->len, ++ &dlen, + VHOST_ACCESS_RW); +- if (unlikely(!desc_addr)) { ++ if (unlikely(!desc_addr || dlen != desc->len)) { + error = -1; + goto out; +@@ -376,10 +380,11 @@ + + if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) { ++ uint64_t dlen = vq->desc[desc_idx].len; + descs = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, + vq, vq->desc[desc_idx].addr, +- vq->desc[desc_idx].len, +- VHOST_ACCESS_RO); +- if (unlikely(!descs)) { ++ &dlen, VHOST_ACCESS_RO); ++ if (unlikely(!descs || ++ dlen != vq->desc[desc_idx].len)) { + count = i; + break; +@@ -439,4 +444,5 @@ + uint32_t vec_id = *vec_idx; + uint32_t len = 0; ++ uint64_t dlen; + struct vring_desc *descs = vq->desc; + +@@ -444,9 +450,10 @@ + + if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { ++ dlen = vq->desc[idx].len; + descs = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, +- vq->desc[idx].len, ++ &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!descs)) ++ if (unlikely(!descs || dlen != vq->desc[idx].len)) + return -1; + +@@ -531,4 +538,5 @@ + uint32_t desc_offset, desc_avail; + uint32_t cpy_len; ++ uint64_t dlen; + uint64_t hdr_addr, hdr_phys_addr; + struct rte_mbuf *hdr_mbuf; +@@ -542,8 +550,10 @@ + } + ++ dlen = buf_vec[vec_idx].buf_len; + desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr, +- buf_vec[vec_idx].buf_len, +- VHOST_ACCESS_RW); +- if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) { ++ &dlen, VHOST_ACCESS_RW); ++ if (dlen != buf_vec[vec_idx].buf_len || ++ buf_vec[vec_idx].buf_len < dev->vhost_hlen || ++ !desc_addr) { + error = -1; + goto out; +@@ -567,10 +577,12 @@ + if (desc_avail == 0) { + vec_idx++; ++ dlen = buf_vec[vec_idx].buf_len; + desc_addr = + vhost_iova_to_vva(dev, vq, + buf_vec[vec_idx].buf_addr, +- buf_vec[vec_idx].buf_len, ++ &dlen, + VHOST_ACCESS_RW); +- if (unlikely(!desc_addr)) { ++ if (unlikely(!desc_addr || ++ dlen != buf_vec[vec_idx].buf_len)) { + error = -1; + goto out; +@@ -912,4 +924,5 @@ + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; ++ uint64_t dlen; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr = NULL; +@@ -927,9 +940,10 @@ + } + ++ dlen = desc->len; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, +- desc->len, ++ &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!desc_addr)) { ++ if (unlikely(!desc_addr || dlen != desc->len)) { + error = -1; + goto out; +@@ -954,9 +968,10 @@ + } + ++ dlen = desc->len; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, +- desc->len, ++ &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!desc_addr)) { ++ if (unlikely(!desc_addr || dlen != desc->len)) { + error = -1; + goto out; +@@ -1042,9 +1057,9 @@ + } + ++ dlen = desc->len; + desc_addr = vhost_iova_to_vva(dev, + vq, desc->addr, +- desc->len, +- VHOST_ACCESS_RO); +- if (unlikely(!desc_addr)) { ++ &dlen, VHOST_ACCESS_RO); ++ if (unlikely(!desc_addr || dlen != desc->len)) { + error = -1; + goto out; +@@ -1320,4 +1335,5 @@ + struct vring_desc *desc; + uint16_t sz, idx; ++ uint64_t dlen; + int err; + +@@ -1326,10 +1342,12 @@ + + if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) { ++ dlen = vq->desc[desc_indexes[i]].len; + desc = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, + vq->desc[desc_indexes[i]].addr, +- vq->desc[desc_indexes[i]].len, ++ &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!desc)) ++ if (unlikely(!desc || ++ dlen != vq->desc[desc_indexes[i]].len)) + break; + +-- +1.8.3.1 + diff --git a/SOURCES/0002-vhost-propagate-set-features-handling-error.patch b/SOURCES/0002-vhost-propagate-set-features-handling-error.patch new file mode 100644 index 0000000..eb9b5a1 --- /dev/null +++ b/SOURCES/0002-vhost-propagate-set-features-handling-error.patch @@ -0,0 +1,40 @@ +From d7f0078e3a3d838b4ec6a87dca62771246e53db6 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Wed, 13 Dec 2017 09:51:07 +0100 +Subject: [PATCH 2/6] vhost: propagate set features handling error + +Not propagating VHOST_USER_SET_FEATURES request handling +error may result in unpredictable behavior, as host and +guests features may no more be synchronized. + +This patch fixes this by reporting the error to the upper +layer, which would result in the device being destroyed +and the connection with the master to be closed. + +Signed-off-by: Maxime Coquelin +Acked-by: Laszlo Ersek +Acked-by: Yuanhan Liu +(cherry picked from commit 59fe5e17d9308b008ffa22ea250ddd363c84c3b5) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/vhost_user.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 545dbcb2b..471b1612c 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -1263,7 +1263,9 @@ vhost_user_msg_handler(int vid, int fd) + send_vhost_reply(fd, &msg); + break; + case VHOST_USER_SET_FEATURES: +- vhost_user_set_features(dev, msg.payload.u64); ++ ret = vhost_user_set_features(dev, msg.payload.u64); ++ if (ret) ++ return -1; + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: +-- +2.14.3 + diff --git a/SOURCES/0003-net-nfp-remove-files.patch b/SOURCES/0003-net-nfp-remove-files.patch new file mode 100644 index 0000000..9c34f6f --- /dev/null +++ b/SOURCES/0003-net-nfp-remove-files.patch @@ -0,0 +1,1026 @@ +From 3d61e23ad1f24e53358f445cd55784e8cf2b08e9 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Thu, 5 Apr 2018 15:42:47 +0100 +Subject: [PATCH 3/3] net/nfp: remove files + +New CPP interface makes NSPU interface obsolete. These files are +not needed anymore. + +Signed-off-by: Alejandro Lucero +(cherry picked from commit 70217f1d8dc1f87d0b1d70b08cd3199a46f49a47) +--- + drivers/net/nfp/nfp_net_eth.h | 82 ------ + drivers/net/nfp/nfp_nfpu.c | 108 ------- + drivers/net/nfp/nfp_nfpu.h | 55 ---- + drivers/net/nfp/nfp_nspu.c | 642 ------------------------------------------ + drivers/net/nfp/nfp_nspu.h | 83 ------ + 5 files changed, 970 deletions(-) + delete mode 100644 drivers/net/nfp/nfp_net_eth.h + delete mode 100644 drivers/net/nfp/nfp_nfpu.c + delete mode 100644 drivers/net/nfp/nfp_nfpu.h + delete mode 100644 drivers/net/nfp/nfp_nspu.c + delete mode 100644 drivers/net/nfp/nfp_nspu.h + +diff --git a/drivers/net/nfp/nfp_net_eth.h b/drivers/net/nfp/nfp_net_eth.h +deleted file mode 100644 +index af57f03c5..000000000 +--- a/drivers/net/nfp/nfp_net_eth.h ++++ /dev/null +@@ -1,82 +0,0 @@ +-/* +- * Copyright (c) 2017 Netronome Systems, Inc. +- * All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions are met: +- * +- * 1. Redistributions of source code must retain the above copyright notice, +- * this list of conditions and the following disclaimer. +- * +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution +- * +- * 3. Neither the name of the copyright holder nor the names of its +- * contributors may be used to endorse or promote products derived from this +- * software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +- * POSSIBILITY OF SUCH DAMAGE. +- */ +- +-/* +- * vim:shiftwidth=8:noexpandtab +- * +- * @file dpdk/pmd/nfp_net_eth.h +- * +- * Netronome NFP_NET PDM driver +- */ +- +-union eth_table_entry { +- struct { +- uint64_t port; +- uint64_t state; +- uint8_t mac_addr[6]; +- uint8_t resv[2]; +- uint64_t control; +- }; +- uint64_t raw[4]; +-}; +- +-#ifndef BIT_ULL +-#define BIT_ULL(a) (1ULL << (a)) +-#endif +- +-#define NSP_ETH_NBI_PORT_COUNT 24 +-#define NSP_ETH_MAX_COUNT (2 * NSP_ETH_NBI_PORT_COUNT) +-#define NSP_ETH_TABLE_SIZE (NSP_ETH_MAX_COUNT * sizeof(union eth_table_entry)) +- +-#define NSP_ETH_PORT_LANES 0xf +-#define NSP_ETH_PORT_INDEX 0xff00 +-#define NSP_ETH_PORT_LABEL 0x3f000000000000 +-#define NSP_ETH_PORT_PHYLABEL 0xfc0000000000000 +- +-#define NSP_ETH_PORT_LANES_MASK rte_cpu_to_le_64(NSP_ETH_PORT_LANES) +- +-#define NSP_ETH_STATE_CONFIGURED BIT_ULL(0) +-#define NSP_ETH_STATE_ENABLED BIT_ULL(1) +-#define NSP_ETH_STATE_TX_ENABLED BIT_ULL(2) +-#define NSP_ETH_STATE_RX_ENABLED BIT_ULL(3) +-#define NSP_ETH_STATE_RATE 0xf00 +-#define NSP_ETH_STATE_INTERFACE 0xff000 +-#define NSP_ETH_STATE_MEDIA 0x300000 +-#define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) +-#define NSP_ETH_STATE_ANEG 0x3800000 +- +-#define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) +-#define NSP_ETH_CTRL_ENABLED BIT_ULL(1) +-#define NSP_ETH_CTRL_TX_ENABLED BIT_ULL(2) +-#define NSP_ETH_CTRL_RX_ENABLED BIT_ULL(3) +-#define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) +-#define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) +-#define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) +diff --git a/drivers/net/nfp/nfp_nfpu.c b/drivers/net/nfp/nfp_nfpu.c +deleted file mode 100644 +index f11afef35..000000000 +--- a/drivers/net/nfp/nfp_nfpu.c ++++ /dev/null +@@ -1,108 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include "nfp_nfpu.h" +- +-/* PF BAR and expansion BAR for the NSP interface */ +-#define NFP_CFG_PCIE_BAR 0 +-#define NFP_CFG_EXP_BAR 7 +- +-#define NFP_CFG_EXP_BAR_CFG_BASE 0x30000 +- +-/* There could be other NFP userspace tools using the NSP interface. +- * Make sure there is no other process using it and locking the access for +- * avoiding problems. +- */ +-static int +-nspv_aquire_process_lock(nfpu_desc_t *desc) +-{ +- int rc; +- struct flock lock; +- char lockname[30]; +- +- memset(&lock, 0, sizeof(lock)); +- +- snprintf(lockname, sizeof(lockname), "/var/lock/nfp%d", desc->nfp); +- +- /* Using S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH */ +- desc->lock = open(lockname, O_RDWR | O_CREAT, 0666); +- +- if (desc->lock < 0) +- return desc->lock; +- +- lock.l_type = F_WRLCK; +- lock.l_whence = SEEK_SET; +- rc = -1; +- while (rc != 0) { +- rc = fcntl(desc->lock, F_SETLK, &lock); +- if (rc < 0) { +- if ((errno != EAGAIN) && (errno != EACCES)) { +- close(desc->lock); +- return rc; +- } +- } +- } +- +- return 0; +-} +- +-int +-nfpu_open(struct rte_pci_device *pci_dev, nfpu_desc_t *desc, int nfp) +-{ +- void *cfg_base, *mem_base; +- size_t barsz; +- int ret = 0; +- int i = 0; +- +- desc->nfp = nfp; +- +- ret = nspv_aquire_process_lock(desc); +- if (ret) +- return -1; +- +- barsz = pci_dev->mem_resource[0].len; +- +- /* barsz in log2 */ +- while (barsz >>= 1) +- i++; +- +- barsz = i; +- +- /* Sanity check: we can assume any bar size less than 1MB an error */ +- if (barsz < 20) +- return -1; +- +- /* Getting address for NFP expansion BAR registers */ +- cfg_base = pci_dev->mem_resource[0].addr; +- cfg_base = (uint8_t *)cfg_base + NFP_CFG_EXP_BAR_CFG_BASE; +- +- /* Getting address for NFP NSP interface registers */ +- mem_base = pci_dev->mem_resource[0].addr; +- mem_base = (uint8_t *)mem_base + (NFP_CFG_EXP_BAR << (barsz - 3)); +- +- +- desc->nspu = rte_malloc("nfp nspu", sizeof(nspu_desc_t), 0); +- nfp_nspu_init(desc->nspu, desc->nfp, NFP_CFG_PCIE_BAR, barsz, +- NFP_CFG_EXP_BAR, cfg_base, mem_base); +- +- return ret; +-} +- +-int +-nfpu_close(nfpu_desc_t *desc) +-{ +- rte_free(desc->nspu); +- close(desc->lock); +- unlink("/var/lock/nfp0"); +- return 0; +-} +diff --git a/drivers/net/nfp/nfp_nfpu.h b/drivers/net/nfp/nfp_nfpu.h +deleted file mode 100644 +index e56fa099d..000000000 +--- a/drivers/net/nfp/nfp_nfpu.h ++++ /dev/null +@@ -1,55 +0,0 @@ +-/* +- * Copyright (c) 2017 Netronome Systems, Inc. +- * All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions are met: +- * +- * 1. Redistributions of source code must retain the above copyright notice, +- * this list of conditions and the following disclaimer. +- * +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution +- * +- * 3. Neither the name of the copyright holder nor the names of its +- * contributors may be used to endorse or promote products derived from this +- * software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +- * POSSIBILITY OF SUCH DAMAGE. +- */ +- +-/* +- * vim:shiftwidth=8:noexpandtab +- * +- * @file dpdk/pmd/nfp_nfpu.h +- * +- * Netronome NFP_NET PDM driver +- */ +- +-/* +- * NFP User interface creates a window for talking with NFP NSP processor +- */ +- +- +-#include +-#include "nfp_nspu.h" +- +-typedef struct { +- int nfp; +- int lock; +- nspu_desc_t *nspu; +-} nfpu_desc_t; +- +-int nfpu_open(struct rte_pci_device *pci_dev, nfpu_desc_t *desc, int nfp); +-int nfpu_close(nfpu_desc_t *desc); +diff --git a/drivers/net/nfp/nfp_nspu.c b/drivers/net/nfp/nfp_nspu.c +deleted file mode 100644 +index f90898321..000000000 +--- a/drivers/net/nfp/nfp_nspu.c ++++ /dev/null +@@ -1,642 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include "nfp_nfpu.h" +- +-#define CFG_EXP_BAR_ADDR_SZ 1 +-#define CFG_EXP_BAR_MAP_TYPE 1 +- +-#define EXP_BAR_TARGET_SHIFT 23 +-#define EXP_BAR_LENGTH_SHIFT 27 /* 0=32, 1=64 bit increment */ +-#define EXP_BAR_MAP_TYPE_SHIFT 29 /* Bulk BAR map */ +- +-/* NFP target for NSP access */ +-#define NFP_NSP_TARGET 7 +- +-/* Expansion BARs for mapping PF vnic BARs */ +-#define NFP_NET_PF_CFG_EXP_BAR 6 +-#define NFP_NET_PF_HW_QUEUES_EXP_BAR 5 +- +-/* +- * This is an NFP internal address used for configuring properly an NFP +- * expansion BAR. +- */ +-#define MEM_CMD_BASE_ADDR 0x8100000000 +- +-/* NSP interface registers */ +-#define NSP_BASE (MEM_CMD_BASE_ADDR + 0x22100) +-#define NSP_STATUS 0x00 +-#define NSP_COMMAND 0x08 +-#define NSP_BUFFER 0x10 +-#define NSP_DEFAULT_BUF 0x18 +-#define NSP_DEFAULT_BUF_CFG 0x20 +- +-#define NSP_MAGIC 0xab10 +-#define NSP_STATUS_MAGIC(x) (((x) >> 48) & 0xffff) +-#define NSP_STATUS_MAJOR(x) (int)(((x) >> 44) & 0xf) +-#define NSP_STATUS_MINOR(x) (int)(((x) >> 32) & 0xfff) +- +-/* NSP commands */ +-#define NSP_CMD_RESET 1 +-#define NSP_CMD_FW_LOAD 6 +-#define NSP_CMD_READ_ETH_TABLE 7 +-#define NSP_CMD_WRITE_ETH_TABLE 8 +-#define NSP_CMD_GET_SYMBOL 14 +- +-#define NSP_BUFFER_CFG_SIZE_MASK (0xff) +- +-#define NSP_REG_ADDR(d, off, reg) ((uint8_t *)(d)->mem_base + (off) + (reg)) +-#define NSP_REG_VAL(p) (*(uint64_t *)(p)) +- +-/* +- * An NFP expansion BAR is configured for allowing access to a specific NFP +- * target: +- * +- * IN: +- * desc: struct with basic NSP addresses to work with +- * expbar: NFP PF expansion BAR index to configure +- * tgt: NFP target to configure access +- * addr: NFP target address +- * +- * OUT: +- * pcie_offset: NFP PCI BAR offset to work with +- */ +-static void +-nfp_nspu_mem_bar_cfg(nspu_desc_t *desc, int expbar, int tgt, +- uint64_t addr, uint64_t *pcie_offset) +-{ +- uint64_t x, y, barsz; +- uint32_t *expbar_ptr; +- +- barsz = desc->barsz; +- +- /* +- * NFP CPP address to configure. This comes from NFP 6000 +- * datasheet document based on Bulk mapping. +- */ +- x = (addr >> (barsz - 3)) << (21 - (40 - (barsz - 3))); +- x |= CFG_EXP_BAR_MAP_TYPE << EXP_BAR_MAP_TYPE_SHIFT; +- x |= CFG_EXP_BAR_ADDR_SZ << EXP_BAR_LENGTH_SHIFT; +- x |= tgt << EXP_BAR_TARGET_SHIFT; +- +- /* Getting expansion bar configuration register address */ +- expbar_ptr = (uint32_t *)desc->cfg_base; +- /* Each physical PCI BAR has 8 NFP expansion BARs */ +- expbar_ptr += (desc->pcie_bar * 8) + expbar; +- +- /* Writing to the expansion BAR register */ +- *expbar_ptr = (uint32_t)x; +- +- /* Getting the pcie offset to work with from userspace */ +- y = addr & ((uint64_t)(1 << (barsz - 3)) - 1); +- *pcie_offset = y; +-} +- +-/* +- * Configuring an expansion bar for accessing NSP userspace interface. This +- * function configures always the same expansion bar, which implies access to +- * previously configured NFP target is lost. +- */ +-static void +-nspu_xlate(nspu_desc_t *desc, uint64_t addr, uint64_t *pcie_offset) +-{ +- nfp_nspu_mem_bar_cfg(desc, desc->exp_bar, NFP_NSP_TARGET, addr, +- pcie_offset); +-} +- +-int +-nfp_nsp_get_abi_version(nspu_desc_t *desc, int *major, int *minor) +-{ +- uint64_t pcie_offset; +- uint64_t nsp_reg; +- +- nspu_xlate(desc, NSP_BASE, &pcie_offset); +- nsp_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, pcie_offset, NSP_STATUS)); +- +- if (NSP_STATUS_MAGIC(nsp_reg) != NSP_MAGIC) +- return -1; +- +- *major = NSP_STATUS_MAJOR(nsp_reg); +- *minor = NSP_STATUS_MINOR(nsp_reg); +- +- return 0; +-} +- +-int +-nfp_nspu_init(nspu_desc_t *desc, int nfp, int pcie_bar, size_t pcie_barsz, +- int exp_bar, void *exp_bar_cfg_base, void *exp_bar_mmap) +-{ +- uint64_t offset, buffaddr; +- uint64_t nsp_reg; +- +- desc->nfp = nfp; +- desc->pcie_bar = pcie_bar; +- desc->exp_bar = exp_bar; +- desc->barsz = pcie_barsz; +- desc->windowsz = 1 << (desc->barsz - 3); +- desc->cfg_base = exp_bar_cfg_base; +- desc->mem_base = exp_bar_mmap; +- +- nspu_xlate(desc, NSP_BASE, &offset); +- +- /* +- * Other NSPU clients can use other buffers. Let's tell NSPU we use the +- * default buffer. +- */ +- buffaddr = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_DEFAULT_BUF)); +- NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_BUFFER)) = buffaddr; +- +- /* NFP internal addresses are 40 bits. Clean all other bits here */ +- buffaddr = buffaddr & (((uint64_t)1 << 40) - 1); +- desc->bufaddr = buffaddr; +- +- /* Lets get information about the buffer */ +- nsp_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_DEFAULT_BUF_CFG)); +- +- /* Buffer size comes in MBs. Coversion to bytes */ +- desc->buf_size = ((size_t)nsp_reg & NSP_BUFFER_CFG_SIZE_MASK) << 20; +- +- return 0; +-} +- +-#define NSPU_NFP_BUF(addr, base, off) \ +- (*(uint64_t *)((uint8_t *)(addr)->mem_base + ((base) | (off)))) +- +-#define NSPU_HOST_BUF(base, off) (*(uint64_t *)((uint8_t *)(base) + (off))) +- +-static int +-nspu_buff_write(nspu_desc_t *desc, void *buffer, size_t size) +-{ +- uint64_t pcie_offset, pcie_window_base, pcie_window_offset; +- uint64_t windowsz = desc->windowsz; +- uint64_t buffaddr, j, i = 0; +- int ret = 0; +- +- if (size > desc->buf_size) +- return -1; +- +- buffaddr = desc->bufaddr; +- windowsz = desc->windowsz; +- +- while (i < size) { +- /* Expansion bar reconfiguration per window size */ +- nspu_xlate(desc, buffaddr + i, &pcie_offset); +- pcie_window_base = pcie_offset & (~(windowsz - 1)); +- pcie_window_offset = pcie_offset & (windowsz - 1); +- for (j = pcie_window_offset; ((j < windowsz) && (i < size)); +- j += 8) { +- NSPU_NFP_BUF(desc, pcie_window_base, j) = +- NSPU_HOST_BUF(buffer, i); +- i += 8; +- } +- } +- +- return ret; +-} +- +-static int +-nspu_buff_read(nspu_desc_t *desc, void *buffer, size_t size) +-{ +- uint64_t pcie_offset, pcie_window_base, pcie_window_offset; +- uint64_t windowsz, i = 0, j; +- uint64_t buffaddr; +- int ret = 0; +- +- if (size > desc->buf_size) +- return -1; +- +- buffaddr = desc->bufaddr; +- windowsz = desc->windowsz; +- +- while (i < size) { +- /* Expansion bar reconfiguration per window size */ +- nspu_xlate(desc, buffaddr + i, &pcie_offset); +- pcie_window_base = pcie_offset & (~(windowsz - 1)); +- pcie_window_offset = pcie_offset & (windowsz - 1); +- for (j = pcie_window_offset; ((j < windowsz) && (i < size)); +- j += 8) { +- NSPU_HOST_BUF(buffer, i) = +- NSPU_NFP_BUF(desc, pcie_window_base, j); +- i += 8; +- } +- } +- +- return ret; +-} +- +-static int +-nspu_command(nspu_desc_t *desc, uint16_t cmd, int read, int write, +- void *buffer, size_t rsize, size_t wsize) +-{ +- uint64_t status, cmd_reg; +- uint64_t offset; +- int retry = 0; +- int retries = 120; +- int ret = 0; +- +- /* Same expansion BAR is used for different things */ +- nspu_xlate(desc, NSP_BASE, &offset); +- +- status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS)); +- +- while ((status & 0x1) && (retry < retries)) { +- status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS)); +- retry++; +- sleep(1); +- } +- +- if (retry == retries) +- return -1; +- +- if (write) { +- ret = nspu_buff_write(desc, buffer, wsize); +- if (ret) +- return ret; +- +- /* Expansion BAR changes when writing the buffer */ +- nspu_xlate(desc, NSP_BASE, &offset); +- } +- +- NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND)) = +- (uint64_t)wsize << 32 | (uint64_t)cmd << 16 | 1; +- +- retry = 0; +- +- cmd_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND)); +- while ((cmd_reg & 0x1) && (retry < retries)) { +- cmd_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND)); +- retry++; +- sleep(1); +- } +- if (retry == retries) +- return -1; +- +- retry = 0; +- status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS)); +- while ((status & 0x1) && (retry < retries)) { +- status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS)); +- retry++; +- sleep(1); +- } +- +- if (retry == retries) +- return -1; +- +- ret = status & (0xff << 8); +- if (ret) +- return ret; +- +- if (read) { +- ret = nspu_buff_read(desc, buffer, rsize); +- if (ret) +- return ret; +- } +- +- return ret; +-} +- +-static int +-nfp_fw_reset(nspu_desc_t *nspu_desc) +-{ +- int res; +- +- res = nspu_command(nspu_desc, NSP_CMD_RESET, 0, 0, 0, 0, 0); +- +- if (res < 0) +- RTE_LOG(INFO, PMD, "fw reset failed: error %d", res); +- +- return res; +-} +- +-#define DEFAULT_FW_PATH "/lib/firmware/netronome" +-#define DEFAULT_FW_FILENAME "nic_dpdk_default.nffw" +- +-static int +-nfp_fw_upload(nspu_desc_t *nspu_desc) +-{ +- int fw_f; +- char *fw_buf; +- char filename[100]; +- struct stat file_stat; +- off_t fsize, bytes; +- ssize_t size; +- int ret; +- +- size = nspu_desc->buf_size; +- +- sprintf(filename, "%s/%s", DEFAULT_FW_PATH, DEFAULT_FW_FILENAME); +- fw_f = open(filename, O_RDONLY); +- if (fw_f < 0) { +- RTE_LOG(INFO, PMD, "Firmware file %s/%s not found.", +- DEFAULT_FW_PATH, DEFAULT_FW_FILENAME); +- return -ENOENT; +- } +- +- if (fstat(fw_f, &file_stat) < 0) { +- RTE_LOG(INFO, PMD, "Firmware file %s/%s size is unknown", +- DEFAULT_FW_PATH, DEFAULT_FW_FILENAME); +- close(fw_f); +- return -ENOENT; +- } +- +- fsize = file_stat.st_size; +- RTE_LOG(DEBUG, PMD, "Firmware file with size: %" PRIu64 "\n", +- (uint64_t)fsize); +- +- if (fsize > (off_t)size) { +- RTE_LOG(INFO, PMD, "fw file too big: %" PRIu64 +- " bytes (%" PRIu64 " max)", +- (uint64_t)fsize, (uint64_t)size); +- close(fw_f); +- return -EINVAL; +- } +- +- fw_buf = malloc((size_t)size); +- if (!fw_buf) { +- RTE_LOG(INFO, PMD, "malloc failed for fw buffer"); +- close(fw_f); +- return -ENOMEM; +- } +- memset(fw_buf, 0, size); +- +- bytes = read(fw_f, fw_buf, fsize); +- if (bytes != fsize) { +- RTE_LOG(INFO, PMD, "Reading fw to buffer failed.\n" +- "Just %" PRIu64 " of %" PRIu64 " bytes read.", +- (uint64_t)bytes, (uint64_t)fsize); +- free(fw_buf); +- close(fw_f); +- return -EIO; +- } +- +- ret = nspu_command(nspu_desc, NSP_CMD_FW_LOAD, 0, 1, fw_buf, 0, bytes); +- +- free(fw_buf); +- close(fw_f); +- +- return ret; +-} +- +-/* Firmware symbol descriptor size */ +-#define NFP_SYM_DESC_LEN 40 +- +-#define SYMBOL_DATA(b, off) (*(int64_t *)((b) + (off))) +-#define SYMBOL_UDATA(b, off) (*(uint64_t *)((b) + (off))) +- +-/* Firmware symbols contain information about how to access what they +- * represent. It can be as simple as an numeric variable declared at a +- * specific NFP memory, but it can also be more complex structures and +- * related to specific hardware functionalities or components. Target, +- * domain and address allow to create the BAR window for accessing such +- * hw object and size defines the length to map. +- * +- * A vNIC is a network interface implemented inside the NFP and using a +- * subset of device PCI BARs. Specific firmware symbols allow to map those +- * vNIC bars by host drivers like the NFP PMD. +- * +- * Accessing what the symbol represents implies to map the access through +- * a PCI BAR window. NFP expansion BARs are used in this regard through +- * the NSPU interface. +- */ +-static int +-nfp_nspu_set_bar_from_symbl(nspu_desc_t *desc, const char *symbl, +- uint32_t expbar, uint64_t *pcie_offset, +- ssize_t *size) +-{ +- int64_t type; +- int64_t target; +- int64_t domain; +- uint64_t addr; +- char *sym_buf; +- int ret = 0; +- +- sym_buf = malloc(desc->buf_size); +- if (!sym_buf) +- return -ENOMEM; +- +- strncpy(sym_buf, symbl, strlen(symbl)); +- ret = nspu_command(desc, NSP_CMD_GET_SYMBOL, 1, 1, sym_buf, +- NFP_SYM_DESC_LEN, strlen(symbl)); +- if (ret) { +- RTE_LOG(DEBUG, PMD, "symbol resolution (%s) failed\n", symbl); +- goto clean; +- } +- +- /* Reading symbol information */ +- type = SYMBOL_DATA(sym_buf, 0); +- target = SYMBOL_DATA(sym_buf, 8); +- domain = SYMBOL_DATA(sym_buf, 16); +- addr = SYMBOL_UDATA(sym_buf, 24); +- *size = (ssize_t)SYMBOL_UDATA(sym_buf, 32); +- +- if (type != 1) { +- RTE_LOG(INFO, PMD, "wrong symbol type\n"); +- ret = -EINVAL; +- goto clean; +- } +- if (!(target == 7 || target == -7)) { +- RTE_LOG(INFO, PMD, "wrong symbol target\n"); +- ret = -EINVAL; +- goto clean; +- } +- if (domain == 8 || domain == 9) { +- RTE_LOG(INFO, PMD, "wrong symbol domain\n"); +- ret = -EINVAL; +- goto clean; +- } +- +- /* Adjusting address based on symbol location */ +- if ((domain >= 24) && (domain < 28) && (target == 7)) { +- addr = 1ULL << 37 | addr | ((uint64_t)domain & 0x3) << 35; +- } else { +- addr = 1ULL << 39 | addr | ((uint64_t)domain & 0x3f) << 32; +- if (target == -7) +- target = 7; +- } +- +- /* Configuring NFP expansion bar for mapping specific PCI BAR window */ +- nfp_nspu_mem_bar_cfg(desc, expbar, target, addr, pcie_offset); +- +- /* This is the PCI BAR offset to use by the host */ +- *pcie_offset |= ((expbar & 0x7) << (desc->barsz - 3)); +- +-clean: +- free(sym_buf); +- return ret; +-} +- +-int +-nfp_nsp_fw_setup(nspu_desc_t *desc, const char *sym, uint64_t *pcie_offset) +-{ +- ssize_t bar0_sym_size; +- +- /* If the symbol resolution works, it implies a firmware app +- * is already there. +- */ +- if (!nfp_nspu_set_bar_from_symbl(desc, sym, NFP_NET_PF_CFG_EXP_BAR, +- pcie_offset, &bar0_sym_size)) +- return 0; +- +- /* No firmware app detected or not the right one */ +- RTE_LOG(INFO, PMD, "No firmware detected. Resetting NFP...\n"); +- if (nfp_fw_reset(desc) < 0) { +- RTE_LOG(ERR, PMD, "nfp fw reset failed\n"); +- return -ENODEV; +- } +- +- RTE_LOG(INFO, PMD, "Reset done.\n"); +- RTE_LOG(INFO, PMD, "Uploading firmware...\n"); +- +- if (nfp_fw_upload(desc) < 0) { +- RTE_LOG(ERR, PMD, "nfp fw upload failed\n"); +- return -ENODEV; +- } +- +- RTE_LOG(INFO, PMD, "Done.\n"); +- +- /* Now the symbol should be there */ +- if (nfp_nspu_set_bar_from_symbl(desc, sym, NFP_NET_PF_CFG_EXP_BAR, +- pcie_offset, &bar0_sym_size)) { +- RTE_LOG(ERR, PMD, "nfp PF BAR symbol resolution failed\n"); +- return -ENODEV; +- } +- +- return 0; +-} +- +-int +-nfp_nsp_map_ctrl_bar(nspu_desc_t *desc, uint64_t *pcie_offset) +-{ +- ssize_t bar0_sym_size; +- +- if (nfp_nspu_set_bar_from_symbl(desc, "_pf0_net_bar0", +- NFP_NET_PF_CFG_EXP_BAR, +- pcie_offset, &bar0_sym_size)) +- return -ENODEV; +- +- return 0; +-} +- +-/* +- * This is a hardcoded fixed NFP internal CPP bus address for the hw queues unit +- * inside the PCIE island. +- */ +-#define NFP_CPP_PCIE_QUEUES ((uint64_t)(1ULL << 39) | 0x80000 | \ +- ((uint64_t)0x4 & 0x3f) << 32) +- +-/* Configure a specific NFP expansion bar for accessing the vNIC rx/tx BARs */ +-void +-nfp_nsp_map_queues_bar(nspu_desc_t *desc, uint64_t *pcie_offset) +-{ +- nfp_nspu_mem_bar_cfg(desc, NFP_NET_PF_HW_QUEUES_EXP_BAR, 0, +- NFP_CPP_PCIE_QUEUES, pcie_offset); +- +- /* This is the pcie offset to use by the host */ +- *pcie_offset |= ((NFP_NET_PF_HW_QUEUES_EXP_BAR & 0x7) << (27 - 3)); +-} +- +-int +-nfp_nsp_eth_config(nspu_desc_t *desc, int port, int up) +-{ +- union eth_table_entry *entries, *entry; +- int modified; +- int ret, idx; +- int i; +- +- idx = port; +- +- RTE_LOG(INFO, PMD, "Hw ethernet port %d configure...\n", port); +- rte_spinlock_lock(&desc->nsp_lock); +- entries = malloc(NSP_ETH_TABLE_SIZE); +- if (!entries) { +- rte_spinlock_unlock(&desc->nsp_lock); +- return -ENOMEM; +- } +- +- ret = nspu_command(desc, NSP_CMD_READ_ETH_TABLE, 1, 0, entries, +- NSP_ETH_TABLE_SIZE, 0); +- if (ret) { +- rte_spinlock_unlock(&desc->nsp_lock); +- free(entries); +- return ret; +- } +- +- entry = entries; +- +- for (i = 0; i < NSP_ETH_MAX_COUNT; i++) { +- /* ports in use do not appear sequentially in the table */ +- if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) { +- /* entry not in use */ +- entry++; +- continue; +- } +- if (idx == 0) +- break; +- idx--; +- entry++; +- } +- +- if (i == NSP_ETH_MAX_COUNT) { +- rte_spinlock_unlock(&desc->nsp_lock); +- free(entries); +- return -EINVAL; +- } +- +- if (up && !(entry->state & NSP_ETH_STATE_CONFIGURED)) { +- entry->control |= NSP_ETH_STATE_CONFIGURED; +- modified = 1; +- } +- +- if (!up && (entry->state & NSP_ETH_STATE_CONFIGURED)) { +- entry->control &= ~NSP_ETH_STATE_CONFIGURED; +- modified = 1; +- } +- +- if (modified) { +- ret = nspu_command(desc, NSP_CMD_WRITE_ETH_TABLE, 0, 1, entries, +- 0, NSP_ETH_TABLE_SIZE); +- if (!ret) +- RTE_LOG(INFO, PMD, +- "Hw ethernet port %d configure done\n", port); +- else +- RTE_LOG(INFO, PMD, +- "Hw ethernet port %d configure failed\n", port); +- } +- rte_spinlock_unlock(&desc->nsp_lock); +- free(entries); +- return ret; +-} +- +-int +-nfp_nsp_eth_read_table(nspu_desc_t *desc, union eth_table_entry **table) +-{ +- int ret; +- +- if (!table) +- return -EINVAL; +- +- RTE_LOG(INFO, PMD, "Reading hw ethernet table...\n"); +- +- /* port 0 allocates the eth table and read it using NSPU */ +- *table = malloc(NSP_ETH_TABLE_SIZE); +- if (!*table) +- return -ENOMEM; +- +- ret = nspu_command(desc, NSP_CMD_READ_ETH_TABLE, 1, 0, *table, +- NSP_ETH_TABLE_SIZE, 0); +- if (ret) +- return ret; +- +- RTE_LOG(INFO, PMD, "Done\n"); +- +- return 0; +-} +diff --git a/drivers/net/nfp/nfp_nspu.h b/drivers/net/nfp/nfp_nspu.h +deleted file mode 100644 +index 8c33835e5..000000000 +--- a/drivers/net/nfp/nfp_nspu.h ++++ /dev/null +@@ -1,83 +0,0 @@ +-/* +- * Copyright (c) 2017 Netronome Systems, Inc. +- * All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions are met: +- * +- * 1. Redistributions of source code must retain the above copyright notice, +- * this list of conditions and the following disclaimer. +- * +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution +- * +- * 3. Neither the name of the copyright holder nor the names of its +- * contributors may be used to endorse or promote products derived from this +- * software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +- * POSSIBILITY OF SUCH DAMAGE. +- */ +- +-/* +- * vim:shiftwidth=8:noexpandtab +- * +- * @file dpdk/pmd/nfp_nspu.h +- * +- * Netronome NFP_NET PDM driver +- */ +- +-/* +- * NSP is the NFP Service Processor. NSPU is NSP Userspace interface. +- * +- * NFP NSP helps with firmware/hardware configuration. NSP is another component +- * in NFP programmable processor and accessing it from host requires to firstly +- * configure a specific NFP PCI expansion BAR. +- * +- * Once access is ready, configuration can be done reading and writing +- * from/to a specific PF PCI BAR window. This same interface will allow to +- * create other PCI BAR windows for accessing other NFP components. +- * +- * This file includes low-level functions, using the NSPU interface, and high +- * level functions, invoked by the PMD for using NSP services. This allows +- * firmware upload, vNIC PCI BARs mapping and other low-level configurations +- * like link setup. +- * +- * NSP access is done during initialization and it is not involved at all with +- * the fast path. +- */ +- +-#include +-#include "nfp_net_eth.h" +- +-typedef struct { +- int nfp; /* NFP device */ +- int pcie_bar; /* PF PCI BAR to work with */ +- int exp_bar; /* Expansion BAR number used by NSPU */ +- int barsz; /* PCIE BAR log2 size */ +- uint64_t bufaddr; /* commands buffer address */ +- size_t buf_size; /* commands buffer size */ +- uint64_t windowsz; /* NSPU BAR window size */ +- void *cfg_base; /* Expansion BARs address */ +- void *mem_base; /* NSP interface */ +- rte_spinlock_t nsp_lock; +-} nspu_desc_t; +- +-int nfp_nspu_init(nspu_desc_t *desc, int nfp, int pcie_bar, size_t pcie_barsz, +- int exp_bar, void *exp_bar_cfg_base, void *exp_bar_mmap); +-int nfp_nsp_get_abi_version(nspu_desc_t *desc, int *major, int *minor); +-int nfp_nsp_fw_setup(nspu_desc_t *desc, const char *sym, uint64_t *pcie_offset); +-int nfp_nsp_map_ctrl_bar(nspu_desc_t *desc, uint64_t *pcie_offset); +-void nfp_nsp_map_queues_bar(nspu_desc_t *desc, uint64_t *pcie_offset); +-int nfp_nsp_eth_config(nspu_desc_t *desc, int port, int up); +-int nfp_nsp_eth_read_table(nspu_desc_t *desc, union eth_table_entry **table); +-- +2.14.3 + diff --git a/SOURCES/0003-vhost-extract-virtqueue-cleaning-and-freeing-functio.patch b/SOURCES/0003-vhost-extract-virtqueue-cleaning-and-freeing-functio.patch new file mode 100644 index 0000000..4921ba2 --- /dev/null +++ b/SOURCES/0003-vhost-extract-virtqueue-cleaning-and-freeing-functio.patch @@ -0,0 +1,83 @@ +From 297fcc013877e57c387e444bf7323fbfd77e4b3f Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Wed, 13 Dec 2017 09:51:08 +0100 +Subject: [PATCH 3/6] vhost: extract virtqueue cleaning and freeing functions + +This patch extracts needed code for vhost_user.c to be able +to clean and free virtqueues unitary. + +Signed-off-by: Maxime Coquelin +Acked-by: Laszlo Ersek +Acked-by: Yuanhan Liu +(cherry picked from commit 467fe22df94b85d2df67b9be3ccbfb3dd72cdd6d) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/vhost.c | 22 ++++++++++++---------- + dpdk-17.11/lib/librte_vhost/vhost.h | 3 +++ + 2 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c +index 4f8b73a09..df528a4ea 100644 +--- a/lib/librte_vhost/vhost.c ++++ b/lib/librte_vhost/vhost.c +@@ -103,7 +103,7 @@ get_device(int vid) + return dev; + } + +-static void ++void + cleanup_vq(struct vhost_virtqueue *vq, int destroy) + { + if ((vq->callfd >= 0) && (destroy != 0)) +@@ -127,6 +127,15 @@ cleanup_device(struct virtio_net *dev, int destroy) + cleanup_vq(dev->virtqueue[i], destroy); + } + ++void ++free_vq(struct vhost_virtqueue *vq) ++{ ++ rte_free(vq->shadow_used_ring); ++ rte_free(vq->batch_copy_elems); ++ rte_mempool_free(vq->iotlb_pool); ++ rte_free(vq); ++} ++ + /* + * Release virtqueues and device memory. + */ +@@ -134,16 +143,9 @@ static void + free_device(struct virtio_net *dev) + { + uint32_t i; +- struct vhost_virtqueue *vq; +- +- for (i = 0; i < dev->nr_vring; i++) { +- vq = dev->virtqueue[i]; + +- rte_free(vq->shadow_used_ring); +- rte_free(vq->batch_copy_elems); +- rte_mempool_free(vq->iotlb_pool); +- rte_free(vq); +- } ++ for (i = 0; i < dev->nr_vring; i++) ++ free_vq(dev->virtqueue[i]); + + rte_free(dev); + } +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index 1cc81c17c..9cad1bb3c 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -364,6 +364,9 @@ void cleanup_device(struct virtio_net *dev, int destroy); + void reset_device(struct virtio_net *dev); + void vhost_destroy_device(int); + ++void cleanup_vq(struct vhost_virtqueue *vq, int destroy); ++void free_vq(struct vhost_virtqueue *vq); ++ + int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); + + void vhost_set_ifname(int, const char *if_name, unsigned int if_len); +-- +2.14.3 + diff --git a/SOURCES/0003-vhost-introduce-safe-API-for-GPA-translation.patch b/SOURCES/0003-vhost-introduce-safe-API-for-GPA-translation.patch new file mode 100644 index 0000000..30378db --- /dev/null +++ b/SOURCES/0003-vhost-introduce-safe-API-for-GPA-translation.patch @@ -0,0 +1,95 @@ +From 1fd1b4807bdabc32953e2591e06ac9331edee76e Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:40 +0200 +Subject: [PATCH 03/11] vhost: introduce safe API for GPA translation + +This new rte_vhost_va_from_guest_pa API takes an extra len +parameter, used to specify the size of the range to be mapped. +Effective mapped range is returned via len parameter. + +This issue has been assigned CVE-2018-1059. + +Reported-by: Yongji Xie +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/rte_vhost.h | 40 ++++++++++++++++++++++++++++++++++ + lib/librte_vhost/rte_vhost_version.map | 6 +++++ + lib/librte_vhost/vhost.h | 2 +- + 3 files changed, 47 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h +index f653644..f2d6c95 100644 +--- a/lib/librte_vhost/rte_vhost.h ++++ b/lib/librte_vhost/rte_vhost.h +@@ -143,4 +143,44 @@ struct vhost_device_ops { + } + ++/** ++ * Convert guest physical address to host virtual address safely ++ * ++ * This variant of rte_vhost_gpa_to_vva() takes care all the ++ * requested length is mapped and contiguous in process address ++ * space. ++ * ++ * @param mem ++ * the guest memory regions ++ * @param gpa ++ * the guest physical address for querying ++ * @param len ++ * the size of the requested area to map, updated with actual size mapped ++ * @return ++ * the host virtual address on success, 0 on failure ++ */ ++static __rte_always_inline uint64_t ++rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem, ++ uint64_t gpa, uint64_t *len) ++{ ++ struct rte_vhost_mem_region *r; ++ uint32_t i; ++ ++ for (i = 0; i < mem->nregions; i++) { ++ r = &mem->regions[i]; ++ if (gpa >= r->guest_phys_addr && ++ gpa < r->guest_phys_addr + r->size) { ++ ++ if (unlikely(*len > r->guest_phys_addr + r->size - gpa)) ++ *len = r->guest_phys_addr + r->size - gpa; ++ ++ return gpa - r->guest_phys_addr + ++ r->host_user_addr; ++ } ++ } ++ *len = 0; ++ ++ return 0; ++} ++ + #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) + +diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map +index 1e70495..9cb1d8c 100644 +--- a/lib/librte_vhost/rte_vhost_version.map ++++ b/lib/librte_vhost/rte_vhost_version.map +@@ -53,2 +53,8 @@ DPDK_17.08 { + + } DPDK_17.05; ++ ++DPDK_17.11.2 { ++ global; ++ ++ rte_vhost_va_from_guest_pa; ++} DPDK_17.08; +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index de300c1..16d6b89 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -391,5 +391,5 @@ uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + { + if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) +- return rte_vhost_gpa_to_vva(dev->mem, iova); ++ return rte_vhost_va_from_guest_pa(dev->mem, iova, len); + + return __vhost_iova_to_vva(dev, vq, iova, len, perm); +-- +1.8.3.1 + diff --git a/SOURCES/0004-vhost-destroy-unused-virtqueues-when-multiqueue-not-.patch b/SOURCES/0004-vhost-destroy-unused-virtqueues-when-multiqueue-not-.patch new file mode 100644 index 0000000..9e92eb4 --- /dev/null +++ b/SOURCES/0004-vhost-destroy-unused-virtqueues-when-multiqueue-not-.patch @@ -0,0 +1,63 @@ +From eb2b3b18edc3af42f52ca5b3f30aa8bfbd08206a Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Wed, 13 Dec 2017 09:51:09 +0100 +Subject: [PATCH 4/6] vhost: destroy unused virtqueues when multiqueue not + negotiated + +QEMU sends VHOST_USER_SET_VRING_CALL requests for all queues +declared in QEMU command line before the guest is started. +It has the effect in DPDK vhost-user backend to allocate vrings +for all queues declared by QEMU. + +If the first driver being used does not support multiqueue, +the device never changes to VIRTIO_DEV_RUNNING state as only +the first queue pair is initialized. One driver impacted by +this bug is virtio-net's iPXE driver which does not support +VIRTIO_NET_F_MQ feature. + +It is safe to destroy unused virtqueues in SET_FEATURES request +handler, as it is ensured the device is not in running state +at this stage, so virtqueues aren't being processed. + +Signed-off-by: Maxime Coquelin +Acked-by: Laszlo Ersek +Acked-by: Yuanhan Liu +(cherry picked from commit e29109323595beb3884da58126ebb3b878cb66f5) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/vhost_user.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 471b1612c..1848c8de9 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -216,6 +216,25 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) + (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", + (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); + ++ if (!(dev->features & (1ULL << VIRTIO_NET_F_MQ))) { ++ /* ++ * Remove all but first queue pair if MQ hasn't been ++ * negotiated. This is safe because the device is not ++ * running at this stage. ++ */ ++ while (dev->nr_vring > 2) { ++ struct vhost_virtqueue *vq; ++ ++ vq = dev->virtqueue[--dev->nr_vring]; ++ if (!vq) ++ continue; ++ ++ dev->virtqueue[dev->nr_vring] = NULL; ++ cleanup_vq(vq, 1); ++ free_vq(vq); ++ } ++ } ++ + return 0; + } + +-- +2.14.3 + diff --git a/SOURCES/0004-vhost-ensure-all-range-is-mapped-when-translating-QV.patch b/SOURCES/0004-vhost-ensure-all-range-is-mapped-when-translating-QV.patch new file mode 100644 index 0000000..4ac570a --- /dev/null +++ b/SOURCES/0004-vhost-ensure-all-range-is-mapped-when-translating-QV.patch @@ -0,0 +1,148 @@ +From 053e6774348c5a497a12b27d6120527c7af5e503 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:41 +0200 +Subject: [PATCH 04/11] vhost: ensure all range is mapped when translating QVAs + +This patch ensures that all the address range is mapped when +translating addresses from master's addresses (e.g. QEMU host +addressess) to process VAs. + +This issue has been assigned CVE-2018-1059. + +Reported-by: Yongji Xie +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/vhost_user.c | 58 +++++++++++++++++++++++++++---------------- + 1 file changed, 36 insertions(+), 22 deletions(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 3acaacf..50e654d 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -330,19 +330,24 @@ + /* Converts QEMU virtual address to Vhost virtual address. */ + static uint64_t +-qva_to_vva(struct virtio_net *dev, uint64_t qva) ++qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) + { +- struct rte_vhost_mem_region *reg; ++ struct rte_vhost_mem_region *r; + uint32_t i; + + /* Find the region where the address lives. */ + for (i = 0; i < dev->mem->nregions; i++) { +- reg = &dev->mem->regions[i]; ++ r = &dev->mem->regions[i]; + +- if (qva >= reg->guest_user_addr && +- qva < reg->guest_user_addr + reg->size) { +- return qva - reg->guest_user_addr + +- reg->host_user_addr; ++ if (qva >= r->guest_user_addr && ++ qva < r->guest_user_addr + r->size) { ++ ++ if (unlikely(*len > r->guest_user_addr + r->size - qva)) ++ *len = r->guest_user_addr + r->size - qva; ++ ++ return qva - r->guest_user_addr + ++ r->host_user_addr; + } + } ++ *len = 0; + + return 0; +@@ -357,5 +362,5 @@ + static uint64_t + ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, +- uint64_t ra, uint64_t size) ++ uint64_t ra, uint64_t *size) + { + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { +@@ -363,5 +368,5 @@ + + vva = vhost_user_iotlb_cache_find(vq, ra, +- &size, VHOST_ACCESS_RW); ++ size, VHOST_ACCESS_RW); + if (!vva) + vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW); +@@ -370,5 +375,5 @@ + } + +- return qva_to_vva(dev, ra); ++ return qva_to_vva(dev, ra, size); + } + +@@ -378,4 +383,5 @@ + struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; + struct vhost_vring_addr *addr = &vq->ring_addrs; ++ uint64_t len; + + /* The addresses are converted from QEMU virtual to Vhost virtual. */ +@@ -383,9 +389,10 @@ + return dev; + ++ len = sizeof(struct vring_desc) * vq->size; + vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev, +- vq, addr->desc_user_addr, sizeof(struct vring_desc)); +- if (vq->desc == 0) { ++ vq, addr->desc_user_addr, &len); ++ if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) { + RTE_LOG(DEBUG, VHOST_CONFIG, +- "(%d) failed to find desc ring address.\n", ++ "(%d) failed to map desc ring.\n", + dev->vid); + return dev; +@@ -396,18 +403,24 @@ + addr = &vq->ring_addrs; + ++ len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; + vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, +- vq, addr->avail_user_addr, sizeof(struct vring_avail)); +- if (vq->avail == 0) { ++ vq, addr->avail_user_addr, &len); ++ if (vq->avail == 0 || ++ len != sizeof(struct vring_avail) + ++ sizeof(uint16_t) * vq->size) { + RTE_LOG(DEBUG, VHOST_CONFIG, +- "(%d) failed to find avail ring address.\n", ++ "(%d) failed to map avail ring.\n", + dev->vid); + return dev; + } + ++ len = sizeof(struct vring_used) + ++ sizeof(struct vring_used_elem) * vq->size; + vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, +- vq, addr->used_user_addr, sizeof(struct vring_used)); +- if (vq->used == 0) { ++ vq, addr->used_user_addr, &len); ++ if (vq->used == 0 || len != sizeof(struct vring_used) + ++ sizeof(struct vring_used_elem) * vq->size) { + RTE_LOG(DEBUG, VHOST_CONFIG, +- "(%d) failed to find used ring address.\n", ++ "(%d) failed to map used ring.\n", + dev->vid); + return dev; +@@ -1095,9 +1108,10 @@ + struct vhost_iotlb_msg *imsg = &msg->payload.iotlb; + uint16_t i; +- uint64_t vva; ++ uint64_t vva, len; + + switch (imsg->type) { + case VHOST_IOTLB_UPDATE: +- vva = qva_to_vva(dev, imsg->uaddr); ++ len = imsg->size; ++ vva = qva_to_vva(dev, imsg->uaddr, &len); + if (!vva) + return -1; +@@ -1107,5 +1121,5 @@ + + vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, +- imsg->size, imsg->perm); ++ len, imsg->perm); + + if (is_vring_iotlb_update(vq, imsg)) +-- +1.8.3.1 + diff --git a/SOURCES/0005-vhost-add-flag-for-built-in-virtio-driver.patch b/SOURCES/0005-vhost-add-flag-for-built-in-virtio-driver.patch new file mode 100644 index 0000000..15822ee --- /dev/null +++ b/SOURCES/0005-vhost-add-flag-for-built-in-virtio-driver.patch @@ -0,0 +1,188 @@ +From 8db980965f3d8cde1abbdb89eaecbc829460133e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 31 Jan 2018 17:46:50 +0000 +Subject: [PATCH 5/6] vhost: add flag for built-in virtio driver + +The librte_vhost API is used in two ways: +1. As a vhost net device backend via rte_vhost_enqueue/dequeue_burst(). +2. As a library for implementing vhost device backends. + +There is no distinction between the two at the API level or in the +librte_vhost implementation. For example, device state is kept in +"struct virtio_net" regardless of whether this is actually a net device +backend or whether the built-in virtio_net.c driver is in use. + +The virtio_net.c driver should be a librte_vhost API client just like +the vhost-scsi code and have no special access to vhost.h internals. +Unfortunately, fixing this requires significant librte_vhost API +changes. + +This patch takes a different approach: keep the librte_vhost API +unchanged but track whether the built-in virtio_net.c driver is in use. +See the next patch for a bug fix that requires knowledge of whether +virtio_net.c is in use. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Maxime Coquelin +Acked-by: Yuanhan Liu +(cherry picked from commit 1c717af4c699e60081feb1d645f86189551f9a9c) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/socket.c | 15 +++++++++++++++ + dpdk-17.11/lib/librte_vhost/vhost.c | 17 ++++++++++++++++- + dpdk-17.11/lib/librte_vhost/vhost.h | 3 +++ + dpdk-17.11/lib/librte_vhost/virtio_net.c | 14 ++++++++++++++ + 4 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c +index 422da002f..ceecc6149 100644 +--- a/lib/librte_vhost/socket.c ++++ b/lib/librte_vhost/socket.c +@@ -69,6 +69,7 @@ struct vhost_user_socket { + bool reconnect; + bool dequeue_zero_copy; + bool iommu_support; ++ bool use_builtin_virtio_net; + + /* + * The "supported_features" indicates the feature bits the +@@ -224,6 +225,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) + size = strnlen(vsocket->path, PATH_MAX); + vhost_set_ifname(vid, vsocket->path, size); + ++ vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); ++ + if (vsocket->dequeue_zero_copy) + vhost_enable_dequeue_zero_copy(vid); + +@@ -547,6 +550,12 @@ rte_vhost_driver_disable_features(const char *path, uint64_t features) + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); ++ ++ /* Note that use_builtin_virtio_net is not affected by this function ++ * since callers may want to selectively disable features of the ++ * built-in vhost net device backend. ++ */ ++ + if (vsocket) + vsocket->features &= ~features; + pthread_mutex_unlock(&vhost_user.mutex); +@@ -587,6 +596,11 @@ rte_vhost_driver_set_features(const char *path, uint64_t features) + if (vsocket) { + vsocket->supported_features = features; + vsocket->features = features; ++ ++ /* Anyone setting feature bits is implementing their own vhost ++ * device backend. ++ */ ++ vsocket->use_builtin_virtio_net = false; + } + pthread_mutex_unlock(&vhost_user.mutex); + +@@ -667,6 +681,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) + * rte_vhost_driver_set_features(), which will overwrite following + * two values. + */ ++ vsocket->use_builtin_virtio_net = true; + vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; + vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; + +diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c +index df528a4ea..75deaa877 100644 +--- a/lib/librte_vhost/vhost.c ++++ b/lib/librte_vhost/vhost.c +@@ -279,7 +279,7 @@ reset_device(struct virtio_net *dev) + + dev->features = 0; + dev->protocol_features = 0; +- dev->flags = 0; ++ dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET; + + for (i = 0; i < dev->nr_vring; i++) + reset_vring_queue(dev, i); +@@ -315,6 +315,7 @@ vhost_new_device(void) + + vhost_devices[i] = dev; + dev->vid = i; ++ dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; + dev->slave_req_fd = -1; + + return i; +@@ -371,6 +372,20 @@ vhost_enable_dequeue_zero_copy(int vid) + dev->dequeue_zero_copy = 1; + } + ++void ++vhost_set_builtin_virtio_net(int vid, bool enable) ++{ ++ struct virtio_net *dev = get_device(vid); ++ ++ if (dev == NULL) ++ return; ++ ++ if (enable) ++ dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET; ++ else ++ dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET; ++} ++ + int + rte_vhost_get_mtu(int vid, uint16_t *mtu) + { +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index 9cad1bb3c..e06531e6b 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -53,6 +53,8 @@ + #define VIRTIO_DEV_RUNNING 1 + /* Used to indicate that the device is ready to operate */ + #define VIRTIO_DEV_READY 2 ++/* Used to indicate that the built-in vhost net device backend is enabled */ ++#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4 + + /* Backend value set by guest. */ + #define VIRTIO_DEV_STOPPED -1 +@@ -371,6 +373,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); + + void vhost_set_ifname(int, const char *if_name, unsigned int if_len); + void vhost_enable_dequeue_zero_copy(int vid); ++void vhost_set_builtin_virtio_net(int vid, bool enable); + + struct vhost_device_ops const *vhost_driver_callback_get(const char *path); + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index 6fee16e55..3bfd71945 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -727,6 +727,13 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, + if (!dev) + return 0; + ++ if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "(%d) %s: built-in vhost net backend is disabled.\n", ++ dev->vid, __func__); ++ return 0; ++ } ++ + if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + return virtio_dev_merge_rx(dev, queue_id, pkts, count); + else +@@ -1173,6 +1180,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + if (!dev) + return 0; + ++ if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "(%d) %s: built-in vhost net backend is disabled.\n", ++ dev->vid, __func__); ++ return 0; ++ } ++ + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); +-- +2.14.3 + diff --git a/SOURCES/0005-vhost-add-support-for-non-contiguous-indirect-descs-.patch b/SOURCES/0005-vhost-add-support-for-non-contiguous-indirect-descs-.patch new file mode 100644 index 0000000..49d3c82 --- /dev/null +++ b/SOURCES/0005-vhost-add-support-for-non-contiguous-indirect-descs-.patch @@ -0,0 +1,223 @@ +From 9fc3d1245bec49e29013b8120340e87adeaaf11a Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:42 +0200 +Subject: [PATCH 05/11] vhost: add support for non-contiguous indirect descs + tables + +This patch adds support for non-contiguous indirect descriptor +tables in VA space. + +When it happens, which is unlikely, a table is allocated and the +non-contiguous content is copied into it. + +This issue has been assigned CVE-2018-1059. + +Reported-by: Yongji Xie +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 108 +++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 101 insertions(+), 7 deletions(-) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index 79bac59..13252e6 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -46,4 +46,5 @@ + #include + #include ++#include + + #include "iotlb.h" +@@ -60,4 +61,44 @@ + } + ++static __rte_always_inline struct vring_desc * ++alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ struct vring_desc *desc) ++{ ++ struct vring_desc *idesc; ++ uint64_t src, dst; ++ uint64_t len, remain = desc->len; ++ uint64_t desc_addr = desc->addr; ++ ++ idesc = rte_malloc(__func__, desc->len, 0); ++ if (unlikely(!idesc)) ++ return 0; ++ ++ dst = (uint64_t)(uintptr_t)idesc; ++ ++ while (remain) { ++ len = remain; ++ src = vhost_iova_to_vva(dev, vq, desc_addr, &len, ++ VHOST_ACCESS_RO); ++ if (unlikely(!src || !len)) { ++ rte_free(idesc); ++ return 0; ++ } ++ ++ rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); ++ ++ remain -= len; ++ dst += len; ++ desc_addr += len; ++ } ++ ++ return idesc; ++} ++ ++static __rte_always_inline void ++free_ind_table(struct vring_desc *idesc) ++{ ++ rte_free(idesc); ++} ++ + static __rte_always_inline void + do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, +@@ -376,4 +417,5 @@ + rte_prefetch0(&vq->desc[desc_indexes[0]]); + for (i = 0; i < count; i++) { ++ struct vring_desc *idesc = NULL; + uint16_t desc_idx = desc_indexes[i]; + int err; +@@ -385,10 +427,22 @@ + vq, vq->desc[desc_idx].addr, + &dlen, VHOST_ACCESS_RO); +- if (unlikely(!descs || +- dlen != vq->desc[desc_idx].len)) { ++ if (unlikely(!descs)) { + count = i; + break; + } + ++ if (unlikely(dlen < vq->desc[desc_idx].len)) { ++ /* ++ * The indirect desc table is not contiguous ++ * in process VA space, we have to copy it. ++ */ ++ idesc = alloc_copy_ind_table(dev, vq, ++ &vq->desc[desc_idx]); ++ if (unlikely(!idesc)) ++ break; ++ ++ descs = idesc; ++ } ++ + desc_idx = 0; + sz = vq->desc[desc_idx].len / sizeof(*descs); +@@ -401,4 +455,5 @@ + if (unlikely(err)) { + count = i; ++ free_ind_table(idesc); + break; + } +@@ -406,4 +461,7 @@ + if (i + 1 < count) + rte_prefetch0(&vq->desc[desc_indexes[i+1]]); ++ ++ if (unlikely(!!idesc)) ++ free_ind_table(idesc); + } + +@@ -446,4 +504,5 @@ + uint64_t dlen; + struct vring_desc *descs = vq->desc; ++ struct vring_desc *idesc = NULL; + + *desc_chain_head = idx; +@@ -455,13 +514,27 @@ + &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!descs || dlen != vq->desc[idx].len)) ++ if (unlikely(!descs)) + return -1; + ++ if (unlikely(dlen < vq->desc[idx].len)) { ++ /* ++ * The indirect desc table is not contiguous ++ * in process VA space, we have to copy it. ++ */ ++ idesc = alloc_copy_ind_table(dev, vq, &vq->desc[idx]); ++ if (unlikely(!idesc)) ++ return -1; ++ ++ descs = idesc; ++ } ++ + idx = 0; + } + + while (1) { +- if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) ++ if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) { ++ free_ind_table(idesc); + return -1; ++ } + + len += descs[idx].len; +@@ -480,4 +553,7 @@ + *vec_idx = vec_id; + ++ if (unlikely(!!idesc)) ++ free_ind_table(idesc); ++ + return 0; + } +@@ -1333,5 +1409,5 @@ + rte_prefetch0(&vq->desc[desc_indexes[0]]); + for (i = 0; i < count; i++) { +- struct vring_desc *desc; ++ struct vring_desc *desc, *idesc = NULL; + uint16_t sz, idx; + uint64_t dlen; +@@ -1348,8 +1424,20 @@ + &dlen, + VHOST_ACCESS_RO); +- if (unlikely(!desc || +- dlen != vq->desc[desc_indexes[i]].len)) ++ if (unlikely(!desc)) + break; + ++ if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) { ++ /* ++ * The indirect desc table is not contiguous ++ * in process VA space, we have to copy it. ++ */ ++ idesc = alloc_copy_ind_table(dev, vq, ++ &vq->desc[desc_indexes[i]]); ++ if (unlikely(!idesc)) ++ break; ++ ++ desc = idesc; ++ } ++ + rte_prefetch0(desc); + sz = vq->desc[desc_indexes[i]].len / sizeof(*desc); +@@ -1365,4 +1453,5 @@ + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); ++ free_ind_table(idesc); + break; + } +@@ -1372,4 +1461,5 @@ + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); ++ free_ind_table(idesc); + break; + } +@@ -1381,4 +1471,5 @@ + if (!zmbuf) { + rte_pktmbuf_free(pkts[i]); ++ free_ind_table(idesc); + break; + } +@@ -1397,4 +1488,7 @@ + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); + } ++ ++ if (unlikely(!!idesc)) ++ free_ind_table(idesc); + } + vq->last_avail_idx += i; +-- +1.8.3.1 + diff --git a/SOURCES/0006-vhost-drop-virtqueues-only-with-built-in-virtio-driv.patch b/SOURCES/0006-vhost-drop-virtqueues-only-with-built-in-virtio-driv.patch new file mode 100644 index 0000000..fd1869b --- /dev/null +++ b/SOURCES/0006-vhost-drop-virtqueues-only-with-built-in-virtio-driv.patch @@ -0,0 +1,42 @@ +From c18b2f65e0a3be55e30fc3df6062e00353dfdb26 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 31 Jan 2018 17:46:51 +0000 +Subject: [PATCH 6/6] vhost: drop virtqueues only with built-in virtio driver + +Commit e29109323595beb3884da58126ebb3b878cb66f5 ("vhost: destroy unused +virtqueues when multiqueue not negotiated") broke vhost-scsi by removing +virtqueues when the virtio-net-specific VIRTIO_NET_F_MQ feature bit is +missing. + +The vhost_user.c code shouldn't assume all devices are vhost net device +backends. Use the new VIRTIO_DEV_BUILTIN_VIRTIO_NET flag to check +whether virtio_net.c is being used. + +This fixes examples/vhost_scsi. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Maxime Coquelin +Acked-by: Yuanhan Liu +(cherry picked from commit 33adfbc805651f455dbf19f1e4b4b0878717a5e5) +Signed-off-by: Maxime Coquelin +--- + dpdk-17.11/lib/librte_vhost/vhost_user.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 1848c8de9..f334497d4 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -216,7 +216,8 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) + (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", + (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); + +- if (!(dev->features & (1ULL << VIRTIO_NET_F_MQ))) { ++ if ((dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) && ++ !(dev->features & (1ULL << VIRTIO_NET_F_MQ))) { + /* + * Remove all but first queue pair if MQ hasn't been + * negotiated. This is safe because the device is not +-- +2.14.3 + diff --git a/SOURCES/0006-vhost-handle-virtually-non-contiguous-buffers-in-Tx.patch b/SOURCES/0006-vhost-handle-virtually-non-contiguous-buffers-in-Tx.patch new file mode 100644 index 0000000..54bb32a --- /dev/null +++ b/SOURCES/0006-vhost-handle-virtually-non-contiguous-buffers-in-Tx.patch @@ -0,0 +1,213 @@ +From ee0d896b3c3ba2dbf5a7a2598a2d8dbe242a0aa7 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:43 +0200 +Subject: [PATCH 06/11] vhost: handle virtually non-contiguous buffers in Tx + +This patch enables the handling of buffers non-contiguous in +process virtual address space in the dequeue path. + +When virtio-net header doesn't fit in a single chunck, it is +copied into a local variablei before being processed. + +For packet content, the copy length is limited to the chunck +size, next chuncks VAs being fetched afterward. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 117 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 95 insertions(+), 22 deletions(-) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index 13252e6..47717a1 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -996,10 +996,11 @@ + { + struct vring_desc *desc; +- uint64_t desc_addr; ++ uint64_t desc_addr, desc_gaddr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; +- uint64_t dlen; ++ uint64_t desc_chunck_len; + struct rte_mbuf *cur = m, *prev = m; ++ struct virtio_net_hdr tmp_hdr; + struct virtio_net_hdr *hdr = NULL; + /* A counter to avoid desc dead loop chain */ +@@ -1016,10 +1017,11 @@ + } + +- dlen = desc->len; ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; + desc_addr = vhost_iova_to_vva(dev, +- vq, desc->addr, +- &dlen, ++ vq, desc_gaddr, ++ &desc_chunck_len, + VHOST_ACCESS_RO); +- if (unlikely(!desc_addr || dlen != desc->len)) { ++ if (unlikely(!desc_addr)) { + error = -1; + goto out; +@@ -1027,6 +1029,38 @@ + + if (virtio_net_with_host_offload(dev)) { +- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); +- rte_prefetch0(hdr); ++ if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) { ++ uint64_t len = desc_chunck_len; ++ uint64_t remain = sizeof(struct virtio_net_hdr); ++ uint64_t src = desc_addr; ++ uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr; ++ uint64_t guest_addr = desc_gaddr; ++ ++ /* ++ * No luck, the virtio-net header doesn't fit ++ * in a contiguous virtual area. ++ */ ++ while (remain) { ++ len = remain; ++ src = vhost_iova_to_vva(dev, vq, ++ guest_addr, &len, ++ VHOST_ACCESS_RO); ++ if (unlikely(!src || !len)) { ++ error = -1; ++ goto out; ++ } ++ ++ rte_memcpy((void *)(uintptr_t)dst, ++ (void *)(uintptr_t)src, len); ++ ++ guest_addr += len; ++ remain -= len; ++ dst += len; ++ } ++ ++ hdr = &tmp_hdr; ++ } else { ++ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); ++ rte_prefetch0(hdr); ++ } + } + +@@ -1044,10 +1078,11 @@ + } + +- dlen = desc->len; ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; + desc_addr = vhost_iova_to_vva(dev, +- vq, desc->addr, +- &dlen, ++ vq, desc_gaddr, ++ &desc_chunck_len, + VHOST_ACCESS_RO); +- if (unlikely(!desc_addr || dlen != desc->len)) { ++ if (unlikely(!desc_addr)) { + error = -1; + goto out; +@@ -1059,10 +1094,28 @@ + } else { + desc_avail = desc->len - dev->vhost_hlen; +- desc_offset = dev->vhost_hlen; ++ ++ if (unlikely(desc_chunck_len < dev->vhost_hlen)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += dev->vhost_hlen; ++ desc_addr = vhost_iova_to_vva(dev, ++ vq, desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RO); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ ++ desc_offset = 0; ++ } else { ++ desc_offset = dev->vhost_hlen; ++ desc_chunck_len -= dev->vhost_hlen; ++ } + } + + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + +- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); ++ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), ++ desc_chunck_len, 0); + + mbuf_offset = 0; +@@ -1071,5 +1124,5 @@ + uint64_t hpa; + +- cpy_len = RTE_MIN(desc_avail, mbuf_avail); ++ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); + + /* +@@ -1079,5 +1132,5 @@ + */ + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, +- desc->addr + desc_offset, cpy_len)))) { ++ desc_gaddr + desc_offset, cpy_len)))) { + cur->data_len = cpy_len; + cur->data_off = 0; +@@ -1094,5 +1147,6 @@ + if (likely(cpy_len > MAX_BATCH_LEN || + copy_nb >= vq->size || +- (hdr && cur == m))) { ++ (hdr && cur == m) || ++ desc->len != desc_chunck_len)) { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), +@@ -1115,4 +1169,5 @@ + mbuf_offset += cpy_len; + desc_avail -= cpy_len; ++ desc_chunck_len -= cpy_len; + desc_offset += cpy_len; + +@@ -1133,9 +1188,11 @@ + } + +- dlen = desc->len; ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; + desc_addr = vhost_iova_to_vva(dev, +- vq, desc->addr, +- &dlen, VHOST_ACCESS_RO); +- if (unlikely(!desc_addr || dlen != desc->len)) { ++ vq, desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RO); ++ if (unlikely(!desc_addr)) { + error = -1; + goto out; +@@ -1147,5 +1204,21 @@ + desc_avail = desc->len; + +- PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); ++ PRINT_PACKET(dev, (uintptr_t)desc_addr, ++ desc_chunck_len, 0); ++ } else if (unlikely(desc_chunck_len == 0)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += desc_offset; ++ desc_addr = vhost_iova_to_vva(dev, vq, ++ desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RO); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ desc_offset = 0; ++ ++ PRINT_PACKET(dev, (uintptr_t)desc_addr, ++ desc_chunck_len, 0); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0007-vhost-handle-virtually-non-contiguous-buffers-in-Rx.patch b/SOURCES/0007-vhost-handle-virtually-non-contiguous-buffers-in-Rx.patch new file mode 100644 index 0000000..72c972d --- /dev/null +++ b/SOURCES/0007-vhost-handle-virtually-non-contiguous-buffers-in-Rx.patch @@ -0,0 +1,176 @@ +From 79587b5fcf7d09fbba9f05bbdc1edcc26060f80a Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:44 +0200 +Subject: [PATCH 07/11] vhost: handle virtually non-contiguous buffers in Rx + +This patch enables the handling of buffers non-contiguous in +process virtual address space in the enqueue path when mergeable +buffers aren't used. + +When virtio-net header doesn't fit in a single chunck, it is +computed in a local variable and copied to the buffer chuncks +afterwards. + +For packet content, the copy length is limited to the chunck +size, next chuncks VAs being fetched afterward. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 95 +++++++++++++++++++++++++++++++++++-------- + 1 file changed, 77 insertions(+), 18 deletions(-) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index 47717a1..5bd4e58 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -246,7 +246,7 @@ + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; +- uint64_t dlen; ++ uint64_t desc_chunck_len; + struct vring_desc *desc; +- uint64_t desc_addr; ++ uint64_t desc_addr, desc_gaddr; + /* A counter to avoid desc dead loop chain */ + uint16_t nr_desc = 1; +@@ -256,7 +256,8 @@ + + desc = &descs[desc_idx]; +- dlen = desc->len; +- desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, +- &dlen, VHOST_ACCESS_RW); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr, ++ &desc_chunck_len, VHOST_ACCESS_RW); + /* + * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid +@@ -264,6 +265,5 @@ + * otherwise stores offset on the stack instead of in a register. + */ +- if (unlikely(dlen != desc->len || desc->len < dev->vhost_hlen) || +- !desc_addr) { ++ if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) { + error = -1; + goto out; +@@ -272,10 +272,56 @@ + rte_prefetch0((void *)(uintptr_t)desc_addr); + +- virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr); +- vhost_log_write(dev, desc->addr, dev->vhost_hlen); +- PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); ++ if (likely(desc_chunck_len >= dev->vhost_hlen)) { ++ virtio_enqueue_offload(m, ++ (struct virtio_net_hdr *)(uintptr_t)desc_addr); ++ PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); ++ vhost_log_write(dev, desc_gaddr, dev->vhost_hlen); ++ } else { ++ struct virtio_net_hdr vnet_hdr; ++ uint64_t remain = dev->vhost_hlen; ++ uint64_t len; ++ uint64_t src = (uint64_t)(uintptr_t)&vnet_hdr, dst; ++ uint64_t guest_addr = desc_gaddr; ++ ++ virtio_enqueue_offload(m, &vnet_hdr); ++ ++ while (remain) { ++ len = remain; ++ dst = vhost_iova_to_vva(dev, vq, guest_addr, ++ &len, VHOST_ACCESS_RW); ++ if (unlikely(!dst || !len)) { ++ error = -1; ++ goto out; ++ } ++ ++ rte_memcpy((void *)(uintptr_t)dst, ++ (void *)(uintptr_t)src, len); ++ ++ PRINT_PACKET(dev, (uintptr_t)dst, len, 0); ++ vhost_log_write(dev, guest_addr, len); ++ remain -= len; ++ guest_addr += len; ++ dst += len; ++ } ++ } + +- desc_offset = dev->vhost_hlen; + desc_avail = desc->len - dev->vhost_hlen; ++ if (unlikely(desc_chunck_len < dev->vhost_hlen)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr = desc->addr + dev->vhost_hlen; ++ desc_addr = vhost_iova_to_vva(dev, ++ vq, desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RW); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ ++ desc_offset = 0; ++ } else { ++ desc_offset = dev->vhost_hlen; ++ desc_chunck_len -= dev->vhost_hlen; ++ } + + mbuf_avail = rte_pktmbuf_data_len(m); +@@ -303,9 +349,10 @@ + + desc = &descs[desc->next]; +- dlen = desc->len; +- desc_addr = vhost_iova_to_vva(dev, vq, desc->addr, +- &dlen, ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr, ++ &desc_chunck_len, + VHOST_ACCESS_RW); +- if (unlikely(!desc_addr || dlen != desc->len)) { ++ if (unlikely(!desc_addr)) { + error = -1; + goto out; +@@ -314,7 +361,18 @@ + desc_offset = 0; + desc_avail = desc->len; ++ } else if (unlikely(desc_chunck_len == 0)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += desc_offset; ++ desc_addr = vhost_iova_to_vva(dev, ++ vq, desc_gaddr, ++ &desc_chunck_len, VHOST_ACCESS_RW); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ desc_offset = 0; + } + +- cpy_len = RTE_MIN(desc_avail, mbuf_avail); ++ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); + if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { + rte_memcpy((void *)((uintptr_t)(desc_addr + +@@ -322,5 +380,5 @@ + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); +- vhost_log_write(dev, desc->addr + desc_offset, cpy_len); ++ vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); +@@ -330,5 +388,5 @@ + batch_copy[copy_nb].src = + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); +- batch_copy[copy_nb].log_addr = desc->addr + desc_offset; ++ batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset; + batch_copy[copy_nb].len = cpy_len; + copy_nb++; +@@ -339,4 +397,5 @@ + desc_avail -= cpy_len; + desc_offset += cpy_len; ++ desc_chunck_len -= cpy_len; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0008-vhost-handle-virtually-non-contiguous-buffers-in-Rx-.patch b/SOURCES/0008-vhost-handle-virtually-non-contiguous-buffers-in-Rx-.patch new file mode 100644 index 0000000..1ac9a1b --- /dev/null +++ b/SOURCES/0008-vhost-handle-virtually-non-contiguous-buffers-in-Rx-.patch @@ -0,0 +1,212 @@ +From 93f522ef2aa3e61bd44d374c5fb92ede0ac1b58f Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:45 +0200 +Subject: [PATCH 08/11] vhost: handle virtually non-contiguous buffers in + Rx-mrg + +This patch enables the handling of buffers non-contiguous in +process virtual address space in the enqueue path when mergeable +buffers are used. + +When virtio-net header doesn't fit in a single chunck, it is +computed in a local variable and copied to the buffer chuncks +afterwards. + +For packet content, the copy length is limited to the chunck +size, next chuncks VAs being fetched afterward. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/virtio_net.c | 115 ++++++++++++++++++++++++++++++++---------- + 1 file changed, 87 insertions(+), 28 deletions(-) + +diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c +index 5bd4e58..a013c07 100644 +--- a/lib/librte_vhost/virtio_net.c ++++ b/lib/librte_vhost/virtio_net.c +@@ -669,12 +669,13 @@ + { + uint32_t vec_idx = 0; +- uint64_t desc_addr; ++ uint64_t desc_addr, desc_gaddr; + uint32_t mbuf_offset, mbuf_avail; + uint32_t desc_offset, desc_avail; + uint32_t cpy_len; +- uint64_t dlen; ++ uint64_t desc_chunck_len; + uint64_t hdr_addr, hdr_phys_addr; + struct rte_mbuf *hdr_mbuf; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; ++ struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL; + uint16_t copy_nb = vq->batch_copy_nb_elems; + int error = 0; +@@ -685,10 +686,11 @@ + } + +- dlen = buf_vec[vec_idx].buf_len; +- desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr, +- &dlen, VHOST_ACCESS_RW); +- if (dlen != buf_vec[vec_idx].buf_len || +- buf_vec[vec_idx].buf_len < dev->vhost_hlen || +- !desc_addr) { ++ desc_chunck_len = buf_vec[vec_idx].buf_len; ++ desc_gaddr = buf_vec[vec_idx].buf_addr; ++ desc_addr = vhost_iova_to_vva(dev, vq, ++ desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RW); ++ if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) { + error = -1; + goto out; +@@ -697,5 +699,9 @@ + hdr_mbuf = m; + hdr_addr = desc_addr; +- hdr_phys_addr = buf_vec[vec_idx].buf_addr; ++ if (unlikely(desc_chunck_len < dev->vhost_hlen)) ++ hdr = &tmp_hdr; ++ else ++ hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; ++ hdr_phys_addr = desc_gaddr; + rte_prefetch0((void *)(uintptr_t)hdr_addr); + +@@ -704,5 +710,22 @@ + + desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; +- desc_offset = dev->vhost_hlen; ++ if (unlikely(desc_chunck_len < dev->vhost_hlen)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += dev->vhost_hlen; ++ desc_addr = vhost_iova_to_vva(dev, vq, ++ desc_gaddr, ++ &desc_chunck_len, ++ VHOST_ACCESS_RW); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ ++ desc_offset = 0; ++ } else { ++ desc_offset = dev->vhost_hlen; ++ desc_chunck_len -= dev->vhost_hlen; ++ } ++ + + mbuf_avail = rte_pktmbuf_data_len(m); +@@ -712,12 +735,12 @@ + if (desc_avail == 0) { + vec_idx++; +- dlen = buf_vec[vec_idx].buf_len; ++ desc_chunck_len = buf_vec[vec_idx].buf_len; ++ desc_gaddr = buf_vec[vec_idx].buf_addr; + desc_addr = + vhost_iova_to_vva(dev, vq, +- buf_vec[vec_idx].buf_addr, +- &dlen, ++ desc_gaddr, ++ &desc_chunck_len, + VHOST_ACCESS_RW); +- if (unlikely(!desc_addr || +- dlen != buf_vec[vec_idx].buf_len)) { ++ if (unlikely(!desc_addr)) { + error = -1; + goto out; +@@ -728,4 +751,15 @@ + desc_offset = 0; + desc_avail = buf_vec[vec_idx].buf_len; ++ } else if (unlikely(desc_chunck_len == 0)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += desc_offset; ++ desc_addr = vhost_iova_to_vva(dev, vq, ++ desc_gaddr, ++ &desc_chunck_len, VHOST_ACCESS_RW); ++ if (unlikely(!desc_addr)) { ++ error = -1; ++ goto out; ++ } ++ desc_offset = 0; + } + +@@ -739,19 +773,46 @@ + + if (hdr_addr) { +- struct virtio_net_hdr_mrg_rxbuf *hdr; +- +- hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t) +- hdr_addr; + virtio_enqueue_offload(hdr_mbuf, &hdr->hdr); + ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers); + +- vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen); +- PRINT_PACKET(dev, (uintptr_t)hdr_addr, +- dev->vhost_hlen, 0); ++ if (unlikely(hdr == &tmp_hdr)) { ++ uint64_t len; ++ uint64_t remain = dev->vhost_hlen; ++ uint64_t src = (uint64_t)(uintptr_t)hdr, dst; ++ uint64_t guest_addr = hdr_phys_addr; ++ ++ while (remain) { ++ len = remain; ++ dst = vhost_iova_to_vva(dev, vq, ++ guest_addr, &len, ++ VHOST_ACCESS_RW); ++ if (unlikely(!dst || !len)) { ++ error = -1; ++ goto out; ++ } ++ ++ rte_memcpy((void *)(uintptr_t)dst, ++ (void *)(uintptr_t)src, ++ len); ++ ++ PRINT_PACKET(dev, (uintptr_t)dst, ++ len, 0); ++ vhost_log_write(dev, guest_addr, len); ++ ++ remain -= len; ++ guest_addr += len; ++ dst += len; ++ } ++ } else { ++ PRINT_PACKET(dev, (uintptr_t)hdr_addr, ++ dev->vhost_hlen, 0); ++ vhost_log_write(dev, hdr_phys_addr, ++ dev->vhost_hlen); ++ } + + hdr_addr = 0; + } + +- cpy_len = RTE_MIN(desc_avail, mbuf_avail); ++ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); + + if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) { +@@ -760,7 +821,5 @@ + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); +- vhost_log_write(dev, +- buf_vec[vec_idx].buf_addr + desc_offset, +- cpy_len); ++ vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); +@@ -770,6 +829,5 @@ + batch_copy[copy_nb].src = + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset); +- batch_copy[copy_nb].log_addr = +- buf_vec[vec_idx].buf_addr + desc_offset; ++ batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset; + batch_copy[copy_nb].len = cpy_len; + copy_nb++; +@@ -780,4 +838,5 @@ + desc_avail -= cpy_len; + desc_offset += cpy_len; ++ desc_chunck_len -= cpy_len; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0009-examples-vhost-move-to-safe-GPA-translation-API.patch b/SOURCES/0009-examples-vhost-move-to-safe-GPA-translation-API.patch new file mode 100644 index 0000000..ef477cf --- /dev/null +++ b/SOURCES/0009-examples-vhost-move-to-safe-GPA-translation-API.patch @@ -0,0 +1,192 @@ +From cbf70816520746639e6db436f356ba0cd36e0bb3 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:46 +0200 +Subject: [PATCH 09/11] examples/vhost: move to safe GPA translation API + +This patch uses the new rte_vhost_va_from_guest_pa() API +to ensure the application doesn't perform out-of-bound +accesses either because of a malicious guest providing an +incorrect descriptor length, or because the buffer is +contiguous in guest physical address space but not in the +host process virtual address space. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + examples/vhost/virtio_net.c | 94 +++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 83 insertions(+), 11 deletions(-) + +diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c +index 1ab57f5..31c3dd0 100644 +--- a/examples/vhost/virtio_net.c ++++ b/examples/vhost/virtio_net.c +@@ -86,8 +86,9 @@ + { + uint32_t desc_avail, desc_offset; ++ uint64_t desc_chunck_len; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct vring_desc *desc; +- uint64_t desc_addr; ++ uint64_t desc_addr, desc_gaddr; + struct virtio_net_hdr virtio_hdr = {0, 0, 0, 0, 0, 0}; + /* A counter to avoid desc dead loop chain */ +@@ -95,5 +96,8 @@ + + desc = &vr->desc[desc_idx]; +- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, &desc_chunck_len); + /* + * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid +@@ -107,7 +111,40 @@ + + /* write virtio-net header */ +- *(struct virtio_net_hdr *)(uintptr_t)desc_addr = virtio_hdr; ++ if (likely(desc_chunck_len >= dev->hdr_len)) { ++ *(struct virtio_net_hdr *)(uintptr_t)desc_addr = virtio_hdr; ++ desc_offset = dev->hdr_len; ++ } else { ++ uint64_t len; ++ uint64_t remain = dev->hdr_len; ++ uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr, dst; ++ uint64_t guest_addr = desc_gaddr; ++ ++ while (remain) { ++ len = remain; ++ dst = rte_vhost_va_from_guest_pa(dev->mem, ++ guest_addr, &len); ++ if (unlikely(!dst || !len)) ++ return -1; ++ ++ rte_memcpy((void *)(uintptr_t)dst, ++ (void *)(uintptr_t)src, ++ len); ++ ++ remain -= len; ++ guest_addr += len; ++ dst += len; ++ } ++ ++ desc_chunck_len = desc->len - dev->hdr_len; ++ desc_gaddr += dev->hdr_len; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, ++ &desc_chunck_len); ++ if (unlikely(!desc_addr)) ++ return -1; ++ ++ desc_offset = 0; ++ } + +- desc_offset = dev->hdr_len; + desc_avail = desc->len - dev->hdr_len; + +@@ -134,5 +171,8 @@ + + desc = &vr->desc[desc->next]; +- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, &desc_chunck_len); + if (unlikely(!desc_addr)) + return -1; +@@ -140,7 +180,17 @@ + desc_offset = 0; + desc_avail = desc->len; ++ } else if (unlikely(desc_chunck_len == 0)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += desc_offset; ++ desc_addr = rte_vhost_va_from_guest_pa(dev->mem, ++ desc_gaddr, ++ &desc_chunck_len); ++ if (unlikely(!desc_addr)) ++ return -1; ++ ++ desc_offset = 0; + } + +- cpy_len = RTE_MIN(desc_avail, mbuf_avail); ++ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); + rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), +@@ -151,4 +201,5 @@ + desc_avail -= cpy_len; + desc_offset += cpy_len; ++ desc_chunck_len -= cpy_len; + } + +@@ -224,6 +275,7 @@ + { + struct vring_desc *desc; +- uint64_t desc_addr; ++ uint64_t desc_addr, desc_gaddr; + uint32_t desc_avail, desc_offset; ++ uint64_t desc_chunck_len; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; +@@ -237,5 +289,8 @@ + return -1; + +- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, &desc_chunck_len); + if (unlikely(!desc_addr)) + return -1; +@@ -251,5 +306,8 @@ + */ + desc = &vr->desc[desc->next]; +- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, &desc_chunck_len); + if (unlikely(!desc_addr)) + return -1; +@@ -263,5 +321,5 @@ + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (1) { +- cpy_len = RTE_MIN(desc_avail, mbuf_avail); ++ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), +@@ -273,4 +331,5 @@ + desc_avail -= cpy_len; + desc_offset += cpy_len; ++ desc_chunck_len -= cpy_len; + + /* This desc reaches to its end, get the next one */ +@@ -284,5 +343,8 @@ + desc = &vr->desc[desc->next]; + +- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr); ++ desc_chunck_len = desc->len; ++ desc_gaddr = desc->addr; ++ desc_addr = rte_vhost_va_from_guest_pa( ++ dev->mem, desc_gaddr, &desc_chunck_len); + if (unlikely(!desc_addr)) + return -1; +@@ -291,4 +353,14 @@ + desc_offset = 0; + desc_avail = desc->len; ++ } else if (unlikely(desc_chunck_len == 0)) { ++ desc_chunck_len = desc_avail; ++ desc_gaddr += desc_offset; ++ desc_addr = rte_vhost_va_from_guest_pa(dev->mem, ++ desc_gaddr, ++ &desc_chunck_len); ++ if (unlikely(!desc_addr)) ++ return -1; ++ ++ desc_offset = 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0010-examples-vhost_scsi-move-to-safe-GPA-translation-API.patch b/SOURCES/0010-examples-vhost_scsi-move-to-safe-GPA-translation-API.patch new file mode 100644 index 0000000..bb857ed --- /dev/null +++ b/SOURCES/0010-examples-vhost_scsi-move-to-safe-GPA-translation-API.patch @@ -0,0 +1,139 @@ +From d34212ffb5e333a515f87b2f828606bc5690b8b3 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:47 +0200 +Subject: [PATCH 10/11] examples/vhost_scsi: move to safe GPA translation API + +This patch uses the new rte_vhost_va_from_guest_pa() API +to ensure all the descriptor buffer is mapped contiguously +in the application virtual address space. + +As the application did not checked return of previous API, +this patch just print an error if the buffer address isn't in +the vhost memory regions or if it is scattered. Ideally, it +should handle scattered buffers gracefully. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + examples/vhost_scsi/vhost_scsi.c | 56 +++++++++++++++++++++++++++++++++------- + 1 file changed, 47 insertions(+), 9 deletions(-) + +diff --git a/examples/vhost_scsi/vhost_scsi.c b/examples/vhost_scsi/vhost_scsi.c +index b4f1f8d..b40f993 100644 +--- a/examples/vhost_scsi/vhost_scsi.c ++++ b/examples/vhost_scsi/vhost_scsi.c +@@ -69,5 +69,5 @@ + } + +-static uint64_t gpa_to_vva(int vid, uint64_t gpa) ++static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len) + { + char path[PATH_MAX]; +@@ -89,5 +89,5 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + assert(ctrlr->mem != NULL); + +- return rte_vhost_gpa_to_vva(ctrlr->mem, gpa); ++ return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); + } + +@@ -139,13 +139,27 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + { + void *data; ++ uint64_t chunck_len; + + task->iovs_cnt = 0; ++ chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, +- task->desc->addr); ++ task->desc->addr, ++ &chunck_len); ++ if (!task->resp || chunck_len != task->desc->len) { ++ fprintf(stderr, "failed to translate desc address.\n"); ++ return; ++ } + + while (descriptor_has_next(task->desc)) { + task->desc = descriptor_get_next(task->vq->desc, task->desc); ++ chunck_len = task->desc->len; + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, +- task->desc->addr); ++ task->desc->addr, ++ &chunck_len); ++ if (!data || chunck_len != task->desc->len) { ++ fprintf(stderr, "failed to translate desc address.\n"); ++ return; ++ } ++ + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; +@@ -159,10 +173,18 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + { + void *data; ++ uint64_t chunck_len; + + task->iovs_cnt = 0; + + do { ++ chunck_len = task->desc->len; + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, +- task->desc->addr); ++ task->desc->addr, ++ &chunck_len); ++ if (!data || chunck_len != task->desc->len) { ++ fprintf(stderr, "failed to translate desc address.\n"); ++ return; ++ } ++ + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; +@@ -172,6 +194,10 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + } while (descriptor_has_next(task->desc)); + ++ chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, +- task->desc->addr); ++ task->desc->addr, ++ &chunck_len); ++ if (!task->resp || chunck_len != task->desc->len) ++ fprintf(stderr, "failed to translate desc address.\n"); + } + +@@ -219,4 +245,5 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + uint16_t last_idx; + struct vhost_scsi_task *task; ++ uint64_t chunck_len; + + last_idx = scsi_vq->last_used_idx & (vq->size - 1); +@@ -236,14 +263,25 @@ static uint64_t gpa_to_vva(int vid, uint64_t gpa) + scsi_vq->last_used_idx++; + ++ chunck_len = task->desc->len; + task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, +- task->desc->addr); ++ task->desc->addr, ++ &chunck_len); ++ if (!task->req || chunck_len != task->desc->len) { ++ fprintf(stderr, "failed to translate desc address.\n"); ++ return; ++ } + + task->desc = descriptor_get_next(task->vq->desc, task->desc); + if (!descriptor_has_next(task->desc)) { + task->dxfer_dir = SCSI_DIR_NONE; ++ chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t) + gpa_to_vva(task->bdev->vid, +- task->desc->addr); +- ++ task->desc->addr, ++ &chunck_len); ++ if (!task->resp || chunck_len != task->desc->len) { ++ fprintf(stderr, "failed to translate desc address.\n"); ++ return; ++ } + } else if (!descriptor_is_wr(task->desc)) { + task->dxfer_dir = SCSI_DIR_TO_DEV; +-- +1.8.3.1 + diff --git a/SOURCES/0011-vhost-deprecate-unsafe-GPA-translation-API.patch b/SOURCES/0011-vhost-deprecate-unsafe-GPA-translation-API.patch new file mode 100644 index 0000000..26a9b2e --- /dev/null +++ b/SOURCES/0011-vhost-deprecate-unsafe-GPA-translation-API.patch @@ -0,0 +1,40 @@ +From e4866aa27a33c6354c7086e608c0625a8c1a1ecc Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Mon, 23 Apr 2018 11:33:48 +0200 +Subject: [PATCH 11/11] vhost: deprecate unsafe GPA translation API + +This patch marks rte_vhost_gpa_to_vva() as deprecated because +it is unsafe. Application relying on this API should move +to the new rte_vhost_va_from_guest_pa() API, and check +returned length to avoid out-of-bound accesses. + +This issue has been assigned CVE-2018-1059. + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/rte_vhost.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h +index f2d6c95..3fc6034 100644 +--- a/lib/librte_vhost/rte_vhost.h ++++ b/lib/librte_vhost/rte_vhost.h +@@ -118,4 +118,9 @@ struct vhost_device_ops { + * Convert guest physical address to host virtual address + * ++ * This function is deprecated because unsafe. ++ * New rte_vhost_va_from_guest_pa() should be used instead to ensure ++ * guest physical ranges are fully and contiguously mapped into ++ * process virtual address space. ++ * + * @param mem + * the guest memory regions +@@ -125,4 +130,5 @@ struct vhost_device_ops { + * the host virtual address on success, 0 on failure + */ ++__rte_deprecated + static __rte_always_inline uint64_t + rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) +-- +1.8.3.1 + diff --git a/SOURCES/arm64-armv8a-linuxapp-gcc-config b/SOURCES/arm64-armv8a-linuxapp-gcc-config index 90863d5..4f0d212 100644 --- a/SOURCES/arm64-armv8a-linuxapp-gcc-config +++ b/SOURCES/arm64-armv8a-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 2543d3fdeee262a6a7fdcdd19e5c36cde5ae450d4cdf35a4a4af438710180e98 +# -*- cfg-sha: 0f73161964ec9ad68f1ea9715e8143248e5b244d51386ead6ce15d559c8bd4e1 # BSD LICENSE # Copyright (C) Cavium, Inc 2015. All rights reserved. # Redistribution and use in source and binary forms, with or without @@ -224,10 +224,12 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y CONFIG_RTE_LIBRTE_MLX4_PMD=n CONFIG_RTE_LIBRTE_MLX4_DEBUG=n CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 # Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD CONFIG_RTE_LIBRTE_MLX5_PMD=n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=n CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 # Compile burst-oriented Broadcom PMD driver CONFIG_RTE_LIBRTE_BNX2X_PMD=n diff --git a/SOURCES/bnxt-dpdk-0001-net-bnxt-cache-address-of-doorbell-to-subsequent-acc.patch b/SOURCES/bnxt-dpdk-0001-net-bnxt-cache-address-of-doorbell-to-subsequent-acc.patch new file mode 100644 index 0000000..3eec914 --- /dev/null +++ b/SOURCES/bnxt-dpdk-0001-net-bnxt-cache-address-of-doorbell-to-subsequent-acc.patch @@ -0,0 +1,129 @@ +From 02bd8182658600ebf2cbe61168e80c19ce4cdaa5 Mon Sep 17 00:00:00 2001 +Message-Id: <02bd8182658600ebf2cbe61168e80c19ce4cdaa5.1524241750.git.dcaratti@redhat.com> +From: Ajit Khaparde +Date: Fri, 20 Apr 2018 07:22:00 -0700 +Subject: [PATCH 1/3] net/bnxt: cache address of doorbell to subsequent access + +While creating TX, Rx, CQ rings use cached DB address instead of +getting it from the PCI memory resource. + +Signed-off-by: Ajit Khaparde +--- + drivers/net/bnxt/bnxt.h | 1 + + drivers/net/bnxt/bnxt_cpr.c | 2 +- + drivers/net/bnxt/bnxt_ethdev.c | 12 ++++++++++++ + drivers/net/bnxt/bnxt_ring.c | 17 +++++------------ + 4 files changed, 19 insertions(+), 13 deletions(-) + +backport notes: + - use RTE_LOG in place of PMD_DRV_LOG (missing upstream commit + 3e92fd4e4ec0 ("net/bnxt: use dynamic log type") + +--- a/drivers/net/bnxt/bnxt.h ++++ b/drivers/net/bnxt/bnxt.h +@@ -188,6 +188,7 @@ struct bnxt { + struct rte_eth_dev *eth_dev; + struct rte_eth_rss_conf rss_conf; + struct rte_pci_device *pdev; ++ void *doorbell_base; + + uint32_t flags; + #define BNXT_FLAG_REGISTERED (1 << 0) +--- a/drivers/net/bnxt/bnxt_cpr.c ++++ b/drivers/net/bnxt/bnxt_cpr.c +@@ -163,7 +163,7 @@ int bnxt_alloc_def_cp_ring(struct bnxt * + HWRM_NA_SIGNATURE); + if (rc) + goto err_out; +- cpr->cp_doorbell = bp->pdev->mem_resource[2].addr; ++ cpr->cp_doorbell = (char *)bp->doorbell_base; + B_CP_DIS_DB(cpr, cpr->cp_raw_cons); + bp->grp_info[0].cp_fw_ring_id = cp_ring->fw_ring_id; + if (BNXT_PF(bp)) +--- a/drivers/net/bnxt/bnxt_ethdev.c ++++ b/drivers/net/bnxt/bnxt_ethdev.c +@@ -2769,11 +2769,23 @@ static int bnxt_init_board(struct rte_et + rc = -ENOMEM; + goto init_err_release; + } ++ ++ if (!pci_dev->mem_resource[2].addr) { ++ RTE_LOG(ERR, PMD, ++ "Cannot find PCI device BAR 2 address, aborting\n"); ++ rc = -ENODEV; ++ goto init_err_release; ++ } else { ++ bp->doorbell_base = (void *)pci_dev->mem_resource[2].addr; ++ } ++ + return 0; + + init_err_release: + if (bp->bar0) + bp->bar0 = NULL; ++ if (bp->doorbell_base) ++ bp->doorbell_base = NULL; + + init_err_disable: + +--- a/drivers/net/bnxt/bnxt_ring.c ++++ b/drivers/net/bnxt/bnxt_ring.c +@@ -281,7 +281,6 @@ int bnxt_alloc_rings(struct bnxt *bp, ui + */ + int bnxt_alloc_hwrm_rings(struct bnxt *bp) + { +- struct rte_pci_device *pci_dev = bp->pdev; + unsigned int i; + int rc = 0; + +@@ -303,8 +302,7 @@ int bnxt_alloc_hwrm_rings(struct bnxt *b + HWRM_NA_SIGNATURE); + if (rc) + goto err_out; +- cpr->cp_doorbell = (char *)pci_dev->mem_resource[2].addr + +- idx * 0x80; ++ cpr->cp_doorbell = (char *)bp->doorbell_base + idx * 0x80; + bp->grp_info[i].cp_fw_ring_id = cp_ring->fw_ring_id; + B_CP_DIS_DB(cpr, cpr->cp_raw_cons); + +@@ -316,8 +314,7 @@ int bnxt_alloc_hwrm_rings(struct bnxt *b + if (rc) + goto err_out; + rxr->rx_prod = 0; +- rxr->rx_doorbell = (char *)pci_dev->mem_resource[2].addr + +- idx * 0x80; ++ rxr->rx_doorbell = (char *)bp->doorbell_base + idx * 0x80; + bp->grp_info[i].rx_fw_ring_id = ring->fw_ring_id; + B_RX_DB(rxr->rx_doorbell, rxr->rx_prod); + +@@ -336,9 +333,7 @@ int bnxt_alloc_hwrm_rings(struct bnxt *b + goto err_out; + RTE_LOG(DEBUG, PMD, "Alloc AGG Done!\n"); + rxr->ag_prod = 0; +- rxr->ag_doorbell = +- (char *)pci_dev->mem_resource[2].addr + +- map_idx * 0x80; ++ rxr->ag_doorbell = (char *)bp->doorbell_base + map_idx * 0x80; + bp->grp_info[i].ag_fw_ring_id = ring->fw_ring_id; + B_RX_DB(rxr->ag_doorbell, rxr->ag_prod); + +@@ -373,8 +368,7 @@ int bnxt_alloc_hwrm_rings(struct bnxt *b + if (rc) + goto err_out; + +- cpr->cp_doorbell = (char *)pci_dev->mem_resource[2].addr + +- idx * 0x80; ++ cpr->cp_doorbell = (char *)bp->doorbell_base + idx * 0x80; + B_CP_DIS_DB(cpr, cpr->cp_raw_cons); + + /* Tx ring */ +@@ -385,8 +379,7 @@ int bnxt_alloc_hwrm_rings(struct bnxt *b + if (rc) + goto err_out; + +- txr->tx_doorbell = (char *)pci_dev->mem_resource[2].addr + +- idx * 0x80; ++ txr->tx_doorbell = (char *)bp->doorbell_base + idx * 0x80; + txq->index = idx; + } + diff --git a/SOURCES/bnxt-dpdk-0002-net-bnxt-avoid-invalid-vnic-id-in-set-L2-Rx-mask.patch b/SOURCES/bnxt-dpdk-0002-net-bnxt-avoid-invalid-vnic-id-in-set-L2-Rx-mask.patch new file mode 100644 index 0000000..dae9ec9 --- /dev/null +++ b/SOURCES/bnxt-dpdk-0002-net-bnxt-avoid-invalid-vnic-id-in-set-L2-Rx-mask.patch @@ -0,0 +1,72 @@ +From 52bececea4d2327d842ee40e6c99388b6b3d8f93 Mon Sep 17 00:00:00 2001 +Message-Id: <52bececea4d2327d842ee40e6c99388b6b3d8f93.1524241750.git.dcaratti@redhat.com> +In-Reply-To: <02bd8182658600ebf2cbe61168e80c19ce4cdaa5.1524241750.git.dcaratti@redhat.com> +References: <02bd8182658600ebf2cbe61168e80c19ce4cdaa5.1524241750.git.dcaratti@redhat.com> +From: Ajit Khaparde +Date: Fri, 20 Apr 2018 07:22:01 -0700 +Subject: [PATCH 2/3] net/bnxt: avoid invalid vnic id in set L2 Rx mask + +In some cases bnxt_hwrm_cfa_l2_set_rx_mask is being called before +VNICs are allocated. The FW returns an error in such cases. +Move bnxt_init_nic to bnxt_dev_init such that the ids are initialized +to an invalid id. +Prevent sending the command to the FW only with a valid vnic id. + +Fixes: 244bc98b0da7 ("net/bnxt: set L2 Rx mask") +Cc: stable@dpdk.org + +Signed-off-by: Ajit Khaparde +--- + drivers/net/bnxt/bnxt_ethdev.c | 7 ++----- + drivers/net/bnxt/bnxt_hwrm.c | 3 +++ + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c +index a133114a3..348129dad 100644 +--- a/drivers/net/bnxt/bnxt_ethdev.c ++++ b/drivers/net/bnxt/bnxt_ethdev.c +@@ -395,10 +395,6 @@ static int bnxt_init_nic(struct bnxt *bp) + bnxt_init_vnics(bp); + bnxt_init_filters(bp); + +- rc = bnxt_init_chip(bp); +- if (rc) +- return rc; +- + return 0; + } + +@@ -594,7 +590,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + } + bp->dev_stopped = 0; + +- rc = bnxt_init_nic(bp); ++ rc = bnxt_init_chip(bp); + if (rc) + goto error; + +@@ -3398,6 +3394,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev) + goto error_free_int; + + bnxt_enable_int(bp); ++ bnxt_init_nic(bp); + + return 0; + +diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c +index 11204bf42..bc8773509 100644 +--- a/drivers/net/bnxt/bnxt_hwrm.c ++++ b/drivers/net/bnxt/bnxt_hwrm.c +@@ -221,6 +221,9 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, + struct hwrm_cfa_l2_set_rx_mask_output *resp = bp->hwrm_cmd_resp_addr; + uint32_t mask = 0; + ++ if (vnic->fw_vnic_id == INVALID_HW_RING_ID) ++ return rc; ++ + HWRM_PREP(req, CFA_L2_SET_RX_MASK); + req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id); + +-- +2.14.3 + diff --git a/SOURCES/bnxt-dpdk-0003-net-bnxt-fix-mbuf-data-offset-initialization.patch b/SOURCES/bnxt-dpdk-0003-net-bnxt-fix-mbuf-data-offset-initialization.patch new file mode 100644 index 0000000..c4db3a1 --- /dev/null +++ b/SOURCES/bnxt-dpdk-0003-net-bnxt-fix-mbuf-data-offset-initialization.patch @@ -0,0 +1,44 @@ +From e5c04b1d1bc83115a2cc28615a5d5c6645c66cd4 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: <02bd8182658600ebf2cbe61168e80c19ce4cdaa5.1524241750.git.dcaratti@redhat.com> +References: <02bd8182658600ebf2cbe61168e80c19ce4cdaa5.1524241750.git.dcaratti@redhat.com> +From: Ajit Khaparde +Date: Fri, 20 Apr 2018 07:22:02 -0700 +Subject: [PATCH 3/3] net/bnxt: fix mbuf data offset initialization + +Initialize mbuf->data_off to RTE_PKTMBUF_HEADROOM after allocation. +Without this, it might be possible that the DMA address provided +to the HW may not be in sync to what is indicated to the application +in bnxt_rx_pkt. + +Fixes: 2eb53b134aae ("net/bnxt: add initial Rx code") +Cc: stable@dpdk.org + +Signed-off-by: Ajit Khaparde +--- + drivers/net/bnxt/bnxt_rxr.c | 2 ++ + 1 file changed, 2 insertions(+) + +backport notes: + - use data->data_off instead off mbuf->data_off, because we miss + upstream commit 42b883535804 ("net/bnxt: use new API to get IOVA + address") + +--- a/drivers/net/bnxt/bnxt_rxr.c ++++ b/drivers/net/bnxt/bnxt_rxr.c +@@ -74,6 +74,7 @@ static inline int bnxt_alloc_rx_data(str + } + + rx_buf->mbuf = data; ++ data->data_off = RTE_PKTMBUF_HEADROOM; + + rxbd->addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR(rx_buf->mbuf)); + +@@ -101,6 +102,7 @@ static inline int bnxt_alloc_ag_data(str + + + rx_buf->mbuf = data; ++ data->data_off = RTE_PKTMBUF_HEADROOM; + + rxbd->addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR(rx_buf->mbuf)); + diff --git a/SOURCES/dpdk-17.11-i40e-fix-link-status-timeout.patch b/SOURCES/dpdk-17.11-i40e-fix-link-status-timeout.patch new file mode 100644 index 0000000..414130f --- /dev/null +++ b/SOURCES/dpdk-17.11-i40e-fix-link-status-timeout.patch @@ -0,0 +1,209 @@ +From: Fan Zhang +Subject: [dpdk-dev,v2] drivers/i40e: fix link update no wait +Date: Thu, 8 Mar 2018 12:17:52 +0000 + +Fixes: 263333bbb7a9 ("i40e: fix link status timeout") +Cc: cunming.liang@intel.com +Cc: stable@dpdk.org + +In i40e_dev_link_update() the driver obtains the link status +info via admin queue command despite of "no_wait" flag. This +requires relatively long time and may be a problem to some +application such as ovs-dpdk +(https://bugzilla.redhat.com/show_bug.cgi?id=1551761). + +This patch aims to fix the problem by using a different +approach of obtaining link status for i40e NIC without waiting. +Instead of getting the link status via admin queue command, +this patch reads the link status registers to accelerate the +procedure. + +Signed-off-by: Fan Zhang +Signed-off-by: Andrey Chilikin +Reviewed-by: Eelco Chaudron +Tested-by: Eelco Chaudron +--- +v2: +- add ccs after fixline + + drivers/net/i40e/i40e_ethdev.c | 128 ++++++++++++++++++++++++++++++----------- + 1 file changed, 95 insertions(+), 33 deletions(-) + +diff --git openvswitch-2.7.4/drivers/net/i40e/i40e_ethdev.c openvswitch-2.7.4/drivers/net/i40e/i40e_ethdev.c +index 508b4171c..968249ed1 100644 +--- openvswitch-2.7.4/drivers/net/i40e/i40e_ethdev.c ++++ openvswitch-2.7.4/drivers/net/i40e/i40e_ethdev.c +@@ -2437,77 +2437,140 @@ i40e_dev_set_link_down(struct rte_eth_dev *dev) + return i40e_phy_conf_link(hw, abilities, speed, false); + } + +-int +-i40e_dev_link_update(struct rte_eth_dev *dev, +- int wait_to_complete) ++#define __rte_always_inline inline __attribute__((always_inline)) ++static __rte_always_inline void ++update_link_no_wait(struct i40e_hw *hw, struct rte_eth_link *link) ++{ ++/* Link status registers and values*/ ++#define I40E_PRTMAC_LINKSTA 0x001E2420 ++#define I40E_REG_LINK_UP 0x40000080 ++#define I40E_PRTMAC_MACC 0x001E24E0 ++#define I40E_REG_MACC_25GB 0x00020000 ++#define I40E_REG_SPEED_MASK 0x38000000 ++#define I40E_REG_SPEED_100MB 0x00000000 ++#define I40E_REG_SPEED_1GB 0x08000000 ++#define I40E_REG_SPEED_10GB 0x10000000 ++#define I40E_REG_SPEED_20GB 0x20000000 ++#define I40E_REG_SPEED_25_40GB 0x18000000 ++ uint32_t link_speed; ++ uint32_t reg_val; ++ ++ reg_val = I40E_READ_REG(hw, I40E_PRTMAC_LINKSTA); ++ link_speed = reg_val & I40E_REG_SPEED_MASK; ++ reg_val &= I40E_REG_LINK_UP; ++ link->link_status = (reg_val == I40E_REG_LINK_UP) ? 1 : 0; ++ ++ if (unlikely(link->link_status != 0)) ++ return; ++ ++ /* Parse the link status */ ++ switch (link_speed) { ++ case I40E_REG_SPEED_100MB: ++ link->link_speed = ETH_SPEED_NUM_100M; ++ break; ++ case I40E_REG_SPEED_1GB: ++ link->link_speed = ETH_SPEED_NUM_1G; ++ break; ++ case I40E_REG_SPEED_10GB: ++ link->link_speed = ETH_SPEED_NUM_10G; ++ break; ++ case I40E_REG_SPEED_20GB: ++ link->link_speed = ETH_SPEED_NUM_20G; ++ break; ++ case I40E_REG_SPEED_25_40GB: ++ reg_val = I40E_READ_REG(hw, I40E_PRTMAC_MACC); ++ ++ if (reg_val & I40E_REG_MACC_25GB) ++ link->link_speed = ETH_SPEED_NUM_25G; ++ else ++ link->link_speed = ETH_SPEED_NUM_40G; ++ ++ break; ++ default: ++ PMD_DRV_LOG(ERR, "Unknown link speed info %u", link_speed); ++ break; ++ } ++} ++ ++static __rte_always_inline void ++update_link_wait(struct i40e_hw *hw, struct rte_eth_link *link, ++ bool enable_lse) + { +-#define CHECK_INTERVAL 100 /* 100ms */ +-#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ +- struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++#define CHECK_INTERVAL 100 /* 100ms */ ++#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ ++ uint32_t rep_cnt = MAX_REPEAT_TIME; + struct i40e_link_status link_status; +- struct rte_eth_link link, old; + int status; +- unsigned rep_cnt = MAX_REPEAT_TIME; +- bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false; + +- memset(&link, 0, sizeof(link)); +- memset(&old, 0, sizeof(old)); + memset(&link_status, 0, sizeof(link_status)); +- rte_i40e_dev_atomic_read_link_status(dev, &old); + + do { + /* Get link status information from hardware */ + status = i40e_aq_get_link_info(hw, enable_lse, + &link_status, NULL); +- if (status != I40E_SUCCESS) { +- link.link_speed = ETH_SPEED_NUM_100M; +- link.link_duplex = ETH_LINK_FULL_DUPLEX; ++ if (unlikely(status != I40E_SUCCESS)) { ++ link->link_speed = ETH_SPEED_NUM_100M; ++ link->link_duplex = ETH_LINK_FULL_DUPLEX; + PMD_DRV_LOG(ERR, "Failed to get link info"); +- goto out; ++ return; + } + +- link.link_status = link_status.link_info & I40E_AQ_LINK_UP; +- if (!wait_to_complete || link.link_status) +- break; ++ link->link_status = link_status.link_info & I40E_AQ_LINK_UP; ++ if (unlikely(link->link_status != 0)) ++ return; + + rte_delay_ms(CHECK_INTERVAL); + } while (--rep_cnt); + +- if (!link.link_status) +- goto out; +- +- /* i40e uses full duplex only */ +- link.link_duplex = ETH_LINK_FULL_DUPLEX; +- + /* Parse the link status */ + switch (link_status.link_speed) { + case I40E_LINK_SPEED_100MB: +- link.link_speed = ETH_SPEED_NUM_100M; ++ link->link_speed = ETH_SPEED_NUM_100M; + break; + case I40E_LINK_SPEED_1GB: +- link.link_speed = ETH_SPEED_NUM_1G; ++ link->link_speed = ETH_SPEED_NUM_1G; + break; + case I40E_LINK_SPEED_10GB: +- link.link_speed = ETH_SPEED_NUM_10G; ++ link->link_speed = ETH_SPEED_NUM_10G; + break; + case I40E_LINK_SPEED_20GB: +- link.link_speed = ETH_SPEED_NUM_20G; ++ link->link_speed = ETH_SPEED_NUM_20G; + break; + case I40E_LINK_SPEED_25GB: +- link.link_speed = ETH_SPEED_NUM_25G; ++ link->link_speed = ETH_SPEED_NUM_25G; + break; + case I40E_LINK_SPEED_40GB: +- link.link_speed = ETH_SPEED_NUM_40G; ++ link->link_speed = ETH_SPEED_NUM_40G; + break; + default: +- link.link_speed = ETH_SPEED_NUM_100M; ++ link->link_speed = ETH_SPEED_NUM_100M; + break; + } ++} ++ ++int ++i40e_dev_link_update(struct rte_eth_dev *dev, ++ int wait_to_complete) ++{ ++ struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct rte_eth_link link, old; ++ bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false; + ++ memset(&link, 0, sizeof(link)); ++ memset(&old, 0, sizeof(old)); ++ ++ rte_i40e_dev_atomic_read_link_status(dev, &old); ++ ++ /* i40e uses full duplex only */ ++ link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_autoneg = !(dev->data->dev_conf.link_speeds & + ETH_LINK_SPEED_FIXED); + +-out: ++ if (!wait_to_complete) ++ update_link_no_wait(hw, &link); ++ else ++ update_link_wait(hw, &link, enable_lse); ++ + rte_i40e_dev_atomic_write_link_status(dev, &link); + if (link.link_status == old.link_status) + return -1; diff --git a/SOURCES/gen_config_group.sh b/SOURCES/gen_config_group.sh index eac8692..da6c225 100755 --- a/SOURCES/gen_config_group.sh +++ b/SOURCES/gen_config_group.sh @@ -104,7 +104,14 @@ do set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_I40E_PMD y case "${DPDK_CONF_MACH_ARCH[i]}" in x86_64) + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_BNXT_PMD y set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_ENIC_PMD y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_MLX4_PMD y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_MLX5_PMD y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_NFP_PMD y + set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_QEDE_PMD y ;& arm64) set_conf "${OUTDIR}" CONFIG_RTE_LIBRTE_IXGBE_PMD y diff --git a/SOURCES/mlnx-dpdk-0001-net-mlx4-move-rdma-core-calls-to-separate-file.patch b/SOURCES/mlnx-dpdk-0001-net-mlx4-move-rdma-core-calls-to-separate-file.patch new file mode 100644 index 0000000..96046d2 --- /dev/null +++ b/SOURCES/mlnx-dpdk-0001-net-mlx4-move-rdma-core-calls-to-separate-file.patch @@ -0,0 +1,1002 @@ +From 064e2489caf7aeb9cd01dfcdc1b6a73b091b7f23 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Tue, 30 Jan 2018 16:34:52 +0100 +Subject: [PATCH 1/9] net/mlx4: move rdma-core calls to separate file + +This lays the groundwork for externalizing rdma-core as an optional +run-time dependency instead of a mandatory one. + +No functional change. + +Signed-off-by: Adrien Mazarguil +Acked-by: Nelio Laranjeiro +(cherry picked from commit 5f03b79b97d77804620e220a7715b696d8073b0e) +--- + drivers/net/mlx4/Makefile | 1 + + drivers/net/mlx4/mlx4.c | 33 ++--- + drivers/net/mlx4/mlx4_ethdev.c | 1 + + drivers/net/mlx4/mlx4_flow.c | 32 ++--- + drivers/net/mlx4/mlx4_glue.c | 275 +++++++++++++++++++++++++++++++++++++++++ + drivers/net/mlx4/mlx4_glue.h | 80 ++++++++++++ + drivers/net/mlx4/mlx4_intr.c | 10 +- + drivers/net/mlx4/mlx4_mr.c | 7 +- + drivers/net/mlx4/mlx4_rxq.c | 53 ++++---- + drivers/net/mlx4/mlx4_txq.c | 17 +-- + 10 files changed, 438 insertions(+), 71 deletions(-) + create mode 100644 drivers/net/mlx4/mlx4_glue.c + create mode 100644 drivers/net/mlx4/mlx4_glue.h + +diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile +index f1f47c2..7ba304b 100644 +--- a/drivers/net/mlx4/Makefile ++++ b/drivers/net/mlx4/Makefile +@@ -38,6 +38,7 @@ LIB = librte_pmd_mlx4.a + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_ethdev.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c ++SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index f9e4f9d..7f58e26 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -67,6 +67,7 @@ + #include + + #include "mlx4.h" ++#include "mlx4_glue.h" + #include "mlx4_flow.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" +@@ -218,8 +219,8 @@ struct mlx4_conf { + mlx4_tx_queue_release(dev->data->tx_queues[i]); + if (priv->pd != NULL) { + assert(priv->ctx != NULL); +- claim_zero(ibv_dealloc_pd(priv->pd)); +- claim_zero(ibv_close_device(priv->ctx)); ++ claim_zero(mlx4_glue->dealloc_pd(priv->pd)); ++ claim_zero(mlx4_glue->close_device(priv->ctx)); + } else + assert(priv->ctx == NULL); + mlx4_intr_uninstall(priv); +@@ -434,7 +435,7 @@ struct mlx4_conf { + + (void)pci_drv; + assert(pci_drv == &mlx4_driver); +- list = ibv_get_device_list(&i); ++ list = mlx4_glue->get_device_list(&i); + if (list == NULL) { + rte_errno = errno; + assert(rte_errno); +@@ -463,12 +464,12 @@ struct mlx4_conf { + PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); + INFO("PCI information matches, using device \"%s\" (VF: %s)", + list[i]->name, (vf ? "true" : "false")); +- attr_ctx = ibv_open_device(list[i]); ++ attr_ctx = mlx4_glue->open_device(list[i]); + err = errno; + break; + } + if (attr_ctx == NULL) { +- ibv_free_device_list(list); ++ mlx4_glue->free_device_list(list); + switch (err) { + case 0: + rte_errno = ENODEV; +@@ -485,7 +486,7 @@ struct mlx4_conf { + } + ibv_dev = list[i]; + DEBUG("device opened"); +- if (ibv_query_device(attr_ctx, &device_attr)) { ++ if (mlx4_glue->query_device(attr_ctx, &device_attr)) { + rte_errno = ENODEV; + goto error; + } +@@ -512,13 +513,13 @@ struct mlx4_conf { + if (!(conf.ports.enabled & (1 << i))) + continue; + DEBUG("using port %u", port); +- ctx = ibv_open_device(ibv_dev); ++ ctx = mlx4_glue->open_device(ibv_dev); + if (ctx == NULL) { + rte_errno = ENODEV; + goto port_error; + } + /* Check port status. */ +- err = ibv_query_port(ctx, port, &port_attr); ++ err = mlx4_glue->query_port(ctx, port, &port_attr); + if (err) { + rte_errno = err; + ERROR("port query failed: %s", strerror(rte_errno)); +@@ -532,7 +533,7 @@ struct mlx4_conf { + } + if (port_attr.state != IBV_PORT_ACTIVE) + DEBUG("port %d is not active: \"%s\" (%d)", +- port, ibv_port_state_str(port_attr.state), ++ port, mlx4_glue->port_state_str(port_attr.state), + port_attr.state); + /* Make asynchronous FD non-blocking to handle interrupts. */ + if (mlx4_fd_set_non_blocking(ctx->async_fd) < 0) { +@@ -541,7 +542,7 @@ struct mlx4_conf { + goto port_error; + } + /* Allocate protection domain. */ +- pd = ibv_alloc_pd(ctx); ++ pd = mlx4_glue->alloc_pd(ctx); + if (pd == NULL) { + rte_errno = ENOMEM; + ERROR("PD allocation failure"); +@@ -605,7 +606,7 @@ struct mlx4_conf { + char name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(name, sizeof(name), "%s port %u", +- ibv_get_device_name(ibv_dev), port); ++ mlx4_glue->get_device_name(ibv_dev), port); + eth_dev = rte_eth_dev_allocate(name); + } + if (eth_dev == NULL) { +@@ -648,9 +649,9 @@ struct mlx4_conf { + port_error: + rte_free(priv); + if (pd) +- claim_zero(ibv_dealloc_pd(pd)); ++ claim_zero(mlx4_glue->dealloc_pd(pd)); + if (ctx) +- claim_zero(ibv_close_device(ctx)); ++ claim_zero(mlx4_glue->close_device(ctx)); + if (eth_dev) + rte_eth_dev_release_port(eth_dev); + break; +@@ -665,9 +666,9 @@ struct mlx4_conf { + */ + error: + if (attr_ctx) +- claim_zero(ibv_close_device(attr_ctx)); ++ claim_zero(mlx4_glue->close_device(attr_ctx)); + if (list) +- ibv_free_device_list(list); ++ mlx4_glue->free_device_list(list); + assert(rte_errno >= 0); + return -rte_errno; + } +@@ -714,7 +715,7 @@ struct mlx4_conf { + * using this PMD, which is not supported in forked processes. + */ + setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); +- ibv_fork_init(); ++ mlx4_glue->fork_init(); + rte_pci_register(&mlx4_driver); + } + +diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c +index 2f69e7d..e2f9509 100644 +--- a/drivers/net/mlx4/mlx4_ethdev.c ++++ b/drivers/net/mlx4/mlx4_ethdev.c +@@ -70,6 +70,7 @@ + + #include "mlx4.h" + #include "mlx4_flow.h" ++#include "mlx4_glue.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + +diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c +index 8b87b29..4a6b8a4 100644 +--- a/drivers/net/mlx4/mlx4_flow.c ++++ b/drivers/net/mlx4/mlx4_flow.c +@@ -65,6 +65,7 @@ + + /* PMD headers. */ + #include "mlx4.h" ++#include "mlx4_glue.h" + #include "mlx4_flow.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" +@@ -914,24 +915,25 @@ struct mlx4_drop { + .priv = priv, + .refcnt = 1, + }; +- drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); ++ drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); + if (!drop->cq) + goto error; +- drop->qp = ibv_create_qp(priv->pd, +- &(struct ibv_qp_init_attr){ +- .send_cq = drop->cq, +- .recv_cq = drop->cq, +- .qp_type = IBV_QPT_RAW_PACKET, +- }); ++ drop->qp = mlx4_glue->create_qp ++ (priv->pd, ++ &(struct ibv_qp_init_attr){ ++ .send_cq = drop->cq, ++ .recv_cq = drop->cq, ++ .qp_type = IBV_QPT_RAW_PACKET, ++ }); + if (!drop->qp) + goto error; + priv->drop = drop; + return drop; + error: + if (drop->qp) +- claim_zero(ibv_destroy_qp(drop->qp)); ++ claim_zero(mlx4_glue->destroy_qp(drop->qp)); + if (drop->cq) +- claim_zero(ibv_destroy_cq(drop->cq)); ++ claim_zero(mlx4_glue->destroy_cq(drop->cq)); + if (drop) + rte_free(drop); + rte_errno = ENOMEM; +@@ -951,8 +953,8 @@ struct mlx4_drop { + if (--drop->refcnt) + return; + drop->priv->drop = NULL; +- claim_zero(ibv_destroy_qp(drop->qp)); +- claim_zero(ibv_destroy_cq(drop->cq)); ++ claim_zero(mlx4_glue->destroy_qp(drop->qp)); ++ claim_zero(mlx4_glue->destroy_cq(drop->cq)); + rte_free(drop); + } + +@@ -984,7 +986,7 @@ struct mlx4_drop { + if (!enable) { + if (!flow->ibv_flow) + return 0; +- claim_zero(ibv_destroy_flow(flow->ibv_flow)); ++ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); +@@ -997,7 +999,7 @@ struct mlx4_drop { + !priv->isolated && + flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) { + if (flow->ibv_flow) { +- claim_zero(ibv_destroy_flow(flow->ibv_flow)); ++ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); +@@ -1027,7 +1029,7 @@ struct mlx4_drop { + if (missing ^ !flow->drop) + return 0; + /* Verbs flow needs updating. */ +- claim_zero(ibv_destroy_flow(flow->ibv_flow)); ++ claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); +@@ -1059,7 +1061,7 @@ struct mlx4_drop { + assert(qp); + if (flow->ibv_flow) + return 0; +- flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr); ++ flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr); + if (flow->ibv_flow) + return 0; + if (flow->drop) +diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c +new file mode 100644 +index 0000000..30797bd +--- /dev/null ++++ b/drivers/net/mlx4/mlx4_glue.c +@@ -0,0 +1,275 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 6WIND S.A. ++ * Copyright 2018 Mellanox ++ */ ++ ++/* Verbs headers do not support -pedantic. */ ++#ifdef PEDANTIC ++#pragma GCC diagnostic ignored "-Wpedantic" ++#endif ++#include ++#include ++#ifdef PEDANTIC ++#pragma GCC diagnostic error "-Wpedantic" ++#endif ++ ++#include "mlx4_glue.h" ++ ++static int ++mlx4_glue_fork_init(void) ++{ ++ return ibv_fork_init(); ++} ++ ++static int ++mlx4_glue_get_async_event(struct ibv_context *context, ++ struct ibv_async_event *event) ++{ ++ return ibv_get_async_event(context, event); ++} ++ ++static void ++mlx4_glue_ack_async_event(struct ibv_async_event *event) ++{ ++ ibv_ack_async_event(event); ++} ++ ++static struct ibv_pd * ++mlx4_glue_alloc_pd(struct ibv_context *context) ++{ ++ return ibv_alloc_pd(context); ++} ++ ++static int ++mlx4_glue_dealloc_pd(struct ibv_pd *pd) ++{ ++ return ibv_dealloc_pd(pd); ++} ++ ++static struct ibv_device ** ++mlx4_glue_get_device_list(int *num_devices) ++{ ++ return ibv_get_device_list(num_devices); ++} ++ ++static void ++mlx4_glue_free_device_list(struct ibv_device **list) ++{ ++ ibv_free_device_list(list); ++} ++ ++static struct ibv_context * ++mlx4_glue_open_device(struct ibv_device *device) ++{ ++ return ibv_open_device(device); ++} ++ ++static int ++mlx4_glue_close_device(struct ibv_context *context) ++{ ++ return ibv_close_device(context); ++} ++ ++static const char * ++mlx4_glue_get_device_name(struct ibv_device *device) ++{ ++ return ibv_get_device_name(device); ++} ++ ++static int ++mlx4_glue_query_device(struct ibv_context *context, ++ struct ibv_device_attr *device_attr) ++{ ++ return ibv_query_device(context, device_attr); ++} ++ ++static int ++mlx4_glue_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr) ++{ ++ return ibv_query_device_ex(context, input, attr); ++} ++ ++static int ++mlx4_glue_query_port(struct ibv_context *context, uint8_t port_num, ++ struct ibv_port_attr *port_attr) ++{ ++ return ibv_query_port(context, port_num, port_attr); ++} ++ ++static const char * ++mlx4_glue_port_state_str(enum ibv_port_state port_state) ++{ ++ return ibv_port_state_str(port_state); ++} ++ ++static struct ibv_comp_channel * ++mlx4_glue_create_comp_channel(struct ibv_context *context) ++{ ++ return ibv_create_comp_channel(context); ++} ++ ++static int ++mlx4_glue_destroy_comp_channel(struct ibv_comp_channel *channel) ++{ ++ return ibv_destroy_comp_channel(channel); ++} ++ ++static struct ibv_cq * ++mlx4_glue_create_cq(struct ibv_context *context, int cqe, void *cq_context, ++ struct ibv_comp_channel *channel, int comp_vector) ++{ ++ return ibv_create_cq(context, cqe, cq_context, channel, comp_vector); ++} ++ ++static int ++mlx4_glue_destroy_cq(struct ibv_cq *cq) ++{ ++ return ibv_destroy_cq(cq); ++} ++ ++static int ++mlx4_glue_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, ++ void **cq_context) ++{ ++ return ibv_get_cq_event(channel, cq, cq_context); ++} ++ ++static void ++mlx4_glue_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) ++{ ++ ibv_ack_cq_events(cq, nevents); ++} ++ ++static struct ibv_flow * ++mlx4_glue_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow) ++{ ++ return ibv_create_flow(qp, flow); ++} ++ ++static int ++mlx4_glue_destroy_flow(struct ibv_flow *flow_id) ++{ ++ return ibv_destroy_flow(flow_id); ++} ++ ++static struct ibv_qp * ++mlx4_glue_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) ++{ ++ return ibv_create_qp(pd, qp_init_attr); ++} ++ ++static struct ibv_qp * ++mlx4_glue_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_init_attr_ex) ++{ ++ return ibv_create_qp_ex(context, qp_init_attr_ex); ++} ++ ++static int ++mlx4_glue_destroy_qp(struct ibv_qp *qp) ++{ ++ return ibv_destroy_qp(qp); ++} ++ ++static int ++mlx4_glue_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) ++{ ++ return ibv_modify_qp(qp, attr, attr_mask); ++} ++ ++static struct ibv_mr * ++mlx4_glue_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) ++{ ++ return ibv_reg_mr(pd, addr, length, access); ++} ++ ++static int ++mlx4_glue_dereg_mr(struct ibv_mr *mr) ++{ ++ return ibv_dereg_mr(mr); ++} ++ ++static struct ibv_rwq_ind_table * ++mlx4_glue_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr) ++{ ++ return ibv_create_rwq_ind_table(context, init_attr); ++} ++ ++static int ++mlx4_glue_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) ++{ ++ return ibv_destroy_rwq_ind_table(rwq_ind_table); ++} ++ ++static struct ibv_wq * ++mlx4_glue_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr) ++{ ++ return ibv_create_wq(context, wq_init_attr); ++} ++ ++static int ++mlx4_glue_destroy_wq(struct ibv_wq *wq) ++{ ++ return ibv_destroy_wq(wq); ++} ++static int ++mlx4_glue_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr) ++{ ++ return ibv_modify_wq(wq, wq_attr); ++} ++ ++static int ++mlx4_glue_dv_init_obj(struct mlx4dv_obj *obj, uint64_t obj_type) ++{ ++ return mlx4dv_init_obj(obj, obj_type); ++} ++ ++static int ++mlx4_glue_dv_set_context_attr(struct ibv_context *context, ++ enum mlx4dv_set_ctx_attr_type attr_type, ++ void *attr) ++{ ++ return mlx4dv_set_context_attr(context, attr_type, attr); ++} ++ ++const struct mlx4_glue *mlx4_glue = &(const struct mlx4_glue){ ++ .fork_init = mlx4_glue_fork_init, ++ .get_async_event = mlx4_glue_get_async_event, ++ .ack_async_event = mlx4_glue_ack_async_event, ++ .alloc_pd = mlx4_glue_alloc_pd, ++ .dealloc_pd = mlx4_glue_dealloc_pd, ++ .get_device_list = mlx4_glue_get_device_list, ++ .free_device_list = mlx4_glue_free_device_list, ++ .open_device = mlx4_glue_open_device, ++ .close_device = mlx4_glue_close_device, ++ .get_device_name = mlx4_glue_get_device_name, ++ .query_device = mlx4_glue_query_device, ++ .query_device_ex = mlx4_glue_query_device_ex, ++ .query_port = mlx4_glue_query_port, ++ .port_state_str = mlx4_glue_port_state_str, ++ .create_comp_channel = mlx4_glue_create_comp_channel, ++ .destroy_comp_channel = mlx4_glue_destroy_comp_channel, ++ .create_cq = mlx4_glue_create_cq, ++ .destroy_cq = mlx4_glue_destroy_cq, ++ .get_cq_event = mlx4_glue_get_cq_event, ++ .ack_cq_events = mlx4_glue_ack_cq_events, ++ .create_flow = mlx4_glue_create_flow, ++ .destroy_flow = mlx4_glue_destroy_flow, ++ .create_qp = mlx4_glue_create_qp, ++ .create_qp_ex = mlx4_glue_create_qp_ex, ++ .destroy_qp = mlx4_glue_destroy_qp, ++ .modify_qp = mlx4_glue_modify_qp, ++ .reg_mr = mlx4_glue_reg_mr, ++ .dereg_mr = mlx4_glue_dereg_mr, ++ .create_rwq_ind_table = mlx4_glue_create_rwq_ind_table, ++ .destroy_rwq_ind_table = mlx4_glue_destroy_rwq_ind_table, ++ .create_wq = mlx4_glue_create_wq, ++ .destroy_wq = mlx4_glue_destroy_wq, ++ .modify_wq = mlx4_glue_modify_wq, ++ .dv_init_obj = mlx4_glue_dv_init_obj, ++ .dv_set_context_attr = mlx4_glue_dv_set_context_attr, ++}; +diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h +new file mode 100644 +index 0000000..0623511 +--- /dev/null ++++ b/drivers/net/mlx4/mlx4_glue.h +@@ -0,0 +1,80 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 6WIND S.A. ++ * Copyright 2018 Mellanox ++ */ ++ ++#ifndef MLX4_GLUE_H_ ++#define MLX4_GLUE_H_ ++ ++/* Verbs headers do not support -pedantic. */ ++#ifdef PEDANTIC ++#pragma GCC diagnostic ignored "-Wpedantic" ++#endif ++#include ++#include ++#ifdef PEDANTIC ++#pragma GCC diagnostic error "-Wpedantic" ++#endif ++ ++struct mlx4_glue { ++ int (*fork_init)(void); ++ int (*get_async_event)(struct ibv_context *context, ++ struct ibv_async_event *event); ++ void (*ack_async_event)(struct ibv_async_event *event); ++ struct ibv_pd *(*alloc_pd)(struct ibv_context *context); ++ int (*dealloc_pd)(struct ibv_pd *pd); ++ struct ibv_device **(*get_device_list)(int *num_devices); ++ void (*free_device_list)(struct ibv_device **list); ++ struct ibv_context *(*open_device)(struct ibv_device *device); ++ int (*close_device)(struct ibv_context *context); ++ const char *(*get_device_name)(struct ibv_device *device); ++ int (*query_device)(struct ibv_context *context, ++ struct ibv_device_attr *device_attr); ++ int (*query_device_ex)(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr); ++ int (*query_port)(struct ibv_context *context, uint8_t port_num, ++ struct ibv_port_attr *port_attr); ++ const char *(*port_state_str)(enum ibv_port_state port_state); ++ struct ibv_comp_channel *(*create_comp_channel) ++ (struct ibv_context *context); ++ int (*destroy_comp_channel)(struct ibv_comp_channel *channel); ++ struct ibv_cq *(*create_cq)(struct ibv_context *context, int cqe, ++ void *cq_context, ++ struct ibv_comp_channel *channel, ++ int comp_vector); ++ int (*destroy_cq)(struct ibv_cq *cq); ++ int (*get_cq_event)(struct ibv_comp_channel *channel, ++ struct ibv_cq **cq, void **cq_context); ++ void (*ack_cq_events)(struct ibv_cq *cq, unsigned int nevents); ++ struct ibv_flow *(*create_flow)(struct ibv_qp *qp, ++ struct ibv_flow_attr *flow); ++ int (*destroy_flow)(struct ibv_flow *flow_id); ++ struct ibv_qp *(*create_qp)(struct ibv_pd *pd, ++ struct ibv_qp_init_attr *qp_init_attr); ++ struct ibv_qp *(*create_qp_ex) ++ (struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_init_attr_ex); ++ int (*destroy_qp)(struct ibv_qp *qp); ++ int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask); ++ struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, ++ size_t length, int access); ++ int (*dereg_mr)(struct ibv_mr *mr); ++ struct ibv_rwq_ind_table *(*create_rwq_ind_table) ++ (struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr); ++ int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table); ++ struct ibv_wq *(*create_wq)(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr); ++ int (*destroy_wq)(struct ibv_wq *wq); ++ int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr); ++ int (*dv_init_obj)(struct mlx4dv_obj *obj, uint64_t obj_type); ++ int (*dv_set_context_attr)(struct ibv_context *context, ++ enum mlx4dv_set_ctx_attr_type attr_type, ++ void *attr); ++}; ++ ++const struct mlx4_glue *mlx4_glue; ++ ++#endif /* MLX4_GLUE_H_ */ +diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c +index 50d1976..c63806d 100644 +--- a/drivers/net/mlx4/mlx4_intr.c ++++ b/drivers/net/mlx4/mlx4_intr.c +@@ -57,6 +57,7 @@ + #include + + #include "mlx4.h" ++#include "mlx4_glue.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + +@@ -216,7 +217,7 @@ + unsigned int i; + + /* Read all message and acknowledge them. */ +- while (!ibv_get_async_event(priv->ctx, &event)) { ++ while (!mlx4_glue->get_async_event(priv->ctx, &event)) { + switch (event.event_type) { + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: +@@ -231,7 +232,7 @@ + DEBUG("event type %d on physical port %d not handled", + event.event_type, event.element.port_num); + } +- ibv_ack_async_event(&event); ++ mlx4_glue->ack_async_event(&event); + } + for (i = 0; i != RTE_DIM(caught); ++i) + if (caught[i]) +@@ -354,7 +355,8 @@ + if (!rxq || !rxq->channel) { + ret = EINVAL; + } else { +- ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx); ++ ret = mlx4_glue->get_cq_event(rxq->cq->channel, &ev_cq, ++ &ev_ctx); + if (ret || ev_cq != rxq->cq) + ret = EINVAL; + } +@@ -364,7 +366,7 @@ + idx); + } else { + rxq->mcq.arm_sn++; +- ibv_ack_cq_events(rxq->cq, 1); ++ mlx4_glue->ack_cq_events(rxq->cq, 1); + } + return -ret; + } +diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c +index 2a3e269..493c008 100644 +--- a/drivers/net/mlx4/mlx4_mr.c ++++ b/drivers/net/mlx4/mlx4_mr.c +@@ -60,6 +60,7 @@ + #include + #include + ++#include "mlx4_glue.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + +@@ -200,8 +201,8 @@ struct mlx4_mr * + .end = end, + .refcnt = 1, + .priv = priv, +- .mr = ibv_reg_mr(priv->pd, (void *)start, end - start, +- IBV_ACCESS_LOCAL_WRITE), ++ .mr = mlx4_glue->reg_mr(priv->pd, (void *)start, end - start, ++ IBV_ACCESS_LOCAL_WRITE), + .mp = mp, + }; + if (mr->mr) { +@@ -240,7 +241,7 @@ struct mlx4_mr * + if (--mr->refcnt) + goto release; + LIST_REMOVE(mr, next); +- claim_zero(ibv_dereg_mr(mr->mr)); ++ claim_zero(mlx4_glue->dereg_mr(mr->mr)); + rte_free(mr); + release: + rte_spinlock_unlock(&priv->mr_lock); +diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c +index 53313c5..7ab3a46 100644 +--- a/drivers/net/mlx4/mlx4_rxq.c ++++ b/drivers/net/mlx4/mlx4_rxq.c +@@ -62,6 +62,7 @@ + #include + + #include "mlx4.h" ++#include "mlx4_glue.h" + #include "mlx4_flow.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" +@@ -231,7 +232,7 @@ struct mlx4_rss * + } + ind_tbl[i] = rxq->wq; + } +- rss->ind = ibv_create_rwq_ind_table ++ rss->ind = mlx4_glue->create_rwq_ind_table + (priv->ctx, + &(struct ibv_rwq_ind_table_init_attr){ + .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), +@@ -243,7 +244,7 @@ struct mlx4_rss * + msg = "RSS indirection table creation failure"; + goto error; + } +- rss->qp = ibv_create_qp_ex ++ rss->qp = mlx4_glue->create_qp_ex + (priv->ctx, + &(struct ibv_qp_init_attr_ex){ + .comp_mask = (IBV_QP_INIT_ATTR_PD | +@@ -264,7 +265,7 @@ struct mlx4_rss * + msg = "RSS hash QP creation failure"; + goto error; + } +- ret = ibv_modify_qp ++ ret = mlx4_glue->modify_qp + (rss->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_INIT, +@@ -275,7 +276,7 @@ struct mlx4_rss * + msg = "failed to switch RSS hash QP to INIT state"; + goto error; + } +- ret = ibv_modify_qp ++ ret = mlx4_glue->modify_qp + (rss->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_RTR, +@@ -288,11 +289,11 @@ struct mlx4_rss * + return 0; + error: + if (rss->qp) { +- claim_zero(ibv_destroy_qp(rss->qp)); ++ claim_zero(mlx4_glue->destroy_qp(rss->qp)); + rss->qp = NULL; + } + if (rss->ind) { +- claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); ++ claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); + rss->ind = NULL; + } + while (i--) +@@ -325,9 +326,9 @@ struct mlx4_rss * + assert(rss->ind); + if (--rss->usecnt) + return; +- claim_zero(ibv_destroy_qp(rss->qp)); ++ claim_zero(mlx4_glue->destroy_qp(rss->qp)); + rss->qp = NULL; +- claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); ++ claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); + rss->ind = NULL; + for (i = 0; i != rss->queues; ++i) + mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]); +@@ -364,9 +365,10 @@ struct mlx4_rss * + int ret; + + /* Prepare range for RSS contexts before creating the first WQ. */ +- ret = mlx4dv_set_context_attr(priv->ctx, +- MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, +- &log2_range); ++ ret = mlx4_glue->dv_set_context_attr ++ (priv->ctx, ++ MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, ++ &log2_range); + if (ret) { + ERROR("cannot set up range size for RSS context to %u" + " (for %u Rx queues), error: %s", +@@ -402,13 +404,13 @@ struct mlx4_rss * + * sequentially and are guaranteed to never be reused in the + * same context by the underlying implementation. + */ +- cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); ++ cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); + if (!cq) { + ret = ENOMEM; + msg = "placeholder CQ creation failure"; + goto error; + } +- wq = ibv_create_wq ++ wq = mlx4_glue->create_wq + (priv->ctx, + &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, +@@ -419,11 +421,11 @@ struct mlx4_rss * + }); + if (wq) { + wq_num = wq->wq_num; +- claim_zero(ibv_destroy_wq(wq)); ++ claim_zero(mlx4_glue->destroy_wq(wq)); + } else { + wq_num = 0; /* Shut up GCC 4.8 warnings. */ + } +- claim_zero(ibv_destroy_cq(cq)); ++ claim_zero(mlx4_glue->destroy_cq(cq)); + if (!wq) { + ret = ENOMEM; + msg = "placeholder WQ creation failure"; +@@ -522,13 +524,14 @@ struct mlx4_rss * + int ret; + + assert(rte_is_power_of_2(elts_n)); +- cq = ibv_create_cq(priv->ctx, elts_n / sges_n, NULL, rxq->channel, 0); ++ cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL, ++ rxq->channel, 0); + if (!cq) { + ret = ENOMEM; + msg = "CQ creation failure"; + goto error; + } +- wq = ibv_create_wq ++ wq = mlx4_glue->create_wq + (priv->ctx, + &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, +@@ -542,7 +545,7 @@ struct mlx4_rss * + msg = "WQ creation failure"; + goto error; + } +- ret = ibv_modify_wq ++ ret = mlx4_glue->modify_wq + (wq, + &(struct ibv_wq_attr){ + .attr_mask = IBV_WQ_ATTR_STATE, +@@ -557,7 +560,7 @@ struct mlx4_rss * + mlxdv.cq.out = &dv_cq; + mlxdv.rwq.in = wq; + mlxdv.rwq.out = &dv_rwq; +- ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); ++ ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); + if (ret) { + msg = "failed to obtain device information from WQ/CQ objects"; + goto error; +@@ -619,9 +622,9 @@ struct mlx4_rss * + return 0; + error: + if (wq) +- claim_zero(ibv_destroy_wq(wq)); ++ claim_zero(mlx4_glue->destroy_wq(wq)); + if (cq) +- claim_zero(ibv_destroy_cq(cq)); ++ claim_zero(mlx4_glue->destroy_cq(cq)); + rte_errno = ret; + ERROR("error while attaching Rx queue %p: %s: %s", + (void *)rxq, msg, strerror(ret)); +@@ -649,9 +652,9 @@ struct mlx4_rss * + memset(&rxq->mcq, 0, sizeof(rxq->mcq)); + rxq->rq_db = NULL; + rxq->wqes = NULL; +- claim_zero(ibv_destroy_wq(rxq->wq)); ++ claim_zero(mlx4_glue->destroy_wq(rxq->wq)); + rxq->wq = NULL; +- claim_zero(ibv_destroy_cq(rxq->cq)); ++ claim_zero(mlx4_glue->destroy_cq(rxq->cq)); + rxq->cq = NULL; + DEBUG("%p: freeing Rx queue elements", (void *)rxq); + for (i = 0; (i != RTE_DIM(*elts)); ++i) { +@@ -812,7 +815,7 @@ struct mlx4_rss * + goto error; + } + if (dev->data->dev_conf.intr_conf.rxq) { +- rxq->channel = ibv_create_comp_channel(priv->ctx); ++ rxq->channel = mlx4_glue->create_comp_channel(priv->ctx); + if (rxq->channel == NULL) { + rte_errno = ENOMEM; + ERROR("%p: Rx interrupt completion channel creation" +@@ -867,7 +870,7 @@ struct mlx4_rss * + assert(!rxq->wqes); + assert(!rxq->rq_db); + if (rxq->channel) +- claim_zero(ibv_destroy_comp_channel(rxq->channel)); ++ claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel)); + if (rxq->mr) + mlx4_mr_put(rxq->mr); + rte_free(rxq); +diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c +index 7882a4d..d97221b 100644 +--- a/drivers/net/mlx4/mlx4_txq.c ++++ b/drivers/net/mlx4/mlx4_txq.c +@@ -60,6 +60,7 @@ + + #include "mlx4.h" + #include "mlx4_autoconf.h" ++#include "mlx4_glue.h" + #include "mlx4_prm.h" + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" +@@ -284,7 +285,7 @@ struct txq_mp2mr_mbuf_check_data { + .lb = !!priv->vf, + .bounce_buf = bounce_buf, + }; +- txq->cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0); ++ txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0); + if (!txq->cq) { + rte_errno = ENOMEM; + ERROR("%p: CQ creation failure: %s", +@@ -304,7 +305,7 @@ struct txq_mp2mr_mbuf_check_data { + /* No completion events must occur by default. */ + .sq_sig_all = 0, + }; +- txq->qp = ibv_create_qp(priv->pd, &qp_init_attr); ++ txq->qp = mlx4_glue->create_qp(priv->pd, &qp_init_attr); + if (!txq->qp) { + rte_errno = errno ? errno : EINVAL; + ERROR("%p: QP creation failure: %s", +@@ -312,7 +313,7 @@ struct txq_mp2mr_mbuf_check_data { + goto error; + } + txq->max_inline = qp_init_attr.cap.max_inline_data; +- ret = ibv_modify_qp ++ ret = mlx4_glue->modify_qp + (txq->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_INIT, +@@ -325,7 +326,7 @@ struct txq_mp2mr_mbuf_check_data { + (void *)dev, strerror(rte_errno)); + goto error; + } +- ret = ibv_modify_qp ++ ret = mlx4_glue->modify_qp + (txq->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_RTR, +@@ -337,7 +338,7 @@ struct txq_mp2mr_mbuf_check_data { + (void *)dev, strerror(rte_errno)); + goto error; + } +- ret = ibv_modify_qp ++ ret = mlx4_glue->modify_qp + (txq->qp, + &(struct ibv_qp_attr){ + .qp_state = IBV_QPS_RTS, +@@ -354,7 +355,7 @@ struct txq_mp2mr_mbuf_check_data { + mlxdv.cq.out = &dv_cq; + mlxdv.qp.in = txq->qp; + mlxdv.qp.out = &dv_qp; +- ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ); ++ ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ); + if (ret) { + rte_errno = EINVAL; + ERROR("%p: failed to obtain information needed for" +@@ -401,9 +402,9 @@ struct txq_mp2mr_mbuf_check_data { + } + mlx4_txq_free_elts(txq); + if (txq->qp) +- claim_zero(ibv_destroy_qp(txq->qp)); ++ claim_zero(mlx4_glue->destroy_qp(txq->qp)); + if (txq->cq) +- claim_zero(ibv_destroy_cq(txq->cq)); ++ claim_zero(mlx4_glue->destroy_cq(txq->cq)); + for (i = 0; i != RTE_DIM(txq->mp2mr); ++i) { + if (!txq->mp2mr[i].mp) + break; +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0002-net-mlx4-spawn-rdma-core-dependency-plug-in.patch b/SOURCES/mlnx-dpdk-0002-net-mlx4-spawn-rdma-core-dependency-plug-in.patch new file mode 100644 index 0000000..8c17778 --- /dev/null +++ b/SOURCES/mlnx-dpdk-0002-net-mlx4-spawn-rdma-core-dependency-plug-in.patch @@ -0,0 +1,245 @@ +From 1e6ab669d81e73ae7f4dead275d03e7f77ad76f7 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Tue, 30 Jan 2018 16:34:54 +0100 +Subject: [PATCH 2/9] net/mlx4: spawn rdma-core dependency plug-in + +When mlx4 is not compiled directly as an independent shared object (e.g. +CONFIG_RTE_BUILD_SHARED_LIB not enabled for performance reasons), DPDK +applications inherit its dependencies on libibverbs and libmlx4 through +rte.app.mk. + +This is an issue both when DPDK is delivered as a binary package (Linux +distributions) and for end users because rdma-core then propagates as a +mandatory dependency for everything. + +Application writers relying on binary DPDK packages are not necessarily +aware of this fact and may end up delivering packages with broken +dependencies. + +This patch therefore introduces an intermediate internal plug-in +hard-linked with rdma-core (to preserve symbol versioning) loaded by the +PMD through dlopen(), so that a missing rdma-core does not cause unresolved +symbols, allowing applications to start normally. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit 9e3391b98b6374e2796e0ae38dcce314efdc4f37) +--- + config/common_base | 1 + + doc/guides/nics/mlx4.rst | 13 +++++++++++++ + drivers/net/mlx4/Makefile | 29 ++++++++++++++++++++++++++++ + drivers/net/mlx4/mlx4.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ + mk/rte.app.mk | 4 ++++ + 5 files changed, 96 insertions(+) + +diff --git a/config/common_base b/config/common_base +index e74febe..71a764c 100644 +--- a/config/common_base ++++ b/config/common_base +@@ -231,6 +231,7 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y + CONFIG_RTE_LIBRTE_MLX4_PMD=n + CONFIG_RTE_LIBRTE_MLX4_DEBUG=n + CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS=n ++CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n + CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 + + # +diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst +index 22341b9..5912722 100644 +--- a/doc/guides/nics/mlx4.rst ++++ b/doc/guides/nics/mlx4.rst +@@ -86,6 +86,19 @@ These options can be modified in the ``.config`` file. + + Toggle compilation of librte_pmd_mlx4 itself. + ++- ``CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS`` (default **n**) ++ ++ Build PMD with additional code to make it loadable without hard ++ dependencies on **libibverbs** nor **libmlx4**, which may not be installed ++ on the target system. ++ ++ In this mode, their presence is still required for it to run properly, ++ however their absence won't prevent a DPDK application from starting (with ++ ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as ++ missing with ``ldd(1)``. ++ ++ This option has no performance impact. ++ + - ``CONFIG_RTE_LIBRTE_MLX4_DEBUG`` (default **n**) + + Toggle debugging code and stricter compilation flags. Enabling this option +diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile +index 7ba304b..60ee120 100644 +--- a/drivers/net/mlx4/Makefile ++++ b/drivers/net/mlx4/Makefile +@@ -33,12 +33,15 @@ include $(RTE_SDK)/mk/rte.vars.mk + + # Library name. + LIB = librte_pmd_mlx4.a ++LIB_GLUE = librte_pmd_mlx4_glue.so + + # Sources. + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_ethdev.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c ++ifneq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c ++endif + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c +@@ -46,6 +49,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_txq.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_utils.c + ++ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) ++INSTALL-$(CONFIG_RTE_LIBRTE_MLX4_PMD)-lib += $(LIB_GLUE) ++endif ++ + # Basic CFLAGS. + CFLAGS += -O3 + CFLAGS += -std=c11 -Wall -Wextra +@@ -55,7 +62,13 @@ CFLAGS += -D_BSD_SOURCE + CFLAGS += -D_DEFAULT_SOURCE + CFLAGS += -D_XOPEN_SOURCE=600 + CFLAGS += $(WERROR_FLAGS) ++ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) ++CFLAGS += -DMLX4_GLUE='"$(LIB_GLUE)"' ++CFLAGS_mlx4_glue.o += -fPIC ++LDLIBS += -ldl ++else + LDLIBS += -libverbs -lmlx4 ++endif + LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring + LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs + LDLIBS += -lrte_bus_pci +@@ -113,7 +126,23 @@ mlx4_autoconf.h: mlx4_autoconf.h.new + + $(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h + ++# Generate dependency plug-in for rdma-core when the PMD must not be linked ++# directly, so that applications do not inherit this dependency. ++ ++ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) ++ ++$(LIB): $(LIB_GLUE) ++ ++$(LIB_GLUE): mlx4_glue.o ++ $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ ++ -s -shared -o $@ $< -libverbs -lmlx4 ++ ++mlx4_glue.o: mlx4_autoconf.h ++ ++endif ++ + clean_mlx4: FORCE + $Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new ++ $Q rm -f -- mlx4_glue.o $(LIB_GLUE) + + clean: clean_mlx4 +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index 7f58e26..d12b00c 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -37,6 +37,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -44,6 +45,7 @@ + #include + #include + #include ++#include + + /* Verbs headers do not support -pedantic. */ + #ifdef PEDANTIC +@@ -55,6 +57,7 @@ + #endif + + #include ++#include + #include + #include + #include +@@ -701,6 +704,47 @@ struct mlx4_conf { + RTE_PCI_DRV_INTR_RMV, + }; + ++#ifdef RTE_LIBRTE_MLX4_DLOPEN_DEPS ++ ++/** ++ * Initialization routine for run-time dependency on rdma-core. ++ */ ++static int ++mlx4_glue_init(void) ++{ ++ void *handle = NULL; ++ void **sym; ++ const char *dlmsg; ++ ++ handle = dlopen(MLX4_GLUE, RTLD_LAZY); ++ if (!handle) { ++ rte_errno = EINVAL; ++ dlmsg = dlerror(); ++ if (dlmsg) ++ WARN("cannot load glue library: %s", dlmsg); ++ goto glue_error; ++ } ++ sym = dlsym(handle, "mlx4_glue"); ++ if (!sym || !*sym) { ++ rte_errno = EINVAL; ++ dlmsg = dlerror(); ++ if (dlmsg) ++ ERROR("cannot resolve glue symbol: %s", dlmsg); ++ goto glue_error; ++ } ++ mlx4_glue = *sym; ++ return 0; ++glue_error: ++ if (handle) ++ dlclose(handle); ++ WARN("cannot initialize PMD due to missing run-time" ++ " dependency on rdma-core libraries (libibverbs," ++ " libmlx4)"); ++ return -rte_errno; ++} ++ ++#endif ++ + /** + * Driver initialization routine. + */ +@@ -715,6 +759,11 @@ struct mlx4_conf { + * using this PMD, which is not supported in forked processes. + */ + setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); ++#ifdef RTE_LIBRTE_MLX4_DLOPEN_DEPS ++ if (mlx4_glue_init()) ++ return; ++ assert(mlx4_glue); ++#endif + mlx4_glue->fork_init(); + rte_pci_register(&mlx4_driver); + } +diff --git a/mk/rte.app.mk b/mk/rte.app.mk +index 6a6a745..6ececfe 100644 +--- a/mk/rte.app.mk ++++ b/mk/rte.app.mk +@@ -141,7 +141,11 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) + _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += -lrte_pmd_kni + endif + _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += -lrte_pmd_lio ++ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -ldl ++else + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs -lmlx4 ++endif + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs -lmlx5 + _LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk + _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += -lrte_pmd_nfp +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0003-net-mlx5-move-rdma-core-calls-to-separate-file.patch b/SOURCES/mlnx-dpdk-0003-net-mlx5-move-rdma-core-calls-to-separate-file.patch new file mode 100644 index 0000000..6aa229b --- /dev/null +++ b/SOURCES/mlnx-dpdk-0003-net-mlx5-move-rdma-core-calls-to-separate-file.patch @@ -0,0 +1,1318 @@ +From 04b49e536cf78ec05203c96e8f5d4c5d9ceb6183 Mon Sep 17 00:00:00 2001 +From: Nelio Laranjeiro +Date: Tue, 30 Jan 2018 16:34:56 +0100 +Subject: [PATCH 3/9] net/mlx5: move rdma-core calls to separate file + +This lays the groundwork for externalizing rdma-core as an optional +run-time dependency instead of a mandatory one. + +No functional change. + +Signed-off-by: Nelio Laranjeiro +Signed-off-by: Adrien Mazarguil +(cherry picked from commit c89f0e24d4f0c775dcbfcaa964e9c8f1de815ce5) +--- + drivers/net/mlx5/Makefile | 1 + + drivers/net/mlx5/mlx5.c | 48 +++--- + drivers/net/mlx5/mlx5_ethdev.c | 5 +- + drivers/net/mlx5/mlx5_flow.c | 96 ++++++----- + drivers/net/mlx5/mlx5_glue.c | 359 +++++++++++++++++++++++++++++++++++++++++ + drivers/net/mlx5/mlx5_glue.h | 107 ++++++++++++ + drivers/net/mlx5/mlx5_mr.c | 7 +- + drivers/net/mlx5/mlx5_rxq.c | 54 ++++--- + drivers/net/mlx5/mlx5_txq.c | 22 +-- + drivers/net/mlx5/mlx5_vlan.c | 13 +- + 10 files changed, 598 insertions(+), 114 deletions(-) + create mode 100644 drivers/net/mlx5/mlx5_glue.c + create mode 100644 drivers/net/mlx5/mlx5_glue.h + +diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile +index a3984eb..bdec306 100644 +--- a/drivers/net/mlx5/Makefile ++++ b/drivers/net/mlx5/Makefile +@@ -53,6 +53,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c ++SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c + + # Basic CFLAGS. + CFLAGS += -O3 +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index 0548d17..f77bdda 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -63,6 +63,7 @@ + #include "mlx5_rxtx.h" + #include "mlx5_autoconf.h" + #include "mlx5_defs.h" ++#include "mlx5_glue.h" + + /* Device parameter to enable RX completion queue compression. */ + #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" +@@ -225,8 +226,8 @@ struct mlx5_args { + } + if (priv->pd != NULL) { + assert(priv->ctx != NULL); +- claim_zero(ibv_dealloc_pd(priv->pd)); +- claim_zero(ibv_close_device(priv->ctx)); ++ claim_zero(mlx5_glue->dealloc_pd(priv->pd)); ++ claim_zero(mlx5_glue->close_device(priv->ctx)); + } else + assert(priv->ctx == NULL); + if (priv->rss_conf.rss_key != NULL) +@@ -565,7 +566,7 @@ struct mlx5_args { + + /* Save PCI address. */ + mlx5_dev[idx].pci_addr = pci_dev->addr; +- list = ibv_get_device_list(&i); ++ list = mlx5_glue->get_device_list(&i); + if (list == NULL) { + assert(errno); + if (errno == ENOSYS) +@@ -615,12 +616,12 @@ struct mlx5_args { + " (SR-IOV: %s)", + list[i]->name, + sriov ? "true" : "false"); +- attr_ctx = ibv_open_device(list[i]); ++ attr_ctx = mlx5_glue->open_device(list[i]); + err = errno; + break; + } + if (attr_ctx == NULL) { +- ibv_free_device_list(list); ++ mlx5_glue->free_device_list(list); + switch (err) { + case 0: + ERROR("cannot access device, is mlx5_ib loaded?"); +@@ -639,7 +640,7 @@ struct mlx5_args { + * Multi-packet send is supported by ConnectX-4 Lx PF as well + * as all ConnectX-5 devices. + */ +- mlx5dv_query_device(attr_ctx, &attrs_out); ++ mlx5_glue->dv_query_device(attr_ctx, &attrs_out); + if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { + if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { + DEBUG("Enhanced MPW is supported"); +@@ -657,7 +658,7 @@ struct mlx5_args { + cqe_comp = 0; + else + cqe_comp = 1; +- if (ibv_query_device_ex(attr_ctx, NULL, &device_attr)) ++ if (mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr)) + goto error; + INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt); + +@@ -721,15 +722,15 @@ struct mlx5_args { + + DEBUG("using port %u (%08" PRIx32 ")", port, test); + +- ctx = ibv_open_device(ibv_dev); ++ ctx = mlx5_glue->open_device(ibv_dev); + if (ctx == NULL) { + err = ENODEV; + goto port_error; + } + +- ibv_query_device_ex(ctx, NULL, &device_attr); ++ mlx5_glue->query_device_ex(ctx, NULL, &device_attr); + /* Check port status. */ +- err = ibv_query_port(ctx, port, &port_attr); ++ err = mlx5_glue->query_port(ctx, port, &port_attr); + if (err) { + ERROR("port query failed: %s", strerror(err)); + goto port_error; +@@ -744,11 +745,11 @@ struct mlx5_args { + + if (port_attr.state != IBV_PORT_ACTIVE) + DEBUG("port %d is not active: \"%s\" (%d)", +- port, ibv_port_state_str(port_attr.state), ++ port, mlx5_glue->port_state_str(port_attr.state), + port_attr.state); + + /* Allocate protection domain. */ +- pd = ibv_alloc_pd(ctx); ++ pd = mlx5_glue->alloc_pd(ctx); + if (pd == NULL) { + ERROR("PD allocation failure"); + err = ENOMEM; +@@ -787,7 +788,7 @@ struct mlx5_args { + goto port_error; + } + mlx5_args_assign(priv, &args); +- if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) { ++ if (mlx5_glue->query_device_ex(ctx, NULL, &device_attr_ex)) { + ERROR("ibv_query_device_ex() failed"); + goto port_error; + } +@@ -807,7 +808,7 @@ struct mlx5_args { + + #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + priv->counter_set_supported = !!(device_attr.max_counter_sets); +- ibv_describe_counter_set(ctx, 0, &cs_desc); ++ mlx5_glue->describe_counter_set(ctx, 0, &cs_desc); + DEBUG("counter type = %d, num of cs = %ld, attributes = %d", + cs_desc.counter_type, cs_desc.num_of_cs, + cs_desc.attributes); +@@ -933,8 +934,9 @@ struct mlx5_args { + .free = &mlx5_free_verbs_buf, + .data = priv, + }; +- mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS, +- (void *)((uintptr_t)&alctr)); ++ mlx5_glue->dv_set_context_attr(ctx, ++ MLX5DV_CTX_ATTR_BUF_ALLOCATORS, ++ (void *)((uintptr_t)&alctr)); + + /* Bring Ethernet device up. */ + DEBUG("forcing Ethernet interface up"); +@@ -946,9 +948,9 @@ struct mlx5_args { + if (priv) + rte_free(priv); + if (pd) +- claim_zero(ibv_dealloc_pd(pd)); ++ claim_zero(mlx5_glue->dealloc_pd(pd)); + if (ctx) +- claim_zero(ibv_close_device(ctx)); ++ claim_zero(mlx5_glue->close_device(ctx)); + break; + } + +@@ -967,9 +969,9 @@ struct mlx5_args { + + error: + if (attr_ctx) +- claim_zero(ibv_close_device(attr_ctx)); ++ claim_zero(mlx5_glue->close_device(attr_ctx)); + if (list) +- ibv_free_device_list(list); ++ mlx5_glue->free_device_list(list); + assert(err >= 0); + return -err; + } +@@ -1040,7 +1042,7 @@ struct mlx5_args { + /* Match the size of Rx completion entry to the size of a cacheline. */ + if (RTE_CACHE_LINE_SIZE == 128) + setenv("MLX5_CQE_SIZE", "128", 0); +- ibv_fork_init(); ++ mlx5_glue->fork_init(); + rte_pci_register(&mlx5_driver); + } + +diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c +index a3cef68..5620cce 100644 +--- a/drivers/net/mlx5/mlx5_ethdev.c ++++ b/drivers/net/mlx5/mlx5_ethdev.c +@@ -64,6 +64,7 @@ + #include + + #include "mlx5.h" ++#include "mlx5_glue.h" + #include "mlx5_rxtx.h" + #include "mlx5_utils.h" + +@@ -1191,7 +1192,7 @@ struct priv * + + /* Read all message and acknowledge them. */ + for (;;) { +- if (ibv_get_async_event(priv->ctx, &event)) ++ if (mlx5_glue->get_async_event(priv->ctx, &event)) + break; + if ((event.event_type == IBV_EVENT_PORT_ACTIVE || + event.event_type == IBV_EVENT_PORT_ERR) && +@@ -1203,7 +1204,7 @@ struct priv * + else + DEBUG("event type %d on port %d not handled", + event.event_type, event.element.port_num); +- ibv_ack_async_event(&event); ++ mlx5_glue->ack_async_event(&event); + } + if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) + if (priv_link_status_update(priv)) +diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c +index f32dfdd..fb85877 100644 +--- a/drivers/net/mlx5/mlx5_flow.c ++++ b/drivers/net/mlx5/mlx5_flow.c +@@ -51,6 +51,7 @@ + + #include "mlx5.h" + #include "mlx5_prm.h" ++#include "mlx5_glue.h" + + /* Define minimal priority for control plane flows. */ + #define MLX5_CTRL_FLOW_PRIORITY 4 +@@ -60,22 +61,9 @@ + #define MLX5_IPV6 6 + + #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT +-struct ibv_counter_set_init_attr { +- int dummy; +-}; + struct ibv_flow_spec_counter_action { + int dummy; + }; +-struct ibv_counter_set { +- int dummy; +-}; +- +-static inline int +-ibv_destroy_counter_set(struct ibv_counter_set *cs) +-{ +- (void)cs; +- return -ENOTSUP; +-} + #endif + + /* Dev ops structure defined in mlx5.c */ +@@ -1664,7 +1652,7 @@ struct ibv_spec_header { + }; + + init_attr.counter_set_id = 0; +- parser->cs = ibv_create_counter_set(priv->ctx, &init_attr); ++ parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr); + if (!parser->cs) + return EINVAL; + counter.counter_set_handle = parser->cs->handle; +@@ -1715,8 +1703,8 @@ struct ibv_spec_header { + if (!priv->dev->data->dev_started) + return 0; + parser->drop_q.ibv_attr = NULL; +- flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp, +- flow->drxq.ibv_attr); ++ flow->drxq.ibv_flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp, ++ flow->drxq.ibv_attr); + if (!flow->drxq.ibv_flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "flow rule creation failure"); +@@ -1727,7 +1715,7 @@ struct ibv_spec_header { + error: + assert(flow); + if (flow->drxq.ibv_flow) { +- claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow(flow->drxq.ibv_flow)); + flow->drxq.ibv_flow = NULL; + } + if (flow->drxq.ibv_attr) { +@@ -1735,7 +1723,7 @@ struct ibv_spec_header { + flow->drxq.ibv_attr = NULL; + } + if (flow->cs) { +- claim_zero(ibv_destroy_counter_set(flow->cs)); ++ claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); + flow->cs = NULL; + parser->cs = NULL; + } +@@ -1839,8 +1827,8 @@ struct ibv_spec_header { + if (!flow->frxq[i].hrxq) + continue; + flow->frxq[i].ibv_flow = +- ibv_create_flow(flow->frxq[i].hrxq->qp, +- flow->frxq[i].ibv_attr); ++ mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, ++ flow->frxq[i].ibv_attr); + if (!flow->frxq[i].ibv_flow) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, +@@ -1866,7 +1854,7 @@ struct ibv_spec_header { + if (flow->frxq[i].ibv_flow) { + struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow; + +- claim_zero(ibv_destroy_flow(ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow(ibv_flow)); + } + if (flow->frxq[i].hrxq) + mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); +@@ -1874,7 +1862,7 @@ struct ibv_spec_header { + rte_free(flow->frxq[i].ibv_attr); + } + if (flow->cs) { +- claim_zero(ibv_destroy_counter_set(flow->cs)); ++ claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); + flow->cs = NULL; + parser->cs = NULL; + } +@@ -2056,14 +2044,16 @@ struct rte_flow * + free: + if (flow->drop) { + if (flow->drxq.ibv_flow) +- claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow ++ (flow->drxq.ibv_flow)); + rte_free(flow->drxq.ibv_attr); + } else { + for (i = 0; i != hash_rxq_init_n; ++i) { + struct mlx5_flow *frxq = &flow->frxq[i]; + + if (frxq->ibv_flow) +- claim_zero(ibv_destroy_flow(frxq->ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow ++ (frxq->ibv_flow)); + if (frxq->hrxq) + mlx5_priv_hrxq_release(priv, frxq->hrxq); + if (frxq->ibv_attr) +@@ -2071,7 +2061,7 @@ struct rte_flow * + } + } + if (flow->cs) { +- claim_zero(ibv_destroy_counter_set(flow->cs)); ++ claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); + flow->cs = NULL; + } + TAILQ_REMOVE(list, flow, next); +@@ -2119,35 +2109,38 @@ struct rte_flow * + WARN("cannot allocate memory for drop queue"); + goto error; + } +- fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); ++ fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); + if (!fdq->cq) { + WARN("cannot allocate CQ for drop queue"); + goto error; + } +- fdq->wq = ibv_create_wq(priv->ctx, +- &(struct ibv_wq_init_attr){ ++ fdq->wq = mlx5_glue->create_wq ++ (priv->ctx, ++ &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, + .max_wr = 1, + .max_sge = 1, + .pd = priv->pd, + .cq = fdq->cq, +- }); ++ }); + if (!fdq->wq) { + WARN("cannot allocate WQ for drop queue"); + goto error; + } +- fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx, +- &(struct ibv_rwq_ind_table_init_attr){ ++ fdq->ind_table = mlx5_glue->create_rwq_ind_table ++ (priv->ctx, ++ &(struct ibv_rwq_ind_table_init_attr){ + .log_ind_tbl_size = 0, + .ind_tbl = &fdq->wq, + .comp_mask = 0, +- }); ++ }); + if (!fdq->ind_table) { + WARN("cannot allocate indirection table for drop queue"); + goto error; + } +- fdq->qp = ibv_create_qp_ex(priv->ctx, +- &(struct ibv_qp_init_attr_ex){ ++ fdq->qp = mlx5_glue->create_qp_ex ++ (priv->ctx, ++ &(struct ibv_qp_init_attr_ex){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_QP_INIT_ATTR_PD | +@@ -2162,7 +2155,7 @@ struct rte_flow * + }, + .rwq_ind_tbl = fdq->ind_table, + .pd = priv->pd +- }); ++ }); + if (!fdq->qp) { + WARN("cannot allocate QP for drop queue"); + goto error; +@@ -2171,13 +2164,13 @@ struct rte_flow * + return 0; + error: + if (fdq->qp) +- claim_zero(ibv_destroy_qp(fdq->qp)); ++ claim_zero(mlx5_glue->destroy_qp(fdq->qp)); + if (fdq->ind_table) +- claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table)); ++ claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); + if (fdq->wq) +- claim_zero(ibv_destroy_wq(fdq->wq)); ++ claim_zero(mlx5_glue->destroy_wq(fdq->wq)); + if (fdq->cq) +- claim_zero(ibv_destroy_cq(fdq->cq)); ++ claim_zero(mlx5_glue->destroy_cq(fdq->cq)); + if (fdq) + rte_free(fdq); + priv->flow_drop_queue = NULL; +@@ -2198,13 +2191,13 @@ struct rte_flow * + if (!fdq) + return; + if (fdq->qp) +- claim_zero(ibv_destroy_qp(fdq->qp)); ++ claim_zero(mlx5_glue->destroy_qp(fdq->qp)); + if (fdq->ind_table) +- claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table)); ++ claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); + if (fdq->wq) +- claim_zero(ibv_destroy_wq(fdq->wq)); ++ claim_zero(mlx5_glue->destroy_wq(fdq->wq)); + if (fdq->cq) +- claim_zero(ibv_destroy_cq(fdq->cq)); ++ claim_zero(mlx5_glue->destroy_cq(fdq->cq)); + rte_free(fdq); + priv->flow_drop_queue = NULL; + } +@@ -2228,7 +2221,8 @@ struct rte_flow * + if (flow->drop) { + if (!flow->drxq.ibv_flow) + continue; +- claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow ++ (flow->drxq.ibv_flow)); + flow->drxq.ibv_flow = NULL; + /* Next flow. */ + continue; +@@ -2248,7 +2242,8 @@ struct rte_flow * + for (i = 0; i != hash_rxq_init_n; ++i) { + if (!flow->frxq[i].ibv_flow) + continue; +- claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow)); ++ claim_zero(mlx5_glue->destroy_flow ++ (flow->frxq[i].ibv_flow)); + flow->frxq[i].ibv_flow = NULL; + mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); + flow->frxq[i].hrxq = NULL; +@@ -2278,8 +2273,9 @@ struct rte_flow * + + if (flow->drop) { + flow->drxq.ibv_flow = +- ibv_create_flow(priv->flow_drop_queue->qp, +- flow->drxq.ibv_attr); ++ mlx5_glue->create_flow ++ (priv->flow_drop_queue->qp, ++ flow->drxq.ibv_attr); + if (!flow->drxq.ibv_flow) { + DEBUG("Flow %p cannot be applied", + (void *)flow); +@@ -2315,8 +2311,8 @@ struct rte_flow * + } + flow_create: + flow->frxq[i].ibv_flow = +- ibv_create_flow(flow->frxq[i].hrxq->qp, +- flow->frxq[i].ibv_attr); ++ mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, ++ flow->frxq[i].ibv_attr); + if (!flow->frxq[i].ibv_flow) { + DEBUG("Flow %p cannot be applied", + (void *)flow); +@@ -2523,7 +2519,7 @@ struct rte_flow * + .out = counters, + .outlen = 2 * sizeof(uint64_t), + }; +- int res = ibv_query_counter_set(&query_cs_attr, &query_out); ++ int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out); + + if (res) { + rte_flow_error_set(error, -res, +diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c +new file mode 100644 +index 0000000..ff48c1e +--- /dev/null ++++ b/drivers/net/mlx5/mlx5_glue.c +@@ -0,0 +1,359 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 6WIND S.A. ++ * Copyright 2018 Mellanox Technologies, Ltd. ++ */ ++ ++#include ++#include ++#include ++ ++/* Verbs headers do not support -pedantic. */ ++#ifdef PEDANTIC ++#pragma GCC diagnostic ignored "-Wpedantic" ++#endif ++#include ++#include ++#ifdef PEDANTIC ++#pragma GCC diagnostic error "-Wpedantic" ++#endif ++ ++#include "mlx5_autoconf.h" ++#include "mlx5_glue.h" ++ ++static int ++mlx5_glue_fork_init(void) ++{ ++ return ibv_fork_init(); ++} ++ ++static struct ibv_pd * ++mlx5_glue_alloc_pd(struct ibv_context *context) ++{ ++ return ibv_alloc_pd(context); ++} ++ ++static int ++mlx5_glue_dealloc_pd(struct ibv_pd *pd) ++{ ++ return ibv_dealloc_pd(pd); ++} ++ ++static struct ibv_device ** ++mlx5_glue_get_device_list(int *num_devices) ++{ ++ return ibv_get_device_list(num_devices); ++} ++ ++static void ++mlx5_glue_free_device_list(struct ibv_device **list) ++{ ++ ibv_free_device_list(list); ++} ++ ++static struct ibv_context * ++mlx5_glue_open_device(struct ibv_device *device) ++{ ++ return ibv_open_device(device); ++} ++ ++static int ++mlx5_glue_close_device(struct ibv_context *context) ++{ ++ return ibv_close_device(context); ++} ++ ++static const char * ++mlx5_glue_get_device_name(struct ibv_device *device) ++{ ++ return ibv_get_device_name(device); ++} ++ ++static int ++mlx5_glue_query_device(struct ibv_context *context, ++ struct ibv_device_attr *device_attr) ++{ ++ return ibv_query_device(context, device_attr); ++} ++ ++static int ++mlx5_glue_query_device_ex(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr) ++{ ++ return ibv_query_device_ex(context, input, attr); ++} ++ ++static int ++mlx5_glue_query_port(struct ibv_context *context, uint8_t port_num, ++ struct ibv_port_attr *port_attr) ++{ ++ return ibv_query_port(context, port_num, port_attr); ++} ++ ++static struct ibv_comp_channel * ++mlx5_glue_create_comp_channel(struct ibv_context *context) ++{ ++ return ibv_create_comp_channel(context); ++} ++ ++static int ++mlx5_glue_destroy_comp_channel(struct ibv_comp_channel *channel) ++{ ++ return ibv_destroy_comp_channel(channel); ++} ++ ++static struct ibv_cq * ++mlx5_glue_create_cq(struct ibv_context *context, int cqe, void *cq_context, ++ struct ibv_comp_channel *channel, int comp_vector) ++{ ++ return ibv_create_cq(context, cqe, cq_context, channel, comp_vector); ++} ++ ++static int ++mlx5_glue_destroy_cq(struct ibv_cq *cq) ++{ ++ return ibv_destroy_cq(cq); ++} ++ ++static int ++mlx5_glue_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, ++ void **cq_context) ++{ ++ return ibv_get_cq_event(channel, cq, cq_context); ++} ++ ++static void ++mlx5_glue_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) ++{ ++ ibv_ack_cq_events(cq, nevents); ++} ++ ++static struct ibv_rwq_ind_table * ++mlx5_glue_create_rwq_ind_table(struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr) ++{ ++ return ibv_create_rwq_ind_table(context, init_attr); ++} ++ ++static int ++mlx5_glue_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) ++{ ++ return ibv_destroy_rwq_ind_table(rwq_ind_table); ++} ++ ++static struct ibv_wq * ++mlx5_glue_create_wq(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr) ++{ ++ return ibv_create_wq(context, wq_init_attr); ++} ++ ++static int ++mlx5_glue_destroy_wq(struct ibv_wq *wq) ++{ ++ return ibv_destroy_wq(wq); ++} ++static int ++mlx5_glue_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr) ++{ ++ return ibv_modify_wq(wq, wq_attr); ++} ++ ++static struct ibv_flow * ++mlx5_glue_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow) ++{ ++ return ibv_create_flow(qp, flow); ++} ++ ++static int ++mlx5_glue_destroy_flow(struct ibv_flow *flow_id) ++{ ++ return ibv_destroy_flow(flow_id); ++} ++ ++static struct ibv_qp * ++mlx5_glue_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) ++{ ++ return ibv_create_qp(pd, qp_init_attr); ++} ++ ++static struct ibv_qp * ++mlx5_glue_create_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_init_attr_ex) ++{ ++ return ibv_create_qp_ex(context, qp_init_attr_ex); ++} ++ ++static int ++mlx5_glue_destroy_qp(struct ibv_qp *qp) ++{ ++ return ibv_destroy_qp(qp); ++} ++ ++static int ++mlx5_glue_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) ++{ ++ return ibv_modify_qp(qp, attr, attr_mask); ++} ++ ++static struct ibv_mr * ++mlx5_glue_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) ++{ ++ return ibv_reg_mr(pd, addr, length, access); ++} ++ ++static int ++mlx5_glue_dereg_mr(struct ibv_mr *mr) ++{ ++ return ibv_dereg_mr(mr); ++} ++ ++static struct ibv_counter_set * ++mlx5_glue_create_counter_set(struct ibv_context *context, ++ struct ibv_counter_set_init_attr *init_attr) ++{ ++#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT ++ (void)context; ++ (void)init_attr; ++ return NULL; ++#else ++ return ibv_create_counter_set(context, init_attr); ++#endif ++} ++ ++static int ++mlx5_glue_destroy_counter_set(struct ibv_counter_set *cs) ++{ ++#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT ++ (void)cs; ++ return ENOTSUP; ++#else ++ return ibv_destroy_counter_set(cs); ++#endif ++} ++ ++static int ++mlx5_glue_describe_counter_set(struct ibv_context *context, ++ uint16_t counter_set_id, ++ struct ibv_counter_set_description *cs_desc) ++{ ++#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT ++ (void)context; ++ (void)counter_set_id; ++ (void)cs_desc; ++ return ENOTSUP; ++#else ++ return ibv_describe_counter_set(context, counter_set_id, cs_desc); ++#endif ++} ++ ++static int ++mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr, ++ struct ibv_counter_set_data *cs_data) ++{ ++#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT ++ (void)query_attr; ++ (void)cs_data; ++ return ENOTSUP; ++#else ++ return ibv_query_counter_set(query_attr, cs_data); ++#endif ++} ++ ++static void ++mlx5_glue_ack_async_event(struct ibv_async_event *event) ++{ ++ ibv_ack_async_event(event); ++} ++ ++static int ++mlx5_glue_get_async_event(struct ibv_context *context, ++ struct ibv_async_event *event) ++{ ++ return ibv_get_async_event(context, event); ++} ++ ++static const char * ++mlx5_glue_port_state_str(enum ibv_port_state port_state) ++{ ++ return ibv_port_state_str(port_state); ++} ++ ++static struct ibv_cq * ++mlx5_glue_cq_ex_to_cq(struct ibv_cq_ex *cq) ++{ ++ return ibv_cq_ex_to_cq(cq); ++} ++ ++static struct ibv_cq_ex * ++mlx5_glue_dv_create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct mlx5dv_cq_init_attr *mlx5_cq_attr) ++{ ++ return mlx5dv_create_cq(context, cq_attr, mlx5_cq_attr); ++} ++ ++static int ++mlx5_glue_dv_query_device(struct ibv_context *ctx, ++ struct mlx5dv_context *attrs_out) ++{ ++ return mlx5dv_query_device(ctx, attrs_out); ++} ++ ++static int ++mlx5_glue_dv_set_context_attr(struct ibv_context *ibv_ctx, ++ enum mlx5dv_set_ctx_attr_type type, void *attr) ++{ ++ return mlx5dv_set_context_attr(ibv_ctx, type, attr); ++} ++ ++static int ++mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type) ++{ ++ return mlx5dv_init_obj(obj, obj_type); ++} ++ ++const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ ++ .fork_init = mlx5_glue_fork_init, ++ .alloc_pd = mlx5_glue_alloc_pd, ++ .dealloc_pd = mlx5_glue_dealloc_pd, ++ .get_device_list = mlx5_glue_get_device_list, ++ .free_device_list = mlx5_glue_free_device_list, ++ .open_device = mlx5_glue_open_device, ++ .close_device = mlx5_glue_close_device, ++ .get_device_name = mlx5_glue_get_device_name, ++ .query_device = mlx5_glue_query_device, ++ .query_device_ex = mlx5_glue_query_device_ex, ++ .query_port = mlx5_glue_query_port, ++ .create_comp_channel = mlx5_glue_create_comp_channel, ++ .destroy_comp_channel = mlx5_glue_destroy_comp_channel, ++ .create_cq = mlx5_glue_create_cq, ++ .destroy_cq = mlx5_glue_destroy_cq, ++ .get_cq_event = mlx5_glue_get_cq_event, ++ .ack_cq_events = mlx5_glue_ack_cq_events, ++ .create_rwq_ind_table = mlx5_glue_create_rwq_ind_table, ++ .destroy_rwq_ind_table = mlx5_glue_destroy_rwq_ind_table, ++ .create_wq = mlx5_glue_create_wq, ++ .destroy_wq = mlx5_glue_destroy_wq, ++ .modify_wq = mlx5_glue_modify_wq, ++ .create_flow = mlx5_glue_create_flow, ++ .destroy_flow = mlx5_glue_destroy_flow, ++ .create_qp = mlx5_glue_create_qp, ++ .create_qp_ex = mlx5_glue_create_qp_ex, ++ .destroy_qp = mlx5_glue_destroy_qp, ++ .modify_qp = mlx5_glue_modify_qp, ++ .reg_mr = mlx5_glue_reg_mr, ++ .dereg_mr = mlx5_glue_dereg_mr, ++ .create_counter_set = mlx5_glue_create_counter_set, ++ .destroy_counter_set = mlx5_glue_destroy_counter_set, ++ .describe_counter_set = mlx5_glue_describe_counter_set, ++ .query_counter_set = mlx5_glue_query_counter_set, ++ .ack_async_event = mlx5_glue_ack_async_event, ++ .get_async_event = mlx5_glue_get_async_event, ++ .port_state_str = mlx5_glue_port_state_str, ++ .cq_ex_to_cq = mlx5_glue_cq_ex_to_cq, ++ .dv_create_cq = mlx5_glue_dv_create_cq, ++ .dv_query_device = mlx5_glue_dv_query_device, ++ .dv_set_context_attr = mlx5_glue_dv_set_context_attr, ++ .dv_init_obj = mlx5_glue_dv_init_obj, ++}; +diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h +new file mode 100644 +index 0000000..67bd8d0 +--- /dev/null ++++ b/drivers/net/mlx5/mlx5_glue.h +@@ -0,0 +1,107 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2018 6WIND S.A. ++ * Copyright 2018 Mellanox Technologies, Ltd. ++ */ ++ ++#ifndef MLX5_GLUE_H_ ++#define MLX5_GLUE_H_ ++ ++#include ++ ++/* Verbs headers do not support -pedantic. */ ++#ifdef PEDANTIC ++#pragma GCC diagnostic ignored "-Wpedantic" ++#endif ++#include ++#include ++#ifdef PEDANTIC ++#pragma GCC diagnostic error "-Wpedantic" ++#endif ++ ++#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT ++struct ibv_counter_set; ++struct ibv_counter_set_data; ++struct ibv_counter_set_description; ++struct ibv_counter_set_init_attr; ++struct ibv_query_counter_set_attr; ++#endif ++ ++struct mlx5_glue { ++ int (*fork_init)(void); ++ struct ibv_pd *(*alloc_pd)(struct ibv_context *context); ++ int (*dealloc_pd)(struct ibv_pd *pd); ++ struct ibv_device **(*get_device_list)(int *num_devices); ++ void (*free_device_list)(struct ibv_device **list); ++ struct ibv_context *(*open_device)(struct ibv_device *device); ++ int (*close_device)(struct ibv_context *context); ++ const char *(*get_device_name)(struct ibv_device *device); ++ int (*query_device)(struct ibv_context *context, ++ struct ibv_device_attr *device_attr); ++ int (*query_device_ex)(struct ibv_context *context, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr); ++ int (*query_port)(struct ibv_context *context, uint8_t port_num, ++ struct ibv_port_attr *port_attr); ++ struct ibv_comp_channel *(*create_comp_channel) ++ (struct ibv_context *context); ++ int (*destroy_comp_channel)(struct ibv_comp_channel *channel); ++ struct ibv_cq *(*create_cq)(struct ibv_context *context, int cqe, ++ void *cq_context, ++ struct ibv_comp_channel *channel, ++ int comp_vector); ++ int (*destroy_cq)(struct ibv_cq *cq); ++ int (*get_cq_event)(struct ibv_comp_channel *channel, ++ struct ibv_cq **cq, void **cq_context); ++ void (*ack_cq_events)(struct ibv_cq *cq, unsigned int nevents); ++ struct ibv_rwq_ind_table *(*create_rwq_ind_table) ++ (struct ibv_context *context, ++ struct ibv_rwq_ind_table_init_attr *init_attr); ++ int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table); ++ struct ibv_wq *(*create_wq)(struct ibv_context *context, ++ struct ibv_wq_init_attr *wq_init_attr); ++ int (*destroy_wq)(struct ibv_wq *wq); ++ int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr); ++ struct ibv_flow *(*create_flow)(struct ibv_qp *qp, ++ struct ibv_flow_attr *flow); ++ int (*destroy_flow)(struct ibv_flow *flow_id); ++ struct ibv_qp *(*create_qp)(struct ibv_pd *pd, ++ struct ibv_qp_init_attr *qp_init_attr); ++ struct ibv_qp *(*create_qp_ex) ++ (struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *qp_init_attr_ex); ++ int (*destroy_qp)(struct ibv_qp *qp); ++ int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, ++ int attr_mask); ++ struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, ++ size_t length, int access); ++ int (*dereg_mr)(struct ibv_mr *mr); ++ struct ibv_counter_set *(*create_counter_set) ++ (struct ibv_context *context, ++ struct ibv_counter_set_init_attr *init_attr); ++ int (*destroy_counter_set)(struct ibv_counter_set *cs); ++ int (*describe_counter_set) ++ (struct ibv_context *context, ++ uint16_t counter_set_id, ++ struct ibv_counter_set_description *cs_desc); ++ int (*query_counter_set)(struct ibv_query_counter_set_attr *query_attr, ++ struct ibv_counter_set_data *cs_data); ++ void (*ack_async_event)(struct ibv_async_event *event); ++ int (*get_async_event)(struct ibv_context *context, ++ struct ibv_async_event *event); ++ const char *(*port_state_str)(enum ibv_port_state port_state); ++ struct ibv_cq *(*cq_ex_to_cq)(struct ibv_cq_ex *cq); ++ struct ibv_cq_ex *(*dv_create_cq) ++ (struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ struct mlx5dv_cq_init_attr *mlx5_cq_attr); ++ int (*dv_query_device)(struct ibv_context *ctx_in, ++ struct mlx5dv_context *attrs_out); ++ int (*dv_set_context_attr)(struct ibv_context *ibv_ctx, ++ enum mlx5dv_set_ctx_attr_type type, ++ void *attr); ++ int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type); ++}; ++ ++const struct mlx5_glue *mlx5_glue; ++ ++#endif /* MLX5_GLUE_H_ */ +diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c +index 6b29eed..dea540a 100644 +--- a/drivers/net/mlx5/mlx5_mr.c ++++ b/drivers/net/mlx5/mlx5_mr.c +@@ -46,6 +46,7 @@ + + #include "mlx5.h" + #include "mlx5_rxtx.h" ++#include "mlx5_glue.h" + + struct mlx5_check_mempool_data { + int ret; +@@ -305,8 +306,8 @@ struct mlx5_mr* + DEBUG("mempool %p using start=%p end=%p size=%zu for MR", + (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); +- mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, +- IBV_ACCESS_LOCAL_WRITE); ++ mr->mr = mlx5_glue->reg_mr(priv->pd, (void *)start, end - start, ++ IBV_ACCESS_LOCAL_WRITE); + mr->mp = mp; + mr->lkey = rte_cpu_to_be_32(mr->mr->lkey); + mr->start = start; +@@ -364,7 +365,7 @@ struct mlx5_mr* + DEBUG("Memory Region %p refcnt: %d", + (void *)mr, rte_atomic32_read(&mr->refcnt)); + if (rte_atomic32_dec_and_test(&mr->refcnt)) { +- claim_zero(ibv_dereg_mr(mr->mr)); ++ claim_zero(mlx5_glue->dereg_mr(mr->mr)); + LIST_REMOVE(mr, next); + rte_free(mr); + return 0; +diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c +index 85399ef..f5778b7 100644 +--- a/drivers/net/mlx5/mlx5_rxq.c ++++ b/drivers/net/mlx5/mlx5_rxq.c +@@ -63,6 +63,7 @@ + #include "mlx5_utils.h" + #include "mlx5_autoconf.h" + #include "mlx5_defs.h" ++#include "mlx5_glue.h" + + /* Default RSS hash key also used for ConnectX-3. */ + uint8_t rss_hash_default_key[] = { +@@ -526,13 +527,13 @@ + ret = EINVAL; + goto exit; + } +- ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); ++ ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); + if (ret || ev_cq != rxq_ibv->cq) { + ret = EINVAL; + goto exit; + } + rxq_data->cq_arm_sn++; +- ibv_ack_cq_events(rxq_ibv->cq, 1); ++ mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); + exit: + if (rxq_ibv) + mlx5_priv_rxq_ibv_release(priv, rxq_ibv); +@@ -597,7 +598,7 @@ struct mlx5_rxq_ibv* + } + } + if (rxq_ctrl->irq) { +- tmpl->channel = ibv_create_comp_channel(priv->ctx); ++ tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx); + if (!tmpl->channel) { + ERROR("%p: Comp Channel creation failure", + (void *)rxq_ctrl); +@@ -625,8 +626,9 @@ struct mlx5_rxq_ibv* + } else if (priv->cqe_comp && rxq_data->hw_timestamp) { + DEBUG("Rx CQE compression is disabled for HW timestamp"); + } +- tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv, +- &attr.cq.mlx5)); ++ tmpl->cq = mlx5_glue->cq_ex_to_cq ++ (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv, ++ &attr.cq.mlx5)); + if (tmpl->cq == NULL) { + ERROR("%p: CQ creation failure", (void *)rxq_ctrl); + goto error; +@@ -662,7 +664,7 @@ struct mlx5_rxq_ibv* + attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; + } + #endif +- tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq); ++ tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq); + if (tmpl->wq == NULL) { + ERROR("%p: WQ creation failure", (void *)rxq_ctrl); + goto error; +@@ -686,7 +688,7 @@ struct mlx5_rxq_ibv* + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = IBV_WQS_RDY, + }; +- ret = ibv_modify_wq(tmpl->wq, &mod); ++ ret = mlx5_glue->modify_wq(tmpl->wq, &mod); + if (ret) { + ERROR("%p: WQ state to IBV_WQS_RDY failed", + (void *)rxq_ctrl); +@@ -696,7 +698,7 @@ struct mlx5_rxq_ibv* + obj.cq.out = &cq_info; + obj.rwq.in = tmpl->wq; + obj.rwq.out = &rwq; +- ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); ++ ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); + if (ret != 0) + goto error; + if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { +@@ -745,11 +747,11 @@ struct mlx5_rxq_ibv* + return tmpl; + error: + if (tmpl->wq) +- claim_zero(ibv_destroy_wq(tmpl->wq)); ++ claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); + if (tmpl->cq) +- claim_zero(ibv_destroy_cq(tmpl->cq)); ++ claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); + if (tmpl->channel) +- claim_zero(ibv_destroy_comp_channel(tmpl->channel)); ++ claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel)); + if (tmpl->mr) + priv_mr_release(priv, tmpl->mr); + return NULL; +@@ -814,10 +816,11 @@ struct mlx5_rxq_ibv* + (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt)); + if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { + rxq_free_elts(rxq_ibv->rxq_ctrl); +- claim_zero(ibv_destroy_wq(rxq_ibv->wq)); +- claim_zero(ibv_destroy_cq(rxq_ibv->cq)); ++ claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); ++ claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); + if (rxq_ibv->channel) +- claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel)); ++ claim_zero(mlx5_glue->destroy_comp_channel ++ (rxq_ibv->channel)); + LIST_REMOVE(rxq_ibv, next); + rte_free(rxq_ibv); + return 0; +@@ -1143,13 +1146,13 @@ struct mlx5_ind_table_ibv* + /* Finalise indirection table. */ + for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) + wq[i] = wq[j]; +- ind_tbl->ind_table = ibv_create_rwq_ind_table( +- priv->ctx, +- &(struct ibv_rwq_ind_table_init_attr){ ++ ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table ++ (priv->ctx, ++ &(struct ibv_rwq_ind_table_init_attr){ + .log_ind_tbl_size = wq_n, + .ind_tbl = wq, + .comp_mask = 0, +- }); ++ }); + if (!ind_tbl->ind_table) + goto error; + rte_atomic32_inc(&ind_tbl->refcnt); +@@ -1221,7 +1224,8 @@ struct mlx5_ind_table_ibv* + DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, + (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); + if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) +- claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table)); ++ claim_zero(mlx5_glue->destroy_rwq_ind_table ++ (ind_tbl->ind_table)); + for (i = 0; i != ind_tbl->queues_n; ++i) + claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i])); + if (!rte_atomic32_read(&ind_tbl->refcnt)) { +@@ -1288,9 +1292,9 @@ struct mlx5_hrxq* + ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n); + if (!ind_tbl) + return NULL; +- qp = ibv_create_qp_ex( +- priv->ctx, +- &(struct ibv_qp_init_attr_ex){ ++ qp = mlx5_glue->create_qp_ex ++ (priv->ctx, ++ &(struct ibv_qp_init_attr_ex){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_QP_INIT_ATTR_PD | +@@ -1304,7 +1308,7 @@ struct mlx5_hrxq* + }, + .rwq_ind_tbl = ind_tbl->ind_table, + .pd = priv->pd, +- }); ++ }); + if (!qp) + goto error; + hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); +@@ -1323,7 +1327,7 @@ struct mlx5_hrxq* + error: + mlx5_priv_ind_table_ibv_release(priv, ind_tbl); + if (qp) +- claim_zero(ibv_destroy_qp(qp)); ++ claim_zero(mlx5_glue->destroy_qp(qp)); + return NULL; + } + +@@ -1391,7 +1395,7 @@ struct mlx5_hrxq* + DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, + (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); + if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { +- claim_zero(ibv_destroy_qp(hrxq->qp)); ++ claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); + mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table); + LIST_REMOVE(hrxq, next); + rte_free(hrxq); +diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c +index 9c5860f..52cf005 100644 +--- a/drivers/net/mlx5/mlx5_txq.c ++++ b/drivers/net/mlx5/mlx5_txq.c +@@ -59,6 +59,7 @@ + #include "mlx5.h" + #include "mlx5_rxtx.h" + #include "mlx5_autoconf.h" ++#include "mlx5_glue.h" + + /** + * Allocate TX queue elements. +@@ -324,7 +325,7 @@ struct mlx5_txq_ibv* + ((desc / MLX5_TX_COMP_THRESH) - 1) : 1; + if (priv->mps == MLX5_MPW_ENHANCED) + cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV; +- tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0); ++ tmpl.cq = mlx5_glue->create_cq(priv->ctx, cqe_n, NULL, NULL, 0); + if (tmpl.cq == NULL) { + ERROR("%p: CQ creation failure", (void *)txq_ctrl); + goto error; +@@ -365,7 +366,7 @@ struct mlx5_txq_ibv* + attr.init.max_tso_header = txq_ctrl->max_tso_header; + attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; + } +- tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init); ++ tmpl.qp = mlx5_glue->create_qp_ex(priv->ctx, &attr.init); + if (tmpl.qp == NULL) { + ERROR("%p: QP creation failure", (void *)txq_ctrl); + goto error; +@@ -376,7 +377,8 @@ struct mlx5_txq_ibv* + /* Primary port number. */ + .port_num = priv->port + }; +- ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT)); ++ ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, ++ (IBV_QP_STATE | IBV_QP_PORT)); + if (ret) { + ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl); + goto error; +@@ -384,13 +386,13 @@ struct mlx5_txq_ibv* + attr.mod = (struct ibv_qp_attr){ + .qp_state = IBV_QPS_RTR + }; +- ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); ++ ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl); + goto error; + } + attr.mod.qp_state = IBV_QPS_RTS; +- ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); ++ ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl); + goto error; +@@ -405,7 +407,7 @@ struct mlx5_txq_ibv* + obj.cq.out = &cq_info; + obj.qp.in = tmpl.qp; + obj.qp.out = &qp; +- ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); ++ ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); + if (ret != 0) + goto error; + if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { +@@ -442,9 +444,9 @@ struct mlx5_txq_ibv* + return txq_ibv; + error: + if (tmpl.cq) +- claim_zero(ibv_destroy_cq(tmpl.cq)); ++ claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); + if (tmpl.qp) +- claim_zero(ibv_destroy_qp(tmpl.qp)); ++ claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); + return NULL; + } + +@@ -497,8 +499,8 @@ struct mlx5_txq_ibv* + DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, + (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt)); + if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) { +- claim_zero(ibv_destroy_qp(txq_ibv->qp)); +- claim_zero(ibv_destroy_cq(txq_ibv->cq)); ++ claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp)); ++ claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq)); + LIST_REMOVE(txq_ibv, next); + rte_free(txq_ibv); + return 0; +diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c +index 6fc315e..841f238 100644 +--- a/drivers/net/mlx5/mlx5_vlan.c ++++ b/drivers/net/mlx5/mlx5_vlan.c +@@ -36,12 +36,23 @@ + #include + #include + ++/* Verbs headers do not support -pedantic. */ ++#ifdef PEDANTIC ++#pragma GCC diagnostic ignored "-Wpedantic" ++#endif ++#include ++#include ++#ifdef PEDANTIC ++#pragma GCC diagnostic error "-Wpedantic" ++#endif ++ + #include + #include + + #include "mlx5_utils.h" + #include "mlx5.h" + #include "mlx5_autoconf.h" ++#include "mlx5_glue.h" + + /** + * DPDK callback to configure a VLAN filter. +@@ -133,7 +144,7 @@ + .flags = vlan_offloads, + }; + +- err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod); ++ err = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); + if (err) { + ERROR("%p: failed to modified stripping mode: %s", + (void *)priv, strerror(err)); +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0004-net-mlx5-spawn-rdma-core-dependency-plug-in.patch b/SOURCES/mlnx-dpdk-0004-net-mlx5-spawn-rdma-core-dependency-plug-in.patch new file mode 100644 index 0000000..485a11d --- /dev/null +++ b/SOURCES/mlnx-dpdk-0004-net-mlx5-spawn-rdma-core-dependency-plug-in.patch @@ -0,0 +1,236 @@ +From bf3bf80e901e5d47803c5ffc53f00077a7c72ac6 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Tue, 30 Jan 2018 16:34:58 +0100 +Subject: [PATCH 4/9] net/mlx5: spawn rdma-core dependency plug-in + +When mlx5 is not compiled directly as an independent shared object (e.g. +CONFIG_RTE_BUILD_SHARED_LIB not enabled for performance reasons), DPDK +applications inherit its dependencies on libibverbs and libmlx5 through +rte.app.mk. + +This is an issue both when DPDK is delivered as a binary package (Linux +distributions) and for end users because rdma-core then propagates as a +mandatory dependency for everything. + +Application writers relying on binary DPDK packages are not necessarily +aware of this fact and may end up delivering packages with broken +dependencies. + +This patch therefore introduces an intermediate internal plug-in +hard-linked with rdma-core (to preserve symbol versioning) loaded by the +PMD through dlopen(), so that a missing rdma-core does not cause unresolved +symbols, allowing applications to start normally. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit fdf5165b0c44d0b7fd33c78d7a5b8ead6cca6329) +--- + config/common_base | 1 + + doc/guides/nics/mlx5.rst | 13 +++++++++++++ + drivers/net/mlx5/Makefile | 31 +++++++++++++++++++++++++++++- + drivers/net/mlx5/mlx5.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ + mk/rte.app.mk | 4 ++++ + 5 files changed, 96 insertions(+), 1 deletion(-) + +diff --git a/config/common_base b/config/common_base +index 71a764c..9da57bd 100644 +--- a/config/common_base ++++ b/config/common_base +@@ -239,6 +239,7 @@ CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 + # + CONFIG_RTE_LIBRTE_MLX5_PMD=n + CONFIG_RTE_LIBRTE_MLX5_DEBUG=n ++CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=n + CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 + + # +diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst +index f9558da..6ee4a47 100644 +--- a/doc/guides/nics/mlx5.rst ++++ b/doc/guides/nics/mlx5.rst +@@ -146,6 +146,19 @@ These options can be modified in the ``.config`` file. + + Toggle compilation of librte_pmd_mlx5 itself. + ++- ``CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS`` (default **n**) ++ ++ Build PMD with additional code to make it loadable without hard ++ dependencies on **libibverbs** nor **libmlx5**, which may not be installed ++ on the target system. ++ ++ In this mode, their presence is still required for it to run properly, ++ however their absence won't prevent a DPDK application from starting (with ++ ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as ++ missing with ``ldd(1)``. ++ ++ This option has no performance impact. ++ + - ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**) + + Toggle debugging code and stricter compilation flags. Enabling this option +diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile +index bdec306..4b20d71 100644 +--- a/drivers/net/mlx5/Makefile ++++ b/drivers/net/mlx5/Makefile +@@ -33,9 +33,13 @@ include $(RTE_SDK)/mk/rte.vars.mk + + # Library name. + LIB = librte_pmd_mlx5.a ++LIB_GLUE = librte_pmd_mlx5_glue.so + + # Sources. + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c ++ifneq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) ++SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c ++endif + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c +@@ -53,7 +57,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c +-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c ++ ++ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) ++INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) ++endif + + # Basic CFLAGS. + CFLAGS += -O3 +@@ -65,7 +72,13 @@ CFLAGS += -D_DEFAULT_SOURCE + CFLAGS += -D_XOPEN_SOURCE=600 + CFLAGS += $(WERROR_FLAGS) + CFLAGS += -Wno-strict-prototypes ++ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) ++CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"' ++CFLAGS_mlx5_glue.o += -fPIC ++LDLIBS += -ldl ++else + LDLIBS += -libverbs -lmlx5 ++endif + LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring + LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs + LDLIBS += -lrte_bus_pci +@@ -158,7 +171,23 @@ mlx5_autoconf.h: mlx5_autoconf.h.new + + $(SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD):.c=.o): mlx5_autoconf.h + ++# Generate dependency plug-in for rdma-core when the PMD must not be linked ++# directly, so that applications do not inherit this dependency. ++ ++ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) ++ ++$(LIB): $(LIB_GLUE) ++ ++$(LIB_GLUE): mlx5_glue.o ++ $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ ++ -s -shared -o $@ $< -libverbs -lmlx5 ++ ++mlx5_glue.o: mlx5_autoconf.h ++ ++endif ++ + clean_mlx5: FORCE + $Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new ++ $Q rm -f -- mlx5_glue.o $(LIB_GLUE) + + clean: clean_mlx5 +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index f77bdda..71ebdce 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -56,6 +57,7 @@ + #include + #include + #include ++#include + #include + + #include "mlx5.h" +@@ -1023,6 +1025,47 @@ struct mlx5_args { + .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, + }; + ++#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS ++ ++/** ++ * Initialization routine for run-time dependency on rdma-core. ++ */ ++static int ++mlx5_glue_init(void) ++{ ++ void *handle = NULL; ++ void **sym; ++ const char *dlmsg; ++ ++ handle = dlopen(MLX5_GLUE, RTLD_LAZY); ++ if (!handle) { ++ rte_errno = EINVAL; ++ dlmsg = dlerror(); ++ if (dlmsg) ++ WARN("cannot load glue library: %s", dlmsg); ++ goto glue_error; ++ } ++ sym = dlsym(handle, "mlx5_glue"); ++ if (!sym || !*sym) { ++ rte_errno = EINVAL; ++ dlmsg = dlerror(); ++ if (dlmsg) ++ ERROR("cannot resolve glue symbol: %s", dlmsg); ++ goto glue_error; ++ } ++ mlx5_glue = *sym; ++ return 0; ++glue_error: ++ if (handle) ++ dlclose(handle); ++ WARN("cannot initialize PMD due to missing run-time" ++ " dependency on rdma-core libraries (libibverbs," ++ " libmlx5)"); ++ return -rte_errno; ++} ++ ++#endif ++ + /** + * Driver initialization routine. + */ +@@ -1042,6 +1085,11 @@ struct mlx5_args { + /* Match the size of Rx completion entry to the size of a cacheline. */ + if (RTE_CACHE_LINE_SIZE == 128) + setenv("MLX5_CQE_SIZE", "128", 0); ++#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS ++ if (mlx5_glue_init()) ++ return; ++ assert(mlx5_glue); ++#endif + mlx5_glue->fork_init(); + rte_pci_register(&mlx5_driver); + } +diff --git a/mk/rte.app.mk b/mk/rte.app.mk +index 6ececfe..200fa40 100644 +--- a/mk/rte.app.mk ++++ b/mk/rte.app.mk +@@ -146,7 +146,11 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -ldl + else + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs -lmlx4 + endif ++ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -ldl ++else + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs -lmlx5 ++endif + _LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk + _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += -lrte_pmd_nfp + _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += -lrte_pmd_null +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0005-net-mlx-add-debug-checks-to-glue-structure.patch b/SOURCES/mlnx-dpdk-0005-net-mlx-add-debug-checks-to-glue-structure.patch new file mode 100644 index 0000000..6a399ca --- /dev/null +++ b/SOURCES/mlnx-dpdk-0005-net-mlx-add-debug-checks-to-glue-structure.patch @@ -0,0 +1,58 @@ +From bb12b6e32bafdba8836000d8515e5864f65f75be Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Thu, 1 Feb 2018 14:30:57 +0100 +Subject: [PATCH 5/9] net/mlx: add debug checks to glue structure + +This code should catch mistakes early if a glue structure member is added +without a corresponding implementation in the library. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit 1b176cec4973a410eaeff00f33c6d49b795aa867) +--- + drivers/net/mlx4/mlx4.c | 9 +++++++++ + drivers/net/mlx5/mlx5.c | 9 +++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index d12b00c..43d1b0a 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -764,6 +764,15 @@ struct mlx4_conf { + return; + assert(mlx4_glue); + #endif ++#ifndef NDEBUG ++ /* Glue structure must not contain any NULL pointers. */ ++ { ++ unsigned int i; ++ ++ for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) ++ assert(((const void *const *)mlx4_glue)[i]); ++ } ++#endif + mlx4_glue->fork_init(); + rte_pci_register(&mlx4_driver); + } +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index 71ebdce..6a3746d 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -1090,6 +1090,15 @@ struct mlx5_args { + return; + assert(mlx5_glue); + #endif ++#ifndef NDEBUG ++ /* Glue structure must not contain any NULL pointers. */ ++ { ++ unsigned int i; ++ ++ for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) ++ assert(((const void *const *)mlx5_glue)[i]); ++ } ++#endif + mlx5_glue->fork_init(); + rte_pci_register(&mlx5_driver); + } +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0006-net-mlx-fix-missing-includes-for-rdma-core-glue.patch b/SOURCES/mlnx-dpdk-0006-net-mlx-fix-missing-includes-for-rdma-core-glue.patch new file mode 100644 index 0000000..95d52af --- /dev/null +++ b/SOURCES/mlnx-dpdk-0006-net-mlx-fix-missing-includes-for-rdma-core-glue.patch @@ -0,0 +1,61 @@ +From 406d0de6f87f3d300ab09fb79ed821d772d829ab Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Thu, 1 Feb 2018 14:30:57 +0100 +Subject: [PATCH 6/9] net/mlx: fix missing includes for rdma-core glue + +For consistency since these includes are already pulled by others. + +Fixes: 6aca97d310 ("net/mlx4: move rdma-core calls to separate file") +Fixes: 7202118686 ("net/mlx5: move rdma-core calls to separate file") + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit 0138edd60cb13612bc7ed638a4d549bcc60008b2) +--- + drivers/net/mlx4/mlx4_glue.c | 3 +++ + drivers/net/mlx4/mlx4_glue.h | 3 +++ + drivers/net/mlx5/mlx5_glue.h | 1 + + 3 files changed, 7 insertions(+) + +diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c +index 30797bd..47ae7ad 100644 +--- a/drivers/net/mlx4/mlx4_glue.c ++++ b/drivers/net/mlx4/mlx4_glue.c +@@ -3,6 +3,9 @@ + * Copyright 2018 Mellanox + */ + ++#include ++#include ++ + /* Verbs headers do not support -pedantic. */ + #ifdef PEDANTIC + #pragma GCC diagnostic ignored "-Wpedantic" +diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h +index 0623511..de251c6 100644 +--- a/drivers/net/mlx4/mlx4_glue.h ++++ b/drivers/net/mlx4/mlx4_glue.h +@@ -6,6 +6,9 @@ + #ifndef MLX4_GLUE_H_ + #define MLX4_GLUE_H_ + ++#include ++#include ++ + /* Verbs headers do not support -pedantic. */ + #ifdef PEDANTIC + #pragma GCC diagnostic ignored "-Wpedantic" +diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h +index 67bd8d0..a2cd18e 100644 +--- a/drivers/net/mlx5/mlx5_glue.h ++++ b/drivers/net/mlx5/mlx5_glue.h +@@ -6,6 +6,7 @@ + #ifndef MLX5_GLUE_H_ + #define MLX5_GLUE_H_ + ++#include + #include + + /* Verbs headers do not support -pedantic. */ +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0007-net-mlx-version-rdma-core-glue-libraries.patch b/SOURCES/mlnx-dpdk-0007-net-mlx-version-rdma-core-glue-libraries.patch new file mode 100644 index 0000000..a430b7e --- /dev/null +++ b/SOURCES/mlnx-dpdk-0007-net-mlx-version-rdma-core-glue-libraries.patch @@ -0,0 +1,206 @@ +From 4e2ccd92941f2865efa749169e7f77064f501121 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Thu, 1 Feb 2018 14:30:57 +0100 +Subject: [PATCH 7/9] net/mlx: version rdma-core glue libraries + +When built as separate objects, these libraries do not have unique names. +Since they do not maintain a stable ABI, loading an incompatible library +may result in a crash (e.g. in case multiple versions are installed). + +This patch addresses the above by versioning glue libraries, both on the +file system (version suffix) and by comparing a dedicated version field +member in glue structures. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit a193e731131f460d3c2d59cac0b4f1a34eac92d2) +--- + drivers/net/mlx4/Makefile | 8 ++++++-- + drivers/net/mlx4/mlx4.c | 5 +++++ + drivers/net/mlx4/mlx4_glue.c | 1 + + drivers/net/mlx4/mlx4_glue.h | 6 ++++++ + drivers/net/mlx5/Makefile | 8 ++++++-- + drivers/net/mlx5/mlx5.c | 5 +++++ + drivers/net/mlx5/mlx5_glue.c | 1 + + drivers/net/mlx5/mlx5_glue.h | 6 ++++++ + 8 files changed, 36 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile +index 60ee120..d24565c 100644 +--- a/drivers/net/mlx4/Makefile ++++ b/drivers/net/mlx4/Makefile +@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk + + # Library name. + LIB = librte_pmd_mlx4.a +-LIB_GLUE = librte_pmd_mlx4_glue.so ++LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION) ++LIB_GLUE_BASE = librte_pmd_mlx4_glue.so ++LIB_GLUE_VERSION = 17.11.1 + + # Sources. + SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c +@@ -64,6 +66,7 @@ CFLAGS += -D_XOPEN_SOURCE=600 + CFLAGS += $(WERROR_FLAGS) + ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) + CFLAGS += -DMLX4_GLUE='"$(LIB_GLUE)"' ++CFLAGS += -DMLX4_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' + CFLAGS_mlx4_glue.o += -fPIC + LDLIBS += -ldl + else +@@ -135,6 +138,7 @@ $(LIB): $(LIB_GLUE) + + $(LIB_GLUE): mlx4_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ ++ -Wl,-h,$(LIB_GLUE) \ + -s -shared -o $@ $< -libverbs -lmlx4 + + mlx4_glue.o: mlx4_autoconf.h +@@ -143,6 +147,6 @@ endif + + clean_mlx4: FORCE + $Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new +- $Q rm -f -- mlx4_glue.o $(LIB_GLUE) ++ $Q rm -f -- mlx4_glue.o $(LIB_GLUE_BASE)* + + clean: clean_mlx4 +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index 43d1b0a..e41acf1 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -773,6 +773,11 @@ struct mlx4_conf { + assert(((const void *const *)mlx4_glue)[i]); + } + #endif ++ if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { ++ ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", ++ mlx4_glue->version, MLX4_GLUE_VERSION); ++ return; ++ } + mlx4_glue->fork_init(); + rte_pci_register(&mlx4_driver); + } +diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c +index 47ae7ad..3b79d32 100644 +--- a/drivers/net/mlx4/mlx4_glue.c ++++ b/drivers/net/mlx4/mlx4_glue.c +@@ -240,6 +240,7 @@ + } + + const struct mlx4_glue *mlx4_glue = &(const struct mlx4_glue){ ++ .version = MLX4_GLUE_VERSION, + .fork_init = mlx4_glue_fork_init, + .get_async_event = mlx4_glue_get_async_event, + .ack_async_event = mlx4_glue_ack_async_event, +diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h +index de251c6..368f906 100644 +--- a/drivers/net/mlx4/mlx4_glue.h ++++ b/drivers/net/mlx4/mlx4_glue.h +@@ -19,7 +19,13 @@ + #pragma GCC diagnostic error "-Wpedantic" + #endif + ++#ifndef MLX4_GLUE_VERSION ++#define MLX4_GLUE_VERSION "" ++#endif ++ ++/* LIB_GLUE_VERSION must be updated every time this structure is modified. */ + struct mlx4_glue { ++ const char *version; + int (*fork_init)(void); + int (*get_async_event)(struct ibv_context *context, + struct ibv_async_event *event); +diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile +index 4b20d71..e75190e 100644 +--- a/drivers/net/mlx5/Makefile ++++ b/drivers/net/mlx5/Makefile +@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk + + # Library name. + LIB = librte_pmd_mlx5.a +-LIB_GLUE = librte_pmd_mlx5_glue.so ++LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION) ++LIB_GLUE_BASE = librte_pmd_mlx5_glue.so ++LIB_GLUE_VERSION = 17.11.1 + + # Sources. + SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c +@@ -74,6 +76,7 @@ CFLAGS += $(WERROR_FLAGS) + CFLAGS += -Wno-strict-prototypes + ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) + CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"' ++CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' + CFLAGS_mlx5_glue.o += -fPIC + LDLIBS += -ldl + else +@@ -180,6 +183,7 @@ $(LIB): $(LIB_GLUE) + + $(LIB_GLUE): mlx5_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ ++ -Wl,-h,$(LIB_GLUE) \ + -s -shared -o $@ $< -libverbs -lmlx5 + + mlx5_glue.o: mlx5_autoconf.h +@@ -188,6 +192,6 @@ endif + + clean_mlx5: FORCE + $Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new +- $Q rm -f -- mlx5_glue.o $(LIB_GLUE) ++ $Q rm -f -- mlx5_glue.o $(LIB_GLUE_BASE)* + + clean: clean_mlx5 +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index 6a3746d..6618d2c 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -1099,6 +1099,11 @@ struct mlx5_args { + assert(((const void *const *)mlx5_glue)[i]); + } + #endif ++ if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { ++ ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", ++ mlx5_glue->version, MLX5_GLUE_VERSION); ++ return; ++ } + mlx5_glue->fork_init(); + rte_pci_register(&mlx5_driver); + } +diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c +index ff48c1e..eba0ca0 100644 +--- a/drivers/net/mlx5/mlx5_glue.c ++++ b/drivers/net/mlx5/mlx5_glue.c +@@ -314,6 +314,7 @@ + } + + const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ ++ .version = MLX5_GLUE_VERSION, + .fork_init = mlx5_glue_fork_init, + .alloc_pd = mlx5_glue_alloc_pd, + .dealloc_pd = mlx5_glue_dealloc_pd, +diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h +index a2cd18e..0e6e65f 100644 +--- a/drivers/net/mlx5/mlx5_glue.h ++++ b/drivers/net/mlx5/mlx5_glue.h +@@ -19,6 +19,10 @@ + #pragma GCC diagnostic error "-Wpedantic" + #endif + ++#ifndef MLX5_GLUE_VERSION ++#define MLX5_GLUE_VERSION "" ++#endif ++ + #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + struct ibv_counter_set; + struct ibv_counter_set_data; +@@ -27,7 +31,9 @@ + struct ibv_query_counter_set_attr; + #endif + ++/* LIB_GLUE_VERSION must be updated every time this structure is modified. */ + struct mlx5_glue { ++ const char *version; + int (*fork_init)(void); + struct ibv_pd *(*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0008-net-mlx-make-rdma-core-glue-path-configurable.patch b/SOURCES/mlnx-dpdk-0008-net-mlx-make-rdma-core-glue-path-configurable.patch new file mode 100644 index 0000000..f1d68e4 --- /dev/null +++ b/SOURCES/mlnx-dpdk-0008-net-mlx-make-rdma-core-glue-path-configurable.patch @@ -0,0 +1,209 @@ +From 3218c0fc3b87a634415114bc6ee84444d8399f43 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Thu, 1 Feb 2018 14:30:57 +0100 +Subject: [PATCH 8/9] net/mlx: make rdma-core glue path configurable + +Since rdma-core glue libraries are intrinsically tied to their respective +PMDs and used as internal plug-ins, their presence in the default search +path among other system libraries for the dynamic linker is not necessarily +desired. + +This commit enables their installation and subsequent look-up at run time +in RTE_EAL_PMD_PATH if configured to a nonempty string. This path can also +be overridden by environment variables MLX[45]_GLUE_PATH. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit 4143e796d50334d4d13e1b2c98139f2ac08c8178) +--- + doc/guides/nics/mlx4.rst | 17 +++++++++++++++++ + doc/guides/nics/mlx5.rst | 14 ++++++++++++++ + drivers/net/mlx4/mlx4.c | 43 ++++++++++++++++++++++++++++++++++++++++++- + drivers/net/mlx5/mlx5.c | 43 ++++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 115 insertions(+), 2 deletions(-) + +diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst +index 5912722..19ccf9c 100644 +--- a/doc/guides/nics/mlx4.rst ++++ b/doc/guides/nics/mlx4.rst +@@ -97,6 +97,11 @@ These options can be modified in the ``.config`` file. + ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as + missing with ``ldd(1)``. + ++ It works by moving these dependencies to a purpose-built rdma-core "glue" ++ plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if ++ set, or in a standard location for the dynamic linker (e.g. ``/lib``) if ++ left to the default empty string (``""``). ++ + This option has no performance impact. + + - ``CONFIG_RTE_LIBRTE_MLX4_DEBUG`` (default **n**) +@@ -121,6 +126,18 @@ These options can be modified in the ``.config`` file. + + This value is always 1 for RX queues since they use a single MP. + ++Environment variables ++~~~~~~~~~~~~~~~~~~~~~ ++ ++- ``MLX4_GLUE_PATH`` ++ ++ A list of directories in which to search for the rdma-core "glue" plug-in, ++ separated by colons or semi-colons. ++ ++ Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS`` ++ enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set, ++ since ``LD_LIBRARY_PATH`` has no effect in this case. ++ + Run-time configuration + ~~~~~~~~~~~~~~~~~~~~~~ + +diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst +index 6ee4a47..5f53eed 100644 +--- a/doc/guides/nics/mlx5.rst ++++ b/doc/guides/nics/mlx5.rst +@@ -157,6 +157,11 @@ These options can be modified in the ``.config`` file. + ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as + missing with ``ldd(1)``. + ++ It works by moving these dependencies to a purpose-built rdma-core "glue" ++ plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if ++ set, or in a standard location for the dynamic linker (e.g. ``/lib``) if ++ left to the default empty string (``""``). ++ + This option has no performance impact. + + - ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**) +@@ -176,6 +181,15 @@ These options can be modified in the ``.config`` file. + Environment variables + ~~~~~~~~~~~~~~~~~~~~~ + ++- ``MLX5_GLUE_PATH`` ++ ++ A list of directories in which to search for the rdma-core "glue" plug-in, ++ separated by colons or semi-colons. ++ ++ Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS`` ++ enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set, ++ since ``LD_LIBRARY_PATH`` has no effect in this case. ++ + - ``MLX5_PMD_ENABLE_PADDING`` + + Enables HW packet padding in PCI bus transactions. +diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c +index e41acf1..6b29a8b 100644 +--- a/drivers/net/mlx4/mlx4.c ++++ b/drivers/net/mlx4/mlx4.c +@@ -712,11 +712,52 @@ struct mlx4_conf { + static int + mlx4_glue_init(void) + { ++ const char *path[] = { ++ /* ++ * A basic security check is necessary before trusting ++ * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. ++ */ ++ (geteuid() == getuid() && getegid() == getgid() ? ++ getenv("MLX4_GLUE_PATH") : NULL), ++ RTE_EAL_PMD_PATH, ++ }; ++ unsigned int i = 0; + void *handle = NULL; + void **sym; + const char *dlmsg; + +- handle = dlopen(MLX4_GLUE, RTLD_LAZY); ++ while (!handle && i != RTE_DIM(path)) { ++ const char *end; ++ size_t len; ++ int ret; ++ ++ if (!path[i]) { ++ ++i; ++ continue; ++ } ++ end = strpbrk(path[i], ":;"); ++ if (!end) ++ end = path[i] + strlen(path[i]); ++ len = end - path[i]; ++ ret = 0; ++ do { ++ char name[ret + 1]; ++ ++ ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, ++ (int)len, path[i], ++ (!len || *(end - 1) == '/') ? "" : "/"); ++ if (ret == -1) ++ break; ++ if (sizeof(name) != (size_t)ret + 1) ++ continue; ++ DEBUG("looking for rdma-core glue as \"%s\"", name); ++ handle = dlopen(name, RTLD_LAZY); ++ break; ++ } while (1); ++ path[i] = end + 1; ++ if (!*end) ++ ++i; ++ } + if (!handle) { + rte_errno = EINVAL; + dlmsg = dlerror(); +diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c +index 6618d2c..403e26b 100644 +--- a/drivers/net/mlx5/mlx5.c ++++ b/drivers/net/mlx5/mlx5.c +@@ -1033,11 +1033,52 @@ struct mlx5_args { + static int + mlx5_glue_init(void) + { ++ const char *path[] = { ++ /* ++ * A basic security check is necessary before trusting ++ * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. ++ */ ++ (geteuid() == getuid() && getegid() == getgid() ? ++ getenv("MLX5_GLUE_PATH") : NULL), ++ RTE_EAL_PMD_PATH, ++ }; ++ unsigned int i = 0; + void *handle = NULL; + void **sym; + const char *dlmsg; + +- handle = dlopen(MLX5_GLUE, RTLD_LAZY); ++ while (!handle && i != RTE_DIM(path)) { ++ const char *end; ++ size_t len; ++ int ret; ++ ++ if (!path[i]) { ++ ++i; ++ continue; ++ } ++ end = strpbrk(path[i], ":;"); ++ if (!end) ++ end = path[i] + strlen(path[i]); ++ len = end - path[i]; ++ ret = 0; ++ do { ++ char name[ret + 1]; ++ ++ ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, ++ (int)len, path[i], ++ (!len || *(end - 1) == '/') ? "" : "/"); ++ if (ret == -1) ++ break; ++ if (sizeof(name) != (size_t)ret + 1) ++ continue; ++ DEBUG("looking for rdma-core glue as \"%s\"", name); ++ handle = dlopen(name, RTLD_LAZY); ++ break; ++ } while (1); ++ path[i] = end + 1; ++ if (!*end) ++ ++i; ++ } + if (!handle) { + rte_errno = EINVAL; + dlmsg = dlerror(); +-- +1.8.3.1 + diff --git a/SOURCES/mlnx-dpdk-0009-net-mlx-control-netdevices-through-ioctl-only.patch b/SOURCES/mlnx-dpdk-0009-net-mlx-control-netdevices-through-ioctl-only.patch new file mode 100644 index 0000000..c636665 --- /dev/null +++ b/SOURCES/mlnx-dpdk-0009-net-mlx-control-netdevices-through-ioctl-only.patch @@ -0,0 +1,653 @@ +From 8f419225febbd2f02748fb142aab2c1b96fd3902 Mon Sep 17 00:00:00 2001 +From: Adrien Mazarguil +Date: Wed, 7 Feb 2018 11:45:03 +0100 +Subject: [PATCH 9/9] net/mlx: control netdevices through ioctl only + +Several control operations implemented by these PMDs affect netdevices +through sysfs, itself subject to file system permission checks enforced by +the kernel, which limits their use for most purposes to applications +running with root privileges. + +Since performing the same operations through ioctl() requires fewer +capabilities (only CAP_NET_ADMIN) and given the remaining operations are +already implemented this way, this patch standardizes on ioctl() and gets +rid of redundant code. + +Signed-off-by: Adrien Mazarguil +(cherry picked from commit 84c07af48024fc9d1027770e5143106e16ac49e7) +--- + drivers/net/mlx4/mlx4_ethdev.c | 192 ++----------------------------- + drivers/net/mlx5/mlx5.h | 2 - + drivers/net/mlx5/mlx5_ethdev.c | 255 +++++------------------------------------ + drivers/net/mlx5/mlx5_stats.c | 28 ++++- + 4 files changed, 63 insertions(+), 414 deletions(-) + +diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c +index e2f9509..cf7afe3 100644 +--- a/drivers/net/mlx4/mlx4_ethdev.c ++++ b/drivers/net/mlx4/mlx4_ethdev.c +@@ -160,167 +160,6 @@ + } + + /** +- * Read from sysfs entry. +- * +- * @param[in] priv +- * Pointer to private structure. +- * @param[in] entry +- * Entry name relative to sysfs path. +- * @param[out] buf +- * Data output buffer. +- * @param size +- * Buffer size. +- * +- * @return +- * Number of bytes read on success, negative errno value otherwise and +- * rte_errno is set. +- */ +-static int +-mlx4_sysfs_read(const struct priv *priv, const char *entry, +- char *buf, size_t size) +-{ +- char ifname[IF_NAMESIZE]; +- FILE *file; +- int ret; +- +- ret = mlx4_get_ifname(priv, &ifname); +- if (ret) +- return ret; +- +- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, +- ifname, entry); +- +- file = fopen(path, "rb"); +- if (file == NULL) { +- rte_errno = errno; +- return -rte_errno; +- } +- ret = fread(buf, 1, size, file); +- if ((size_t)ret < size && ferror(file)) { +- rte_errno = EIO; +- ret = -rte_errno; +- } else { +- ret = size; +- } +- fclose(file); +- return ret; +-} +- +-/** +- * Write to sysfs entry. +- * +- * @param[in] priv +- * Pointer to private structure. +- * @param[in] entry +- * Entry name relative to sysfs path. +- * @param[in] buf +- * Data buffer. +- * @param size +- * Buffer size. +- * +- * @return +- * Number of bytes written on success, negative errno value otherwise and +- * rte_errno is set. +- */ +-static int +-mlx4_sysfs_write(const struct priv *priv, const char *entry, +- char *buf, size_t size) +-{ +- char ifname[IF_NAMESIZE]; +- FILE *file; +- int ret; +- +- ret = mlx4_get_ifname(priv, &ifname); +- if (ret) +- return ret; +- +- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, +- ifname, entry); +- +- file = fopen(path, "wb"); +- if (file == NULL) { +- rte_errno = errno; +- return -rte_errno; +- } +- ret = fwrite(buf, 1, size, file); +- if ((size_t)ret < size || ferror(file)) { +- rte_errno = EIO; +- ret = -rte_errno; +- } else { +- ret = size; +- } +- fclose(file); +- return ret; +-} +- +-/** +- * Get unsigned long sysfs property. +- * +- * @param priv +- * Pointer to private structure. +- * @param[in] name +- * Entry name relative to sysfs path. +- * @param[out] value +- * Value output buffer. +- * +- * @return +- * 0 on success, negative errno value otherwise and rte_errno is set. +- */ +-static int +-mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) +-{ +- int ret; +- unsigned long value_ret; +- char value_str[32]; +- +- ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); +- if (ret < 0) { +- DEBUG("cannot read %s value from sysfs: %s", +- name, strerror(rte_errno)); +- return ret; +- } +- value_str[ret] = '\0'; +- errno = 0; +- value_ret = strtoul(value_str, NULL, 0); +- if (errno) { +- rte_errno = errno; +- DEBUG("invalid %s value `%s': %s", name, value_str, +- strerror(rte_errno)); +- return -rte_errno; +- } +- *value = value_ret; +- return 0; +-} +- +-/** +- * Set unsigned long sysfs property. +- * +- * @param priv +- * Pointer to private structure. +- * @param[in] name +- * Entry name relative to sysfs path. +- * @param value +- * Value to set. +- * +- * @return +- * 0 on success, negative errno value otherwise and rte_errno is set. +- */ +-static int +-mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) +-{ +- int ret; +- MKSTR(value_str, "%lu", value); +- +- ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); +- if (ret < 0) { +- DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", +- name, value_str, value, strerror(rte_errno)); +- return ret; +- } +- return 0; +-} +- +-/** + * Perform ifreq ioctl() on associated Ethernet device. + * + * @param[in] priv +@@ -389,12 +228,12 @@ + int + mlx4_mtu_get(struct priv *priv, uint16_t *mtu) + { +- unsigned long ulong_mtu = 0; +- int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu); ++ struct ifreq request; ++ int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request); + + if (ret) + return ret; +- *mtu = ulong_mtu; ++ *mtu = request.ifr_mtu; + return 0; + } + +@@ -413,20 +252,13 @@ + mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + { + struct priv *priv = dev->data->dev_private; +- uint16_t new_mtu; +- int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu); ++ struct ifreq request = { .ifr_mtu = mtu, }; ++ int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request); + + if (ret) + return ret; +- ret = mlx4_mtu_get(priv, &new_mtu); +- if (ret) +- return ret; +- if (new_mtu == mtu) { +- priv->mtu = mtu; +- return 0; +- } +- rte_errno = EINVAL; +- return -rte_errno; ++ priv->mtu = mtu; ++ return 0; + } + + /** +@@ -445,14 +277,14 @@ + static int + mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) + { +- unsigned long tmp = 0; +- int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp); ++ struct ifreq request; ++ int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request); + + if (ret) + return ret; +- tmp &= keep; +- tmp |= (flags & (~keep)); +- return mlx4_set_sysfs_ulong(priv, "flags", tmp); ++ request.ifr_flags &= keep; ++ request.ifr_flags |= flags & ~keep; ++ return mlx4_ifreq(priv, SIOCSIFFLAGS, &request); + } + + /** +diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h +index e6a69b8..a34121c 100644 +--- a/drivers/net/mlx5/mlx5.h ++++ b/drivers/net/mlx5/mlx5.h +@@ -186,8 +186,6 @@ struct priv { + int mlx5_is_secondary(void); + int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]); + int priv_ifreq(const struct priv *, int req, struct ifreq *); +-int priv_is_ib_cntr(const char *); +-int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *); + int priv_get_num_vfs(struct priv *, uint16_t *); + int priv_get_mtu(struct priv *, uint16_t *); + int priv_set_flags(struct priv *, unsigned int, unsigned int); +diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c +index 5620cce..4dffa4f 100644 +--- a/drivers/net/mlx5/mlx5_ethdev.c ++++ b/drivers/net/mlx5/mlx5_ethdev.c +@@ -35,6 +35,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -228,181 +229,6 @@ struct priv * + } + + /** +- * Check if the counter is located on ib counters file. +- * +- * @param[in] cntr +- * Counter name. +- * +- * @return +- * 1 if counter is located on ib counters file , 0 otherwise. +- */ +-int +-priv_is_ib_cntr(const char *cntr) +-{ +- if (!strcmp(cntr, "out_of_buffer")) +- return 1; +- return 0; +-} +- +-/** +- * Read from sysfs entry. +- * +- * @param[in] priv +- * Pointer to private structure. +- * @param[in] entry +- * Entry name relative to sysfs path. +- * @param[out] buf +- * Data output buffer. +- * @param size +- * Buffer size. +- * +- * @return +- * 0 on success, -1 on failure and errno is set. +- */ +-static int +-priv_sysfs_read(const struct priv *priv, const char *entry, +- char *buf, size_t size) +-{ +- char ifname[IF_NAMESIZE]; +- FILE *file; +- int ret; +- int err; +- +- if (priv_get_ifname(priv, &ifname)) +- return -1; +- +- if (priv_is_ib_cntr(entry)) { +- MKSTR(path, "%s/ports/1/hw_counters/%s", +- priv->ibdev_path, entry); +- file = fopen(path, "rb"); +- } else { +- MKSTR(path, "%s/device/net/%s/%s", +- priv->ibdev_path, ifname, entry); +- file = fopen(path, "rb"); +- } +- if (file == NULL) +- return -1; +- ret = fread(buf, 1, size, file); +- err = errno; +- if (((size_t)ret < size) && (ferror(file))) +- ret = -1; +- else +- ret = size; +- fclose(file); +- errno = err; +- return ret; +-} +- +-/** +- * Write to sysfs entry. +- * +- * @param[in] priv +- * Pointer to private structure. +- * @param[in] entry +- * Entry name relative to sysfs path. +- * @param[in] buf +- * Data buffer. +- * @param size +- * Buffer size. +- * +- * @return +- * 0 on success, -1 on failure and errno is set. +- */ +-static int +-priv_sysfs_write(const struct priv *priv, const char *entry, +- char *buf, size_t size) +-{ +- char ifname[IF_NAMESIZE]; +- FILE *file; +- int ret; +- int err; +- +- if (priv_get_ifname(priv, &ifname)) +- return -1; +- +- MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry); +- +- file = fopen(path, "wb"); +- if (file == NULL) +- return -1; +- ret = fwrite(buf, 1, size, file); +- err = errno; +- if (((size_t)ret < size) || (ferror(file))) +- ret = -1; +- else +- ret = size; +- fclose(file); +- errno = err; +- return ret; +-} +- +-/** +- * Get unsigned long sysfs property. +- * +- * @param priv +- * Pointer to private structure. +- * @param[in] name +- * Entry name relative to sysfs path. +- * @param[out] value +- * Value output buffer. +- * +- * @return +- * 0 on success, -1 on failure and errno is set. +- */ +-static int +-priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) +-{ +- int ret; +- unsigned long value_ret; +- char value_str[32]; +- +- ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); +- if (ret == -1) { +- DEBUG("cannot read %s value from sysfs: %s", +- name, strerror(errno)); +- return -1; +- } +- value_str[ret] = '\0'; +- errno = 0; +- value_ret = strtoul(value_str, NULL, 0); +- if (errno) { +- DEBUG("invalid %s value `%s': %s", name, value_str, +- strerror(errno)); +- return -1; +- } +- *value = value_ret; +- return 0; +-} +- +-/** +- * Set unsigned long sysfs property. +- * +- * @param priv +- * Pointer to private structure. +- * @param[in] name +- * Entry name relative to sysfs path. +- * @param value +- * Value to set. +- * +- * @return +- * 0 on success, -1 on failure and errno is set. +- */ +-static int +-priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) +-{ +- int ret; +- MKSTR(value_str, "%lu", value); +- +- ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); +- if (ret == -1) { +- DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", +- name, value_str, value, strerror(errno)); +- return -1; +- } +- return 0; +-} +- +-/** + * Perform ifreq ioctl() on associated Ethernet device. + * + * @param[in] priv +@@ -445,20 +271,25 @@ struct priv * + { + /* The sysfs entry name depends on the operating system. */ + const char **name = (const char *[]){ +- "device/sriov_numvfs", +- "device/mlx5_num_vfs", ++ "sriov_numvfs", ++ "mlx5_num_vfs", + NULL, + }; +- int ret; + + do { +- unsigned long ulong_num_vfs; ++ int n; ++ FILE *file; ++ MKSTR(path, "%s/device/%s", priv->ibdev_path, *name); + +- ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); +- if (!ret) +- *num_vfs = ulong_num_vfs; +- } while (*(++name) && ret); +- return ret; ++ file = fopen(path, "rb"); ++ if (!file) ++ continue; ++ n = fscanf(file, "%" SCNu16, num_vfs); ++ fclose(file); ++ if (n == 1) ++ return 0; ++ } while (*(++name)); ++ return -1; + } + + /** +@@ -475,35 +306,12 @@ struct priv * + int + priv_get_mtu(struct priv *priv, uint16_t *mtu) + { +- unsigned long ulong_mtu; ++ struct ifreq request; ++ int ret = priv_ifreq(priv, SIOCGIFMTU, &request); + +- if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) +- return -1; +- *mtu = ulong_mtu; +- return 0; +-} +- +-/** +- * Read device counter from sysfs. +- * +- * @param priv +- * Pointer to private structure. +- * @param name +- * Counter name. +- * @param[out] cntr +- * Counter output buffer. +- * +- * @return +- * 0 on success, -1 on failure and errno is set. +- */ +-int +-priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) +-{ +- unsigned long ulong_ctr; +- +- if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) +- return -1; +- *cntr = ulong_ctr; ++ if (ret) ++ return ret; ++ *mtu = request.ifr_mtu; + return 0; + } + +@@ -521,15 +329,9 @@ struct priv * + static int + priv_set_mtu(struct priv *priv, uint16_t mtu) + { +- uint16_t new_mtu; ++ struct ifreq request = { .ifr_mtu = mtu, }; + +- if (priv_set_sysfs_ulong(priv, "mtu", mtu) || +- priv_get_mtu(priv, &new_mtu)) +- return -1; +- if (new_mtu == mtu) +- return 0; +- errno = EINVAL; +- return -1; ++ return priv_ifreq(priv, SIOCSIFMTU, &request); + } + + /** +@@ -548,13 +350,14 @@ struct priv * + int + priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) + { +- unsigned long tmp; ++ struct ifreq request; ++ int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request); + +- if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) +- return -1; +- tmp &= keep; +- tmp |= (flags & (~keep)); +- return priv_set_sysfs_ulong(priv, "flags", tmp); ++ if (ret) ++ return ret; ++ request.ifr_flags &= keep; ++ request.ifr_flags |= flags & ~keep; ++ return priv_ifreq(priv, SIOCSIFFLAGS, &request); + } + + /** +diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c +index 5e225d3..48422cc 100644 +--- a/drivers/net/mlx5/mlx5_stats.c ++++ b/drivers/net/mlx5/mlx5_stats.c +@@ -31,8 +31,11 @@ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include + #include + #include ++#include ++#include + + #include + #include +@@ -47,6 +50,7 @@ struct mlx5_counter_ctrl { + char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE]; + /* Name of the counter on the device table. */ + char ctr_name[RTE_ETH_XSTATS_NAME_SIZE]; ++ uint32_t ib:1; /**< Nonzero for IB counters. */ + }; + + static const struct mlx5_counter_ctrl mlx5_counters_init[] = { +@@ -121,6 +125,7 @@ struct mlx5_counter_ctrl { + { + .dpdk_name = "rx_out_of_buffer", + .ctr_name = "out_of_buffer", ++ .ib = 1, + }, + }; + +@@ -157,13 +162,24 @@ struct mlx5_counter_ctrl { + return -1; + } + for (i = 0; i != xstats_n; ++i) { +- if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name)) +- priv_get_cntr_sysfs(priv, +- mlx5_counters_init[i].ctr_name, +- &stats[i]); +- else ++ if (mlx5_counters_init[i].ib) { ++ FILE *file; ++ MKSTR(path, "%s/ports/1/hw_counters/%s", ++ priv->ibdev_path, ++ mlx5_counters_init[i].ctr_name); ++ ++ file = fopen(path, "rb"); ++ if (file) { ++ int n = fscanf(file, "%" SCNu64, &stats[i]); ++ ++ fclose(file); ++ if (n != 1) ++ stats[i] = 0; ++ } ++ } else { + stats[i] = (uint64_t) + et_stats->data[xstats_ctrl->dev_table_idx[i]]; ++ } + } + return 0; + } +@@ -246,7 +262,7 @@ struct mlx5_counter_ctrl { + } + } + for (j = 0; j != xstats_n; ++j) { +- if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name)) ++ if (mlx5_counters_init[j].ib) + continue; + if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) { + WARN("counter \"%s\" is not recognized", +-- +1.8.3.1 + diff --git a/SOURCES/ppc_64-power8-linuxapp-gcc-config b/SOURCES/ppc_64-power8-linuxapp-gcc-config index a8dcb9a..7ce33a1 100644 --- a/SOURCES/ppc_64-power8-linuxapp-gcc-config +++ b/SOURCES/ppc_64-power8-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 4d1578565c23e449d8e5c1c18e88181f05769b5132b7f22dcbed6bce900e9d0c +# -*- cfg-sha: 66873b0483ad6c1ddd2ac86b1c5a551b36bb44ee99fc23b2be6a5274e7a35efe # BSD LICENSE # Copyright (C) IBM Corporation 2014. # Redistribution and use in source and binary forms, with or without @@ -199,10 +199,12 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y CONFIG_RTE_LIBRTE_MLX4_PMD=n CONFIG_RTE_LIBRTE_MLX4_DEBUG=n CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 # Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD CONFIG_RTE_LIBRTE_MLX5_PMD=n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=n CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 # Compile burst-oriented Broadcom PMD driver CONFIG_RTE_LIBRTE_BNX2X_PMD=n diff --git a/SOURCES/x86_64-native-linuxapp-gcc-config b/SOURCES/x86_64-native-linuxapp-gcc-config index 9e18711..957b03a 100644 --- a/SOURCES/x86_64-native-linuxapp-gcc-config +++ b/SOURCES/x86_64-native-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 56176386deef83f9f1fd9d1c143a20be1294c8ed5e720aaef37e4b007ccbbde3 +# -*- cfg-sha: 0f230d691937d738d80b3e9ed66fd3e89feaf2268d62541b31569230fada3ea2 # BSD LICENSE # Copyright(c) 2010-2014 Intel Corporation. All rights reserved. # All rights reserved. @@ -197,13 +197,15 @@ CONFIG_RTE_LIBRTE_FM10K_DEBUG_DRIVER=n CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y # Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD -CONFIG_RTE_LIBRTE_MLX4_PMD=n +CONFIG_RTE_LIBRTE_MLX4_PMD=y CONFIG_RTE_LIBRTE_MLX4_DEBUG=n CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=y CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 # Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD -CONFIG_RTE_LIBRTE_MLX5_PMD=n +CONFIG_RTE_LIBRTE_MLX5_PMD=y CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=y CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 # Compile burst-oriented Broadcom PMD driver CONFIG_RTE_LIBRTE_BNX2X_PMD=n @@ -226,12 +228,12 @@ CONFIG_RTE_LIBRTE_ENIC_PMD=y CONFIG_RTE_LIBRTE_ENIC_DEBUG=n CONFIG_RTE_LIBRTE_ENIC_DEBUG_FLOW=n # Compile burst-oriented Netronome NFP PMD driver -CONFIG_RTE_LIBRTE_NFP_PMD=n +CONFIG_RTE_LIBRTE_NFP_PMD=y CONFIG_RTE_LIBRTE_NFP_DEBUG=n # Compile Marvell PMD driver CONFIG_RTE_LIBRTE_MRVL_PMD=n # Compile burst-oriented Broadcom BNXT PMD driver -CONFIG_RTE_LIBRTE_BNXT_PMD=n +CONFIG_RTE_LIBRTE_BNXT_PMD=y # Compile burst-oriented Solarflare libefx-based PMD CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n @@ -308,7 +310,7 @@ CONFIG_RTE_LIBRTE_PMD_BOND=n CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB=n CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB_L1=n # QLogic 10G/25G/40G/50G/100G PMD -CONFIG_RTE_LIBRTE_QEDE_PMD=n +CONFIG_RTE_LIBRTE_QEDE_PMD=y CONFIG_RTE_LIBRTE_QEDE_DEBUG_INIT=n CONFIG_RTE_LIBRTE_QEDE_DEBUG_INFO=n CONFIG_RTE_LIBRTE_QEDE_DEBUG_DRIVER=n diff --git a/SPECS/dpdk.spec b/SPECS/dpdk.spec index 7f769d3..cc52ff4 100644 --- a/SPECS/dpdk.spec +++ b/SPECS/dpdk.spec @@ -5,7 +5,7 @@ # Dont edit Version: and Release: directly, only these: %define ver 17.11 -%define rel 7 +%define rel 11 %define srcname dpdk # Define when building git snapshots @@ -32,9 +32,111 @@ Source504: arm64-armv8a-linuxapp-gcc-config Source505: ppc_64-power8-linuxapp-gcc-config Source506: x86_64-native-linuxapp-gcc-config -Patch0: dpdk-dev-v2-1-4-net-virtio-fix-vector-Rx-break-caused-by-rxq-flushing.patch -Patch1: 0001-vhost_user_protect_active_rings_from_async_ring_changes.patch -Patch2: 0001-bus-pci-forbid-IOVA-mode-if-IOMMU-address-width-too-.patch +# Patches only in dpdk package +Patch0: dpdk-dev-v2-1-4-net-virtio-fix-vector-Rx-break-caused-by-rxq-flushing.patch +Patch2: 0001-bus-pci-forbid-IOVA-mode-if-IOMMU-address-width-too-.patch + +# Patches in common with the openvswitch package +Patch400: 0001-vhost_user_protect_active_rings_from_async_ring_changes.patch + +Patch410: 0001-net-enic-fix-crash-due-to-static-max-number-of-queue.patch +Patch411: 0001-net-enic-fix-L4-Rx-ptype-comparison.patch + +Patch420: 0001-vhost-prevent-features-to-be-changed-while-device-is.patch +Patch421: 0002-vhost-propagate-set-features-handling-error.patch +Patch422: 0003-vhost-extract-virtqueue-cleaning-and-freeing-functio.patch +Patch423: 0004-vhost-destroy-unused-virtqueues-when-multiqueue-not-.patch +Patch424: 0005-vhost-add-flag-for-built-in-virtio-driver.patch +Patch425: 0006-vhost-drop-virtqueues-only-with-built-in-virtio-driv.patch +Patch426: 0001-vhost-fix-IOTLB-pool-out-of-memory-handling.patch +Patch427: 0001-vhost-remove-pending-IOTLB-entry-if-miss-request-fai.patch + +Patch430: 0001-net-mlx5-use-PCI-address-as-port-name.patch +Patch435: 0001-net-mlx4-fix-broadcast-Rx.patch + +# Backport MLX patches to avoid runtime dependencies on rdma-core +Patch451: mlnx-dpdk-0001-net-mlx4-move-rdma-core-calls-to-separate-file.patch +Patch452: mlnx-dpdk-0002-net-mlx4-spawn-rdma-core-dependency-plug-in.patch +Patch453: mlnx-dpdk-0003-net-mlx5-move-rdma-core-calls-to-separate-file.patch +Patch454: mlnx-dpdk-0004-net-mlx5-spawn-rdma-core-dependency-plug-in.patch +Patch455: mlnx-dpdk-0005-net-mlx-add-debug-checks-to-glue-structure.patch +Patch456: mlnx-dpdk-0006-net-mlx-fix-missing-includes-for-rdma-core-glue.patch +Patch457: mlnx-dpdk-0007-net-mlx-version-rdma-core-glue-libraries.patch +Patch458: mlnx-dpdk-0008-net-mlx-make-rdma-core-glue-path-configurable.patch + +# Fixes for allowing to run as non-root +Patch459: mlnx-dpdk-0009-net-mlx-control-netdevices-through-ioctl-only.patch + +# Backport bnxt patch to fix link down issues when autonegotiation is turned off +Patch460: 0001-net-bnxt-fix-link-speed-setting-with-autoneg-off.patch + +# Bug 1559612 +Patch465: dpdk-17.11-i40e-fix-link-status-timeout.patch + +# QEDE fixes +Patch468: 0001-net-qede-fix-MTU-set-and-max-Rx-length.patch +Patch469: 0001-net-qede-fix-few-log-messages.patch + +# Bug 1566712 +Patch470: 0001-net-nfp-support-CPP.patch +Patch471: 0002-net-nfp-use-new-CPP-interface.patch +Patch472: 0003-net-nfp-remove-files.patch + +# Bug 1567634 +Patch475: bnxt-dpdk-0001-net-bnxt-cache-address-of-doorbell-to-subsequent-acc.patch +Patch476: bnxt-dpdk-0002-net-bnxt-avoid-invalid-vnic-id-in-set-L2-Rx-mask.patch +Patch477: bnxt-dpdk-0003-net-bnxt-fix-mbuf-data-offset-initialization.patch + +# Bug 1544298 +# DPDK CVE-2018-1059 : Information exposure in unchecked guest physical to host virtual address +Patch480: 0001-vhost-fix-indirect-descriptors-table-translation-siz.patch +Patch481: 0002-vhost-check-all-range-is-mapped-when-translating-GPA.patch +Patch482: 0003-vhost-introduce-safe-API-for-GPA-translation.patch +Patch483: 0004-vhost-ensure-all-range-is-mapped-when-translating-QV.patch +Patch484: 0005-vhost-add-support-for-non-contiguous-indirect-descs-.patch +Patch485: 0006-vhost-handle-virtually-non-contiguous-buffers-in-Tx.patch +Patch486: 0007-vhost-handle-virtually-non-contiguous-buffers-in-Rx.patch +Patch487: 0008-vhost-handle-virtually-non-contiguous-buffers-in-Rx-.patch +Patch488: 0009-examples-vhost-move-to-safe-GPA-translation-API.patch +Patch489: 0010-examples-vhost_scsi-move-to-safe-GPA-translation-API.patch +Patch490: 0011-vhost-deprecate-unsafe-GPA-translation-API.patch + +# enic fixes +Patch500: 0001-net-enic-allocate-stats-DMA-buffer-upfront-during-pr.patch +Patch501: 0001-net-enic-fix-crash-on-MTU-update-with-non-setup-queu.patch + +# Bug 1575067 +Patch510: 0001-net-nfp-fix-mbufs-releasing-when-stop-or-close.patch + +# Bug 1560728 +Patch520: 0001-eal-abstract-away-the-auxiliary-vector.patch +Patch521: 0001-eal-fix-build-with-glibc-2.16.patch +Patch522: 0002-eal-fix-build-on-FreeBSD.patch + +# Bug 1552465 +Patch530: 0001-vhost-improve-dirty-pages-logging-performance.patch + +# Bug 1583161 +Patch540: 0001-net-nfp-configure-default-RSS-reta-table.patch + +# Bug 1583670 +Patch545: 0001-net-nfp-fix-lock-file-usage.patch + +# Bug 1578981 +Patch550: 0001-net-qede-fix-L2-handles-used-for-RSS-hash-update.patch + +# Bug 1578590 +Patch555: 0001-net-qede-fix-unicast-filter-routine-return-code.patch + +# Bug 1589866 +Patch560: 0001-net-qede-fix-memory-alloc-for-multiple-port-reconfig.patch + +# Bug 1581230 +Patch570: 0001-net-mlx5-fix-memory-region-cache-lookup.patch +Patch571: 0001-net-mlx5-fix-memory-region-boundary-checks.patch + +# Patches only in dpdk package +Patch700: 0001-net-mlx-fix-rdma-core-glue-path-with-EAL-plugins.patch Summary: Set of libraries and drivers for fast packet processing @@ -77,8 +179,12 @@ ExclusiveArch: x86_64 aarch64 ppc64le %define incdir %{_includedir}/%{name} %define pmddir %{_libdir}/%{name}-pmds -BuildRequires: kernel-headers, zlib-devel, numactl-devel +BuildRequires: gcc, kernel-headers, zlib-devel, numactl-devel BuildRequires: doxygen, python-sphinx +%ifarch x86_64 +BuildRequires: rdma-core-devel >= 15 +%global __requires_exclude_from ^%{_libdir}/librte_pmd_mlx[45]_glue\.so.*$ +%endif %description The Data Plane Development Kit is a set of libraries and drivers for @@ -120,10 +226,7 @@ as L2 and L3 forwarding. %endif %prep -%setup -q -n %{srcname}-%{srcver} -%patch0 -p1 -%patch1 -p1 -%patch2 -p1 +%autosetup -n %{srcname}-%{srcver} -p1 %build # In case dpdk-devel is installed @@ -164,6 +267,15 @@ unset RTE_SDK RTE_INCLUDE RTE_TARGET mkdir -p %{buildroot}/%{pmddir} for f in %{buildroot}/%{_libdir}/*_pmd_*.so.*; do bn=$(basename ${f}) +%ifarch x86_64 + case $bn in + librte_pmd_mlx[45]_glue.so.*) + mkdir -p %{buildroot}/%{pmddir}-glue + ln -s ../${bn} %{buildroot}%{pmddir}-glue/${bn} + continue + ;; + esac +%endif ln -s ../${bn} %{buildroot}%{pmddir}/${bn} done @@ -215,6 +327,10 @@ sed -i -e 's:-%{machine_tmpl}-:-%{machine}-:g' %{buildroot}/%{_sysconfdir}/profi %dir %{pmddir} %{_libdir}/*.so.* %{pmddir}/*.so.* +%ifarch x86_64 +%dir %{pmddir}-glue +%{pmddir}-glue/*.so.* +%endif %files doc #BSD @@ -249,6 +365,22 @@ sed -i -e 's:-%{machine_tmpl}-:-%{machine}-:g' %{buildroot}/%{_sysconfdir}/profi %endif %changelog +* Thu Jun 14 2018 Timothy Redaelli - 17.11-11 +- Re-align with DPDK patches inside OVS FDP 18.06 (#1591198) + +* Mon Jun 11 2018 Aaron Conole - 17.11-10 +- Fix mlx5 memory region boundary checks (#1581230) + +* Thu Jun 07 2018 Timothy Redaelli - 17.11-9 +- Add 2 missing QEDE patches +- Fix previous changelog date + +* Thu Jun 07 2018 Timothy Redaelli - 17.11-8 +- Align with DPDK patches inside OVS FDP 18.06 +- Enable BNXT, MLX4, MLX5, NFP and QEDE PMDs +- Backport "net/mlx: fix rdma-core glue path with EAL plugins" (only needed on + DPDK package) + * Wed Jan 31 2018 Kevin Traynor - 17.11-7 - Backport to forbid IOVA mode if IOMMU address width too small (#1530957)