linma / rpms / iproute

Forked from rpms/iproute 4 years ago
Clone

Blame SOURCES/0195-devlink-implement-shared-buffer-occupancy-control.patch

049c96
From fbd89ab4f4ffdcbda971a3a4ea6096cc4971d04e Mon Sep 17 00:00:00 2001
049c96
From: Phil Sutter <psutter@redhat.com>
049c96
Date: Sat, 9 Jul 2016 11:33:14 +0200
049c96
Subject: [PATCH] devlink: implement shared buffer occupancy control
049c96
049c96
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1342515
049c96
Upstream Status: iproute2.git commit a60ebcb6f34f4
049c96
049c96
commit a60ebcb6f34f4c43cba092f52b1150d7fb1deec5
049c96
Author: Jiri Pirko <jiri@mellanox.com>
049c96
Date:   Fri Apr 15 09:51:52 2016 +0200
049c96
049c96
    devlink: implement shared buffer occupancy control
049c96
049c96
    Use kernel shared buffer occupancy control commands to make snapshot and
049c96
    clear occupancy watermarks. Also, allow to show occupancy values in a
049c96
    nice way.
049c96
049c96
    Signed-off-by: Jiri Pirko <jiri@mellanox.com>
049c96
---
049c96
 devlink/devlink.c       | 349 ++++++++++++++++++++++++++++++++++++++++++++++++
049c96
 include/linux/devlink.h |   6 +
049c96
 2 files changed, 355 insertions(+)
049c96
049c96
diff --git a/devlink/devlink.c b/devlink/devlink.c
049c96
index 228807f..ffefa86 100644
049c96
--- a/devlink/devlink.c
049c96
+++ b/devlink/devlink.c
049c96
@@ -27,6 +27,12 @@
049c96
 
049c96
 #define pr_err(args...) fprintf(stderr, ##args)
049c96
 #define pr_out(args...) fprintf(stdout, ##args)
049c96
+#define pr_out_sp(num, args...)					\
049c96
+	do {							\
049c96
+		int ret = fprintf(stdout, ##args);		\
049c96
+		if (ret < num)					\
049c96
+			fprintf(stdout, "%*s", num - ret, "");	\
049c96
+	} while (0)
049c96
 
049c96
 static int _mnlg_socket_recv_run(struct mnlg_socket *nlg,
049c96
 				 mnl_cb_t data_cb, void *data)
049c96
@@ -275,6 +281,12 @@ static int attr_cb(const struct nlattr *attr, void *data)
049c96
 	if (type == DEVLINK_ATTR_SB_TC_INDEX &&
049c96
 	    mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
049c96
 		return MNL_CB_ERROR;
049c96
+	if (type == DEVLINK_ATTR_SB_OCC_CUR &&
049c96
+	    mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
049c96
+		return MNL_CB_ERROR;
049c96
+	if (type == DEVLINK_ATTR_SB_OCC_MAX &&
049c96
+	    mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
049c96
+		return MNL_CB_ERROR;
049c96
 	tb[type] = attr;
049c96
 	return MNL_CB_OK;
049c96
 }
049c96
@@ -864,6 +876,7 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb)
049c96
 	struct nlattr *attr_bus_name = tb[DEVLINK_ATTR_BUS_NAME];
049c96
 	struct nlattr *attr_dev_name = tb[DEVLINK_ATTR_DEV_NAME];
049c96
 	struct nlattr *attr_port_index = tb[DEVLINK_ATTR_PORT_INDEX];
049c96
+	struct nlattr *attr_sb_index = tb[DEVLINK_ATTR_SB_INDEX];
049c96
 
049c96
 	if (opts->present & DL_OPT_HANDLE &&
049c96
 	    attr_bus_name && attr_dev_name) {
049c96
@@ -885,6 +898,12 @@ static bool dl_dump_filter(struct dl *dl, struct nlattr **tb)
049c96
 		    port_index != opts->port_index)
049c96
 			return false;
049c96
 	}
049c96
+	if (opts->present & DL_OPT_SB && attr_sb_index) {
049c96
+		uint32_t sb_index = mnl_attr_get_u32(attr_sb_index);
049c96
+
049c96
+		if (sb_index != opts->sb_index)
049c96
+			return false;
049c96
+	}
049c96
 	return true;
049c96
 }
049c96
 
049c96
@@ -1168,6 +1187,9 @@ static void cmd_sb_help(void)
049c96
 	pr_out("       devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX\n");
049c96
 	pr_out("                              type { ingress | egress } pool POOL_INDEX\n");
049c96
 	pr_out("                              th THRESHOLD\n");
049c96
+	pr_out("       devlink sb occupancy show { DEV | DEV/PORT_INDEX } [ sb SB_INDEX ]\n");
049c96
+	pr_out("       devlink sb occupancy snapshot DEV [ sb SB_INDEX ]\n");
049c96
+	pr_out("       devlink sb occupancy clearmax DEV [ sb SB_INDEX ]\n");
049c96
 }
049c96
 
049c96
 static void pr_out_sb(struct nlattr **tb)
049c96
@@ -1504,6 +1526,330 @@ static int cmd_sb_tc(struct dl *dl)
049c96
 	return -ENOENT;
049c96
 }
049c96
 
049c96
+struct occ_item {
049c96
+	struct list_head list;
049c96
+	uint32_t index;
049c96
+	uint32_t cur;
049c96
+	uint32_t max;
049c96
+	uint32_t bound_pool_index;
049c96
+};
049c96
+
049c96
+struct occ_port {
049c96
+	struct list_head list;
049c96
+	char *bus_name;
049c96
+	char *dev_name;
049c96
+	uint32_t port_index;
049c96
+	uint32_t sb_index;
049c96
+	struct list_head pool_list;
049c96
+	struct list_head ing_tc_list;
049c96
+	struct list_head eg_tc_list;
049c96
+};
049c96
+
049c96
+struct occ_show {
049c96
+	struct dl *dl;
049c96
+	int err;
049c96
+	struct list_head port_list;
049c96
+};
049c96
+
049c96
+static struct occ_item *occ_item_alloc(void)
049c96
+{
049c96
+	return calloc(1, sizeof(struct occ_item));
049c96
+}
049c96
+
049c96
+static void occ_item_free(struct occ_item *occ_item)
049c96
+{
049c96
+	free(occ_item);
049c96
+}
049c96
+
049c96
+static struct occ_port *occ_port_alloc(uint32_t port_index)
049c96
+{
049c96
+	struct occ_port *occ_port;
049c96
+
049c96
+	occ_port = calloc(1, sizeof(*occ_port));
049c96
+	if (!occ_port)
049c96
+		return NULL;
049c96
+	occ_port->port_index = port_index;
049c96
+	INIT_LIST_HEAD(&occ_port->pool_list);
049c96
+	INIT_LIST_HEAD(&occ_port->ing_tc_list);
049c96
+	INIT_LIST_HEAD(&occ_port->eg_tc_list);
049c96
+	return occ_port;
049c96
+}
049c96
+
049c96
+static void occ_port_free(struct occ_port *occ_port)
049c96
+{
049c96
+	struct occ_item *occ_item, *tmp;
049c96
+
049c96
+	list_for_each_entry_safe(occ_item, tmp, &occ_port->pool_list, list)
049c96
+		occ_item_free(occ_item);
049c96
+	list_for_each_entry_safe(occ_item, tmp, &occ_port->ing_tc_list, list)
049c96
+		occ_item_free(occ_item);
049c96
+	list_for_each_entry_safe(occ_item, tmp, &occ_port->eg_tc_list, list)
049c96
+		occ_item_free(occ_item);
049c96
+}
049c96
+
049c96
+static struct occ_show *occ_show_alloc(struct dl *dl)
049c96
+{
049c96
+	struct occ_show *occ_show;
049c96
+
049c96
+	occ_show = calloc(1, sizeof(*occ_show));
049c96
+	if (!occ_show)
049c96
+		return NULL;
049c96
+	occ_show->dl = dl;
049c96
+	INIT_LIST_HEAD(&occ_show->port_list);
049c96
+	return occ_show;
049c96
+}
049c96
+
049c96
+static void occ_show_free(struct occ_show *occ_show)
049c96
+{
049c96
+	struct occ_port *occ_port, *tmp;
049c96
+
049c96
+	list_for_each_entry_safe(occ_port, tmp, &occ_show->port_list, list)
049c96
+		occ_port_free(occ_port);
049c96
+}
049c96
+
049c96
+static struct occ_port *occ_port_get(struct occ_show *occ_show,
049c96
+				     struct nlattr **tb)
049c96
+{
049c96
+	struct occ_port *occ_port;
049c96
+	uint32_t port_index;
049c96
+
049c96
+	port_index = mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_INDEX]);
049c96
+
049c96
+	list_for_each_entry_reverse(occ_port, &occ_show->port_list, list) {
049c96
+		if (occ_port->port_index == port_index)
049c96
+			return occ_port;
049c96
+	}
049c96
+	occ_port = occ_port_alloc(port_index);
049c96
+	if (!occ_port)
049c96
+		return NULL;
049c96
+	list_add_tail(&occ_port->list, &occ_show->port_list);
049c96
+	return occ_port;
049c96
+}
049c96
+
049c96
+static void pr_out_occ_show_item_list(const char *label, struct list_head *list,
049c96
+				      bool bound_pool)
049c96
+{
049c96
+	struct occ_item *occ_item;
049c96
+	int i = 1;
049c96
+
049c96
+	pr_out_sp(7, "  %s:", label);
049c96
+	list_for_each_entry(occ_item, list, list) {
049c96
+		if ((i - 1) % 4 == 0 && i != 1)
049c96
+			pr_out_sp(7, " ");
049c96
+		if (bound_pool)
049c96
+			pr_out_sp(7, "%2u(%u):", occ_item->index,
049c96
+				  occ_item->bound_pool_index);
049c96
+		else
049c96
+			pr_out_sp(7, "%2u:", occ_item->index);
049c96
+		pr_out_sp(15, "%7u/%u", occ_item->cur, occ_item->max);
049c96
+		if (i++ % 4 == 0)
049c96
+			pr_out("\n");
049c96
+	}
049c96
+	if ((i - 1) % 4 != 0)
049c96
+		pr_out("\n");
049c96
+}
049c96
+
049c96
+static void pr_out_occ_show_port(struct occ_port *occ_port)
049c96
+{
049c96
+	pr_out_occ_show_item_list("pool", &occ_port->pool_list, false);
049c96
+	pr_out_occ_show_item_list("itc", &occ_port->ing_tc_list, true);
049c96
+	pr_out_occ_show_item_list("etc", &occ_port->eg_tc_list, true);
049c96
+}
049c96
+
049c96
+static void pr_out_occ_show(struct occ_show *occ_show)
049c96
+{
049c96
+	struct dl *dl = occ_show->dl;
049c96
+	struct dl_opts *opts = &dl->opts;
049c96
+	struct occ_port *occ_port;
049c96
+
049c96
+	list_for_each_entry(occ_port, &occ_show->port_list, list) {
049c96
+		__pr_out_port_handle_nice(dl, opts->bus_name, opts->dev_name,
049c96
+					  occ_port->port_index);
049c96
+		pr_out(":\n");
049c96
+		pr_out_occ_show_port(occ_port);
049c96
+	}
049c96
+}
049c96
+
049c96
+static void cmd_sb_occ_port_pool_process(struct occ_show *occ_show,
049c96
+					 struct nlattr **tb)
049c96
+{
049c96
+	struct occ_port *occ_port;
049c96
+	struct occ_item *occ_item;
049c96
+
049c96
+	if (occ_show->err || !dl_dump_filter(occ_show->dl, tb))
049c96
+		return;
049c96
+
049c96
+	occ_port = occ_port_get(occ_show, tb);
049c96
+	if (!occ_port) {
049c96
+		occ_show->err = -ENOMEM;
049c96
+		return;
049c96
+	}
049c96
+
049c96
+	occ_item = occ_item_alloc();
049c96
+	if (!occ_item) {
049c96
+		occ_show->err = -ENOMEM;
049c96
+		return;
049c96
+	}
049c96
+	occ_item->index = mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_POOL_INDEX]);
049c96
+	occ_item->cur = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_CUR]);
049c96
+	occ_item->max = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_MAX]);
049c96
+	list_add_tail(&occ_item->list, &occ_port->pool_list);
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ_port_pool_process_cb(const struct nlmsghdr *nlh, void *data)
049c96
+{
049c96
+	struct occ_show *occ_show = data;
049c96
+	struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
049c96
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
049c96
+
049c96
+	mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
049c96
+	if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] ||
049c96
+	    !tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_SB_INDEX] ||
049c96
+	    !tb[DEVLINK_ATTR_SB_POOL_INDEX] ||
049c96
+	    !tb[DEVLINK_ATTR_SB_OCC_CUR] || !tb[DEVLINK_ATTR_SB_OCC_MAX])
049c96
+		return MNL_CB_ERROR;
049c96
+	cmd_sb_occ_port_pool_process(occ_show, tb);
049c96
+	return MNL_CB_OK;
049c96
+}
049c96
+
049c96
+static void cmd_sb_occ_tc_pool_process(struct occ_show *occ_show,
049c96
+				       struct nlattr **tb)
049c96
+{
049c96
+	struct occ_port *occ_port;
049c96
+	struct occ_item *occ_item;
049c96
+	uint8_t pool_type;
049c96
+
049c96
+	if (occ_show->err || !dl_dump_filter(occ_show->dl, tb))
049c96
+		return;
049c96
+
049c96
+	occ_port = occ_port_get(occ_show, tb);
049c96
+	if (!occ_port) {
049c96
+		occ_show->err = -ENOMEM;
049c96
+		return;
049c96
+	}
049c96
+
049c96
+	occ_item = occ_item_alloc();
049c96
+	if (!occ_item) {
049c96
+		occ_show->err = -ENOMEM;
049c96
+		return;
049c96
+	}
049c96
+	occ_item->index = mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_TC_INDEX]);
049c96
+	occ_item->cur = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_CUR]);
049c96
+	occ_item->max = mnl_attr_get_u32(tb[DEVLINK_ATTR_SB_OCC_MAX]);
049c96
+	occ_item->bound_pool_index =
049c96
+			mnl_attr_get_u16(tb[DEVLINK_ATTR_SB_POOL_INDEX]);
049c96
+	pool_type = mnl_attr_get_u8(tb[DEVLINK_ATTR_SB_POOL_TYPE]);
049c96
+	if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS)
049c96
+		list_add_tail(&occ_item->list, &occ_port->ing_tc_list);
049c96
+	else if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS)
049c96
+		list_add_tail(&occ_item->list, &occ_port->eg_tc_list);
049c96
+	else
049c96
+		occ_item_free(occ_item);
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ_tc_pool_process_cb(const struct nlmsghdr *nlh, void *data)
049c96
+{
049c96
+	struct occ_show *occ_show = data;
049c96
+	struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
049c96
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
049c96
+
049c96
+	mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
049c96
+	if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] ||
049c96
+	    !tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_SB_INDEX] ||
049c96
+	    !tb[DEVLINK_ATTR_SB_TC_INDEX] || !tb[DEVLINK_ATTR_SB_POOL_TYPE] ||
049c96
+	    !tb[DEVLINK_ATTR_SB_POOL_INDEX] ||
049c96
+	    !tb[DEVLINK_ATTR_SB_OCC_CUR] || !tb[DEVLINK_ATTR_SB_OCC_MAX])
049c96
+		return MNL_CB_ERROR;
049c96
+	cmd_sb_occ_tc_pool_process(occ_show, tb);
049c96
+	return MNL_CB_OK;
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ_show(struct dl *dl)
049c96
+{
049c96
+	struct nlmsghdr *nlh;
049c96
+	struct occ_show *occ_show;
049c96
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP;
049c96
+	int err;
049c96
+
049c96
+	err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_HANDLEP, DL_OPT_SB);
049c96
+	if (err)
049c96
+		return err;
049c96
+
049c96
+	occ_show = occ_show_alloc(dl);
049c96
+	if (!occ_show)
049c96
+		return -ENOMEM;
049c96
+
049c96
+	nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_PORT_POOL_GET, flags);
049c96
+
049c96
+	err = _mnlg_socket_sndrcv(dl->nlg, nlh,
049c96
+				  cmd_sb_occ_port_pool_process_cb, occ_show);
049c96
+	if (err)
049c96
+		goto out;
049c96
+
049c96
+	nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_TC_POOL_BIND_GET, flags);
049c96
+
049c96
+	err = _mnlg_socket_sndrcv(dl->nlg, nlh,
049c96
+				  cmd_sb_occ_tc_pool_process_cb, occ_show);
049c96
+	if (err)
049c96
+		goto out;
049c96
+
049c96
+	pr_out_occ_show(occ_show);
049c96
+
049c96
+out:
049c96
+	occ_show_free(occ_show);
049c96
+	return err;
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ_snapshot(struct dl *dl)
049c96
+{
049c96
+	struct nlmsghdr *nlh;
049c96
+	int err;
049c96
+
049c96
+	nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_OCC_SNAPSHOT,
049c96
+			       NLM_F_REQUEST | NLM_F_ACK);
049c96
+
049c96
+	err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, DL_OPT_SB);
049c96
+	if (err)
049c96
+		return err;
049c96
+
049c96
+	return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ_clearmax(struct dl *dl)
049c96
+{
049c96
+	struct nlmsghdr *nlh;
049c96
+	int err;
049c96
+
049c96
+	nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_SB_OCC_MAX_CLEAR,
049c96
+			       NLM_F_REQUEST | NLM_F_ACK);
049c96
+
049c96
+	err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, DL_OPT_SB);
049c96
+	if (err)
049c96
+		return err;
049c96
+
049c96
+	return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
049c96
+}
049c96
+
049c96
+static int cmd_sb_occ(struct dl *dl)
049c96
+{
049c96
+	if (dl_argv_match(dl, "help") || dl_no_arg(dl)) {
049c96
+		cmd_sb_help();
049c96
+		return 0;
049c96
+	} else if (dl_argv_match(dl, "show") ||
049c96
+		   dl_argv_match(dl, "list")) {
049c96
+		dl_arg_inc(dl);
049c96
+		return cmd_sb_occ_show(dl);
049c96
+	} else if (dl_argv_match(dl, "snapshot")) {
049c96
+		dl_arg_inc(dl);
049c96
+		return cmd_sb_occ_snapshot(dl);
049c96
+	} else if (dl_argv_match(dl, "clearmax")) {
049c96
+		dl_arg_inc(dl);
049c96
+		return cmd_sb_occ_clearmax(dl);
049c96
+	}
049c96
+	pr_err("Command \"%s\" not found\n", dl_argv(dl));
049c96
+	return -ENOENT;
049c96
+}
049c96
+
049c96
 static int cmd_sb(struct dl *dl)
049c96
 {
049c96
 	if (dl_argv_match(dl, "help")) {
049c96
@@ -1522,6 +1868,9 @@ static int cmd_sb(struct dl *dl)
049c96
 	} else if (dl_argv_match(dl, "tc")) {
049c96
 		dl_arg_inc(dl);
049c96
 		return cmd_sb_tc(dl);
049c96
+	} else if (dl_argv_match(dl, "occupancy")) {
049c96
+		dl_arg_inc(dl);
049c96
+		return cmd_sb_occ(dl);
049c96
 	}
049c96
 	pr_err("Command \"%s\" not found\n", dl_argv(dl));
049c96
 	return -ENOENT;
049c96
diff --git a/include/linux/devlink.h b/include/linux/devlink.h
049c96
index d40699f..0e21d00 100644
049c96
--- a/include/linux/devlink.h
049c96
+++ b/include/linux/devlink.h
049c96
@@ -53,6 +53,10 @@ enum devlink_command {
049c96
 	DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
049c96
 	DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
049c96
 
049c96
+	/* Shared buffer occupancy monitoring commands */
049c96
+	DEVLINK_CMD_SB_OCC_SNAPSHOT,
049c96
+	DEVLINK_CMD_SB_OCC_MAX_CLEAR,
049c96
+
049c96
 	/* add new commands above here */
049c96
 
049c96
 	__DEVLINK_CMD_MAX,
049c96
@@ -119,6 +123,8 @@ enum devlink_attr {
049c96
 	DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,	/* u8 */
049c96
 	DEVLINK_ATTR_SB_THRESHOLD,		/* u32 */
049c96
 	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
049c96
+	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
049c96
+	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
049c96
 
049c96
 	/* add new attributes above here, update the policy in devlink.c */
049c96
 
049c96
-- 
049c96
1.8.3.1
049c96