Blame SOURCES/0058-bpf-implement-bpf-to-bpf-calls-support.patch

8def76
From 80dcb40f8442f79a043c520ae9eef067519ee7ca Mon Sep 17 00:00:00 2001
8def76
From: Andrea Claudi <aclaudi@redhat.com>
8def76
Date: Thu, 13 Jun 2019 14:37:56 +0200
8def76
Subject: [PATCH] bpf: implement bpf to bpf calls support
8def76
8def76
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361
8def76
Upstream Status: iproute2.git commit b5cb33aec65cb
8def76
8def76
commit b5cb33aec65cb77183abbdfa5b61ecc9877ec776
8def76
Author: Daniel Borkmann <daniel@iogearbox.net>
8def76
Date:   Wed Jul 18 01:31:21 2018 +0200
8def76
8def76
    bpf: implement bpf to bpf calls support
8def76
8def76
    Implement missing bpf to bpf calls support. The loader will
8def76
    recognize .text section and handle relocation entries that
8def76
    are emitted by LLVM.
8def76
8def76
    First step is processing of map related relocation entries
8def76
    for .text section, and in a second step loader will copy .text
8def76
    section into program section and adjust call instruction
8def76
    offset accordingly.
8def76
8def76
    Example with test_xdp_noinline.o from kernel selftests:
8def76
8def76
     1) Every function as __attribute__ ((always_inline)), rest
8def76
        left unchanged:
8def76
8def76
      # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
8def76
      # ip a
8def76
      1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:233 qdisc noqueue state UNKNOWN group default qlen 1000
8def76
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
8def76
        inet 127.0.0.1/8 scope host lo
8def76
           valid_lft forever preferred_lft forever
8def76
        inet6 ::1/128 scope host
8def76
           valid_lft forever preferred_lft forever
8def76
      [...]
8def76
      # bpftool prog dump xlated id 233
8def76
      [...]
8def76
      1669: (2d) if r3 > r2 goto pc+4
8def76
      1670: (79) r2 = *(u64 *)(r10 -136)
8def76
      1671: (61) r2 = *(u32 *)(r2 +0)
8def76
      1672: (63) *(u32 *)(r1 +0) = r2
8def76
      1673: (b7) r0 = 1
8def76
      1674: (95) exit        <-- 1674 insns total
8def76
8def76
     2) Every function as __attribute__ ((noinline)), rest
8def76
        left unchanged:
8def76
8def76
      # ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
8def76
      # ip a
8def76
      1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:236 qdisc noqueue state UNKNOWN group default qlen 1000
8def76
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
8def76
        inet 127.0.0.1/8 scope host lo
8def76
           valid_lft forever preferred_lft forever
8def76
        inet6 ::1/128 scope host
8def76
           valid_lft forever preferred_lft forever
8def76
      [...]
8def76
      # bpftool prog dump xlated id 236
8def76
      [...]
8def76
      1000: (bf) r1 = r6
8def76
      1001: (b7) r2 = 24
8def76
      1002: (85) call pc+3   <-- pc-relative call insns
8def76
      1003: (1f) r7 -= r0
8def76
      1004: (bf) r0 = r7
8def76
      1005: (95) exit
8def76
      1006: (bf) r0 = r1
8def76
      1007: (bf) r1 = r2
8def76
      1008: (67) r1 <<= 32
8def76
      1009: (77) r1 >>= 32
8def76
      1010: (bf) r3 = r0
8def76
      1011: (6f) r3 <<= r1
8def76
      1012: (87) r2 = -r2
8def76
      1013: (57) r2 &= 31
8def76
      1014: (67) r0 <<= 32
8def76
      1015: (77) r0 >>= 32
8def76
      1016: (7f) r0 >>= r2
8def76
      1017: (4f) r0 |= r3
8def76
      1018: (95) exit        <-- 1018 insns total
8def76
8def76
    Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
8def76
    Signed-off-by: David Ahern <dsahern@gmail.com>
8def76
---
8def76
 lib/bpf.c | 233 ++++++++++++++++++++++++++++++++++++------------------
8def76
 1 file changed, 157 insertions(+), 76 deletions(-)
8def76
8def76
diff --git a/lib/bpf.c b/lib/bpf.c
8def76
index ead8b5a7219f0..1b87490555050 100644
8def76
--- a/lib/bpf.c
8def76
+++ b/lib/bpf.c
8def76
@@ -1109,7 +1109,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
8def76
 #ifdef HAVE_ELF
8def76
 struct bpf_elf_prog {
8def76
 	enum bpf_prog_type	type;
8def76
-	const struct bpf_insn	*insns;
8def76
+	struct bpf_insn		*insns;
8def76
+	unsigned int		insns_num;
8def76
 	size_t			size;
8def76
 	const char		*license;
8def76
 };
8def76
@@ -1135,11 +1136,13 @@ struct bpf_elf_ctx {
8def76
 	int			map_fds[ELF_MAX_MAPS];
8def76
 	struct bpf_elf_map	maps[ELF_MAX_MAPS];
8def76
 	struct bpf_map_ext	maps_ext[ELF_MAX_MAPS];
8def76
+	struct bpf_elf_prog	prog_text;
8def76
 	int			sym_num;
8def76
 	int			map_num;
8def76
 	int			map_len;
8def76
 	bool			*sec_done;
8def76
 	int			sec_maps;
8def76
+	int			sec_text;
8def76
 	char			license[ELF_MAX_LICENSE_LEN];
8def76
 	enum bpf_prog_type	type;
8def76
 	__u32			ifindex;
8def76
@@ -1904,12 +1907,25 @@ static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
8def76
 	return 0;
8def76
 }
8def76
 
8def76
+static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
8def76
+			  struct bpf_elf_sec_data *data)
8def76
+{
8def76
+	ctx->sec_text = section;
8def76
+	ctx->sec_done[section] = true;
8def76
+	return 0;
8def76
+}
8def76
+
8def76
 static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
8def76
 {
8def76
 	return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
8def76
 }
8def76
 
8def76
-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
8def76
+static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
8def76
+{
8def76
+	return ctx->sec_text;
8def76
+}
8def76
+
8def76
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
8def76
 {
8def76
 	struct bpf_elf_sec_data data;
8def76
 	int i, ret = -1;
8def76
@@ -1925,6 +1941,11 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
8def76
 		else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
8def76
 			 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
8def76
 			ret = bpf_fetch_license(ctx, i, &data);
8def76
+		else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
8def76
+			 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
8def76
+			 !strcmp(data.sec_name, ".text") &&
8def76
+			 check_text_sec)
8def76
+			ret = bpf_fetch_text(ctx, i, &data);
8def76
 		else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
8def76
 			 !strcmp(data.sec_name, ".symtab"))
8def76
 			ret = bpf_fetch_symtab(ctx, i, &data);
8def76
@@ -1969,17 +1990,18 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
8def76
 		ret = bpf_fill_section_data(ctx, i, &data);
8def76
 		if (ret < 0 ||
8def76
 		    !(data.sec_hdr.sh_type == SHT_PROGBITS &&
8def76
-		      data.sec_hdr.sh_flags & SHF_EXECINSTR &&
8def76
+		      (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
8def76
 		      !strcmp(data.sec_name, section)))
8def76
 			continue;
8def76
 
8def76
 		*sseen = true;
8def76
 
8def76
 		memset(&prog, 0, sizeof(prog));
8def76
-		prog.type    = ctx->type;
8def76
-		prog.insns   = data.sec_data->d_buf;
8def76
-		prog.size    = data.sec_data->d_size;
8def76
-		prog.license = ctx->license;
8def76
+		prog.type      = ctx->type;
8def76
+		prog.license   = ctx->license;
8def76
+		prog.size      = data.sec_data->d_size;
8def76
+		prog.insns_num = prog.size / sizeof(struct bpf_insn);
8def76
+		prog.insns     = data.sec_data->d_buf;
8def76
 
8def76
 		fd = bpf_prog_attach(section, &prog, ctx);
8def76
 		if (fd < 0)
8def76
@@ -1992,84 +2014,120 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
8def76
 	return fd;
8def76
 }
8def76
 
8def76
-struct bpf_tail_call_props {
8def76
-	unsigned int total;
8def76
-	unsigned int jited;
8def76
+struct bpf_relo_props {
8def76
+	struct bpf_tail_call {
8def76
+		unsigned int total;
8def76
+		unsigned int jited;
8def76
+	} tc;
8def76
+	int main_num;
8def76
 };
8def76
 
8def76
+static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
8def76
+			      GElf_Rel *relo, GElf_Sym *sym,
8def76
+			      struct bpf_relo_props *props)
8def76
+{
8def76
+	unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
8def76
+	unsigned int map_idx = sym->st_value / ctx->map_len;
8def76
+
8def76
+	if (insn_off >= prog->insns_num)
8def76
+		return -EINVAL;
8def76
+	if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) {
8def76
+		fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
8def76
+			insn_off);
8def76
+		return -EINVAL;
8def76
+	}
8def76
+
8def76
+	if (map_idx >= ARRAY_SIZE(ctx->map_fds))
8def76
+		return -EINVAL;
8def76
+	if (!ctx->map_fds[map_idx])
8def76
+		return -EINVAL;
8def76
+	if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) {
8def76
+		props->tc.total++;
8def76
+		if (ctx->maps_ext[map_idx].owner.jited ||
8def76
+		    (ctx->maps_ext[map_idx].owner.type == 0 &&
8def76
+		     ctx->cfg.jit_enabled))
8def76
+			props->tc.jited++;
8def76
+	}
8def76
+
8def76
+	prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD;
8def76
+	prog->insns[insn_off].imm = ctx->map_fds[map_idx];
8def76
+	return 0;
8def76
+}
8def76
+
8def76
+static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
8def76
+			       GElf_Rel *relo, GElf_Sym *sym,
8def76
+			       struct bpf_relo_props *props)
8def76
+{
8def76
+	unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
8def76
+	struct bpf_elf_prog *prog_text = &ctx->prog_text;
8def76
+
8def76
+	if (insn_off >= prog->insns_num)
8def76
+		return -EINVAL;
8def76
+	if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) &&
8def76
+	    prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) {
8def76
+		fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
8def76
+			insn_off);
8def76
+		return -EINVAL;
8def76
+	}
8def76
+
8def76
+	if (!props->main_num) {
8def76
+		struct bpf_insn *insns = realloc(prog->insns,
8def76
+						 prog->size + prog_text->size);
8def76
+		if (!insns)
8def76
+			return -ENOMEM;
8def76
+
8def76
+		memcpy(insns + prog->insns_num, prog_text->insns,
8def76
+		       prog_text->size);
8def76
+		props->main_num = prog->insns_num;
8def76
+		prog->insns = insns;
8def76
+		prog->insns_num += prog_text->insns_num;
8def76
+		prog->size += prog_text->size;
8def76
+	}
8def76
+
8def76
+	prog->insns[insn_off].imm += props->main_num - insn_off;
8def76
+	return 0;
8def76
+}
8def76
+
8def76
 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
8def76
 			       struct bpf_elf_sec_data *data_relo,
8def76
-			       struct bpf_elf_sec_data *data_insn,
8def76
-			       struct bpf_tail_call_props *props)
8def76
+			       struct bpf_elf_prog *prog,
8def76
+			       struct bpf_relo_props *props)
8def76
 {
8def76
-	Elf_Data *idata = data_insn->sec_data;
8def76
 	GElf_Shdr *rhdr = &data_relo->sec_hdr;
8def76
 	int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
8def76
-	struct bpf_insn *insns = idata->d_buf;
8def76
-	unsigned int num_insns = idata->d_size / sizeof(*insns);
8def76
 
8def76
 	for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
8def76
-		unsigned int ioff, rmap;
8def76
 		GElf_Rel relo;
8def76
 		GElf_Sym sym;
8def76
+		int ret = -EIO;
8def76
 
8def76
 		if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
8def76
 			return -EIO;
8def76
-
8def76
-		ioff = relo.r_offset / sizeof(struct bpf_insn);
8def76
-		if (ioff >= num_insns ||
8def76
-		    insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
8def76
-			fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
8def76
-				ioff);
8def76
-			fprintf(stderr, " - Current section: %s\n", data_relo->sec_name);
8def76
-			if (ioff < num_insns &&
8def76
-			    insns[ioff].code == (BPF_JMP | BPF_CALL))
8def76
-				fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
8def76
-			return -EINVAL;
8def76
-		}
8def76
-
8def76
 		if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
8def76
 			return -EIO;
8def76
-		if (sym.st_shndx != ctx->sec_maps) {
8def76
-			fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
8def76
-				relo_ent, sym.st_shndx);
8def76
-			return -EIO;
8def76
-		}
8def76
 
8def76
-		rmap = sym.st_value / ctx->map_len;
8def76
-		if (rmap >= ARRAY_SIZE(ctx->map_fds))
8def76
-			return -EINVAL;
8def76
-		if (!ctx->map_fds[rmap])
8def76
-			return -EINVAL;
8def76
-		if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
8def76
-			props->total++;
8def76
-			if (ctx->maps_ext[rmap].owner.jited ||
8def76
-			    (ctx->maps_ext[rmap].owner.type == 0 &&
8def76
-			     ctx->cfg.jit_enabled))
8def76
-				props->jited++;
8def76
-		}
8def76
-
8def76
-		if (ctx->verbose)
8def76
-			fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
8def76
-				bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
8def76
-				data_insn->sec_name, ioff);
8def76
-
8def76
-		insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
8def76
-		insns[ioff].imm     = ctx->map_fds[rmap];
8def76
+		if (sym.st_shndx == ctx->sec_maps)
8def76
+			ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props);
8def76
+		else if (sym.st_shndx == ctx->sec_text)
8def76
+			ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props);
8def76
+		else
8def76
+			fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
8def76
+				relo_ent, sym.st_shndx);
8def76
+		if (ret < 0)
8def76
+			return ret;
8def76
 	}
8def76
 
8def76
 	return 0;
8def76
 }
8def76
 
8def76
 static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
8def76
-			       bool *lderr, bool *sseen)
8def76
+			       bool *lderr, bool *sseen, struct bpf_elf_prog *prog)
8def76
 {
8def76
 	struct bpf_elf_sec_data data_relo, data_insn;
8def76
-	struct bpf_elf_prog prog;
8def76
 	int ret, idx, i, fd = -1;
8def76
 
8def76
 	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
8def76
-		struct bpf_tail_call_props props = {};
8def76
+		struct bpf_relo_props props = {};
8def76
 
8def76
 		ret = bpf_fill_section_data(ctx, i, &data_relo);
8def76
 		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
8def76
@@ -2080,40 +2138,54 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
8def76
 		ret = bpf_fill_section_data(ctx, idx, &data_insn);
8def76
 		if (ret < 0 ||
8def76
 		    !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
8def76
-		      data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
8def76
+		      (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) &&
8def76
 		      !strcmp(data_insn.sec_name, section)))
8def76
 			continue;
8def76
+		if (sseen)
8def76
+			*sseen = true;
8def76
+
8def76
+		memset(prog, 0, sizeof(*prog));
8def76
+		prog->type = ctx->type;
8def76
+		prog->license = ctx->license;
8def76
+		prog->size = data_insn.sec_data->d_size;
8def76
+		prog->insns_num = prog->size / sizeof(struct bpf_insn);
8def76
+		prog->insns = malloc(prog->size);
8def76
+		if (!prog->insns) {
8def76
+			*lderr = true;
8def76
+			return -ENOMEM;
8def76
+		}
8def76
 
8def76
-		*sseen = true;
8def76
+		memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size);
8def76
 
8def76
-		ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props;;
8def76
+		ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props;;
8def76
 		if (ret < 0) {
8def76
 			*lderr = true;
8def76
+			if (ctx->sec_text != idx)
8def76
+				free(prog->insns);
8def76
 			return ret;
8def76
 		}
8def76
+		if (ctx->sec_text == idx) {
8def76
+			fd = 0;
8def76
+			goto out;
8def76
+		}
8def76
 
8def76
-		memset(&prog, 0, sizeof(prog));
8def76
-		prog.type    = ctx->type;
8def76
-		prog.insns   = data_insn.sec_data->d_buf;
8def76
-		prog.size    = data_insn.sec_data->d_size;
8def76
-		prog.license = ctx->license;
8def76
-
8def76
-		fd = bpf_prog_attach(section, &prog, ctx);
8def76
+		fd = bpf_prog_attach(section, prog, ctx);
8def76
+		free(prog->insns);
8def76
 		if (fd < 0) {
8def76
 			*lderr = true;
8def76
-			if (props.total) {
8def76
+			if (props.tc.total) {
8def76
 				if (ctx->cfg.jit_enabled &&
8def76
-				    props.total != props.jited)
8def76
+				    props.tc.total != props.tc.jited)
8def76
 					fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
8def76
-						props.jited, props.total);
8def76
+						props.tc.jited, props.tc.total);
8def76
 				if (!ctx->cfg.jit_enabled &&
8def76
-				    props.jited)
8def76
+				    props.tc.jited)
8def76
 					fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
8def76
-						props.jited, props.total);
8def76
+						props.tc.jited, props.tc.total);
8def76
 			}
8def76
 			return fd;
8def76
 		}
8def76
-
8def76
+out:
8def76
 		ctx->sec_done[i]   = true;
8def76
 		ctx->sec_done[idx] = true;
8def76
 		break;
8def76
@@ -2125,10 +2197,18 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
8def76
 static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
8def76
 {
8def76
 	bool lderr = false, sseen = false;
8def76
+	struct bpf_elf_prog prog;
8def76
 	int ret = -1;
8def76
 
8def76
-	if (bpf_has_map_data(ctx))
8def76
-		ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
8def76
+	if (bpf_has_call_data(ctx)) {
8def76
+		ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL,
8def76
+					  &ctx->prog_text);
8def76
+		if (ret < 0)
8def76
+			return ret;
8def76
+	}
8def76
+
8def76
+	if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx))
8def76
+		ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog;;
8def76
 	if (ret < 0 && !lderr)
8def76
 		ret = bpf_fetch_prog(ctx, section, &sseen);
8def76
 	if (ret < 0 && !sseen)
8def76
@@ -2525,6 +2605,7 @@ static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
8def76
 
8def76
 	bpf_hash_destroy(ctx);
8def76
 
8def76
+	free(ctx->prog_text.insns);
8def76
 	free(ctx->sec_done);
8def76
 	free(ctx->log);
8def76
 
8def76
@@ -2546,7 +2627,7 @@ static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
8def76
 		return ret;
8def76
 	}
8def76
 
8def76
-	ret = bpf_fetch_ancillary(ctx);
8def76
+	ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text"));
8def76
 	if (ret < 0) {
8def76
 		fprintf(stderr, "Error fetching ELF ancillary data!\n");
8def76
 		goto out;
8def76
-- 
8def76
2.20.1
8def76