Blob Blame History Raw
From acb0ee316d5ed776253b6d7bfccfb21e0005919b Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Thu, 16 May 2019 14:56:50 +0200
Subject: [PATCH] fix clang_parser for LLVM 8+

LLVM changed the behavior of clang_Cursor_isAnonymous in [1]. The old
behavior would returned false for direct-acccess anonymous structs
within other structs, whereas LLVM 8 returns true. To circumvent this
behavior change among LLVM versions, we keep track of all structs
defined within a struct. We don't parse the substruct recursively (if we
do it might be parsed multiple times, and since we don't know yet if
this is a direct or indirect strucutre, we might parse it incorrectly),
instead we keep the cursor saved in a map. If this substruct is later
declared as an attribute of the supersctruct, that means we have a
direct-accessible struct. We remove it from our map, and we parse
recursively (parsing recursively the cursor pointing to the declaration
will effectively parse the struct definition).

After the first parse, any struct left in our map is an indirect struct.
Since we skipped the parsing stage for those, we need to run
`clang_visitChildren` again for each struct cursor we have saved. We
repeat this until there's no unvisited structs in the map. Keep in mind
that while visiting a new struct we might find more indirect structs.

Also add Travis jobs to test against LLVM and clang 8 on Ubuntu.

[1]: llvm/llvm-project@c05e6f4
---
 .travis.yml                    |  4 ++
 docker/Dockerfile.bionic-llvm8 | 36 ++++++++++++++
 src/bpforc.h                   |  9 ++++
 src/clang_parser.cpp           | 87 +++++++++++++++++++++++-----------
 4 files changed, 108 insertions(+), 28 deletions(-)
 create mode 100644 docker/Dockerfile.bionic-llvm8

diff --git a/.travis.yml b/.travis.yml
index 7fbff63..b539868 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,10 @@ sudo: required
       env: BASE=fedora29 TYPE=Debug
     - name: "LLVM 7 Release"
       env: BASE=fedora29 TYPE=Release
+    - name: "LLVM 8 Debug"
+      env: BASE=bionic-llvm8 TYPE=Debug
+    - name: "LLVM 8 Release"
+      env: BASE=bionic-llvm8 TYPE=Release
   allow_failures:
     - name: "Static LLVM 5 Debug"
       env: BASE=alpine TYPE=Debug STATIC_LINKING=ON TEST_ARGS="--gtest_filter=-codegen.string_equal_comparison:codegen.string_not_equal_comparison"
diff --git a/docker/Dockerfile.bionic-llvm8 b/docker/Dockerfile.bionic-llvm8
new file mode 100644
index 0000000..ebf10d3
--- /dev/null
+++ b/docker/Dockerfile.bionic-llvm8
@@ -0,0 +1,36 @@
+FROM ubuntu:bionic
+
+ENV llvmVersion=8
+
+RUN apt-get update && apt-get install -y curl gnupg &&\
+    llvmRepository='\n\
+deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main\n\
+deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic main\n\
+# 8\n\
+deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main\n\
+deb-src http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main' && \
+    echo $llvmRepository >> /etc/apt/sources.list && \
+    curl -L https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 4052245BD4284CDD && \
+    echo "deb https://repo.iovisor.org/apt/bionic bionic main" | tee /etc/apt/sources.list.d/iovisor.list
+
+RUN apt-get update && apt-get install -y \
+      bison \
+      cmake \
+      flex \
+      g++ \
+      git \
+      libelf-dev \
+      zlib1g-dev \
+      libbcc \
+      clang-${llvmVersion} \
+      libclang-${llvmVersion}-dev \
+      libclang-common-${llvmVersion}-dev \
+      libclang1-${llvmVersion} \
+      llvm-${llvmVersion} \
+      llvm-${llvmVersion}-dev \
+      llvm-${llvmVersion}-runtime \
+      libllvm${llvmVersion}
+
+COPY build.sh /build.sh
+ENTRYPOINT ["bash", "/build.sh"]
diff --git a/src/bpforc.h b/src/bpforc.h
index 1c134d0..8eede31 100644
--- a/src/bpforc.h
+++ b/src/bpforc.h
@@ -80,8 +80,13 @@ class BpfOrc
   ExecutionSession ES;
   std::unique_ptr<TargetMachine> TM;
   std::shared_ptr<SymbolResolver> Resolver;
+#if LLVM_VERSION_MAJOR >= 8
+  LegacyRTDyldObjectLinkingLayer ObjectLayer;
+  LegacyIRCompileLayer<decltype(ObjectLayer), SimpleCompiler> CompileLayer;
+#else
   RTDyldObjectLinkingLayer ObjectLayer;
   IRCompileLayer<decltype(ObjectLayer), SimpleCompiler> CompileLayer;
+#endif
 
 public:
   std::map<std::string, std::tuple<uint8_t *, uintptr_t>> sections_;
@@ -91,7 +96,11 @@ class BpfOrc
       Resolver(createLegacyLookupResolver(ES,
         [](const std::string &Name __attribute__((unused))) -> JITSymbol { return nullptr; },
         [](Error Err) { cantFail(std::move(Err), "lookup failed"); })),
+#if LLVM_VERSION_MAJOR >= 8
+      ObjectLayer(ES, [this](VModuleKey) { return LegacyRTDyldObjectLinkingLayer::Resources{std::make_shared<MemoryManager>(sections_), Resolver}; }),
+#else
       ObjectLayer(ES, [this](VModuleKey) { return RTDyldObjectLinkingLayer::Resources{std::make_shared<MemoryManager>(sections_), Resolver}; }),
+#endif
       CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {}
 
   void compileModule(std::unique_ptr<Module> M) {
diff --git a/src/clang_parser.cpp b/src/clang_parser.cpp
index 4bb8f87..7bb8d6a 100644
--- a/src/clang_parser.cpp
+++ b/src/clang_parser.cpp
@@ -1,5 +1,7 @@
 #include <clang-c/Index.h>
 #include <iostream>
+#include <unordered_map>
+#include <unordered_set>
 #include <string.h>
 #include <sys/utsname.h>
 #include <sys/stat.h>
@@ -13,6 +15,9 @@
 
 namespace bpftrace {
 
+std::unordered_map<std::string, CXCursor> indirect_structs;
+std::unordered_set<std::string> unvisited_indirect_structs;
+
 static std::string get_clang_string(CXString string)
 {
   std::string str = clang_getCString(string);
@@ -30,8 +35,9 @@ static CXCursor get_indirect_field_parent_struct(CXCursor c)
 {
   CXCursor parent = clang_getCursorSemanticParent(c);
 
-  while (!clang_Cursor_isNull(parent) && clang_Cursor_isAnonymous(parent))
-     parent = clang_getCursorSemanticParent(parent);
+  while (!clang_Cursor_isNull(parent) && indirect_structs.count(get_clang_string(clang_getTypeSpelling(clang_getCanonicalType(clang_getCursorType(parent))))) > 0) {
+    parent = clang_getCursorSemanticParent(parent);
+  }
 
   return parent;
 }
@@ -253,44 +259,69 @@ void ClangParser::parse(ast::Program *program, BPFtrace &bpftrace)
     std::cerr << "Input (" << input.size() << "): " << input << std::endl;
   }
 
-  CXCursor cursor = clang_getTranslationUnitCursor(translation_unit);
+  indirect_structs.clear();
+  unvisited_indirect_structs.clear();
 
-  clang_visitChildren(
-      cursor,
-      [](CXCursor c, CXCursor parent, CXClientData client_data)
-      {
+  CXCursor cursor = clang_getTranslationUnitCursor(translation_unit);
 
-        if (clang_getCursorKind(parent) != CXCursor_StructDecl &&
-            clang_getCursorKind(parent) != CXCursor_UnionDecl)
-          return CXChildVisit_Recurse;
+  bool iterate = true;
 
-        if (clang_getCursorKind(c) == CXCursor_FieldDecl)
+  do {
+    clang_visitChildren(
+        cursor,
+        [](CXCursor c, CXCursor parent, CXClientData client_data)
         {
-          auto &structs = static_cast<BPFtrace*>(client_data)->structs_;
-          auto struct_name = get_parent_struct_name(c);
-          auto ident = get_clang_string(clang_getCursorSpelling(c));
-          auto offset = clang_Cursor_getOffsetOfField(c) / 8;
-          auto type = clang_getCanonicalType(clang_getCursorType(c));
+          if (clang_getCursorKind(parent) != CXCursor_StructDecl &&
+              clang_getCursorKind(parent) != CXCursor_UnionDecl)
+            return CXChildVisit_Recurse;
 
           auto ptype = clang_getCanonicalType(clang_getCursorType(parent));
           auto ptypestr = get_clang_string(clang_getTypeSpelling(ptype));
           auto ptypesize = clang_Type_getSizeOf(ptype);
 
-          if(clang_Cursor_isAnonymous(parent))
-            offset = get_indirect_field_offset(c);
+          if (clang_getCursorKind(c) == CXCursor_StructDecl ||
+              clang_getCursorKind(c) == CXCursor_UnionDecl) {
+            auto struct_name = get_clang_string(clang_getTypeSpelling(clang_getCanonicalType(clang_getCursorType(c))));
+            indirect_structs[struct_name] = c;
+            unvisited_indirect_structs.insert(struct_name);
 
-          if (struct_name == "")
-            struct_name = ptypestr;
-          remove_struct_prefix(struct_name);
+            return CXChildVisit_Continue;
+          }
 
-          structs[struct_name].fields[ident].offset = offset;
-          structs[struct_name].fields[ident].type = get_sized_type(type);
-          structs[struct_name].size = ptypesize;
-        }
+          if (clang_getCursorKind(c) == CXCursor_FieldDecl)
+          {
+            auto &structs = static_cast<BPFtrace*>(client_data)->structs_;
+            auto struct_name = get_parent_struct_name(c);
+            auto ident = get_clang_string(clang_getCursorSpelling(c));
+            auto offset = clang_Cursor_getOffsetOfField(c) / 8;
+            auto type = clang_getCanonicalType(clang_getCursorType(c));
+            auto typestr = get_clang_string(clang_getTypeSpelling(type));
 
-        return CXChildVisit_Recurse;
-      },
-      &bpftrace);
+            if (indirect_structs.count(typestr))
+              indirect_structs.erase(typestr);
+
+            if(indirect_structs.count(ptypestr))
+              offset = get_indirect_field_offset(c);
+
+            if (struct_name == "")
+              struct_name = ptypestr;
+            remove_struct_prefix(struct_name);
+
+            structs[struct_name].fields[ident].offset = offset;
+            structs[struct_name].fields[ident].type = get_sized_type(type);
+            structs[struct_name].size = ptypesize;
+          }
+
+          return CXChildVisit_Recurse;
+        },
+        &bpftrace);
+    if (unvisited_indirect_structs.size()) {
+      cursor = indirect_structs[*unvisited_indirect_structs.begin()];
+      unvisited_indirect_structs.erase(unvisited_indirect_structs.begin());
+    } else {
+      iterate = false;
+    }
+  } while (iterate);
 
   clang_disposeTranslationUnit(translation_unit);
   clang_disposeIndex(index);
-- 
2.20.1