Blame SOURCES/dyninst-8.1-findMain.patch

bdf803
commit 5934afb83c7908b313c48f70e71b84a8b9000b35
bdf803
Author: Josh Stone <jistone@redhat.com>
bdf803
Date:   Tue Jan 14 13:39:36 2014 -0800
bdf803
bdf803
    Dyninst 8.1 backport of image::findMain fixes
bdf803
    
bdf803
        commit 44205dbd2d9011d1cfc366e775f861bad2146c2b
bdf803
        Author: Josh Stone <jistone@redhat.com>
bdf803
        Date:   Tue Jan 14 13:39:36 2014 -0800
bdf803
    
bdf803
            dyninstAPI: Let image::findMain inspect PIE too
bdf803
    
bdf803
            Position-independent executables (PIE) are generally indistinguishable
bdf803
            from any other shared object, but we still need to find main() in these
bdf803
            when they're stripped.  One clue is the presence of INTERP, which PIEs
bdf803
            will always have.  (Some DSOs also have INTERP, but this is rare.)
bdf803
    
bdf803
        commit 335aeb15f09a951c536f60475cafc84882cbfc1c
bdf803
        Author: Josh Stone <jistone@redhat.com>
bdf803
        Date:   Tue Jan 14 12:34:10 2014 -0800
bdf803
    
bdf803
            dyninstAPI: Expand powerpc support in image::findMain
bdf803
    
bdf803
            First, this function needs to respect the ELF entry address, rather than
bdf803
            assuming that entry is at the start of the .text section.  This was
bdf803
            already handled in x86, so we just need the same in powerpc.
bdf803
    
bdf803
            Second, we need to add ppc64 support to the section which currently
bdf803
            handles ppc32.  This requires dereferencing .opd indirection, and ppc64
bdf803
            also uses a TOC-based load which the current dataflowAPI code can't
bdf803
            handle.  For this case, just use a simple InstructionAPI eval with the
bdf803
            known TOC r2 register.
bdf803
    
bdf803
        commit ed3ab720945492a42bf36d358e105ef1bd34c950
bdf803
        Author: Josh Stone <jistone@redhat.com>
bdf803
        Date:   Wed Jan 15 11:20:32 2014 -0800
bdf803
    
bdf803
            dyninstAPI: Set image::findMain's findings as LOCAL/INTERNAL
bdf803
    
bdf803
            When we look for new symbols in image::findMain, it's not really the
bdf803
            intent to make these global/exported symbols.  It doesn't matter so much
bdf803
            for live processes, but these shouldn't be exported in rewritten
bdf803
            binaries.  This patch sets them as LOCAL and INTERNAL instead.
bdf803
    
bdf803
            Note, this is only for Linux and FreeBSD, but Windows may want it too.
bdf803
    
bdf803
        commit 49606ee86625d2505112f55f77817981c498b606
bdf803
        Author: Josh Stone <jistone@redhat.com>
bdf803
        Date:   Mon Jan 27 17:45:32 2014 -0800
bdf803
    
bdf803
            common: Skip get_immediate_operand without any imm
bdf803
    
bdf803
            For x86 image::findMain, we blindly look for the first call from the
bdf803
            entry address, then grab the immediate operand from the preceding
bdf803
            instruction.  Sometimes that instruction may not even have any immediate
bdf803
            operands, and the heuristic fails with a message on stderr like:
bdf803
    
bdf803
              arch-x86.C[5602]:  invalid immediate size 0 in insn
bdf803
    
bdf803
            This now occurs with some ET_DYN binaries that are allowed by commit
bdf803
            44205dbd2d90, when those are really DSOs and not PIE.  However, this
bdf803
            could conceivably also happen for any executable that doesn't fit the
bdf803
            heuristic pattern.
bdf803
    
bdf803
            This patch quietly returns 0 from get_immediate_operand when there are
bdf803
            no operands, and image::findMain will treat this as an invalid address.
bdf803
bdf803
diff --git a/common/src/arch-x86.C b/common/src/arch-x86.C
bdf803
index e805585bf22c..73efd3cd18df 100644
bdf803
--- a/common/src/arch-x86.C
bdf803
+++ b/common/src/arch-x86.C
bdf803
@@ -5581,6 +5581,9 @@ Address get_immediate_operand(instruction *instr)
bdf803
 
bdf803
     ia32_decode(IA32_FULL_DECODER,(const unsigned char *)(instr->ptr()),detail);
bdf803
 
bdf803
+    if (loc.imm_cnt < 1)
bdf803
+      return 0;
bdf803
+
bdf803
     // now find the immediate value in the locations
bdf803
     Address immediate = 0;
bdf803
 
bdf803
diff --git a/dyninstAPI/src/image.C b/dyninstAPI/src/image.C
bdf803
index dbe05a7cadd6..f2af21a15a8d 100644
bdf803
--- a/dyninstAPI/src/image.C
bdf803
+++ b/dyninstAPI/src/image.C
bdf803
@@ -269,6 +269,29 @@ namespace {
bdf803
         void * fini_addr;
bdf803
     };
bdf803
 
bdf803
+    void *get_raw_symtab_ptr(Symtab *linkedFile, Address addr)
bdf803
+    {
bdf803
+        Region *reg = linkedFile->findEnclosingRegion(addr);
bdf803
+        if (reg != NULL) {
bdf803
+            char *data = (char*)reg->getPtrToRawData();
bdf803
+            data += addr - reg->getMemOffset();
bdf803
+            return data;
bdf803
+        }
bdf803
+        return NULL;
bdf803
+    }
bdf803
+
bdf803
+    Address deref_opd(Symtab *linkedFile, Address addr)
bdf803
+    {
bdf803
+        Region *reg = linkedFile->findEnclosingRegion(addr);
bdf803
+        if (reg && reg->getRegionName() == ".opd") {
bdf803
+            // opd symbol needing dereference
bdf803
+            void *data = get_raw_symtab_ptr(linkedFile, addr);
bdf803
+            if (data)
bdf803
+                return *(Address*)data;
bdf803
+        }
bdf803
+        return addr;
bdf803
+    }
bdf803
+
bdf803
     /*
bdf803
      * b ends with a call to libc_start_main. We are looking for the
bdf803
      * value in GR8, which is the address of a structure that contains
bdf803
@@ -289,6 +312,7 @@ namespace {
bdf803
             b->end()-b->start(),
bdf803
             b->region()->getArch());
bdf803
 
bdf803
+        RegisterAST::Ptr r2( new RegisterAST(ppc32::r2) );
bdf803
         RegisterAST::Ptr r8( new RegisterAST(ppc32::r8) );
bdf803
 
bdf803
         Address cur_addr = b->start();
bdf803
@@ -302,50 +326,67 @@ namespace {
bdf803
         if(!r8_def)
bdf803
             return 0;
bdf803
 
bdf803
-        // Get all of the assignments that happen in this instruction
bdf803
-        AssignmentConverter conv(true);
bdf803
-        vector<Assignment::Ptr> assigns;
bdf803
-        conv.convert(r8_def,r8_def_addr,f,b,assigns);
bdf803
-
bdf803
-        // find the one we care about (r8)
bdf803
-        vector<Assignment::Ptr>::iterator ait = assigns.begin();
bdf803
-        for( ; ait != assigns.end(); ++ait) {
bdf803
-            AbsRegion & outReg = (*ait)->out();
bdf803
-            Absloc const& loc = outReg.absloc();
bdf803
-            if(loc.reg() == r8->getID())
bdf803
-                break;
bdf803
-        }
bdf803
-        if(ait == assigns.end()) {
bdf803
-            return 0;
bdf803
+        Address ss_addr = 0;
bdf803
+
bdf803
+        // Try a TOC-based lookup first
bdf803
+        if (r8_def->isRead(r2)) {
bdf803
+            set<Expression::Ptr> memReads;
bdf803
+            r8_def->getMemoryReadOperands(memReads);
bdf803
+            Address TOC = f->obj()->cs()->getTOC(r8_def_addr);
bdf803
+            if (TOC != 0 && memReads.size() == 1) {
bdf803
+                Expression::Ptr expr = *memReads.begin();
bdf803
+                expr->bind(r2.get(), Result(u64, TOC));
bdf803
+                const Result &res = expr->eval();
bdf803
+                if (res.defined) {
bdf803
+                    void *res_addr =
bdf803
+                        get_raw_symtab_ptr(linkedFile, res.convert<Address>());
bdf803
+                    if (res_addr)
bdf803
+                        ss_addr = *(Address*)res_addr;
bdf803
+                }
bdf803
+            }
bdf803
         }
bdf803
 
bdf803
-        // Slice back to the definition of R8, and, if possible, simplify
bdf803
-        // to a constant
bdf803
-        Slicer slc(*ait,b,f);
bdf803
-        Default_Predicates preds;
bdf803
-        Graph::Ptr slg = slc.backwardSlice(preds);
bdf803
-        DataflowAPI::Result_t sl_res;
bdf803
-        DataflowAPI::SymEval::expand(slg,sl_res);
bdf803
-        AST::Ptr calculation = sl_res[*ait];
bdf803
-        SimpleArithVisitor visit; 
bdf803
-        AST::Ptr simplified = calculation->accept(&visit);
bdf803
-        //printf("after simplification:\n%s\n",simplified->format().c_str());
bdf803
-        if(simplified->getID() == AST::V_ConstantAST) { 
bdf803
-            ConstantAST::Ptr cp = ConstantAST::convert(simplified);
bdf803
-            Address ss_addr = cp->val().val;
bdf803
-
bdf803
-            // need a pointer to the image data
bdf803
-            SymtabAPI::Region * dreg = linkedFile->findEnclosingRegion(ss_addr);
bdf803
-        
bdf803
-            if(dreg) {
bdf803
-                struct libc_startup_info * si =
bdf803
-                    (struct libc_startup_info *)(
bdf803
-                        ((Address)dreg->getPtrToRawData()) + 
bdf803
-                        ss_addr - (Address)dreg->getMemOffset());
bdf803
-                return (Address)si->main_addr;
bdf803
+        if (ss_addr == 0) {
bdf803
+            // Get all of the assignments that happen in this instruction
bdf803
+            AssignmentConverter conv(true);
bdf803
+            vector<Assignment::Ptr> assigns;
bdf803
+            conv.convert(r8_def,r8_def_addr,f,b,assigns);
bdf803
+
bdf803
+            // find the one we care about (r8)
bdf803
+            vector<Assignment::Ptr>::iterator ait = assigns.begin();
bdf803
+            for( ; ait != assigns.end(); ++ait) {
bdf803
+                AbsRegion & outReg = (*ait)->out();
bdf803
+                Absloc const& loc = outReg.absloc();
bdf803
+                if(loc.reg() == r8->getID())
bdf803
+                    break;
bdf803
+            }
bdf803
+            if(ait == assigns.end()) {
bdf803
+                return 0;
bdf803
+            }
bdf803
+
bdf803
+            // Slice back to the definition of R8, and, if possible, simplify
bdf803
+            // to a constant
bdf803
+            Slicer slc(*ait,b,f);
bdf803
+            Default_Predicates preds;
bdf803
+            Graph::Ptr slg = slc.backwardSlice(preds);
bdf803
+            DataflowAPI::Result_t sl_res;
bdf803
+            DataflowAPI::SymEval::expand(slg,sl_res);
bdf803
+            AST::Ptr calculation = sl_res[*ait];
bdf803
+            SimpleArithVisitor visit; 
bdf803
+            AST::Ptr simplified = calculation->accept(&visit);
bdf803
+            //printf("after simplification:\n%s\n",simplified->format().c_str());
bdf803
+            if(simplified->getID() == AST::V_ConstantAST) { 
bdf803
+                ConstantAST::Ptr cp = ConstantAST::convert(simplified);
bdf803
+                ss_addr = cp->val().val;
bdf803
             }
bdf803
         }
bdf803
 
bdf803
+        // need a pointer to the image data
bdf803
+        auto si = (struct libc_startup_info *)
bdf803
+            get_raw_symtab_ptr(linkedFile, ss_addr);
bdf803
+        if (si)
bdf803
+            return (Address)si->main_addr;
bdf803
+
bdf803
         return 0;
bdf803
     }
bdf803
 }
bdf803
@@ -358,10 +399,13 @@ namespace {
bdf803
  */
bdf803
 void image::findMain()
bdf803
 {
bdf803
-#if defined(ppc32_linux) || defined(ppc32_bgp)
bdf803
+#if defined(ppc32_linux) || defined(ppc32_bgp) || defined(ppc64_linux)
bdf803
     using namespace Dyninst::InstructionAPI;
bdf803
 
bdf803
-    if(!desc_.isSharedObject())
bdf803
+    // Only look for main in executables, but do allow position-independent
bdf803
+    // executables (PIE) which look like shared objects with an INTERP.
bdf803
+    // (Some strange DSOs also have INTERP, but this is rare.)
bdf803
+    if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
bdf803
     {
bdf803
     	bool foundMain = false;
bdf803
     	bool foundStart = false;
bdf803
@@ -381,22 +425,21 @@ void image::findMain()
bdf803
         if (foundText == false) {
bdf803
             return;
bdf803
         }
bdf803
-	
bdf803
-    	if( !foundMain )
bdf803
-    	{
bdf803
+
bdf803
+        if( !foundMain )
bdf803
+        {
bdf803
             logLine("No main symbol found: attempting to create symbol for main\n");
bdf803
-            const unsigned char* p;
bdf803
-            p = (( const unsigned char * ) eReg->getPtrToRawData());
bdf803
 
bdf803
-            Address mainAddress = 0;
bdf803
+            Address eAddr = linkedFile->getEntryOffset();
bdf803
+            eAddr = deref_opd(linkedFile, eAddr);
bdf803
 
bdf803
 	        bool parseInAllLoadableRegions = (BPatch_normalMode != mode_);
bdf803
 	        SymtabCodeSource scs(linkedFile, filt, parseInAllLoadableRegions);
bdf803
             CodeObject tco(&scs,NULL,NULL,false);
bdf803
 
bdf803
-            tco.parse(eReg->getMemOffset(),false);
bdf803
+            tco.parse(eAddr,false);
bdf803
             set<CodeRegion *> regions;
bdf803
-            scs.findRegions(eReg->getMemOffset(),regions);
bdf803
+            scs.findRegions(eAddr,regions);
bdf803
             if(regions.empty()) {
bdf803
                 // express puzzlement
bdf803
                 return;
bdf803
@@ -404,23 +447,31 @@ void image::findMain()
bdf803
             SymtabCodeRegion * reg = 
bdf803
                 static_cast<SymtabCodeRegion*>(*regions.begin());
bdf803
             Function * func = 
bdf803
-                tco.findFuncByEntry(reg,eReg->getMemOffset());
bdf803
+                tco.findFuncByEntry(reg,eAddr);
bdf803
             if(!func) {
bdf803
                 // again, puzzlement
bdf803
                 return;
bdf803
             }
bdf803
 
bdf803
+            Block * b = NULL;
bdf803
             const Function::edgelist & calls = func->callEdges();
bdf803
-            if(calls.size() != 1) {
bdf803
+            if (calls.empty()) {
bdf803
+                // when there are no calls, let's hope the entry block is it
bdf803
+                b = tco.findBlockByEntry(reg,eAddr);
bdf803
+            } else if(calls.size() == 1) {
bdf803
+                Function::edgelist::iterator cit = calls.begin();
bdf803
+                b = (*cit)->src();
bdf803
+            } else {
bdf803
                 startup_printf("%s[%d] _start has unexpected number (%d) of"
bdf803
                                " call edges, bailing on findMain()\n",
bdf803
                     FILE__,__LINE__,calls.size());
bdf803
-                return; 
bdf803
+                return;
bdf803
             }
bdf803
-            Function::edgelist::iterator cit = calls.begin();
bdf803
-            Block * b = (*cit)->src();
bdf803
+            if (!b) return;
bdf803
+
bdf803
+            Address mainAddress = evaluate_main_address(linkedFile,func,b);
bdf803
+            mainAddress = deref_opd(linkedFile, mainAddress);
bdf803
 
bdf803
-            mainAddress = evaluate_main_address(linkedFile,func,b);
bdf803
             if(0 == mainAddress || !scs.isValidAddress(mainAddress)) {
bdf803
                 startup_printf("%s[%d] failed to find main\n",FILE__,__LINE__);
bdf803
                 return;
bdf803
@@ -430,8 +481,8 @@ void image::findMain()
bdf803
             }
bdf803
            	Symbol *newSym= new Symbol( "main", 
bdf803
                                             Symbol::ST_FUNCTION,
bdf803
-                                            Symbol::SL_GLOBAL, 
bdf803
-                                            Symbol::SV_DEFAULT, 
bdf803
+                                            Symbol::SL_LOCAL,
bdf803
+                                            Symbol::SV_INTERNAL,
bdf803
                                             mainAddress,
bdf803
                                             linkedFile->getDefaultModule(),
bdf803
                                             eReg, 
bdf803
@@ -444,7 +495,10 @@ void image::findMain()
bdf803
 || defined(i386_unknown_solaris2_5) \
bdf803
 || (defined(os_freebsd) \
bdf803
     && (defined(arch_x86) || defined(arch_x86_64)))
bdf803
-    if(!desc_.isSharedObject())
bdf803
+    // Only look for main in executables, but do allow position-independent
bdf803
+    // executables (PIE) which look like shared objects with an INTERP.
bdf803
+    // (Some strange DSOs also have INTERP, but this is rare.)
bdf803
+    if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
bdf803
     {
bdf803
     	bool foundMain = false;
bdf803
     	bool foundStart = false;
bdf803
@@ -594,8 +648,8 @@ void image::findMain()
bdf803
             	//logLine( "No static symbol for function main\n" );
bdf803
                 Symbol *newSym = new Symbol("DYNINST_pltMain", 
bdf803
                                             Symbol::ST_FUNCTION, 
bdf803
-                                            Symbol::SL_GLOBAL,
bdf803
-                                            Symbol::SV_DEFAULT,
bdf803
+                                            Symbol::SL_LOCAL,
bdf803
+                                            Symbol::SV_INTERNAL,
bdf803
                                             mainAddress,
bdf803
                                             linkedFile->getDefaultModule(),
bdf803
                                             eReg, 
bdf803
@@ -606,8 +660,8 @@ void image::findMain()
bdf803
            {
bdf803
            	Symbol *newSym= new Symbol( "main", 
bdf803
                                             Symbol::ST_FUNCTION,
bdf803
-                                            Symbol::SL_GLOBAL, 
bdf803
-                                            Symbol::SV_DEFAULT, 
bdf803
+                                            Symbol::SL_LOCAL,
bdf803
+                                            Symbol::SV_INTERNAL,
bdf803
                                             mainAddress,
bdf803
                                             linkedFile->getDefaultModule(),
bdf803
                                             eReg, 
bdf803
@@ -619,8 +673,8 @@ void image::findMain()
bdf803
     	{
bdf803
             Symbol *startSym = new Symbol( "_start",
bdf803
                                            Symbol::ST_FUNCTION,
bdf803
-                                           Symbol::SL_GLOBAL,
bdf803
-                                           Symbol::SV_DEFAULT, 
bdf803
+                                           Symbol::SL_LOCAL,
bdf803
+                                           Symbol::SV_INTERNAL,
bdf803
                                            eReg->getMemOffset(),
bdf803
                                            linkedFile->getDefaultModule(),
bdf803
                                            eReg,
bdf803
@@ -635,8 +689,8 @@ void image::findMain()
bdf803
 	  if (linkedFile->findRegion(finisec,".fini")) {
bdf803
 	    Symbol *finiSym = new Symbol( "_fini",
bdf803
 					  Symbol::ST_FUNCTION,
bdf803
-					  Symbol::SL_GLOBAL, 
bdf803
-					  Symbol::SV_DEFAULT, 
bdf803
+					  Symbol::SL_LOCAL,
bdf803
+					  Symbol::SV_INTERNAL,
bdf803
 					  finisec->getMemOffset(),
bdf803
 					  linkedFile->getDefaultModule(),
bdf803
 					  finisec, 
bdf803
@@ -657,8 +711,8 @@ void image::findMain()
bdf803
         {
bdf803
 	    Symbol *newSym = new Symbol( "_DYNAMIC", 
bdf803
 					Symbol::ST_OBJECT, 
bdf803
-                                         Symbol::SL_GLOBAL, 
bdf803
-                                         Symbol::SV_DEFAULT,
bdf803
+                                         Symbol::SL_LOCAL,
bdf803
+                                         Symbol::SV_INTERNAL,
bdf803
                                          dynamicsec->getMemOffset(), 
bdf803
                                          linkedFile->getDefaultModule(),
bdf803
                                          dynamicsec,