Blob Blame History Raw
commit 5934afb83c7908b313c48f70e71b84a8b9000b35
Author: Josh Stone <jistone@redhat.com>
Date:   Tue Jan 14 13:39:36 2014 -0800

    Dyninst 8.1 backport of image::findMain fixes
    
        commit 44205dbd2d9011d1cfc366e775f861bad2146c2b
        Author: Josh Stone <jistone@redhat.com>
        Date:   Tue Jan 14 13:39:36 2014 -0800
    
            dyninstAPI: Let image::findMain inspect PIE too
    
            Position-independent executables (PIE) are generally indistinguishable
            from any other shared object, but we still need to find main() in these
            when they're stripped.  One clue is the presence of INTERP, which PIEs
            will always have.  (Some DSOs also have INTERP, but this is rare.)
    
        commit 335aeb15f09a951c536f60475cafc84882cbfc1c
        Author: Josh Stone <jistone@redhat.com>
        Date:   Tue Jan 14 12:34:10 2014 -0800
    
            dyninstAPI: Expand powerpc support in image::findMain
    
            First, this function needs to respect the ELF entry address, rather than
            assuming that entry is at the start of the .text section.  This was
            already handled in x86, so we just need the same in powerpc.
    
            Second, we need to add ppc64 support to the section which currently
            handles ppc32.  This requires dereferencing .opd indirection, and ppc64
            also uses a TOC-based load which the current dataflowAPI code can't
            handle.  For this case, just use a simple InstructionAPI eval with the
            known TOC r2 register.
    
        commit ed3ab720945492a42bf36d358e105ef1bd34c950
        Author: Josh Stone <jistone@redhat.com>
        Date:   Wed Jan 15 11:20:32 2014 -0800
    
            dyninstAPI: Set image::findMain's findings as LOCAL/INTERNAL
    
            When we look for new symbols in image::findMain, it's not really the
            intent to make these global/exported symbols.  It doesn't matter so much
            for live processes, but these shouldn't be exported in rewritten
            binaries.  This patch sets them as LOCAL and INTERNAL instead.
    
            Note, this is only for Linux and FreeBSD, but Windows may want it too.
    
        commit 49606ee86625d2505112f55f77817981c498b606
        Author: Josh Stone <jistone@redhat.com>
        Date:   Mon Jan 27 17:45:32 2014 -0800
    
            common: Skip get_immediate_operand without any imm
    
            For x86 image::findMain, we blindly look for the first call from the
            entry address, then grab the immediate operand from the preceding
            instruction.  Sometimes that instruction may not even have any immediate
            operands, and the heuristic fails with a message on stderr like:
    
              arch-x86.C[5602]:  invalid immediate size 0 in insn
    
            This now occurs with some ET_DYN binaries that are allowed by commit
            44205dbd2d90, when those are really DSOs and not PIE.  However, this
            could conceivably also happen for any executable that doesn't fit the
            heuristic pattern.
    
            This patch quietly returns 0 from get_immediate_operand when there are
            no operands, and image::findMain will treat this as an invalid address.

diff --git a/common/src/arch-x86.C b/common/src/arch-x86.C
index e805585bf22c..73efd3cd18df 100644
--- a/common/src/arch-x86.C
+++ b/common/src/arch-x86.C
@@ -5581,6 +5581,9 @@ Address get_immediate_operand(instruction *instr)
 
     ia32_decode(IA32_FULL_DECODER,(const unsigned char *)(instr->ptr()),detail);
 
+    if (loc.imm_cnt < 1)
+      return 0;
+
     // now find the immediate value in the locations
     Address immediate = 0;
 
diff --git a/dyninstAPI/src/image.C b/dyninstAPI/src/image.C
index dbe05a7cadd6..f2af21a15a8d 100644
--- a/dyninstAPI/src/image.C
+++ b/dyninstAPI/src/image.C
@@ -269,6 +269,29 @@ namespace {
         void * fini_addr;
     };
 
+    void *get_raw_symtab_ptr(Symtab *linkedFile, Address addr)
+    {
+        Region *reg = linkedFile->findEnclosingRegion(addr);
+        if (reg != NULL) {
+            char *data = (char*)reg->getPtrToRawData();
+            data += addr - reg->getMemOffset();
+            return data;
+        }
+        return NULL;
+    }
+
+    Address deref_opd(Symtab *linkedFile, Address addr)
+    {
+        Region *reg = linkedFile->findEnclosingRegion(addr);
+        if (reg && reg->getRegionName() == ".opd") {
+            // opd symbol needing dereference
+            void *data = get_raw_symtab_ptr(linkedFile, addr);
+            if (data)
+                return *(Address*)data;
+        }
+        return addr;
+    }
+
     /*
      * b ends with a call to libc_start_main. We are looking for the
      * value in GR8, which is the address of a structure that contains
@@ -289,6 +312,7 @@ namespace {
             b->end()-b->start(),
             b->region()->getArch());
 
+        RegisterAST::Ptr r2( new RegisterAST(ppc32::r2) );
         RegisterAST::Ptr r8( new RegisterAST(ppc32::r8) );
 
         Address cur_addr = b->start();
@@ -302,50 +326,67 @@ namespace {
         if(!r8_def)
             return 0;
 
-        // Get all of the assignments that happen in this instruction
-        AssignmentConverter conv(true);
-        vector<Assignment::Ptr> assigns;
-        conv.convert(r8_def,r8_def_addr,f,b,assigns);
-
-        // find the one we care about (r8)
-        vector<Assignment::Ptr>::iterator ait = assigns.begin();
-        for( ; ait != assigns.end(); ++ait) {
-            AbsRegion & outReg = (*ait)->out();
-            Absloc const& loc = outReg.absloc();
-            if(loc.reg() == r8->getID())
-                break;
-        }
-        if(ait == assigns.end()) {
-            return 0;
+        Address ss_addr = 0;
+
+        // Try a TOC-based lookup first
+        if (r8_def->isRead(r2)) {
+            set<Expression::Ptr> memReads;
+            r8_def->getMemoryReadOperands(memReads);
+            Address TOC = f->obj()->cs()->getTOC(r8_def_addr);
+            if (TOC != 0 && memReads.size() == 1) {
+                Expression::Ptr expr = *memReads.begin();
+                expr->bind(r2.get(), Result(u64, TOC));
+                const Result &res = expr->eval();
+                if (res.defined) {
+                    void *res_addr =
+                        get_raw_symtab_ptr(linkedFile, res.convert<Address>());
+                    if (res_addr)
+                        ss_addr = *(Address*)res_addr;
+                }
+            }
         }
 
-        // Slice back to the definition of R8, and, if possible, simplify
-        // to a constant
-        Slicer slc(*ait,b,f);
-        Default_Predicates preds;
-        Graph::Ptr slg = slc.backwardSlice(preds);
-        DataflowAPI::Result_t sl_res;
-        DataflowAPI::SymEval::expand(slg,sl_res);
-        AST::Ptr calculation = sl_res[*ait];
-        SimpleArithVisitor visit; 
-        AST::Ptr simplified = calculation->accept(&visit);
-        //printf("after simplification:\n%s\n",simplified->format().c_str());
-        if(simplified->getID() == AST::V_ConstantAST) { 
-            ConstantAST::Ptr cp = ConstantAST::convert(simplified);
-            Address ss_addr = cp->val().val;
-
-            // need a pointer to the image data
-            SymtabAPI::Region * dreg = linkedFile->findEnclosingRegion(ss_addr);
-        
-            if(dreg) {
-                struct libc_startup_info * si =
-                    (struct libc_startup_info *)(
-                        ((Address)dreg->getPtrToRawData()) + 
-                        ss_addr - (Address)dreg->getMemOffset());
-                return (Address)si->main_addr;
+        if (ss_addr == 0) {
+            // Get all of the assignments that happen in this instruction
+            AssignmentConverter conv(true);
+            vector<Assignment::Ptr> assigns;
+            conv.convert(r8_def,r8_def_addr,f,b,assigns);
+
+            // find the one we care about (r8)
+            vector<Assignment::Ptr>::iterator ait = assigns.begin();
+            for( ; ait != assigns.end(); ++ait) {
+                AbsRegion & outReg = (*ait)->out();
+                Absloc const& loc = outReg.absloc();
+                if(loc.reg() == r8->getID())
+                    break;
+            }
+            if(ait == assigns.end()) {
+                return 0;
+            }
+
+            // Slice back to the definition of R8, and, if possible, simplify
+            // to a constant
+            Slicer slc(*ait,b,f);
+            Default_Predicates preds;
+            Graph::Ptr slg = slc.backwardSlice(preds);
+            DataflowAPI::Result_t sl_res;
+            DataflowAPI::SymEval::expand(slg,sl_res);
+            AST::Ptr calculation = sl_res[*ait];
+            SimpleArithVisitor visit; 
+            AST::Ptr simplified = calculation->accept(&visit);
+            //printf("after simplification:\n%s\n",simplified->format().c_str());
+            if(simplified->getID() == AST::V_ConstantAST) { 
+                ConstantAST::Ptr cp = ConstantAST::convert(simplified);
+                ss_addr = cp->val().val;
             }
         }
 
+        // need a pointer to the image data
+        auto si = (struct libc_startup_info *)
+            get_raw_symtab_ptr(linkedFile, ss_addr);
+        if (si)
+            return (Address)si->main_addr;
+
         return 0;
     }
 }
@@ -358,10 +399,13 @@ namespace {
  */
 void image::findMain()
 {
-#if defined(ppc32_linux) || defined(ppc32_bgp)
+#if defined(ppc32_linux) || defined(ppc32_bgp) || defined(ppc64_linux)
     using namespace Dyninst::InstructionAPI;
 
-    if(!desc_.isSharedObject())
+    // Only look for main in executables, but do allow position-independent
+    // executables (PIE) which look like shared objects with an INTERP.
+    // (Some strange DSOs also have INTERP, but this is rare.)
+    if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
     {
     	bool foundMain = false;
     	bool foundStart = false;
@@ -381,22 +425,21 @@ void image::findMain()
         if (foundText == false) {
             return;
         }
-	
-    	if( !foundMain )
-    	{
+
+        if( !foundMain )
+        {
             logLine("No main symbol found: attempting to create symbol for main\n");
-            const unsigned char* p;
-            p = (( const unsigned char * ) eReg->getPtrToRawData());
 
-            Address mainAddress = 0;
+            Address eAddr = linkedFile->getEntryOffset();
+            eAddr = deref_opd(linkedFile, eAddr);
 
 	        bool parseInAllLoadableRegions = (BPatch_normalMode != mode_);
 	        SymtabCodeSource scs(linkedFile, filt, parseInAllLoadableRegions);
             CodeObject tco(&scs,NULL,NULL,false);
 
-            tco.parse(eReg->getMemOffset(),false);
+            tco.parse(eAddr,false);
             set<CodeRegion *> regions;
-            scs.findRegions(eReg->getMemOffset(),regions);
+            scs.findRegions(eAddr,regions);
             if(regions.empty()) {
                 // express puzzlement
                 return;
@@ -404,23 +447,31 @@ void image::findMain()
             SymtabCodeRegion * reg = 
                 static_cast<SymtabCodeRegion*>(*regions.begin());
             Function * func = 
-                tco.findFuncByEntry(reg,eReg->getMemOffset());
+                tco.findFuncByEntry(reg,eAddr);
             if(!func) {
                 // again, puzzlement
                 return;
             }
 
+            Block * b = NULL;
             const Function::edgelist & calls = func->callEdges();
-            if(calls.size() != 1) {
+            if (calls.empty()) {
+                // when there are no calls, let's hope the entry block is it
+                b = tco.findBlockByEntry(reg,eAddr);
+            } else if(calls.size() == 1) {
+                Function::edgelist::iterator cit = calls.begin();
+                b = (*cit)->src();
+            } else {
                 startup_printf("%s[%d] _start has unexpected number (%d) of"
                                " call edges, bailing on findMain()\n",
                     FILE__,__LINE__,calls.size());
-                return; 
+                return;
             }
-            Function::edgelist::iterator cit = calls.begin();
-            Block * b = (*cit)->src();
+            if (!b) return;
+
+            Address mainAddress = evaluate_main_address(linkedFile,func,b);
+            mainAddress = deref_opd(linkedFile, mainAddress);
 
-            mainAddress = evaluate_main_address(linkedFile,func,b);
             if(0 == mainAddress || !scs.isValidAddress(mainAddress)) {
                 startup_printf("%s[%d] failed to find main\n",FILE__,__LINE__);
                 return;
@@ -430,8 +481,8 @@ void image::findMain()
             }
            	Symbol *newSym= new Symbol( "main", 
                                             Symbol::ST_FUNCTION,
-                                            Symbol::SL_GLOBAL, 
-                                            Symbol::SV_DEFAULT, 
+                                            Symbol::SL_LOCAL,
+                                            Symbol::SV_INTERNAL,
                                             mainAddress,
                                             linkedFile->getDefaultModule(),
                                             eReg, 
@@ -444,7 +495,10 @@ void image::findMain()
 || defined(i386_unknown_solaris2_5) \
 || (defined(os_freebsd) \
     && (defined(arch_x86) || defined(arch_x86_64)))
-    if(!desc_.isSharedObject())
+    // Only look for main in executables, but do allow position-independent
+    // executables (PIE) which look like shared objects with an INTERP.
+    // (Some strange DSOs also have INTERP, but this is rare.)
+    if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
     {
     	bool foundMain = false;
     	bool foundStart = false;
@@ -594,8 +648,8 @@ void image::findMain()
             	//logLine( "No static symbol for function main\n" );
                 Symbol *newSym = new Symbol("DYNINST_pltMain", 
                                             Symbol::ST_FUNCTION, 
-                                            Symbol::SL_GLOBAL,
-                                            Symbol::SV_DEFAULT,
+                                            Symbol::SL_LOCAL,
+                                            Symbol::SV_INTERNAL,
                                             mainAddress,
                                             linkedFile->getDefaultModule(),
                                             eReg, 
@@ -606,8 +660,8 @@ void image::findMain()
            {
            	Symbol *newSym= new Symbol( "main", 
                                             Symbol::ST_FUNCTION,
-                                            Symbol::SL_GLOBAL, 
-                                            Symbol::SV_DEFAULT, 
+                                            Symbol::SL_LOCAL,
+                                            Symbol::SV_INTERNAL,
                                             mainAddress,
                                             linkedFile->getDefaultModule(),
                                             eReg, 
@@ -619,8 +673,8 @@ void image::findMain()
     	{
             Symbol *startSym = new Symbol( "_start",
                                            Symbol::ST_FUNCTION,
-                                           Symbol::SL_GLOBAL,
-                                           Symbol::SV_DEFAULT, 
+                                           Symbol::SL_LOCAL,
+                                           Symbol::SV_INTERNAL,
                                            eReg->getMemOffset(),
                                            linkedFile->getDefaultModule(),
                                            eReg,
@@ -635,8 +689,8 @@ void image::findMain()
 	  if (linkedFile->findRegion(finisec,".fini")) {
 	    Symbol *finiSym = new Symbol( "_fini",
 					  Symbol::ST_FUNCTION,
-					  Symbol::SL_GLOBAL, 
-					  Symbol::SV_DEFAULT, 
+					  Symbol::SL_LOCAL,
+					  Symbol::SV_INTERNAL,
 					  finisec->getMemOffset(),
 					  linkedFile->getDefaultModule(),
 					  finisec, 
@@ -657,8 +711,8 @@ void image::findMain()
         {
 	    Symbol *newSym = new Symbol( "_DYNAMIC", 
 					Symbol::ST_OBJECT, 
-                                         Symbol::SL_GLOBAL, 
-                                         Symbol::SV_DEFAULT,
+                                         Symbol::SL_LOCAL,
+                                         Symbol::SV_INTERNAL,
                                          dynamicsec->getMemOffset(), 
                                          linkedFile->getDefaultModule(),
                                          dynamicsec,