commit 5934afb83c7908b313c48f70e71b84a8b9000b35
Author: Josh Stone <jistone@redhat.com>
Date: Tue Jan 14 13:39:36 2014 -0800
Dyninst 8.1 backport of image::findMain fixes
commit 44205dbd2d9011d1cfc366e775f861bad2146c2b
Author: Josh Stone <jistone@redhat.com>
Date: Tue Jan 14 13:39:36 2014 -0800
dyninstAPI: Let image::findMain inspect PIE too
Position-independent executables (PIE) are generally indistinguishable
from any other shared object, but we still need to find main() in these
when they're stripped. One clue is the presence of INTERP, which PIEs
will always have. (Some DSOs also have INTERP, but this is rare.)
commit 335aeb15f09a951c536f60475cafc84882cbfc1c
Author: Josh Stone <jistone@redhat.com>
Date: Tue Jan 14 12:34:10 2014 -0800
dyninstAPI: Expand powerpc support in image::findMain
First, this function needs to respect the ELF entry address, rather than
assuming that entry is at the start of the .text section. This was
already handled in x86, so we just need the same in powerpc.
Second, we need to add ppc64 support to the section which currently
handles ppc32. This requires dereferencing .opd indirection, and ppc64
also uses a TOC-based load which the current dataflowAPI code can't
handle. For this case, just use a simple InstructionAPI eval with the
known TOC r2 register.
commit ed3ab720945492a42bf36d358e105ef1bd34c950
Author: Josh Stone <jistone@redhat.com>
Date: Wed Jan 15 11:20:32 2014 -0800
dyninstAPI: Set image::findMain's findings as LOCAL/INTERNAL
When we look for new symbols in image::findMain, it's not really the
intent to make these global/exported symbols. It doesn't matter so much
for live processes, but these shouldn't be exported in rewritten
binaries. This patch sets them as LOCAL and INTERNAL instead.
Note, this is only for Linux and FreeBSD, but Windows may want it too.
commit 49606ee86625d2505112f55f77817981c498b606
Author: Josh Stone <jistone@redhat.com>
Date: Mon Jan 27 17:45:32 2014 -0800
common: Skip get_immediate_operand without any imm
For x86 image::findMain, we blindly look for the first call from the
entry address, then grab the immediate operand from the preceding
instruction. Sometimes that instruction may not even have any immediate
operands, and the heuristic fails with a message on stderr like:
arch-x86.C[5602]: invalid immediate size 0 in insn
This now occurs with some ET_DYN binaries that are allowed by commit
44205dbd2d90, when those are really DSOs and not PIE. However, this
could conceivably also happen for any executable that doesn't fit the
heuristic pattern.
This patch quietly returns 0 from get_immediate_operand when there are
no operands, and image::findMain will treat this as an invalid address.
diff --git a/common/src/arch-x86.C b/common/src/arch-x86.C
index e805585bf22c..73efd3cd18df 100644
--- a/common/src/arch-x86.C
+++ b/common/src/arch-x86.C
@@ -5581,6 +5581,9 @@ Address get_immediate_operand(instruction *instr)
ia32_decode(IA32_FULL_DECODER,(const unsigned char *)(instr->ptr()),detail);
+ if (loc.imm_cnt < 1)
+ return 0;
+
// now find the immediate value in the locations
Address immediate = 0;
diff --git a/dyninstAPI/src/image.C b/dyninstAPI/src/image.C
index dbe05a7cadd6..f2af21a15a8d 100644
--- a/dyninstAPI/src/image.C
+++ b/dyninstAPI/src/image.C
@@ -269,6 +269,29 @@ namespace {
void * fini_addr;
};
+ void *get_raw_symtab_ptr(Symtab *linkedFile, Address addr)
+ {
+ Region *reg = linkedFile->findEnclosingRegion(addr);
+ if (reg != NULL) {
+ char *data = (char*)reg->getPtrToRawData();
+ data += addr - reg->getMemOffset();
+ return data;
+ }
+ return NULL;
+ }
+
+ Address deref_opd(Symtab *linkedFile, Address addr)
+ {
+ Region *reg = linkedFile->findEnclosingRegion(addr);
+ if (reg && reg->getRegionName() == ".opd") {
+ // opd symbol needing dereference
+ void *data = get_raw_symtab_ptr(linkedFile, addr);
+ if (data)
+ return *(Address*)data;
+ }
+ return addr;
+ }
+
/*
* b ends with a call to libc_start_main. We are looking for the
* value in GR8, which is the address of a structure that contains
@@ -289,6 +312,7 @@ namespace {
b->end()-b->start(),
b->region()->getArch());
+ RegisterAST::Ptr r2( new RegisterAST(ppc32::r2) );
RegisterAST::Ptr r8( new RegisterAST(ppc32::r8) );
Address cur_addr = b->start();
@@ -302,50 +326,67 @@ namespace {
if(!r8_def)
return 0;
- // Get all of the assignments that happen in this instruction
- AssignmentConverter conv(true);
- vector<Assignment::Ptr> assigns;
- conv.convert(r8_def,r8_def_addr,f,b,assigns);
-
- // find the one we care about (r8)
- vector<Assignment::Ptr>::iterator ait = assigns.begin();
- for( ; ait != assigns.end(); ++ait) {
- AbsRegion & outReg = (*ait)->out();
- Absloc const& loc = outReg.absloc();
- if(loc.reg() == r8->getID())
- break;
- }
- if(ait == assigns.end()) {
- return 0;
+ Address ss_addr = 0;
+
+ // Try a TOC-based lookup first
+ if (r8_def->isRead(r2)) {
+ set<Expression::Ptr> memReads;
+ r8_def->getMemoryReadOperands(memReads);
+ Address TOC = f->obj()->cs()->getTOC(r8_def_addr);
+ if (TOC != 0 && memReads.size() == 1) {
+ Expression::Ptr expr = *memReads.begin();
+ expr->bind(r2.get(), Result(u64, TOC));
+ const Result &res = expr->eval();
+ if (res.defined) {
+ void *res_addr =
+ get_raw_symtab_ptr(linkedFile, res.convert<Address>());
+ if (res_addr)
+ ss_addr = *(Address*)res_addr;
+ }
+ }
}
- // Slice back to the definition of R8, and, if possible, simplify
- // to a constant
- Slicer slc(*ait,b,f);
- Default_Predicates preds;
- Graph::Ptr slg = slc.backwardSlice(preds);
- DataflowAPI::Result_t sl_res;
- DataflowAPI::SymEval::expand(slg,sl_res);
- AST::Ptr calculation = sl_res[*ait];
- SimpleArithVisitor visit;
- AST::Ptr simplified = calculation->accept(&visit);
- //printf("after simplification:\n%s\n",simplified->format().c_str());
- if(simplified->getID() == AST::V_ConstantAST) {
- ConstantAST::Ptr cp = ConstantAST::convert(simplified);
- Address ss_addr = cp->val().val;
-
- // need a pointer to the image data
- SymtabAPI::Region * dreg = linkedFile->findEnclosingRegion(ss_addr);
-
- if(dreg) {
- struct libc_startup_info * si =
- (struct libc_startup_info *)(
- ((Address)dreg->getPtrToRawData()) +
- ss_addr - (Address)dreg->getMemOffset());
- return (Address)si->main_addr;
+ if (ss_addr == 0) {
+ // Get all of the assignments that happen in this instruction
+ AssignmentConverter conv(true);
+ vector<Assignment::Ptr> assigns;
+ conv.convert(r8_def,r8_def_addr,f,b,assigns);
+
+ // find the one we care about (r8)
+ vector<Assignment::Ptr>::iterator ait = assigns.begin();
+ for( ; ait != assigns.end(); ++ait) {
+ AbsRegion & outReg = (*ait)->out();
+ Absloc const& loc = outReg.absloc();
+ if(loc.reg() == r8->getID())
+ break;
+ }
+ if(ait == assigns.end()) {
+ return 0;
+ }
+
+ // Slice back to the definition of R8, and, if possible, simplify
+ // to a constant
+ Slicer slc(*ait,b,f);
+ Default_Predicates preds;
+ Graph::Ptr slg = slc.backwardSlice(preds);
+ DataflowAPI::Result_t sl_res;
+ DataflowAPI::SymEval::expand(slg,sl_res);
+ AST::Ptr calculation = sl_res[*ait];
+ SimpleArithVisitor visit;
+ AST::Ptr simplified = calculation->accept(&visit);
+ //printf("after simplification:\n%s\n",simplified->format().c_str());
+ if(simplified->getID() == AST::V_ConstantAST) {
+ ConstantAST::Ptr cp = ConstantAST::convert(simplified);
+ ss_addr = cp->val().val;
}
}
+ // need a pointer to the image data
+ auto si = (struct libc_startup_info *)
+ get_raw_symtab_ptr(linkedFile, ss_addr);
+ if (si)
+ return (Address)si->main_addr;
+
return 0;
}
}
@@ -358,10 +399,13 @@ namespace {
*/
void image::findMain()
{
-#if defined(ppc32_linux) || defined(ppc32_bgp)
+#if defined(ppc32_linux) || defined(ppc32_bgp) || defined(ppc64_linux)
using namespace Dyninst::InstructionAPI;
- if(!desc_.isSharedObject())
+ // Only look for main in executables, but do allow position-independent
+ // executables (PIE) which look like shared objects with an INTERP.
+ // (Some strange DSOs also have INTERP, but this is rare.)
+ if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
{
bool foundMain = false;
bool foundStart = false;
@@ -381,22 +425,21 @@ void image::findMain()
if (foundText == false) {
return;
}
-
- if( !foundMain )
- {
+
+ if( !foundMain )
+ {
logLine("No main symbol found: attempting to create symbol for main\n");
- const unsigned char* p;
- p = (( const unsigned char * ) eReg->getPtrToRawData());
- Address mainAddress = 0;
+ Address eAddr = linkedFile->getEntryOffset();
+ eAddr = deref_opd(linkedFile, eAddr);
bool parseInAllLoadableRegions = (BPatch_normalMode != mode_);
SymtabCodeSource scs(linkedFile, filt, parseInAllLoadableRegions);
CodeObject tco(&scs,NULL,NULL,false);
- tco.parse(eReg->getMemOffset(),false);
+ tco.parse(eAddr,false);
set<CodeRegion *> regions;
- scs.findRegions(eReg->getMemOffset(),regions);
+ scs.findRegions(eAddr,regions);
if(regions.empty()) {
// express puzzlement
return;
@@ -404,23 +447,31 @@ void image::findMain()
SymtabCodeRegion * reg =
static_cast<SymtabCodeRegion*>(*regions.begin());
Function * func =
- tco.findFuncByEntry(reg,eReg->getMemOffset());
+ tco.findFuncByEntry(reg,eAddr);
if(!func) {
// again, puzzlement
return;
}
+ Block * b = NULL;
const Function::edgelist & calls = func->callEdges();
- if(calls.size() != 1) {
+ if (calls.empty()) {
+ // when there are no calls, let's hope the entry block is it
+ b = tco.findBlockByEntry(reg,eAddr);
+ } else if(calls.size() == 1) {
+ Function::edgelist::iterator cit = calls.begin();
+ b = (*cit)->src();
+ } else {
startup_printf("%s[%d] _start has unexpected number (%d) of"
" call edges, bailing on findMain()\n",
FILE__,__LINE__,calls.size());
- return;
+ return;
}
- Function::edgelist::iterator cit = calls.begin();
- Block * b = (*cit)->src();
+ if (!b) return;
+
+ Address mainAddress = evaluate_main_address(linkedFile,func,b);
+ mainAddress = deref_opd(linkedFile, mainAddress);
- mainAddress = evaluate_main_address(linkedFile,func,b);
if(0 == mainAddress || !scs.isValidAddress(mainAddress)) {
startup_printf("%s[%d] failed to find main\n",FILE__,__LINE__);
return;
@@ -430,8 +481,8 @@ void image::findMain()
}
Symbol *newSym= new Symbol( "main",
Symbol::ST_FUNCTION,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
mainAddress,
linkedFile->getDefaultModule(),
eReg,
@@ -444,7 +495,10 @@ void image::findMain()
|| defined(i386_unknown_solaris2_5) \
|| (defined(os_freebsd) \
&& (defined(arch_x86) || defined(arch_x86_64)))
- if(!desc_.isSharedObject())
+ // Only look for main in executables, but do allow position-independent
+ // executables (PIE) which look like shared objects with an INTERP.
+ // (Some strange DSOs also have INTERP, but this is rare.)
+ if(!desc_.isSharedObject() || linkedFile->getInterpreterName() != NULL)
{
bool foundMain = false;
bool foundStart = false;
@@ -594,8 +648,8 @@ void image::findMain()
//logLine( "No static symbol for function main\n" );
Symbol *newSym = new Symbol("DYNINST_pltMain",
Symbol::ST_FUNCTION,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
mainAddress,
linkedFile->getDefaultModule(),
eReg,
@@ -606,8 +660,8 @@ void image::findMain()
{
Symbol *newSym= new Symbol( "main",
Symbol::ST_FUNCTION,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
mainAddress,
linkedFile->getDefaultModule(),
eReg,
@@ -619,8 +673,8 @@ void image::findMain()
{
Symbol *startSym = new Symbol( "_start",
Symbol::ST_FUNCTION,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
eReg->getMemOffset(),
linkedFile->getDefaultModule(),
eReg,
@@ -635,8 +689,8 @@ void image::findMain()
if (linkedFile->findRegion(finisec,".fini")) {
Symbol *finiSym = new Symbol( "_fini",
Symbol::ST_FUNCTION,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
finisec->getMemOffset(),
linkedFile->getDefaultModule(),
finisec,
@@ -657,8 +711,8 @@ void image::findMain()
{
Symbol *newSym = new Symbol( "_DYNAMIC",
Symbol::ST_OBJECT,
- Symbol::SL_GLOBAL,
- Symbol::SV_DEFAULT,
+ Symbol::SL_LOCAL,
+ Symbol::SV_INTERNAL,
dynamicsec->getMemOffset(),
linkedFile->getDefaultModule(),
dynamicsec,