#!/usr/bin/python
# getasm.py - post-processor utility for objdump x86 disassembly
# Copyright (c) 2005 Paulo Matias
# Published under the terms of GPL2
# Partly based on LDasm Perl(eeek) code, by Ravemax
import psyco
import string
import sys
import os
import tempfile
import re
if len(sys.argv) != 2:
sys.stderr.write("usage: %s executable > outfile\n" % sys.argv[0])
sys.exit(1)
def create_temp():
(fd, tmpf) = tempfile.mkstemp()
os.close(fd)
os.system("objdump -d -T -x --prefix-addresses --show-raw-insn -C %s > %s" % (sys.argv[1], tmpf))
return tmpf
def read_static_sections():
sec = {}
expr = re.compile("^Idx Name")
while not expr.search(tmp.readline()):
pass
while True:
secdet = string.split(tmp.readline())
if len(secdet) != 7:
break
line = tmp.readline()
sec[secdet[1]] = {
"size": string.atoi(secdet[2], 16),
"vma": string.atoi(secdet[3], 16),
"vma_end": string.atoi(secdet[3], 16)+string.atoi(secdet[2], 16),
"lma": string.atoi(secdet[4], 16),
"fileofs": string.atoi(secdet[5], 16)
}
if string.find(line, "DATA") == -1:
if string.find(line, "CODE") == -1:
sec[secdet[1]]["type"] = "?"
else:
sec[secdet[1]]["type"] = "code"
else:
sec[secdet[1]]["type"] = "data"
if string.find(line, "READONLY") == -1:
sec[secdet[1]]["ro"] = True
else:
sec[secdet[1]]["ro"] = False
return sec
def read_strings():
strvma = sections[".rodata"]["vma"]
f = file(sys.argv[1])
f.seek(sections[".rodata"]["fileofs"])
strdata = f.read(sections[".rodata"]["size"])
f.close()
for x in string.split(strdata, "\x00"):
if len(x):
rcvma = "%x" % strvma
refsaddr[rcvma] = { "str": string.strip(x) }
strvma += len(x)+1
def read_symbol_table():
exprdis = re.compile("^Disassembly")
exprsym = re.compile("([^ ]+).{7}(.).(.+?)\t.+? (.+)")
while True:
line = string.strip(tmp.readline())
if exprdis.search(line):
break
m = exprsym.search(line)
if not m:
continue
m = m.groups()
if m[0] == "00000000":
continue
raddr, rexp, rtype = m[0], m[1], m[2]
rfct = string.join(string.split(m[3]), "::")
if rtype == "*UND*":
refsaddr[raddr] = { "import": rfct }
elif rtype == ".text":
refsaddr[raddr] = { "internal": rfct }
if rexp == "F":
refsaddr[raddr]["export"] = rfct
def search_calls_and_jumps():
expr1 = re.compile("^0")
expr2 = re.compile("\s{2,}j|call")
expr3 = re.compile("(0x)?([^ ]+).+?\s{2,}([^ ]+)\s+\*?(0x)?([0-9a-f]+)( |\n)")
while True:
line = tmp.readline()
if not line:
break
if (not expr1.search(line)) or (not expr2.search(line)):
continue
m = expr3.search(line)
if not m:
continue
m = m.groups()
sjaddr, op, daddr = m[1], m[2], m[4]
if not refsaddr.has_key(daddr):
refsaddr[daddr] = {}
if op == 'call':
if not refsaddr[daddr].has_key("call"):
refsaddr[daddr]["call"] = []
refsaddr[daddr]["call"].append(sjaddr)
else:
if not refsaddr[daddr].has_key("jump"):
refsaddr[daddr]["jump"] = []
if op == 'jmp':
sjaddr += "(U)"
else:
sjaddr += "(C)"
refsaddr[daddr]["jump"].append(sjaddr)
def add_referenced_by(type, list):
sys.stdout.write("-- Referenced by a %s at Address:" % type)
i = 0
for x in list:
if i % 5 == 0:
sys.stdout.write("\n ")
sys.stdout.write("%s " % x)
i+=1
print ""
def add_export_ref(funcname, addr):
if refsaddr[addr].has_key("export"):
print "-- Exported fn(): %s" % funcname
else:
print "-- Function(): %s" % funcname
def add_string_ref(str):
print "-- Possible StringData Ref from Code Obj"
print " %s" % repr(str)
def add_ref_to_func(funcname, type):
print "-- Reference To: %s (%s)" % (funcname, type)
def generate_listing():
exprdis = re.compile("^Disassembly")
expraddr = re.compile("(call\s+|0x)([0-9a-f]{4,})")
while True:
line = tmp.readline()
if exprdis.search(line):
break
sys.stdout.write(line)
while True:
line = tmp.readline()
if not line:
break
line = string.strip(line)
addr = string.split(line, " ", 1)[0]
if refsaddr.has_key(addr):
if refsaddr[addr].has_key("call"):
add_referenced_by("Call", refsaddr[addr]["call"])
if refsaddr[addr].has_key("jump"):
add_referenced_by("(U)nconditional or (C)onditional Jump", refsaddr[addr]["jump"])
if refsaddr[addr].has_key("internal"):
add_export_ref(refsaddr[addr]["internal"], addr)
addr = expraddr.search(line)
if addr:
addr = addr.groups()[1]
if refsaddr.has_key(addr):
rfa = refsaddr[addr]
if rfa.has_key("str"):
add_string_ref(rfa["str"])
elif rfa.has_key("import"):
add_ref_to_func(rfa["import"], "import")
elif rfa.has_key("internal"):
add_ref_to_func(rfa["internal"], "internal")
print line
psyco.core.full()
sys.stderr.write("Disassembling...\n")
tempfn = create_temp()
tmp = file(tempfn)
sys.stderr.write("Reading sections...\n")
sections = read_static_sections()
refsaddr = {}
sys.stderr.write("Reading strings...\n")
read_strings()
sys.stderr.write("Reading symbol table...\n")
read_symbol_table()
sys.stderr.write("Searching calls and jumps...\n")
search_calls_and_jumps()
tmp.seek(0)
sys.stderr.write("Generating listing...\n")
generate_listing()
tmp.close()
os.unlink(tempfn)