Processamento de Assembly em Python

Python
Enviado por thotypous em Ter, 27/12/2005 - 09:22.Python
Um exemplo de código para todos. Trata-se de um pós-processador para a saída do objdump. Ele inclui referências de string, de calls e de jumps, entre outras coisas, na listagem do código Assembly. Muito bom para analisar os códigos.
  1. #!/usr/bin/python
  2. # getasm.py - post-processor utility for objdump x86 disassembly
  3. # Copyright (c) 2005 Paulo Matias
  4. # Published under the terms of GPL2
  5. # Partly based on LDasm Perl(eeek) code, by Ravemax
  6.  
  7. import psyco
  8. import string
  9. import sys
  10. import os
  11. import tempfile
  12. import re
  13.  
  14. if len(sys.argv) != 2:
  15.   sys.stderr.write("usage: %s executable > outfile\n" % sys.argv[0])
  16.   sys.exit(1)
  17.  
  18. def create_temp():
  19.   (fd, tmpf) = tempfile.mkstemp()
  20.   os.close(fd)
  21.   os.system("objdump -d -T -x --prefix-addresses --show-raw-insn -C %s > %s" % (sys.argv[1], tmpf))
  22.   return tmpf
  23.  
  24. def read_static_sections():
  25.   sec = {}
  26.   expr = re.compile("^Idx Name")
  27.   while not expr.search(tmp.readline()):
  28.     pass
  29.   while True:
  30.     secdet = string.split(tmp.readline())
  31.     if len(secdet) != 7:
  32.       break
  33.     line = tmp.readline()
  34.     sec[secdet[1]] = {
  35.       "size":    string.atoi(secdet[2], 16),
  36.       "vma":     string.atoi(secdet[3], 16),
  37.       "vma_end": string.atoi(secdet[3], 16)+string.atoi(secdet[2], 16),
  38.       "lma":     string.atoi(secdet[4], 16),
  39.       "fileofs": string.atoi(secdet[5], 16)
  40.     }
  41.     if string.find(line, "DATA") == -1:
  42.       if string.find(line, "CODE") == -1:
  43.         sec[secdet[1]]["type"] = "?"
  44.       else:
  45.         sec[secdet[1]]["type"] = "code"
  46.     else:
  47.       sec[secdet[1]]["type"] = "data"
  48.     if string.find(line, "READONLY") == -1:
  49.       sec[secdet[1]]["ro"] = True
  50.     else:
  51.       sec[secdet[1]]["ro"] = False
  52.   return sec
  53.  
  54. def read_strings():
  55.   strvma = sections[".rodata"]["vma"]
  56.   f = file(sys.argv[1])
  57.   f.seek(sections[".rodata"]["fileofs"])
  58.   strdata = f.read(sections[".rodata"]["size"])
  59.   f.close()
  60.   for x in string.split(strdata, "\x00"):
  61.     if len(x):
  62.       rcvma = "%x" % strvma
  63.       refsaddr[rcvma] = { "str": string.strip(x) }
  64.     strvma += len(x)+1
  65.  
  66. def read_symbol_table():
  67.   exprdis = re.compile("^Disassembly")
  68.   exprsym = re.compile("([^ ]+).{7}(.).(.+?)\t.+? (.+)")
  69.   while True:
  70.     line = string.strip(tmp.readline())
  71.     if exprdis.search(line):
  72.       break
  73.     m = exprsym.search(line)
  74.     if not m:
  75.       continue
  76.     m = m.groups()
  77.     if m[0] == "00000000":
  78.       continue
  79.     raddr, rexp, rtype = m[0], m[1], m[2]
  80.     rfct  = string.join(string.split(m[3]), "::")
  81.     if rtype == "*UND*":
  82.       refsaddr[raddr] = { "import": rfct }
  83.     elif rtype == ".text":
  84.       refsaddr[raddr] = { "internal": rfct }
  85.       if rexp == "F":
  86.         refsaddr[raddr]["export"] = rfct
  87.  
  88. def search_calls_and_jumps():
  89.   expr1 = re.compile("^0")
  90.   expr2 = re.compile("\s{2,}j|call")
  91.   expr3 = re.compile("(0x)?([^ ]+).+?\s{2,}([^ ]+)\s+\*?(0x)?([0-9a-f]+)( |\n)")
  92.   while True:
  93.     line = tmp.readline()
  94.     if not line:
  95.       break
  96.     if (not expr1.search(line)) or (not expr2.search(line)):
  97.       continue
  98.     m = expr3.search(line)
  99.     if not m:
  100.       continue
  101.     m = m.groups()
  102.     sjaddr, op, daddr = m[1], m[2], m[4]
  103.     if not refsaddr.has_key(daddr):
  104.       refsaddr[daddr] = {}
  105.     if op == 'call':
  106.       if not refsaddr[daddr].has_key("call"):
  107.         refsaddr[daddr]["call"] = []
  108.       refsaddr[daddr]["call"].append(sjaddr)
  109.     else:
  110.       if not refsaddr[daddr].has_key("jump"):
  111.         refsaddr[daddr]["jump"] = []
  112.       if op == 'jmp':
  113.         sjaddr += "(U)"
  114.       else:
  115.         sjaddr += "(C)"
  116.       refsaddr[daddr]["jump"].append(sjaddr)
  117.  
  118. def add_referenced_by(type, list):
  119.   sys.stdout.write("-- Referenced by a %s at Address:" % type)
  120.   i = 0
  121.   for x in list:
  122.     if i % 5 == 0:
  123.       sys.stdout.write("\n     ")
  124.     sys.stdout.write("%s " % x)
  125.     i+=1
  126.   print ""
  127.  
  128. def add_export_ref(funcname, addr):
  129.   if refsaddr[addr].has_key("export"):
  130.     print "-- Exported fn(): %s" % funcname
  131.   else:
  132.     print "-- Function(): %s" % funcname
  133.  
  134. def add_string_ref(str):
  135.   print "-- Possible StringData Ref from Code Obj"
  136.   print "     %s" % repr(str)
  137.  
  138. def add_ref_to_func(funcname, type):
  139.   print "-- Reference To: %s (%s)" % (funcname, type)
  140.  
  141. def generate_listing():
  142.   exprdis = re.compile("^Disassembly")
  143.   expraddr = re.compile("(call\s+|0x)([0-9a-f]{4,})")
  144.   while True:
  145.     line = tmp.readline()
  146.     if exprdis.search(line):
  147.       break
  148.     sys.stdout.write(line)
  149.   while True:
  150.     line = tmp.readline()
  151.     if not line:
  152.       break
  153.     line = string.strip(line)
  154.     addr = string.split(line, " ", 1)[0]
  155.     if refsaddr.has_key(addr):
  156.       if refsaddr[addr].has_key("call"):
  157.         add_referenced_by("Call", refsaddr[addr]["call"])
  158.       if refsaddr[addr].has_key("jump"):
  159.         add_referenced_by("(U)nconditional or (C)onditional Jump", refsaddr[addr]["jump"])
  160.       if refsaddr[addr].has_key("internal"):
  161.         add_export_ref(refsaddr[addr]["internal"], addr)
  162.     addr = expraddr.search(line)
  163.     if addr:
  164.       addr = addr.groups()[1]
  165.       if refsaddr.has_key(addr):
  166.         rfa = refsaddr[addr]
  167.         if rfa.has_key("str"):
  168.           add_string_ref(rfa["str"])
  169.         elif rfa.has_key("import"):
  170.           add_ref_to_func(rfa["import"], "import")
  171.         elif rfa.has_key("internal"):
  172.           add_ref_to_func(rfa["internal"], "internal")
  173.     print line
  174.  
  175. psyco.core.full()
  176.  
  177. sys.stderr.write("Disassembling...\n")
  178. tempfn = create_temp()
  179. tmp = file(tempfn)
  180.  
  181. sys.stderr.write("Reading sections...\n")
  182. sections = read_static_sections()
  183.  
  184. refsaddr = {}
  185.  
  186. sys.stderr.write("Reading strings...\n")
  187. read_strings()
  188.  
  189. sys.stderr.write("Reading symbol table...\n")
  190. read_symbol_table()
  191.  
  192. sys.stderr.write("Searching calls and jumps...\n")
  193. search_calls_and_jumps()
  194.  
  195. tmp.seek(0)
  196.  
  197. sys.stderr.write("Generating listing...\n")
  198. generate_listing()
  199.  
  200. tmp.close()
  201. os.unlink(tempfn)