|
Package Libs ::
Module librecognition
|
|
1 """
2 (c) Immunity, Inc. 2004-2007
3
4
5 U{Immunity Inc.<http://www.immunityinc.com>}
6
7
8 Library for function recognizing
9
10 """
11 __VERSION__ = '1.2'
12
13
14 from libanalyze import *
15 from libdatatype import *
16 from libstackanalyze import *
17 import binascii
18 import struct
19 import hashlib
20 import re
21 import string
22 import debugger
23 import csv
24 import os
25
28 if not isinstance(dictionaries, list):
29 dictionaries = [ dictionaries ]
30
31 self.iterators = []
32 self.fds = []
33 self.idx = 0
34 for d in dictionaries:
35 try:
36 fd = open(d, "rb")
37 except:
38 fd = open(d, "w+b")
39 self.iterators.append(csv.reader(fd))
40 self.fds.append(fd)
42 for i in range(0, self.idx+1):
43 self.fds[i].seek(0)
44 self.idx = 0
45 return self
46
48 while self.iterators:
49 self.iterators.pop()
50 for fd in self.fds:
51 fd.close()
52 del self.fds
53
55 try:
56 data = self.iterators[self.idx].next()
57 except StopIteration:
58 if len(self.iterators) > self.idx+1:
59 self.idx += 1
60 return self.next()
61 else:
62 raise StopIteration
63
64 data.append(self.fds[self.idx].name)
65 return data
66
68 - def __init__(self, imm, dictionaryfiles=None):
69 """
70 This class try to recognize a function using different methods
71 (address/signature/heuristic).
72
73 @type imm: Debbuger OBJECT
74 @param imm: Debbuger instance
75
76 @type dictionaryfiles: STRING|LIST
77 @param dictionaryfiles: Name, or list of names, of .dat files inside the Data folder, where're stored the function
78 patterns. Use an empty string to use all .dat files in Data folder.
79 """
80 self.imm = imm
81 self.heuristicReferencesCache = {}
82 self.heuristicCache = {}
83 self.resolvCache = {}
84
85 if not dictionaryfiles:
86 dictionaryfiles = []
87 for file in os.listdir("Data"):
88 if file[-4:] == ".dat":
89 dictionaryfiles.append(os.path.join("Data", file))
90 self.dictionaries = MultiCSVIterator(dictionaryfiles)
91
93 """
94 Look up into our dictionaries to find a function match.
95
96 @type address: DWORD
97 @param address: Address of the function to search
98
99 @type heuristic: INTEGER
100 @param heuristic: heuristic threasold to consider a real function match
101
102 @rtype: STRING
103 @return: a STRING with the function's real name or the given address if there's no match
104 """
105
106
107 if self.resolvCache.has_key(address):
108 return self.resolvCache[address]
109
110
111 exact = self.makeFunctionHashExact(address)
112 for data in self.dictionaries:
113 if exact == data[4]:
114 self.resolvCache[address] = data[0]
115 break
116
117
118 if not self.resolvCache.has_key(address):
119 ref = self.selectBasicBlock(address)
120 posThreshold = 0
121 posName = ""
122 for data in self.dictionaries:
123
124
125 if ref == data[1]:
126 perc = self.checkHeuristic(address, data[2], data[3])
127
128 if perc >= heuristic and perc > posThreshold:
129 posThreshold = perc
130 posName = data[0]
131 if posName:
132 self.resolvCache[address] = posName
133
134
135 if not self.resolvCache.has_key(address):
136 self.resolvCache[address] = "%08X" % address
137
138 return self.resolvCache[address]
139
141 """
142 Check a given address with a precomputed hash of a function.
143 Return a percentage of match (you can use a threasold to consider a real match)
144
145 @type address: DWORD
146 @param address: Address of the function to compare
147
148 @type reference: STRING
149 @param reference: base64 representation of the compressed information about the function
150
151 @type refFirstCall: STRING
152 @param refFirstCall: the same, but following the function pointed by the first call in the first BB.
153 (OPTIONAL)
154
155 @rtype: INTEGER
156 @return: heuristic threasold to consider a real function match
157 """
158
159
160
161
162 if self.heuristicCache.has_key(address):
163 cfg = self.heuristicCache[address]
164 else:
165 cfg = self.makeFunctionHashHeuristic(address)
166 self.heuristicCache[address] = cfg
167
168
169 sha1 = hashlib.sha1(reference+refFirstCall).digest()
170 if self.heuristicReferencesCache.has_key(sha1):
171 refcfg = self.heuristicReferencesCache[sha1]
172 else:
173
174
175 refcfg = []
176 refcfg.append([])
177 refcfg.append([])
178 data = binascii.a2b_base64(reference)
179 for o in range(0,len(data),12):
180 (start, left, right) = struct.unpack("LLL",data[o:o+12])
181 refcfg[0].append([ start, left, right ])
182 if refFirstCall:
183 data = binascii.a2b_base64(refFirstCall)
184 for o in range(0,len(data),12):
185 (start, left, right) = struct.unpack("LLL",data[o:o+12])
186 refcfg[1].append([ start, left, right ])
187 self.heuristicReferencesCache[sha1] = refcfg
188
189 perc1 = self.compareHeuristic(cfg[0][:], refcfg[0][:])
190 if cfg[1] or refcfg[1]:
191 perc2 = self.compareHeuristic(cfg[1][:], refcfg[1][:])
192
193 perc = (perc1 + perc2) / 2
194 else:
195 perc = perc1
196
197 return perc
198
200
201
202
203
204
205
206 diff = eq = 0
207 checked = []
208
209 for info in cfg:
210 bbeq = value = 0
211 for rinfo in refcfg:
212 tmp = 0
213 if info[0] == rinfo[0]: tmp += 1
214 if info[1] == rinfo[1]: tmp += 1
215 if info[2] == rinfo[2]: tmp += 1
216 if tmp > bbeq:
217 bbeq = tmp
218 value = rinfo
219 if bbeq == 3: break
220 try:
221 idx=refcfg.index(value)
222 refcfg.pop(idx)
223 except ValueError:
224 pass
225
226 eq += bbeq
227 diff += 3 - bbeq
228
229
230 for rinfo in refcfg:
231 bbeq = value = 0
232 for info in cfg:
233 tmp = 0
234 if info[0] == rinfo[0]: tmp += 1
235 if info[1] == rinfo[1]: tmp += 1
236 if info[2] == rinfo[2]: tmp += 1
237 if tmp > bbeq:
238 bbeq = tmp
239 value = rinfo
240 if bbeq == 3: break
241 try:
242 idx=cfg.index(value)
243 cfg.pop(idx)
244 except ValueError:
245 pass
246
247 eq += bbeq
248 diff += 3 - bbeq
249
250
251 return eq * 100 / (eq + diff)
252
254 """
255 Consider:
256 - Control Flow Graph
257 - generalized instructions that:
258 access memory/write memory/use registers/use constant/call/jmp/jmc
259 and all his combinations.
260 - special case of functions with just 1 BB and a couple of calls (follow the first call)
261
262 @type address: DWORD
263 @param address: address of the function to hash
264
265 @type compressed: Boolean
266 @param compressed: return a compressed base64 representation or the raw data
267
268 @type followCalls: Boolean
269 @param followCalls: follow the first call in a single basic block function
270
271 @rtype: LIST
272 @return: the first element is described below and the second is the result of this same function but over the first
273 call of a single basic block function (if applies), each element is like this:
274 a base64 representation of the compressed version of each bb hash:
275 [4 bytes BB(i) start][4 bytes BB(i) 1st edge][4 bytes BB(i) 2nd edge]
276 0 <= i < BB count
277 or the same but like a LIST with raw data.
278 """
279
280 f = self.imm.getFunction(address)
281 bbs = f.getBasicBlocks()
282 bbmap = {}
283 cfg = {}
284
285
286 for bb in bbs:
287 cfg[bb.getStart()] = bb.getEdges()
288
289
290 for bb in bbs:
291 bbhash_data = []
292 for op in bb.getInstructions(self.imm):
293
294 instr = []
295 instr.append(op.getMemType())
296 instr.append(op.indexed)
297 instr.append(op.getCmdType())
298 instr.append(op.optype[0])
299 instr.append(op.optype[1])
300 instr.append(op.optype[2])
301 instr.append(op.getSize())
302 bbhash_data.append(self.hash_a_list(instr))
303 bbhash = self.hash_a_list(bbhash_data)
304 bbmap[bb.getStart()] = bbhash
305
306
307 rcfg = []
308 for start,edges in cfg.iteritems():
309 rstart = 0
310 redges = [0, 0]
311 rstart = bbmap[start]
312 if bbmap.has_key(edges[0]):
313 redges[0] = bbmap[edges[0]]
314 if bbmap.has_key(edges[1]):
315 redges[1] = bbmap[edges[1]]
316 rcfg.append([ rstart,redges[0],redges[1] ])
317
318
319 firstcall = []
320 if followCalls and len(bbs) == 1 and len(bbs[0].getCalls()) > 0:
321
322
323 op = self.imm.Disasm(bbs[0].getCalls()[0])
324 if op.getJmpConst():
325 firstcall = self.makeFunctionHashHeuristic(op.getJmpConst(), compressed, followCalls=False)[0]
326
327 del op
328
329 del bbs
330 del f
331 rcfg.sort()
332
333 if compressed:
334
335 fhash = ""
336 for data in rcfg:
337
338 fhash += struct.pack("LLL", data[0], data[1], data[2])
339 return [ binascii.b2a_base64(fhash)[:-1], firstcall ]
340 else:
341 return [ rcfg, firstcall ]
342
344 """
345 Take a list and return a binary representation of his CRC32.
346
347 @type data: LIST
348 @param data: a list of elements to make the hash
349
350 @rtype: UNSIGNED LONG
351 @return: a hash of the given values
352 """
353
354 ret = 0
355 for elem in data:
356 ret = binascii.crc32(str(elem), ret)
357 return struct.unpack("L", struct.pack("l",ret))[0]
358
360 """
361 Search memory to find a function that fullfit the options.
362
363 @type csvline: STRING
364 @param csvline: A line of a Data CSV file. This's a simple support for copy 'n paste from a CSV file.
365
366 @type heuristic: INTEGER
367 @param heuristic: heuristic threasold to consider a real function match
368
369 @type module: STRING
370 @param module: name of a module to restrict the search
371
372 @rtype: LIST
373 @return: a list of tuples with possible function's addresses and the heauristic match percentage
374 """
375
376 line = csv.reader([csvline]).next()
377 if len(line) < 9: line[7] = ""
378 return self._searchFunctionByHeuristic(line[1], line[2], line[3], line[4], heuristic, module, string.split(line[7],"|"))
379
380 - def _searchFunctionByHeuristic(self, search, functionhash=None, firstcallhash=None, exact=None, heuristic = 90, module = None, firstbb = None):
381 """
382 Search memory to find a function that fullfit the options.
383
384 @type search: STRING
385 @param search: searchCommand string to make the first selection
386
387 @type functionhash: STRING
388 @param functionhash: the primary function hash (use makeFunctionHash to generate this value)
389
390 @type firstcallhash: STRING
391 @param firstcallhash: the hash of the first call on single BB functions (use makeFunctionHash to generate this value)
392
393 @type exact: STRING
394 @param exact: an exact function hash, this's a binary byte-per-byte hash (use makeFunctionHash to generate this value)
395
396 @type heuristic: INTEGER
397 @param heuristic: heuristic threasold to consider a real function match
398
399 @type module: STRING
400 @param module: name of a module to restrict the search
401
402 @type firstbb: STRING
403 @param firstbb: generalized assembler of the first BB (to search function begin)
404
405 @rtype: LIST
406 @return: a list of tuples with possible function's addresses and the heauristic match percentage
407 """
408
409
410
411 if isinstance(search, list):
412 search.reverse()
413 tmp = search[:]
414 if tmp: search = tmp.pop()
415 if tmp: functionhash = tmp.pop()
416 if tmp: firstcallhash = tmp.pop()
417 if tmp: exact = tmp.pop()
418 if tmp: version = tmp.pop()
419 if tmp: file = tmp.pop()
420 if tmp: firstbb = tmp.pop()
421
422
423 if not search or not functionhash:
424 return None
425
426 if not firstcallhash:
427 firstcallhash = ""
428
429 heu_addy = None
430 heu_perc = 0
431 poss_functions = []
432 poss_return = []
433 search = string.replace(search, "\\n","\n")
434 if search:
435 if module:
436
437 for key,mod in debugger.Getallmodules().iteritems():
438 if module.lower() in key.lower():
439 poss_functions += self.imm.searchCommandsOnModule(mod[0], search)
440 else:
441 poss_functions = self.imm.searchCommands(search)
442 if poss_functions:
443 for poss in poss_functions:
444
445 addy = self.imm.getFunctionBegin(poss[0])
446 if not addy:
447
448 for mod in self.imm.getAllModules().values():
449 if mod.getMainentry():
450
451 f = StackFunction(self.imm, mod.getMainentry())
452 if f.isInsideFunction(poss[0]):
453 addy = mod.getMainentry()
454 break
455 if not addy and firstbb:
456