type_dict.py (3787B)
1 ################################################################### 2 # Format of input file: 3 # <filename> <func or global> <line> <var or call to free> 4 ################################################################### 5 6 ################################################################### 7 # Format of dictionary: 8 # ('filename:line') |-> ('type') 9 ################################################################### 10 11 import json 12 import os, errno 13 import gdb 14 import re 15 16 def delfile(name): 17 try: 18 os.remove(name) 19 except OSError as err: 20 if err.errno != errno.ENOENT: 21 raise 22 23 class CodeDict(): 24 in_n = ".funcs" 25 out_n = ".dict" 26 27 inf = None 28 outf = None 29 30 dict = {} 31 32 def __init__(self): 33 self.setup() 34 print("Creating dictionary, this can take 5 minutes or more..") 35 self.parse() 36 print("..done!") 37 38 self.outf.write(json.dumps(self.dict)) 39 40 def setup(self): 41 try: 42 self.inf = open(self.in_n, "r") 43 except: 44 print(f"{in_n} file not found, run `occ.sh` first") 45 raise 46 47 delfile(self.out_n) 48 self.outf = open(self.out_n, "w+") 49 50 def parse(self): 51 for line in self.inf.readlines(): 52 # Insert ./ to reflect the frame representation of source file in gdb 53 l = ("./" + line).split(" ") 54 55 if len(l) < 5 or l[4] != "=": 56 continue 57 58 src = l[0] 59 fn = l[1] 60 lnr = l[2] 61 var = l[3] 62 63 # ugly, but necessary since gdb does not like $ whatis 'dup_task_struct'::tsk 64 # and task_struct is too important to give up 65 if fn == "dup_task_struct": 66 key = f"{src}:{lnr}" 67 val = "type = struct task_struct *" 68 self.dict[key] = val 69 continue 70 71 var = re.split('\-\>|\.', var) 72 var[0] = re.sub('[.*?]', '', var[0]) 73 74 if fn == "<global>": 75 try: 76 type_info = gdb.execute(f"whatis '{src}'::{var}", to_string = True) 77 except: 78 continue 79 else: 80 try: 81 type_info = gdb.execute(f"whatis '{fn}'::{var[0]}", to_string = True) 82 except: 83 continue 84 85 if len(var) > 1: 86 type_info = self.parse_chain(type_info[7:], var, 1) 87 88 if type_info is not None: 89 key = f"{src}:{lnr}" 90 self.dict[key] = type_info.replace('\n','') 91 92 def parse_chain(self, next_type, chain, index): 93 # we're at the final field access, return its type 94 if index >= len(chain): 95 return "type = " + next_type.strip() 96 97 # we need to look for the type of the next field in the field access chain 98 field = chain[index] 99 field = re.sub('\[.*?\]', '', field) 100 101 # obtain the fields of the compound type to search through 102 ptype = gdb.execute(f"ptype {next_type}", to_string = True).split("\n")[1:-2] 103 104 # loop over the compound type's fields, attempt to match field we're looking for 105 for f in ptype: 106 # account for possible bit field 107 bitfield = f.rfind(':') 108 if bitfield > 0: 109 f = f[:bitfield] 110 111 # account for possible array 112 f = re.sub('\[.*?\]', '', f) 113 114 # match on field name, everything preceding it is its type 115 name = re.search(f"[^_A-Za-z]({field})[^_A-Za-z0-9]", f) 116 117 # field name was found, extract type and recurse if necessary 118 if bool(name): 119 return self.parse_chain(f[:name.start(1)], chain, index + 1) 120 121 # field not found 122 return None 123 124 125 CodeDict()