linux-rootkit

Feature-rich interactive rootkit that targets Linux kernel 4.19, accompanied by a dynamic kernel memory analysis GDB plugin for in vivo introspection (e.g. using QEMU)
git clone git://git.deurzen.net/linux-rootkit
Log | Files | Refs

commit cc48337599c5d9eef110b8be3fec2f7af3bbb6fe
parent 835f88964ee3cb6e03f0309e0b640af0ccbbf9b8
Author: deurzen <m.deurzen@tum.de>
Date:   Sun,  7 Feb 2021 23:26:15 +0100

Merge branch 'master' of github.com:deurzen/rootkit-programming-dev

Diffstat:
Aproject/README.md | 37+++++++++++++++++++++++++++++++++++++
Mproject/extract_sizeret.py | 39++++++++++++++++++++++++++++++---------
Mproject/occ.sh | 4+++-
Mproject/type_dict.py | 16++++++++++------
4 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/project/README.md b/project/README.md @@ -0,0 +1,37 @@ +# LiveDM - Proof of Concept + +This a small user's guide to supplement the slides. + + + +## Usage + +### Up-front Setup + +1. Clone the kernel sources. The version has to match that of the running kernel (including the sublevel)! +2. Run `occ.sh`. It takes the path the the kernel sources as its argument and generates all occurrences of our functions of interest. +3. Fire up the VM, attach GDB, and source `type_dict.py`. This will, based on the results of `occ.sh`, generate the dictionary. (Note: this will take ~5 min) +4. The setup is now ready + +### Memory Tracing + +Simply source `extract_sizeret.py` from within GDB. All memory allocations / frees for selected defined functions will now be tracked. + +Commands available: + +```c +rk-print-mem Prints the currently allocated memory +rk-debug Toggle between different output levels* +rk-data <addr> Output the data inside a buffer/struct; argument is address of rk-print-mem output +``` + + + +Output levels:* + +``` +WARN # warn when critical fields (in this case task_struct->cred.uid) change to suspicious values +INFO # also print tracepoint additions +TRACE # also print every memory allocation +``` + diff --git a/project/extract_sizeret.py b/project/extract_sizeret.py @@ -20,6 +20,8 @@ break_arg = { "vmalloc_32_user": "rdi", } +# when the size is hidden in a struct, things get more complicated +# allocator |-> (register with struct pointer, struct type, struct member that holds size) break_arg_access = { "kmem_cache_alloc_node": ("rdi", "struct kmem_cache *", "object_size"), } @@ -37,7 +39,10 @@ watch_write_access_chain = { ] } +# this is limited by the amount of debug registers.. avail_hw_breakpoints = 4 + +# store watchpoints so we can delete them later on (i.e., once the corresponding struct is freed) watchpoints = {} n_watchpoints = 0 @@ -52,20 +57,22 @@ entries = set() exits = set() types = {} -# Maps address to tuples of (type, size, caller) +# Address |-> (type, size, caller) mem_map = {} size_at_entry = None class DebugLevel(IntEnum): __order__ = 'WARN INFO TRACE' - WARN = 0 - INFO = 1 - TRACE = 2 + WARN = 0 # warn when critical fields (in this case task_struct->cred.uid) change to suspicious values + INFO = 1 # show tracepoint additions + TRACE = 2 # show every memory allocation debug_level = DebugLevel.INFO class RkPrintMem(gdb.Command): + """Print currently allocated memory""" + def __init__(self): super(RkPrintMem, self).__init__("rk-print-mem", gdb.COMMAND_DATA) @@ -81,6 +88,8 @@ class RkPrintMem(gdb.Command): RkPrintMem() class RkDebug(gdb.Command): + """Toggle between different modes of memory logging""" + def __init__(self): super(RkDebug, self).__init__("rk-debug", gdb.COMMAND_USER) @@ -115,7 +124,7 @@ class RkPrintData(gdb.Command): RkPrintData() - +# this breakpoint can react to function entry and exit class EntryExitBreakpoint(gdb.Breakpoint): def __init__(self, b): gdb.Breakpoint.__init__(self, b) @@ -150,7 +159,7 @@ class EntryExitBreakpoint(gdb.Breakpoint): (size, address) = extret mem_map[address] = (type, size, caller) - + if type[7:] in watch_write_access_chain: access_chains = watch_write_access_chain[type[7:]] for access_chain, critical_value in access_chains: @@ -214,10 +223,24 @@ class EntryExitBreakpoint(gdb.Breakpoint): if symtab is None: break + # https://stackoverflow.com/a/15550907/11069175 + # https://stackoverflow.com/questions/41565105/gdb-breakpoint-gets-hit-in-the-wrong-line-number + # in rare cases, our lines don't match up due to optimizations + # therefore, we go one step in each direction (up to 10 times) until we find our type key = f"{symtab.filename}:{sym.line}" if key in types: return (types[key], key) + else: + for i in range(10): + key_pos = f"{symtab.filename}:{sym.line + i}" + key_neg = f"{symtab.filename}:{sym.line - i}" + + if key_neg in types: + return (types[key_neg], key_neg) + + if key_pos in types: + return (types[key_pos], key_pos) f_iter = f_iter.older() @@ -348,14 +371,12 @@ class Stage3(): # system can hang when pagination is on gdb.execute("set pagination off") - # for rk-data + # for printing structs with rk-data gdb.execute("set print pretty on") with open(self.dictfile, 'r') as dct: types = json.load(dct) - types["./kernel/fork.c:812"] = "type = struct task_struct *" - for b in (break_arg.keys() | break_arg_access.keys()): # set breakpoint at function entry, to extract size b_entry = EntryExitBreakpoint(b) diff --git a/project/occ.sh b/project/occ.sh @@ -1,5 +1,7 @@ #!/bin/bash -#Extract all occurences of function calls and the assigned variables from kernel sources +#extract all occurences of function calls and the assigned variables from kernel sources + +#these are (more or less) wrappers for the functions we use in extract_sizeret.py funcs=("kmalloc" "kzalloc" "vmalloc" "vzalloc" "alloc_task_struct_node") out=".funcs" diff --git a/project/type_dict.py b/project/type_dict.py @@ -1,9 +1,5 @@ ################################################################### # Format of input file: -# First line: -# directory prefix to prune -# -# Rest of lines: # <filename> <func or global> <line> <var or call to free> ################################################################### @@ -35,7 +31,9 @@ class CodeDict(): def __init__(self): self.setup() + print("Creating dictionary, this can take 5 minutes or more..") self.parse() + print("..done!") self.outf.write(json.dumps(self.dict)) @@ -62,6 +60,14 @@ class CodeDict(): lnr = l[2] var = l[3] + # ugly, but necessary since gdb does not like $ whatis 'dup_task_struct'::tsk + # and task_struct is too important to give up + if fn == "dup_task_struct": + key = f"{src}:{lnr}" + val = "type = struct task_struct *" + self.dict[key] = val + continue + var = re.split('\-\>|\.', var) var[0] = re.sub('[.*?]', '', var[0]) @@ -77,9 +83,7 @@ class CodeDict(): continue if len(var) > 1: - print("looking in", type_info[7:].strip(), "for", var[1:]) type_info = self.parse_chain(type_info[7:], var, 1) - print("FOUND:", type_info) if type_info is not None: key = f"{src}:{lnr}"