Merge branch 'master' of github.com:deurzen/rootkit-programming-dev - linux-rootkit - Feature-rich interactive rootkit that targets Linux kernel 4.19, accompanied by a dynamic kernel memory analysis GDB plugin for in vivo introspection (e.g. using QEMU)

commit cc48337599c5d9eef110b8be3fec2f7af3bbb6fe
parent 835f88964ee3cb6e03f0309e0b640af0ccbbf9b8
Author: deurzen <m.deurzen@tum.de>
Date:   Sun,  7 Feb 2021 23:26:15 +0100

Merge branch 'master' of github.com:deurzen/rootkit-programming-dev

Diffstat:
A project/README.md  | 37 +++++++++++++++++++++++++++++++++++++
M project/extract_sizeret.py  | 39 ++++++++++++++++++++++++++++++---------
M project/occ.sh  | 4 +++-
M project/type_dict.py  | 16 ++++++++++------

4 files changed, 80 insertions(+), 16 deletions(-)
diff --git a/project/README.md b/project/README.md
@@ -0,0 +1,37 @@
+# LiveDM - Proof of Concept
+
+This a small user's guide to supplement the slides.
+
+
+
+## Usage
+
+### Up-front Setup
+
+1. Clone the kernel sources. The version has to match that of the running kernel (including the sublevel)!
+2. Run `occ.sh`. It takes the path the the kernel sources as its argument and generates all occurrences of our functions of interest.
+3. Fire up the VM, attach GDB, and source `type_dict.py`. This will, based on the results of `occ.sh`, generate the dictionary. (Note: this will take ~5 min)
+4. The setup is now ready
+
+### Memory Tracing
+
+Simply source `extract_sizeret.py` from within GDB. All memory allocations / frees for selected defined functions will now be tracked.
+
+Commands available:
+
+```c
+rk-print-mem		Prints the currently allocated memory
+rk-debug			Toggle between different output levels*
+rk-data <addr>		Output the data inside a buffer/struct; argument is address of rk-print-mem output
+```
+
+
+
+Output levels:*
+
+```
+WARN 	# warn when critical fields (in this case task_struct->cred.uid) change to suspicious values
+INFO 	# also print tracepoint additions
+TRACE 	# also print every memory allocation
+```
+
diff --git a/project/extract_sizeret.py b/project/extract_sizeret.py
@@ -20,6 +20,8 @@ break_arg = {
     "vmalloc_32_user": "rdi",
 }
 
+# when the size is hidden in a struct, things get more complicated
+# allocator |-> (register with struct pointer, struct type, struct member that holds size)
 break_arg_access = {
     "kmem_cache_alloc_node": ("rdi", "struct kmem_cache *", "object_size"),
 }
@@ -37,7 +39,10 @@ watch_write_access_chain = {
     ]
 }
 
+# this is limited by the amount of debug registers..
 avail_hw_breakpoints = 4
+
+# store watchpoints so we can delete them later on (i.e., once the corresponding struct is freed)
 watchpoints = {}
 n_watchpoints = 0
 
@@ -52,20 +57,22 @@ entries = set()
 exits = set()
 types = {}
 
-# Maps address to tuples of (type, size, caller)
+# Address |-> (type, size, caller)
 mem_map = {}
 
 size_at_entry = None
 
 class DebugLevel(IntEnum):
     __order__ = 'WARN INFO TRACE'
-    WARN = 0
-    INFO = 1
-    TRACE = 2
+    WARN = 0 # warn when critical fields (in this case task_struct->cred.uid) change to suspicious values
+    INFO = 1 # show tracepoint additions
+    TRACE = 2 # show every memory allocation
 
 debug_level = DebugLevel.INFO
 
 class RkPrintMem(gdb.Command):
+    """Print currently allocated memory"""
+
     def __init__(self):
         super(RkPrintMem, self).__init__("rk-print-mem", gdb.COMMAND_DATA)
 
@@ -81,6 +88,8 @@ class RkPrintMem(gdb.Command):
 RkPrintMem()
 
 class RkDebug(gdb.Command):
+    """Toggle between different modes of memory logging"""
+
     def __init__(self):
         super(RkDebug, self).__init__("rk-debug", gdb.COMMAND_USER)
 
@@ -115,7 +124,7 @@ class RkPrintData(gdb.Command):
 
 RkPrintData()
 
-
+# this breakpoint can react to function entry and exit
 class EntryExitBreakpoint(gdb.Breakpoint):
     def __init__(self, b):
         gdb.Breakpoint.__init__(self, b)
@@ -150,7 +159,7 @@ class EntryExitBreakpoint(gdb.Breakpoint):
         (size, address) = extret
 
         mem_map[address] = (type, size, caller)
-
+        
         if type[7:] in watch_write_access_chain:
             access_chains = watch_write_access_chain[type[7:]]
             for access_chain, critical_value in access_chains:
@@ -214,10 +223,24 @@ class EntryExitBreakpoint(gdb.Breakpoint):
             if symtab is None:
                 break
 
+            # https://stackoverflow.com/a/15550907/11069175
+            # https://stackoverflow.com/questions/41565105/gdb-breakpoint-gets-hit-in-the-wrong-line-number
+            # in rare cases, our lines don't match up due to optimizations
+            # therefore, we go one step in each direction (up to 10 times) until we find our type
             key = f"{symtab.filename}:{sym.line}"
 
             if key in types:
                 return (types[key], key)
+            else:
+                for i in range(10):
+                    key_pos = f"{symtab.filename}:{sym.line + i}"
+                    key_neg = f"{symtab.filename}:{sym.line - i}"
+                    
+                    if key_neg in types:
+                        return (types[key_neg], key_neg)
+                    
+                    if key_pos in types:
+                        return (types[key_pos], key_pos) 
 
             f_iter = f_iter.older()
 
@@ -348,14 +371,12 @@ class Stage3():
         # system can hang when pagination is on
         gdb.execute("set pagination off")
 
-        # for rk-data
+        # for printing structs with rk-data
         gdb.execute("set print pretty on")
 
         with open(self.dictfile, 'r') as dct:
             types = json.load(dct)
 
-        types["./kernel/fork.c:812"] = "type = struct task_struct *"
-
         for b in (break_arg.keys() | break_arg_access.keys()):
             # set breakpoint at function entry, to extract size
             b_entry = EntryExitBreakpoint(b)
diff --git a/project/occ.sh b/project/occ.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
-#Extract all occurences of function calls and the assigned variables from kernel sources
+#extract all occurences of function calls and the assigned variables from kernel sources
+
+#these are (more or less) wrappers for the functions we use in extract_sizeret.py
 funcs=("kmalloc" "kzalloc" "vmalloc" "vzalloc" "alloc_task_struct_node")
 out=".funcs"
 
diff --git a/project/type_dict.py b/project/type_dict.py
@@ -1,9 +1,5 @@
 ###################################################################
 # Format of input file:
-# First line:
-#   directory prefix to prune
-#
-# Rest of lines:
 #    <filename> <func or global> <line> <var or call to free>
 ###################################################################
 
@@ -35,7 +31,9 @@ class CodeDict():
 
     def __init__(self):
         self.setup()
+        print("Creating dictionary, this can take 5 minutes or more..")
         self.parse()
+        print("..done!")
 
         self.outf.write(json.dumps(self.dict))
 
@@ -62,6 +60,14 @@ class CodeDict():
             lnr = l[2]
             var = l[3]
 
+            # ugly, but necessary since gdb does not like $ whatis 'dup_task_struct'::tsk
+            # and task_struct is too important to give up
+            if fn == "dup_task_struct":
+                key = f"{src}:{lnr}"
+                val = "type = struct task_struct *"
+                self.dict[key] = val
+                continue
+
             var = re.split('\-\>|\.', var)
             var[0] = re.sub('[.*?]', '', var[0])
 
@@ -77,9 +83,7 @@ class CodeDict():
                     continue
 
             if len(var) > 1:
-                print("looking in", type_info[7:].strip(), "for", var[1:])
                 type_info = self.parse_chain(type_info[7:], var, 1)
-                print("FOUND:", type_info)
 
             if type_info is not None:
                 key = f"{src}:{lnr}"

	linux-rootkit Feature-rich interactive rootkit that targets Linux kernel 4.19, accompanied by a dynamic kernel memory analysis GDB plugin for in vivo introspection (e.g. using QEMU)
	git clone git://git.deurzen.net/linux-rootkit
	Log \| Files \| Refs

A	project/README.md	\|	37	+++++++++++++++++++++++++++++++++++++
M	project/extract_sizeret.py	\|	39	++++++++++++++++++++++++++++++---------
M	project/occ.sh	\|	4	+++-
M	project/type_dict.py	\|	16	++++++++++------