updates presentation - linux-rootkit - Feature-rich interactive rootkit that targets Linux kernel 4.19, accompanied by a dynamic kernel memory analysis GDB plugin for in vivo introspection (e.g. using QEMU)

commit 835f88964ee3cb6e03f0309e0b640af0ccbbf9b8
parent 49010fb2cf779b6d2d49875b2d120ba35f40a0c7
Author: deurzen <m.deurzen@tum.de>
Date:   Sun,  7 Feb 2021 23:26:08 +0100

updates presentation

Diffstat:
M project/slides/beamerthemetum.sty  | 4 ----
M project/slides/i20lecture.cls  | 6 +++---
M project/slides/presentation.tex  | 189 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------

3 files changed, 115 insertions(+), 84 deletions(-)
diff --git a/project/slides/beamerthemetum.sty b/project/slides/beamerthemetum.sty
@@ -32,11 +32,7 @@
 \newlength{\headmargin} %% Left and right margin for title text line
 \setlength{\headmargin}{.1cm}
 
-\ifyourpkgprefix@enoption
 \usepackage{babel}
-\else
-\usepackage[ngerman]{babel}
-\fi
 
 \ifyourpkgprefix@wideoption
 \geometry{paperwidth=170.66mm,paperheight=96mm}  % 16:9
diff --git a/project/slides/i20lecture.cls b/project/slides/i20lecture.cls
@@ -142,8 +142,8 @@
 % META
 % =========================================================
 \title[None]{Rootkit Programming - Final Presentation}
-\author[Max v. D., Tizian L.]{Max van Deurzen, Tizian Leonhardt}
-\semester{Wintersemester 2020/21}
+\author[Tizian L., Max v. D.]{Tizian Leonhardt, Max van Deurzen}
+\semester{Wintersemester 20/21}
 \institute{}
-\date{10. February, 2021}
+\date{February 8, 2021}
 % =========================================================
diff --git a/project/slides/presentation.tex b/project/slides/presentation.tex
@@ -1,7 +1,7 @@
 \documentclass{i20lecture}
 \usepackage{listings}
 
-\subtitle{LiveDM - Proof of Concept}
+\subtitle{LiveDM --- Proof of Concept}
 
 \begin{document}
 
@@ -28,16 +28,16 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}{\insertsection}
   \framesubtitle{Dynamic Kernel Memory}
-  
+
   \begin{itemize}
-    \item Dynamic kernel memory is..
+    \item Dynamic kernel memory is...
     \begin{itemize}
 \pause
-    \item ..hard to make sense of -- usually, no type information is available
+    \item ...hard to make sense of -- usually, no type information is available
 \pause
-    \item ..changing \textit{very} frequently
+	\item ...constantly changing -- it's dynamic, after all
 \pause
-    \item ..difficult to analyze!
+    \item ...difficult to analyze!
     \end{itemize}
 \pause
     \item How can we make analysis easier?
@@ -45,76 +45,86 @@
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Overview}
-  
+  \framesubtitle{LiveDM --- Overview}
+
   \begin{itemize}
-    \item LiveDM seeks to overcome the previous issues through Virtual Machine Introspection
+	\item LiveDM seeks to overcome these issues through Virtual Machine Introspection (VMI)
+	\pause
+	\begin{itemize}
+		\item Monitor the runtime state of a system-level VM
+		\pause
+		\item Without altering the guest OS
+	\end{itemize}
 \pause
     \item Memory allocation events can be intercepted from a
 \pause
     \item Going from there, LiveDM is able to create a memory map
-\pause
-    \item This map even includes type information!
+	\begin{itemize}
+		\item Monitor the runtime state of a system-level VM
+		\pause
+		\item This map includes type information!
+	\end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Overview}
-  
+  \framesubtitle{LiveDM --- Overview}
+
   \begin{itemize}
-    \item Three phases \footnote{The word 'phase' is not the best pick here} exist to create the mapping:
+    \item Three distinct stages to create the mapping:
     \begin{enumerate}
 \pause
      \item Gathering of necessary values
 \pause
      \item Determining scope of interpretation
 \pause
-     \item Data type interpretation
+     \item Performing type interpretation
     \end{enumerate}
   \end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Phase 1}
-  
+  \framesubtitle{LiveDM --- Stage 1}
+
   \begin{itemize}
-    \item Phase 1 is comprised of..
+    \item Stage 1 is comprised of...
     \begin{itemize}
 \pause
-     \item ..intercepting a set of memory allocation/freeing functions
+     \item ...intercepting a set of memory allocation/deallocation functions
 \pause
-     \item ..retrieving the requested size, as well as the return value
+     \item ...retrieving the requested allocation size, as well as the return value
 \pause
-     \item ..identifying the caller through the stack's return address
+	 \item ...identifying the caller (call site) through the stack's return address
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Phase 2}
-  
+  \framesubtitle{LiveDM --- Stage 2}
+
   \begin{itemize}
-    \item In phase 2, the scope of memory monitoring is chosen
+    \item In stage 2, the scope of memory monitoring is chosen
     \begin{itemize}
 \pause
      \item Offer snapshots of the memory map (containing type and size for allocated memory)
      \begin{itemize}
-      \item We offer this in our PoC
+		 \item We offer this in our PoC (\lstinline|rk-print-mem| and \lstinline|rk-data <address>|)
      \end{itemize}
 \pause
-     \item Trace every memory access on known memory blocks
+	 \item Trace every memory (write) access on known (vulnerable) memory blocks
      \begin{itemize}
-      \item We are able to showcase that on a small example
+      \item Warn when critical values are written to traced blocks
+      \item We are able to showcase this in a small demo
      \end{itemize}
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Phase 3}
-  
+  \framesubtitle{LiveDM --- Stage 3}
+
   \begin{itemize}
-    \item In phase 3, the caller's address is translated into a type
+    \item In stage 3, the caller's address is translated into a type
 \pause
     \begin{itemize}
      \item Relies on instrumenting GCC to retrieve abstract syntax tree (AST)
@@ -123,10 +133,10 @@
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{LiveDM - Motivation}
-  
+  \framesubtitle{LiveDM --- Motivation}
+
   \begin{itemize}
-    \item Why do we need this information? Possible answers include..
+    \item Why do we need this information? Possible answers include...
     \begin{enumerate}
 \pause
      \item To make dynamic memory less transparent
@@ -142,25 +152,40 @@
 \section{Approach}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}{\insertsection}
-  \framesubtitle{VMM}
-  
+  \begin{enumerate}
+   \item Background
+    \item Approach
+    \begin{itemize}
+        \item Tools
+        \item Implementing stage 1
+        \item Implementing stage 2
+        \item Implementing stage 3
+    \end{itemize}
+    \item Results
+    \item Discussion / Questions
+  \end{enumerate}
+\end{frame}
+
+\begin{frame}{\insertsection}
+  \framesubtitle{Tools}
+
   \begin{itemize}
    \item Since introspection techniques are required, we need a VMM
 \pause
     \begin{itemize}
     \item Xen
     \item KVM
-    \item QEMU (our pick; introspection done with GDB)
-    \item ..
+	\item \textcolor{yellow}{QEMU} (in vivo introspection using GDB)
+    \item ...
     \end{itemize}
   \end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{Implementing Phase 1}
-    
+  \framesubtitle{Implementing stage 1}
+
     \begin{itemize}
-     \item Intercepting allocation functions is easy: breakpoints
+	\item Intercepting allocations is easy: non-blocking \textbf{breakpoints}
      \begin{itemize}
 \pause
       \item Has a significant performance overhead, but system is still usable
@@ -168,19 +193,19 @@
       \item Possible improvement: hardware breakpoints
 \pause
         \begin{itemize}
-            \item Limited to a small number
+			\item Limited to a small amount
 \pause
-            \item Only part of GDB's Python API since 21st January 2021..
+            \item Only part of GDB's Python API since January 21st, 2021...
         \end{itemize}
      \end{itemize}
     \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 1}
-    
+  \framesubtitle{Implementing stage 1}
+
     \begin{itemize}
-     \item To retrieve the size parameter, we can rely on the System V calling convention
+     \item To retrieve the size of the allocation, we can rely on the System V calling convention
 \pause
      \begin{itemize}
       \item As the size is not always the first argument, we build a dictionary:
@@ -194,59 +219,63 @@
     }
     \end{lstlisting}
 \pause
-    \begin{itemize}
-     \item Return values are gathered by additionally breaking on return instructions
-     \begin{itemize}
-\pause
-     \item Only one will be generated per function
-     \end{itemize}
-    \end{itemize}
+	\begin{itemize}
+		\item Return values are gathered by additionally breaking on return instructions
+		\pause
+		\begin{itemize}
+			\item Look for \lstinline|ret{,q}| instruction's offset from function entry in the disassembly
+			\pause
+			\item Break on \lstinline|<function entry> + <ret offset>|
+			\pause
+			\item Retrieve return value from \lstinline|$rax|
+		\end{itemize}
+	\end{itemize}
 \end{frame}
 
 \begin{frame}{\insertsection}
-  \framesubtitle{Implementing Phase 1}
-    
+  \framesubtitle{Implementing stage 1}
+
     \begin{itemize}
      \item LiveDM relies on the return address on the stack
 \pause
-     \item Instead, we walk the unwinded stack and retrieve the \texttt{file:line} information
+     \item Instead, we walk frame by frame up the stack and retrieve the file name and line number at the assignment location
 \pause
      \begin{itemize}
-      \item More on that later..
+      \item More on that later...
      \end{itemize}
     \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 2}
-    
+  \framesubtitle{Implementing stage 2}
+
     \begin{enumerate}
      \item Snapshot-based approach
 \pause
      \begin{itemize}
       \item Since we already store everything gathered, this is readily available
 \pause
-      \item Currently allocated memory can be listed with \texttt{rk-print-mem}:
+      \item Currently allocated memory can be listed with \lstinline|rk-print-mem|:
      \end{itemize}
     \end{enumerate}
     \begin{lstlisting}
-    > rk-print-mem 
+    > rk-print-mem
       type: struct task_struct *, size: 3776 B, addr: 0xffff9e65bb961d80, caller: ./kernel/fork.c:812
       type: struct fdtable *, size: 56 B, addr: 0xffff9e65bc7d7280, caller: ./fs/file.c:111
     \end{lstlisting}
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 2}
-    
+  \framesubtitle{Implementing stage 2}
+
     \begin{enumerate}
      \setcounter{enumi}{1}
      \item Memory-access tracing
 \pause
     \begin{itemize}
-     \item Would require some advanced techniques (ex.: page unmapping) for full coverage
+     \item Would require some advanced techniques (e.g., page unmapping) for full coverage
 \pause
-    \item Not feasible for the given time frame
+    \item Not feasible within the given time frame
 \pause
     \item Instead, we will demonstrate a small example later based on \textit{hardware} watchpoints
     \end{itemize}
@@ -254,37 +283,43 @@
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 3}
+  \framesubtitle{Implementing stage 3}
     \begin{itemize}
-     \item Translation of call sites to types; possible approaches:
+     \item Translation of call sites to types
+	 \pause
+     \item Possible approaches:
      \begin{itemize}
 \pause
       \item Instrumenting \texttt{gcc} to extract AST (LiveDM)
 \pause
-      \item Using \texttt{clang} to generate an AST without instrumentation
+      \item Use \texttt{clang} to generate an AST without instrumentation
 \pause
-      \item Abusing GDB's \texttt{whatis} command to statically pre-compute type dictionary (Our pick)
+	  \item \textcolor{yellow}{Utilize GDB's \texttt{whatis} command to statically pre-compute type dictionary}
      \end{itemize}
     \end{itemize}
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 3}
+  \framesubtitle{Implementing stage 3}
     \begin{itemize}
-     \item Process for generating the type dictionary: \footnote{Fully automated, since very specific to kernel  code version}
+     \item Process for generating the type dictionary: \footnote{Fully automated, since specific to kernel sources version, build options, and compiler optimizations}
 \pause
      \begin{enumerate}
       \item Find all occurences of function calls we are interested in using \texttt{cscope}
 \pause
-      \item Iterate the generated occurences in python; execute \texttt{whatis} on every variable
+      \item Iterate the generated occurences using Python; execute \texttt{whatis} on every assigned-to symbol
       \begin{itemize}
 \pause
-       \item Assumption: kernel symbols are loaded
+       \item Assumption: debug symbols for current kernel sources are available
 \pause
-       \item Compound types (example: \lstinline|desc->inbuf|) have to be resolved incrementally by us
+       \item Compound type access chains (e.g., \lstinline|desc->inbuf|) have to be recursively resolved
+	\pause
+	   \item We only require the type of the last dereferenced field, as that is what's being assigned to
       \end{itemize}
 \pause
-      \item Place the results in a dictionary; can be loaded by the 'main' python script
+      \item Store the results in a dictionary
+\pause
+      \item Use this precompiled type dictionary in our runtime script
 \pause
      \end{enumerate}
     \end{itemize}
@@ -295,9 +330,9 @@
 \end{frame}
 
 \begin{frame}[fragile]{\insertsection}
-  \framesubtitle{Implementing Phase 3}
+  \framesubtitle{Implementing stage 3}
     \begin{itemize}
-     \item Once a breakpoint is encountered, we can walk the stack with gdb..
+     \item Once a breakpoint is encountered, we can walk the stack with gdb...
     \end{itemize}
 \pause
     \begin{lstlisting}
@@ -307,7 +342,7 @@
     \end{lstlisting}
 \pause
     \begin{itemize}
-     \item ..and match the \texttt{file:line} descriptor to a type without expensive computations
+     \item ...and match the \texttt{file:line} descriptor to a type without expensive computations
     \end{itemize}
 \end{frame}

	linux-rootkit Feature-rich interactive rootkit that targets Linux kernel 4.19, accompanied by a dynamic kernel memory analysis GDB plugin for in vivo introspection (e.g. using QEMU)
	git clone git://git.deurzen.net/linux-rootkit
	Log \| Files \| Refs

M	project/slides/beamerthemetum.sty	\|	4	----
M	project/slides/i20lecture.cls	\|	6	+++---
M	project/slides/presentation.tex	\|	189	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------