context/rnd: paging/stack/heap/virtualization

This commit is contained in:
steveej 2017-09-21 21:53:48 +02:00
parent 12b71b3744
commit 83c5540a42
8 changed files with 972 additions and 382 deletions

View file

@ -39,10 +39,27 @@ static void caller(void) {
printer(&a, &b);
}
static void shell() {
char *argv[] = {};
execve("/bin/sh", argv, NULL);
}
static void simple_printer(void) { fprintf(stderr, "I wonder who called me?"); }
static void modifier(void) {
uint64_t *p;
// without frame-pointer
*(&p + 1) = (uint64_t *)simple_printer;
// with frame-pointer
*(&p + 2) = (uint64_t *)simple_printer;
}
int main(void) {
caller();
many_args(0xfffffffffffffff0, 0xfffffffffffffff1, 0xfffffffffffffff3,
0xfffffffffffffff4, 0xfffffffffffffff5, 0xfffffffffffffff6,
0xfffffffffffffff7);
// caller();
// many_args(0xfffffffffffffff0, 0xfffffffffffffff1, 0xfffffffffffffff3,
// 0xfffffffffffffff4, 0xfffffffffffffff5, 0xfffffffffffffff6,
// 0xfffffffffffffff7);
modifier();
return 0;
}

View file

@ -1,9 +1,18 @@
% // vim: set ft=tex:
\newglossaryentry{bbox} {
name = {busybox},
long = {BusyBox: The Swiss Army Knife of Embedded \gls{LX}},
description = {%
BusyBox combines tiny versions of many common UNIX utilities into a single small executable%
},
first = {\glsentrylong{bbox}}
}
\newglossaryentry{Rust} {
name = {Rust},
long = {the Rust programming language},
description = {
description = {%
Statically typed programming language that uses a new concept of variable ownership and reference tracking. Largely explain in \cref{context::rust}.
},
first = {\glsentrylong{Rust}}
@ -11,7 +20,7 @@
\newglossaryentry{proglang} {
name = {programming language},
description = {
description = {%
A well-defined language used to write software. Hundreds of language exists, each with focus on different aspects like comfort for humans, size, speed, safety, etc.
},
}
@ -20,7 +29,7 @@
\newglossaryentry{compiler}{
name = compiler,
long = {source- to machine-code compiler},
description = {
description = {%
A program that can transform software source code to executable machine code.
Typically targetted for a \glsentryname{proglang} or a family of \glspl{proglang}.
},
@ -30,7 +39,7 @@
\newglossaryentry{addrspace}{
name = address space,
long = bound address range in memory,
description = {
description = {%
A logical entity that represents a section of memory, specified with a start address and either by end address or length given in a standardize unit
},
first = {\glsentrylong{addrspace}}
@ -38,14 +47,21 @@
\newglossaryentry{stack}{
name = stack,
description = {
description = {%
TODO
},
}
\newglossaryentry{sf}{
name = stack-frame,
description = {%
Procedure data and meta-data (see \cref{lst:amd64-stack-frame-components})%
},
}
\newglossaryentry{heap}{
name = heap,
description = {
description = {%
TODO
},
}
@ -53,67 +69,67 @@
\newglossaryentry{api}{
name = API,
long = {Application Programming Interface},
description = {
description = {%
},
first = {\glsentrylong{api}}
}
\newglossaryentry{OS}{
\newglossaryentry{os}{
name = OS,
long = Operating System,
description = {
description = {%
The software that manages the system's hardware ressources.
Other \glspl{app} can access the ressources only through the interface provided by the \gls{OS}.
Other \glspl{app} can access the ressources only through the interface provided by the \gls{os}.
},
first = {\glsentrylong{OS}}
first = {\glsentrylong{os}}
}
\newglossaryentry{fs}{
name = filesystem,
description = {
description = {%
TODO
},
}
\newglossaryentry{virt}{
name = virtualization,
description = {
description = {%
TODO
},
}
\newglossaryentry{OSS}{
name = Open-Source Software,
description = {
description = {%
TODO
},
}
\newglossaryentry{osvirt}{
name = Operating System-Level Virtualization,
description = {
description = {%
TODO
},
}
\newglossaryentry{hypervisor}{
name = Hypervisor,
description = {
description = {%
TODO
},
}
\newglossaryentry{VM}{
name = Virtual Machine,
description = {
description = {%
TODO
},
}
\newglossaryentry{LX}{
name = Linux,
description = {
description = {%
is a generic term referring to the family of Unix-like
computer operating systems that use the Linux kernel
},
@ -122,108 +138,95 @@
\newglossaryentry{android}{
name = Android,
description = {a mobile \gls{OS} based on \gls{LX}},
description = {a mobile \gls{os} based on \gls{LX}},
first = {\glsentryname{android}, \glsentrydesc{android}},
}
\newglossaryentry{imezzos}{
name = intermezzOS,
description = {
description = {%
TODO
},
}
\newglossaryentry{redoxos}{
name = Redox OS,
description = {
description = {%
TODO
},
}
\newglossaryentry{blogos}{
name = Blog OS,
description = {
description = {%
TODO
},
}
\newglossaryentry{tockos}{
name = Tock OS,
description = {
description = {%
TODO
},
}
\newglossaryentry{rootfs}{
name = RootFS,
description = {
description = {%
% TODO
},
}
\newglossaryentry{lxns}{
name = Linux Namespace,
description = {
description = {%
entitiy that holds a specific set of process attributes and can be set per process
},
}
\newglossaryentry{lxcap}{
name = Linux Capability,
description = {
entitiy that holds a specific set of process attributes and can be set per process, mainly to establish a relationship between processes and \gls{OS} resources
description = {%
entitiy that holds a specific set of process attributes and can be set per process, mainly to establish a relationship between processes and \gls{os} resources
},
plural = Linux Capabilities,
}
\newglossaryentry{lxvfs}{
name = Linux VFS,
description = {
description = {%
Virtual Filesystem Switch, a filesystem abstraction layer in \gls{LX}.
},
}
\newglossaryentry{BSD}{
name = BSD,
description = {
description = {%
TODO
}
}
\newglossaryentry{computer}{
name = Computer,
description = {
description = {%
is a programmable machine that receives input,
stores and manipulates data, and provides
output in a useful format
}
}
\newglossaryentry{app}{
name=software-application,
description={
TODO
}
}
\newglossaryentry{program} {
name = {program},
description = {
A set of logically grouped instructions.
},
}
\newglossaryentry{pm}{
name=package manager,
description={
name = package manager,
description = {%
TODO
}
}
\newglossaryentry{sac}{
name=Software Application Container,
description={
name = Software Application Container,
description = {%
The broad term for the technology used to build, package, distribute and run an application program in isolation from the underlying and co-existing systems, wherein the level or technique of isolation can be different depending on the \gls{sacr}.
The term is nuanced from \gls{appc} defined by the \gls{appcorg}.
The \gls{appcorg} is a community driven effort to create an open, standardized specification for developers and users of \gls{sac} technology.
@ -232,57 +235,57 @@
}
}
\newglossaryentry{saci}{
name=Software Application Container Image,
description={
name = Software Application Container Image,
description = {%
An archive file that contains all of the necessary binaries that are needed to execute an application and a manifest file that that contains metadata about the application. Alternatively to containing all the required binary files, the manifest file can declare dependencies to other application container images, which must then be available at runtime to execute the contained application.
}
}
\newglossaryentry{sacr}{
name=Software Application Container Runtime,
description={
name = Software Application Container Runtime,
description = {%
An application program (suite) that understands how to run the software inside an \gls{saci}.
}
}
\newglossaryentry{LXC}{
name=LXC,
description={
name = LXC,
description = {%
TODO
}
}
\newglossaryentry{Docker}{
name=Docker,
description={
name = Docker,
description = {%
A very popular \gls{sac} platform and application suite, providing functionality to build and deploy Docker specific \glspl{saci}.
}
}
\newglossaryentry{systemd-nspawn}{
name=systemd-nspawn,
description={
name = systemd-nspawn,
description = {%
TODO
}
}
\newglossaryentry{rkt}{
name=rkt,
description={
name = rkt,
description = {%
TODO
}
}
\newglossaryentry{appcorg}{
name=App Container Organisation,
description={
name = App Container Organisation,
description = {%
Organisation for the App Container specification, including the schema and associated tooling.
}
}
\newglossaryentry{appc}{
name=App Container,
description={
name = App Container,
description = {%
Specific variant of an \glsentrytext{sac} defined by the \glsentrytext{appcorg}.
}
}
@ -315,22 +318,22 @@
}
\newglossaryentry{C}{
name=C,
, description={
name = C,
, description = {%
TODO C programming language,
}
}
\newglossaryentry{C++}{
name=C++,
, description={
name = C++,
, description = {%
A \glsentrytext {proglag} based on \glsentrytext{C}, enahnced by features like object-orientation, lambdas, and much more.
}
}
\newglossaryentry{asm}{
name=Assembly programming language,
description={
name = Assembly programming language,
description = {%
TODO ASM
}
}
@ -338,41 +341,110 @@
\newglossaryentry{amd64}{
name = AMD64,
long = AMD64,
description={
description = {%
TODO AMD64
},
first = {\glsentrylong{amd64}},
}
\newglossaryentry{CPU}{
\newglossaryentry{cpu}{
name = CPU,
long = Central Processing Unit,
description={
TODO CPU
description = {%
TODO cpu
},
first = {\glsentrylong{CPU}},
first = {\glsentrylong{cpu}},
}
\newglossaryentry{tlb}{
name = TLB,
long = Translation Lookaside Buffer,
description = {%
TODO tlb
},
first = {\glsentrylong{tlb}},
}
\newglossaryentry{MMU}{
name = MMU,
long = Memory Management Unit,
description={
description = {%
TODO MMU
},
first = {\glsentrylong{MMU}},
}
\newglossaryentry{sysadmin}{
name=System Administrator
, description={
name = System Administrator
, description = {%
TODO sysadmin
}
}
\newglossaryentry{realtime}{
name=System Administrator
, description={
name = realtime
, description = {%
TODO realtime
}
}
\newglossaryentry{app}{
name = software-application,
description = {%
A bundle of one or multiples \gls{program} intended to solve a specific use-case.
}
}
\newglossaryentry{task}{
name = task
, description = {%
Generic term for any unit of work.
In the context of this thesis, it may be used for any of \glsentrytext{program}, \glsentrytext{process}, \glsentrytext{thread}, \glsentrytext{app}.
}
}
\newglossaryentry{program}{
name = program
, description = {%
A group of instructions that can be executed by the \glsentryname{cpu}.
}
}
\newglossaryentry{process}{
name = process
, description = {%
A Program in execution.
}
}
\newglossaryentry{thread}{
name = thread
, description = {%
A defined path of instructions within a process.
It can span from a part of a procedure up to the whole program of the process.
Threads can be identified in the program code by hypothetical execution paths.
The thread can only be executed by spawning a process in such a way that the specific thread in the program will be executed, e.g. by invoking specific arguments.
}
}
\newglossaryentry{procedure}{
name = procedure
, description = {%
An addressable subgroup of instructions in a program that contains specific functionality.
}
}
\newglossaryentry{function}{
name = function
, description = {%
See Procedure.
}
}
\newglossaryentry{job}{
name = job
, description = {%
A specific unit of work, specifying one or multiple programs to execute along with the arguments to be passed to them.
}
}

View file

@ -3,9 +3,9 @@
\label{context::introduction}
This thesis studies the feasibility of using compile-time code analysis, as found in \gls{Rust}'s \gls{compiler}, for ensuring memory-safety within an \gls{OS} kernel.
This study could be applied to all \glspl{app}, but the focus is on the implementation of \glspl{OS} which is the \gls{app} that is responsible for managing the system's resources and provide abstractions for all other \glspl{app}.
For this the \gls{OS} is the only \gls{app} that required unrestricted access to these resources, with the responsibility of managing them safely according to the rules that are either hard-coded or set up by the \gls{sysadmin}.
This thesis studies the feasibility of using compile-time code analysis, as found in \gls{Rust}'s \gls{compiler}, for ensuring memory-safety within an \gls{os} kernel.
This study could be applied to all \glspl{app}, but the focus is on the implementation of \glspl{os} which is the \gls{app} that is responsible for managing the system's resources and provide abstractions for all other \glspl{app}.
For this the \gls{os} is the only \gls{app} that required unrestricted access to these resources, with the responsibility of managing them safely according to the rules that are either hard-coded or set up by the \gls{sysadmin}.
The increasing number of vulnerabilities based on memory-safety issues in \glspl{app}, as presented in \cref{context::common-mem-safety-mistakes::cwe::statistics}, is a major motivator for working on this topic.
@ -15,16 +15,13 @@ The increasing number of vulnerabilities based on memory-safety issues in \glspl
% Hypotheses
% A hypothesis is a testable prediction for an observed phenomenon, namely, the gap in the knowledge. Each research question will have both a null and an alternative hypothesis in a quantitative study. Qualitative studies do not have hypotheses. The two hypotheses should follow the research question upon which they are based. Hypotheses are testable predictions to the gap in the knowledge. In a qualitative study the hypotheses are replaced with the primary research questions.
%TODO: mention paper's by tockos team
%TODO: mention electrolyte, formal verification for Rust
According to my best-effort literature research in Q1/2017, the hypothesis that \textit{Rust's static code analysis can guarantee memory safety in the \gls{OS}} has not been studied explicitly.
This is to my surprise, because as explained in \cref{context::introduction::memory-safety}, memory-safety in \gls{OS} development is critical, and \gls{Rust} offers attractive features that might bring improvements, which is covered in \cref{context::rust}.
According to my best-effort literature research in Q1/2017, the hypothesis that \textit{Rust's static code analysis can guarantee memory safety in the \gls{os}} has not been studied explicitly.
This is to my surprise, because as explained in \cref{context::introduction::memory-safety}, memory-safety in \gls{os} development is critical, and \gls{Rust} offers attractive features that might bring improvements, which is covered in \cref{context::rust}.
The hypothesis cannot be trivially approved or denied, which drives the research efforts for my final thesis project.
Besides this specific hypothesis, many implementations of \glspl{OS} with \gls{Rust} have appeared in public.
Besides this specific hypothesis, many implementations of \glspl{os} with \gls{Rust} have appeared in public.
Their purposes range from proof-of-concept and educational work like \gls{imezzos} and \gls{blogos}, to implementations that aim to be production grade software like \gls{redoxos} and \gls{tockos} \cite{Levy2015a}.
These implementations are subject to evaluation in \cref{rnd::existing-os-in-rust}
These implementations are subject to evaluation in \cref{rnd::existing-os-dev-with-rust}
The final results will be of qualitative nature, captured by analyzing the existing and a self-developed \gls{Rust}-implementations of popular memory management techniques.
In addition to the sole analysis of \gls{Rust}-implementations, comparisons will be made, discerning the level of memory safety guarantees gained over similarly intending implementations in \gls{C}.
@ -34,23 +31,23 @@ In addition to the sole analysis of \gls{Rust}-implementations, comparisons will
Memory-safety is a term that is only vaguely defined in general, thus a definition is given for the context of this thesis.
For a thorough understanding of the issues discussed further in this document, it might be helpful to review the basics of how memory is used in current computer systems.
For decades computer systems, more specifically their \glspl{CPU}, were designed to execute instructions that were previously loaded into volatile main memory, typically from a secondary, persistent memory.
For decades computer systems, more specifically their \glspl{cpu}, were designed to execute instructions that were previously loaded into volatile main memory, typically from a secondary, persistent memory.
These instructions are themselves able to alter the very main memory they are stored at, which allows for great flexibility but also involves the risk of corrupting a consistent chain of instructions or other memory content like data.
As any other \gls{app}, the \gls{OS} is loaded and executed in form of one or multiple sets of logically grouped instructions, called \glspl{program}.
Loading the \gls{OS}'s program into memory is not the responsibility of the \gls{OS}, it belongs to the components earlier in the boot process, namely the boot loader and system firmware.
The \gls{OS} takes over the responsibility to protect the main and secondary memory as soon as the bootloader has loaded the \gls{OS} and has jumped to its first instruction.
From this point, loading further programs into main memory is done by the \gls{OS}, either according to scheduled jobs set up by the \gls{sysadmin}, or based on well-defined events which can be triggered by any form of input via the system's interfaces.
For example, the \gls{OS} can load and execute a program stored on the hard-disk, after the user has given the appropriate instructions via a terminal.
As any other \gls{app}, the \gls{os} is loaded and executed in form of one or multiple sets of logically grouped instructions, called \glspl{program}.
Loading the \gls{os}'s program into memory is not the responsibility of the \gls{os}, it belongs to the components earlier in the boot process, namely the boot loader and system firmware.
The \gls{os} takes over the responsibility to protect the main and secondary memory as soon as the bootloader has loaded the \gls{os} and has jumped to its first instruction.
From this point, loading further programs into main memory is done by the \gls{os}, either according to scheduled jobs set up by the \gls{sysadmin}, or based on well-defined events which can be triggered by any form of input via the system's interfaces.
For example, the \gls{os} can load and execute a program stored on the hard-disk, after the user has given the appropriate instructions via a terminal.
The execution of other programs is potentially dangerous, because they might attempt to access the memory content of other programs and their data.
It is the responsibility of the \gls{OS} to prevent such executed programs from being able to mutually interfere with memory content that is not theirs, keeping the memory in a safe state at all times \footnote{This does not include memory-safety \textit{within} each of these executed programs, as the \gls{OS} has no pertinent knowledge of the program's intentions.}.
This requires an extensive amount of care and foresight from the developers of the \gls{OS}, to ensure memory consistency in any of the various events and combinations thereof that might possibly occur at runtime.
It is the responsibility of the \gls{os} to prevent such executed programs from being able to mutually interfere with memory content that is not theirs, keeping the memory in a safe state at all times \footnote{This does not include memory-safety \textit{within} each of these executed programs, as the \gls{os} has no pertinent knowledge of the program's intentions.}.
This requires an extensive amount of care and foresight from the developers of the \gls{os}, to ensure memory consistency in any of the various events and combinations thereof that might possibly occur at runtime.
\subsection{A Definition Of Memory-Safety For \glsentryplural{OS}}
\subsection{A Definition Of Memory-Safety For \glsentryplural{os}}
\label{context::introduction::memory-safety::def}
If the \gls{OS} is memory-safe, any program, whether it is part of the \gls{OS} or any installed \gls{app}, is only able to access its allocated memory regions.
Additionally, if the \gls{OS} supports shared memory regions, each shared memory region may only be accessible by programs that have been granted access to it.
If the \gls{os} is memory-safe, any program, whether it is part of the \gls{os} or any installed \gls{app}, is only able to access its allocated memory regions.
Additionally, if the \gls{os} supports shared memory regions, each shared memory region may only be accessible by programs that have been granted access to it.
\section{Memory-Safety Violation in Software}
\label{context::introduction::memory-safety-violation-in-sw}
@ -71,7 +68,7 @@ This aspect is relevant to assessing the origins of memory-safety related errors
The following assumptions are made based on common sense
\begin{itemize}
\item{No human is born as a flawless software engineer.}
\item{Beginners will start writing programs before they master this skill in perfection.}
\item{Beginners will start writing production software before they master programming in perfection.}
\item{With each generation of humans there will always be new beginners that will start learning from scratch.}
\item{Capabilities and motivation vary significantly between individuals.}
\item{Less capable or motivated individuals will eventually write software for production use.}
@ -81,7 +78,7 @@ Combining these assumptions, it cannot generally be assumed that every beginner
From my personal experience with software developers and students of software engineering, I have received the impression that many do not prioritize safety in their software.
The most severe example for this in my personal career is a former team partner in one of our \gls{C}/\gls{C++} programming courses.
One severe example for this in my personal career so far is a former team partner in one of our \gls{C}/\gls{C++} programming courses.
Despite the fact that the professor instructed us to use valgrind\footnote{a runtime memory analyzer and debugger} to verify our programs, my partner was satisfied with the result after writing the algorithms to his best understanding and correcting all errors detected by the \gls{compiler}.
Discussing the topic with him did not lead to any understanding on his side, and even after verifying that his program had easily detectable memory issues, he insisted on the correct result of the algorithm and pointed out the lack of time.
I realized similar mindset in some of the other teams.
@ -90,7 +87,7 @@ This personal experience is no scientific proof nor is it statistically signific
It does create a feeling of insecurity, because if their software is distributed widely a few of these people are enough to risk the security of thousands of systems.
A professor and co-author of \citetitle{Arpaci-Dusseau2015} gives the following warning about this issue:
\textit{"Just because a program compiled(!) or even ran once or many times correctly does not mean the program is correct. Many events may have conspired to get you to a point where you believe it works, but then some- thing changes and it stops. A common student reaction is to say (or yell) “But it worked before!” and then blame the compiler, operating system, hardware, or even (dare we say it) the professor. But the problem is usually right where you think it would be, in your code. Get to work and debug it before you blame those other components."}\cite[p.~127]{Arpaci-Dusseau2015}
\textit{"Just because a program compiled(!) or even ran once or many times correctly does not mean the program is correct. Many events may have conspired to get you to a point where you believe it works, but then something changes and it stops. A common student reaction is to say (or yell) “But it worked before!” and then blame the compiler, operating system, hardware, or even (dare we say it) the professor. But the problem is usually right where you think it would be, in your code. Get to work and debug it before you blame those other components."}\cite[p.~127]{Arpaci-Dusseau2015}
Plenty of educational, economical or methodological solutions are imaginable for this problem.
Higher focus on safety and testing in education, enforced internal company guidelines, or industry wide third party software certification requirements can be attempted.
@ -99,80 +96,247 @@ For this thesis such constraints are out of scope, and the focus is on examining
\subsection{Technical Aspect}
The problem on the technical side is that the \gls{compiler} is not able to detect all errors that are in the source code and the human was able to produce an executable program.
The resulting executable program might merely serve its purpose, and can contain severe technical mistakes that are not considered an error by the \gls{compiler}.
This is especially likely using low-abstraction languages like \gls{C} and \gls{C++} for \gls{OS} development, where technical mistakes and intended behavior are very difficult to distinguish.
This is especially likely using low-abstraction languages like \gls{C} and \gls{C++} for \gls{os} development, where technical mistakes and intended behavior are very difficult to distinguish.
The goal of this thesis is to find out if the \gls{Rust} \gls{compiler} is able to mitigate this specific problem.
\chapter{OS Development Concepts}
This chapter explains concepts used in \gls{OS} development today, and is a direct preparation for the upcoming \cref{context::common-mem-safety-mistakes}, which explains specific weaknesses that result from made memory-management mistakes in the attempt to implement these concepts.
Since the \gls{OS} manages the system's hardware directly, some of the implementation and design choices depend on the underlying hardware architecture.
For a full understanding the hardware implications are also outlined in this document.
To bound the extent of this and the following chapters, the explanations are limited to one contemporary architecture, \gls{amd64}, and further narrowed down by focusing on the operation in 64-Bit Long Mode\cite[p.~18]{AMD64Vol2}.
\label{context::os-dev-concepts}
In order to protect the memory of each executed program according to \cref{context::introduction::memory-safety::def}, the \gls{os} must be designed developed, and tested carefully.
This chapter explains concepts used in \gls{os} development and points out memory-safety critical operations without getting into great detail.
This is done in preparation for the next \cref{context::common-mem-safety-mistakes}, which explains weaknesses that result from memory management mistakes that were made in the attempt to implement the following concepts.
\section{Resource Management by Virtualization}
Resource management in \gls{OS} development is different than in generic \glspl{app} development.
The \gls{OS} - typically the lowest software layer - must know the very details of the system's hardware and perform raw access to it.
Since the \gls{os} manages the system's hardware directly, many of the implementation and design choices depend on hardware design and architecture.
To bound the extent of this document, the explanations are limited to one contemporary architecture, \gls{amd64}, and further narrowed down by focusing on the operation in 64-Bit Long Mode\cite[p.~18]{AMD64Vol2}.
For a full understanding of the \gls{os}'s design, some hardware implications are included although held to a minimum in this chapter.
More hardware specific details are required are explained and explained throughout \cnameref{rnd} along with concrete \gls{os} development examples.
\subsection{Layers}
The \gls{OS} creates a virtualization\footnote{The term \textit{virtualization} within the \gls{OS} the jargon can be understood as abstraction} layer on top of architecture specific code and abstracts it in form of an internal \gls{api}.
This layer abstracts at least the \gls{CPU} and memory\cite{Arpaci-Dusseau2015}.
Higher-level, complex management algorithms can then implement hardware-independent on top of this \gls{api}, making it reusable across different architectures.
The \gls{OS} then provides an \gls{api} through which \glspl{app} can request access to these virtualized resources.
This allows \gls{app} developers to develop and run different programs easily and presumably safely on the \gls{OS}, agnostic of the architecture.
\section{Virtualization}
The \gls{os} is considered the lowest software layer on the system and must know the very details of the system's hardware resources and perform raw access to it.
The goal to make \gls{os} is the only software on a system that is required to have this particular knowledge, so that other \glspl{app} could run on virtually any system.
\subsection{Task Models}
TODO shortly describe these and give a reference model
\begin{itemize}
\item Task
\item Program
\item Procedure
\item Process
\item Thread
\end{itemize}
\subsection{Abstraction Layers}
The \gls{os} uses virtualization\footnote{The term \textit{virtualization} within the \gls{os} the jargon can be understood as abstraction} techniques to create layers on top of architecture specific code and abstracts it in form of an \gls{api}.
This layer abstracts at least the \gls{cpu} and memory\cite[p.~5-7]{Arpaci-Dusseau2015}.
Higher-level complex management algorithms can be implemented hardware-independently on top of this \gls{api}, making it reusable across different architectures.
The \gls{os} then provides an \gls{api} through which \glspl{app} can request access to these virtualized resources.
This allows \gls{app} developers to develop and run different programs easily and presumably safely on the \gls{os}, agnostic of the architecture.
\Cref{fig:system-abstraction-layers} shows a top-down model of the abstractions layers in the system.
\usetikzlibrary{shapes.geometric, arrows}
\begin{figure}[ht]
\usetikzlibrary{chains,arrows.meta,decorations.pathmorphing,quotes}
\begin{tikzpicture}
[start chain=main going below, every on chain/.append style={align=center, text width=35ex, minimum height=7ex, fill=blue!20}, >={LaTeX[]}, node distance=7ex]
% \node[on chain] {\gls{os} Abstraction Layers};
\foreach \i/\itext [remember=\i as \iprior] in {
1/Software Applications,
2/System Libraries,
3/OS API,
4/OS Drivers,
5/{CPU, Memory, I/O, ...}
}
{
\node (block \i) [on chain] {\itext};
\ifnum\i>1
\draw
(block \iprior.west) +(0ex,-1ex) coordinate (b\i)
edge["Synchronous API Calls" {left,text width=15ex,anchor=east, align=right}, bend right=90, ->]
(block \i.east -| b\i) +(0ex,+1ex);
\draw
(block \iprior.east) +(0ex,-1ex) coordinate (k\i)
edge["Asynchronous Events" {right,text width=15ex,anchor=west, align=left}, bend left=90, <-]
(block \i.east -| k\i) +(0ex,1ex);
\fi
}
\draw[dashed,thick,red]
($(block 2)!0.5!(block 3)$) ++(-20ex,0) edge["Userspace", "Kernelspace"'] ++(40ex,0);
\draw[dashed,black,thick]
($(block 4)!0.5!(block 5)$) ++(-20ex,0) edge["Software", "Hardware"'] ++(40ex,0);
\end{tikzpicture}
\caption{System Abstraction Layers. TODO: fill user/kernel space with colors}
\label{fig:system-abstraction-layers}
\end{figure}
\FloatBarrier
\subsection{Resource Specifics}
Virtualization has different technical implications for different resources types, depending on their nature and available count.
To give an example, the \gls{CPU} is not explicitly requested, because any instruction by the program implicitly requires the \gls{CPU} to execute it.
In contrary, a program could ask the \gls{OS} for a specific amount of memory or to write text on the display output on behalf of it.
Virtualization has different technical implications for different resource types, depending on their nature and available count.
Programs that are executed need at least the \gls{cpu} and a certain amount of memory.
\section{Hardware-supported Memory-Management}
Memory is a resource that a program must explicitly ask the \gls{os} for, providing a specific amount for each request.
The \gls{cpu} is generally not explicitly requested, because any instruction by the program implicitly requires the \gls{cpu} for being executed on the system at all.
\subsection{Task Model}
\label{context::os-dev-concepts::virtualization::task}
To better understand the \gls{os}'s problem of managing executed programs and their resources there needs to be a model that can be applied to this situation.
% TODO: remove or complete this graph
%\begin{tikzpicture}[->,node distance=5ex,on grid,
% every state/.style={fill=red,draw=none,circular drop shadow,text=white}]
%
% \node[state] (A) {Task};
% \node[state] (B) {Job};
% \node[state] (C) {Process};
% \node[state] (D) {Program};
% \node[state] (E) {Procedure};
% \node[state] (F) {Thread};
%
% \path (A) edge (B);
%\end{tikzpicture}
As explained in this document it should be understood that a program consists of instructions that can be executed by the \gls{cpu}.
When the \gls{os} loads a program into memory and begins executing its instructions it is called a process.\cite[p.~25]{Arpaci-Dusseau2015}
A process can begin to exist before its execution, when the \gls{os} has internally created an entry for the process that at least contains a reference to the program and the arguments to be passed.
Processes that use the same program are not to be treated differently by the \gls{os} than any other process in terms of memory-safety, and must be prevented from mutual memory access.
These processes can differ in arguments that are passed to the program their, so that their runtime behavior can differ significantly.
\subsubsection{Demo: Process $\neq$ Program}
A great example for demonstrating this difference is \textit{"\gls{bbox}. \glsentrydesc{bbox}"}.
Line 1 in \cref{shell::context::os-dev-concepts::program-process} shows a command that instructs the \gls{os} - \gls{LX} in this example - to execute the program \gls{bbox} three times, with different arguments each time.
The purpose is to demonstrate that the same program \gls{bbox} is instantiated thrice with completely different functionality each time, even existing simultaneously in the \gls{os}'s process list.
\begin{listing}[ht]
\begin{minted}[escapeinside=??,linenos,autogobble,breaklines=true]{shell}
$ busybox sh -c "busybox ps -Ao pid,args | busybox grep busybox"
9441 busybox sh -c busybox ps -Ao pid,args | busybox grep busybox
9442 busybox ps -Ao pid,args
9443 busybox grep busybox
\end{minted}
\caption{Multiple Process From Same Program}
\label{shell::context::os-dev-concepts::program-process}
\end{listing}
It invokes \gls{bbox} with the "sh" (a shell utility) argument which in turn receives the "-c" (command execute" argument and another argument containing the expected command.
This command consists of subsequent calls to \gls{bbox} invoking its builtin "ps" (a utility to print the process list) and "grep" (a tool to find text) utilities.
The process list is received by passing "ps" as the argument to \gls{bbox} in the third execution.
Line 2 through 4 show the three \glspl{process} of the \gls{bbox} \gls{program} with different proccess ids and their respective arguments.
\subsection{Terminology}
These terms are used ambiguously in various documents, manuals, and websites, and shall be assigned a role for the model in this document.
\Cref{tab:os-dev-concepts:task-terms} defines these terms and their relationships for the scope of this document.
\begin{table}
\begin{tabularx}{\textwidth}{@{}lX@{}}
\toprule
\Gls{task} & \glsentrydesc{task} \\
\Gls{program} & \glsentrydesc{program} \\
\Gls{process} & \glsentrydesc{process} \\
\Gls{thread} & \glsentrydesc{thread} \\
\Gls{procedure} & \glsentrydesc{procedure} \\
\Gls{function} & \glsentrydesc{function} \\
\Gls{job} & \glsentrydesc{job} \\
\bottomrule
\end{tabularx}
\caption{Definition of commonly used terms for executable code}
\label{tab:os-dev-concepts:task-terms}
\end{table}
\FloatBarrier
\section{Preemptive Multitasking}
To extend on the terminology given in \cnameref{context::os-dev-concepts::virtualization::task} multitasking are known in the \gls{os} jargon.
In this document, it is the \gls{os}'s capability of switching tasks without terminating them, effectively keeping its runtime state in a place that persists during the period of time in which the task is not actively executed on the system's \gls{cpu}.
The preemptive attribute adds the notion of switching tasks without relying on their cooperation, but instead be able to do this at any time the \gls{os} intends to do so.
Preemptive multitasking enables a form of \gls{cpu} virtualization, as a task is not aware of being preempted and resumed.
As explained above, the \gls{cpu} resource doesn't have to be explicitly requested, as the request to execute a program implies a dependency on the \gls{cpu}..
The previously explained virtualization is the foundation for the \gls{os} to perform preemptive multitasking inconspicuously towards the \glspl{app}.
This means that when a task is preempted and continued later, it observes no side-effects other than an elapse of time.
Preemptive multitasking needs not be considered during development of single-threaded \gls{app}.
\subsection{Concurrent Resource Usage}
Switching tasks has different technical implications for different resources types, depending on their nature and quantity.
A single \gls{cpu} system can not execute more than one program at the same time, as it runs instructions through the \gls{cpu} one-by-one, implicitly holding the program state in form of the \gls{cpu} registers, which are preserved in between the instructions, and preserved between preemptive task switches.
While it doesn't make sense for any instruction to request the \gls{cpu} per-se, there are valid use-cases for programs to request a specific amount of \gls{cpu} bandwidth within a specific amount of time to guarantee a certain amount of computing speed.
Other use-cases emerge when concurrent programs access the exact same resources and are intermixed by the \gls{os}, creating non-sequential resource usage pattern which may put the resource in an inconsistent state and lead to unexpected results for the application.
Within this document these technicalities are considered part of the application semantics and shouldn't effect the \gls{os} developer.
Therefore it is sufficient to recognize the \gls{os}'s responsibility of cleanly switching the program in execution periodically.
In contrast to the \gls{cpu}, the main memory resource is available in limited but huge quantities.
Replacing the content of the memory is not necessary on a preemptive task switch, as long as the memory is not exhausted.
This has the effect that that tasks that are currently not in execution on the \gls{cpu} still own a region of main memory.
The \gls{os} must ensure that switching tasks is done properly for all resources to prevent interference and unintended behavior.
To ensure memory safety in this scenario, all data in the memory must be protected from unintended access, according to the definition of memory safety in \cref{context::introduction::memory-safety::def}.
\subsection{Context Switching}
The context switch is the core functionality of the multitasking as it effectively switches to a different task, possibly by preempting the one that is currently running.
When the \gls{os} preempts a task, it needs to store and preserve the current task's context.
The context consists of all volatile resources that can possibly be overwritten by another task.
This is at minimum a set of \gls{cpu} registers depending on the specific architecture.
The \gls{os} stores the preempted context in a well-known and protected memory location, so that it can be restored when this task is resumed.
In preemptive multitasking, context switches are not considered voluntary, but rather by force.
This works by using the \gls{cpu}'s interrupt mechanism which has the ability to jump to an \gls{os} function in the event of an interrupt.
Interrupts for this use-case are usually triggered by programmed timer interrupts, occurring continuously and regularly.
The interrupt mechanism itself is part of the \gls{cpu} which is why the lowest level of the task switching mechanism in the \gls{os} is hardware dependent.
Safety could be increased if the \gls{compiler} or in a more general sense the \gls{proglang} could assist in architecture specific code.
More details on this mechanism is given in \cnameref{rnd::sysprog-conventions::ir-driven-preemptive-cs-amd64}.
\section{Hardware-supported Memory-Paging}
\label{context::introduction::hw-supported-mm}
To improve the efficiency and safety of memory management, developers of hardware and software have been collaborating to offload virtual memory address lookup and caching from the \gls{os} software to the hardware, the \gls{cpu}'s \gls{MMU} to be specific.
A hardware-implementation of the lookup algorithm is fast, and allows rudimentary memory permission runtime-checks to protect pages by leveraging \gls{cpu}'s security rings\cite[p.~117,~p.~145]{AMD64Vol2}.
Activating the 64-Bit long mode on \gls{amd64} makes the system rely primarily on paging memory management, thus the technique of memory segmentation can be neglected in this context.
This section provides information about hardware-supported memory paging and protection techniques.
To improve the efficiency and safety of memory-management, developers of hardware and software have been collaborating to offload virtual memory address lookup and caching from the \gls{OS} software to the hardware, namely the \gls{CPU}'s \gls{MMU}.
A hardware-implementation of the lookup algorithm is fast, and allows rudimentary memory permission runtime-checks to protect pages by leveraging \gls{CPU}'s security rings\cite[p.~117,~p.~145]{AMD64Vol2}.
\subsection{Virtual Address Translation and Paging}
Paging with virtual addresses is one method of virtualizing memory and in this way transparently share the system's memory among running tasks and the \gls{OS} itself, presumably in a safe way.
Paging with virtual addresses is one method of virtualizing memory and in this way transparently share the system's memory among running tasks and the \gls{os} itself, presumably in a safe way.
Even when using a language that supports direct memory addressing, \gls{app} developers don't have to consider paging and address translation in the logic of their programs, because all addresses in their program are virtual and are translated at runtime by the \gls{MMU}.
The translation itself is performed by the \gls{MMU} according to a map that is called page table, which is a structure maintained by the \gls{OS} in the main memory.
This memory structure can be stored anywhere in memory, and the address is handed to the \gls{MMU} via a specific \gls{CPU} register, \textit{CR3} on \gls{amd64}.
The \gls{OS} can maintain multiple page table structures, and can create different virtual address spaces by changing \gls{MMU}'s page-table pointer - the \textit{CR3} register.
As mentioned above the hardware caches provides caches for repeated lookups of the same virtual addresses.
Controlling the validity of these cache entries is in the \gls{OS} responsibility.
The translation itself is performed by the \gls{MMU} according to a map that is called page table, which is a structure maintained by the \gls{os} in the main memory.
This memory structure can be stored anywhere in memory, and the address is handed to the \gls{MMU} via a specific \gls{cpu} register, \textit{CR3} on \gls{amd64}.
The \gls{os} can maintain multiple page table structures, and can create different virtual address spaces by changing the \gls{MMU}'s page-table pointer register.
To avoid the need for storing a translation mapping for every possible address, mappings are grouped into fixed-size pieces, the \textit{page}s.
\subsubsection{Translation Caching}
The hardware caches the translation results for subsequent lookups in the \gls{tlb} \cite[p.~142-143]{AMD64Vol2}.
This greatly improves the speed for repeated access to the same virtual addresses, but is certainly dangerous for memory-safety.
Controlling the validity of these cache entries is the responsibility of the \gls{os} .
This is critical for memory-safety, as the cached virtual to physical address lookup results are different for each address space and shouldn't leak into other address spaces.
If any lookup yields a cached result which originates from a different virtual address space, the physical address is likely to belong to a memory region to which the current task shouldn't have access to.
What makes it more difficult to manage is that there are exceptions to this, e.g. when memory is intentionally shared between two processes or threads, which must be set up by the \gls{os} according to the processes requests.
\subsubsection{Page: Chunks of Single Addresses}
To avoid the need for storing a translation mapping for every single physical address, mappings are grouped into equisized regions, called \textit{page}s.
This works by encoding the offset within the page in the virtual address, together with page's index in page table.
The offset size depends on the chosen page-size, and can be calculated with the following formula, given page-size in bytes as $p$:
\begin{equation}
\textrm{offset\_bits(p)} = log_2(p), \{ p \in N, p: n^2 \}
vaddr\_offset(p) = log_2(p); p \in N %TODO: restrain that p must be a power of 2
\end{equation}
For example, the \gls{amd64} default page-size of 4 KiB has a 12-bit offset, which leaves $64-12 = 52$ bits for page-table indexing.
For example, the \gls{amd64} default page-size of 4 KiB has a 12-bit offset, which theoretically leaves the other $64-12 = 52$ bits of the virtual address for page-table indexing.
Practically there's an architectural limit of 48 bits for the virtual address.
The resulting paging hierarchy is presented in \cnameref{rnd::sysprog-conventions::paging-amd64}.
\paragraph{Page-Faults}
If an instruction uses a virtual address that indexes a page which is not present in memory, the \gls{CPU} will generate page-fault exception to give control back to the \gls{OS}.
The \gls{OS} must then react accordingly by e.g. finding free physical memory and map it to the page my modifying the page's page-table entry.
\subsection{Page-Faults}
The page-fault is a hardware-triggered, memory-safety critical event that must be handled by the \gls{os}.
It is triggered by the \gls{cpu}'s \gls{MMU} during the virtual address lookup algorithm, when an instruction uses a virtual address for which the target page is not available.
This happens for example if the indexed page is not present in main memory or has not been allocated at all.
It also happens when an instruction violates a page protection, of which four exist and can be configured by the \gls{os} on \gls{amd64} \cite[p.~145-148]{AMD64Vol2}:
\begin{itemize}
\item (1) Protect supervisor pages from user access
\item (2) Prevent writes into read-only pages
\item Prevent the \gls{cpu} from executing (3) non-executable pages and (4) user pages
\end{itemize}
\subsubsection{Swapping}
The finite primary memory can only hold a finite number of virtual pages, and the \gls{OS} is responsible for having the required pages present.
The \gls{os} must implement the page-fault handler to deal with it accordingly e.g. finding free physical memory and map it to the page my modifying the virtual addresse's page-table entry or indicate denied access.
\subsection{Swapping}
The finite primary memory can only hold a finite number of virtual pages, and the \gls{os} is responsible for having the required pages present.
Besides the pages that contain the page-table itself, the pages that aren't required by the current instruction might be moved to secondary memory.
Swapping pages in and out of primary memory is risky as it requires to transfer large amounts of raw memory content, but these safety analyzes exceed the scope of this thesis.
\section{Multi-Level Paging}
\subsection{Multi-Level Paging}
\label{context::introduction::hw-supported-mm::multilevel-paging}
If only one page-table per virtual address space was used that consists of $2^{52}$ page-table entries, which must at minimum store the physical address, it would require $\frac{52 * 2^{52} [Bit]}{8*1024^4 [Bit/Byte]} = 26624$ TiB of memory for each virtual address space.
Even if only a handful of additional pages were allocated and mapped, the \gls{OS} would still have to allocate this huge page-table.
Even if only a handful of additional pages were allocated and mapped, the \gls{os} would still have to allocate this huge page-table.
This vast consumption of main memory is impractical and impossible for average systems, which rarely surpass 100 GiB of main memory.
Therefore most systems use a hierarchy of page tables.
@ -262,92 +426,142 @@ Using a hierarchical translation structure allows to save significant amounts of
\caption{Hierarchical Virtual Paging}
\label{fig:paging-hierarchy-abstract}
\end{figure}
\FloatBarrier
The details of how this is implemented on \gls{amd64} can be found in \cnameref{rnd::sysprog-conventions::paging-amd64}.
The details of a 4-level-hierachy paging implemented on \gls{amd64} can be found in \cnamepref{rnd::sysprog-conventions::paging-amd64}.
\subsection{The Concepts of Stack And Heap}
\label{context::introduction::hw-supported-mm::stackheap}
In \gls{proglang} and \gls{OS} design and literature, the terms \gls{stack} and \gls{heap} are ubiquitous. A research for their definition wasn't conclusive, indicating that they are rather concepts than absolutely defined terms, and might be implemented and used differently on various architectures, \glspl{proglang} and \glspl{OS}.
\section{Stack And Heap}
\label{context::introduction::os-dev-concepts::stackheap}
In \gls{proglang} and \gls{os} design and literature, the terms \gls{stack} and \gls{heap} are ubiquitous.
A research for their original definition wasn't conclusive, indicating that they are to be taken as concepts rather than absolutely defined methods.
They might be implemented and used differently on various architectures, \glspl{proglang} and \glspl{os}.
This part focuses on the basic concepts, already limiting the scope to the \gls{amd64} architecture, the \glspl{proglang} \gls{C} and \gls{Rust} and their usage on either bare-metal or \gls{LX}.
A detailed continuation is found in \cnameref{rnd::mm-conventions}.
This sections explains the basic concepts with the tendency towards the \gls{amd64} architecture, the \gls{C} and \gls{Rust} \glspl{proglang} and their usage for bare-metal \gls{os} and \gls{LX} \gls{app} development.
\subsubsection{Stack: Hardware-Backed Abstract Type}
\label{context::introduction::hw-supported-mm::stackheap::stack}
\subsection{Stack: Hardware-Backed Abstract Type}
\label{context::introduction::os-dev-concepts::stackheap::stack}
In summary, the \gls{stack} is a memory model for structuring contiguous memory.
It grows by adding new data entries on top of each other.
According to the \gls{stack} analogy, only the topmost element can be accessed and removed, thus it behaves like a Last-In-First-Out data structure.
The hardware manuals \citetitle{AMD64Vol1} and \citetitle{AMD64Vol2} have no mention of the word \textit{heap}, but use \textit{stack} hundreds of times, indicating that \gls{stack} is implemented in hardware to some extend.
The hardware manuals \citetitle{AMD64Vol1} and \citetitle{AMD64Vol2} have no mention of the word \textit{heap}, but use \textit{stack} hundreds of times, indicating that \gls{stack} is implemented in hardware to some extent.
The \gls{amd64} manuals conjunctionally describe how the \gls{stack} is used and influenced by various instructions on this architecture.
Here it grows from numerically higher to numerically lower addresses, whereas the numerically highest address is called the stack bottom, and the current numerically lowest address is the stack top.
In 64-Bit long mode \gls{amd64} doesn't consider the stack to be sized.
The \gls{stack} is allocated per procedure and typically stores only procedure-local data, which is simply forgotten once the procedure has completed.
To achieve memory-safety with regards to \gls{stack} management inside \gls{OS}, each procedure must only access its own particular \gls{stack}.
Additionally, \glspl {stack} must be prevented from growing into memory regions that might belong to other procedures.
This needs to be considered by \gls{OS} developers when implementing memory-management for multitasking \gls{OS}, as further investigated in \cref{rnd::existing-os-dev-wity-rust,rnd::imezzos-preemptive-multitasking}.
The \gls{stack} is typically allocated per process or thread and stores the \glspl{sf} for each procedure.
The \gls{sf} is automatically cleaned up or simply forgotten once the procedure has completed.
On chains of procedure calls, each preceding \gls{sf} remains on the stack.
\cref{TODO-Callstacklayout}\footnote{By R. S. Shaw - Own work, Public Domain, \url{https://commons.wikimedia.org/w/index.php?curid=1956587}} displays a \gls{stack} that contains two \glspl{sf} from different procedures.
\subsubsection{Heap: Organized Chaos}
\begin{figure}[ht]
\centering
%\begin{wrapfigure}{R}{0.5\textwidth}
\includegraphics[width=0.49\textwidth]{gfx/TODO-Callstacklayout}
\caption{TODO-Redraw-Callstacklayout}
\label{TODO-Callstacklayout}
%\end{wrapfigure}
\end{figure}
% TODO: draw stack with multiple stack-frames
\FloatBarrier
\subsubsection{Safety Concerns}
To achieve memory-safe \gls{stack} management in the \gls{os}, each procedure must only be able to access its own particular \gls{stack} and possible references via its arguments.
This is not the case in a regular \gls{C} program, where the called procedure is able to modify the previous \gls{sf}, which is demonstrated in \cnameref{context::common-mem-safety-mistakes::manifestations::ret-addr-manipulation}.
Additionally, the \glspl{stack} must be prevented from growing into other memory zones like the \gls{heap}.
Since \Gls{stack} management is memory-safety critical for \gls{os} developers when implementing memory management for multitasking within the \gls{os}, it is one of the main subjects in \cnameref{rnd}, \cref{rnd::existing-os-dev-with-rust,rnd::imezzos-preemptive-multitasking}.
\subsection{Heap: Organized Chaos}
\label{context::introduction::hw-supported-mm::stackheap::heap}
\Gls{heap} is an ambiguous term that names a data structure in more theoretical computer science and a memory zone in system programming.
In this document \gls{heap} refers to the latter.
\Gls{heap} is an ambiguous term that names a data structure in more theoretical computer science and a memory model in system programming.
This document refers to the latter.
\subsection{Combining Stack And Heap}
% TODO: figure that shows stack and heap?
The \gls{heap} is managed by the \gls{os} to keep track of allocated memory on behalf of all \glspl{app} on the system.
The algorithms within the \gls{os} to manage the \gls{heap} can be arbitrary complex, and the choice is based on the trade-offs between complexity, efficiency and speed.
Safety is explicitly omitted from the trade-offs list, as it should never be traded against anything else.
\section{Preemptive Multitasking}
The previously explained virtualization is the foundation for the \gls{OS} to perform preemptive multitasking inconspicuously towards the \glspl{app}.
This means that when a task is preempted and continued later, it observes no side-effects other than an elapse of time.
Preemptive multitasking needs not be considered during development of single-threaded \gls{app}.
Multi-threading and
\Gls{app} developers make use of dynamic \gls{heap} allocation requests via the \gls{os}'s \gls{api} if the memory usage for their program is not predictable at the time of development.
The requests will be processed by the \gls{os} and granted or denied according to ratio of requested memory and available system memory.
The \glspl{app} is responsible for returning no-longer required memory to the \gls{os}, which makes it available to other \glspl{app} in the system.
\subsection{Resource Characteristics}
Switching tasks has different technical implications for different resources types, depending on their nature and quantity.
For example, a single \gls{CPU} system can not be utilized by more than one program at the same time, as it runs instructions one-by-one and implicitly holding the program state in form of the \gls{CPU} registers, which are preserved in between the instructions.
In contrast, main memory resources are only limited by their capacity and can otherwise be shared by several programs simultaneously, so that tasks that are not executed by \gls{CPU} can still have data stored in memory.
Memory which is not cleaned up by properly is blocked until the \gls{app} is terminated.
If \gls{heap} allocations within the \gls{os} are not cleaned up properly, the allocated memory is lost until the whole system is restarted.
Between properly cleaning up and loosing memory allocations is a whole range of possible memory-safety issues, which are explained in \cref{context::introduction::memory-safety-violation-in-sw}.
The \gls{OS} must ensure that switching tasks is done properly for all resources to prevent interference and unintended behavior.
To ensure memory safety in this scenario, all data in the memory must be protected from unintended access, according to the definition of memory safety in \cref{context::introduction::memory-safety::def}.
\section{Stack And Heap: Combined Usage}
\Glspl{program} combine the usage of \gls{stack} and \gls{heap} use them for different purposes.
\subsection{Context Switching}
When the \gls{OS} preempts a task, it needs to store and preserve the current task's context.
The context consists of all volatile resources that can possibly be overwritten by another task.
This is at minimum a set of \gls{CPU} registers depending on the specific architecture.
A description for \gls{amd64} is given in \cref{tab:task-minimum-context-registers}.
Depending on the \gls{proglang}'s \gls{compiler} and the target system, the responsibility of writing the memory management code falls either on the developer, the \gls{compiler}, or both.
\Cref{tab:stack-heap-usage-responsbility} describes the usage-cases and responsibilities for \gls{stack} and \gls{heap} with regard to user-space and \gls{os} development.
The \gls{OS} stores the preempted context in a well-known and protected memory location, so that it can be restored when this task is resumed.
\begin{table}[ht!]
\begin{tabularx}{0.99\textwidth}{XX}
\toprule
\multicolumn{2}{c}{Stack and Heap Usage Overview} \\
\toprule
\multicolumn{1}{c}{\Gls{stack}} & \multicolumn{1}{c}{Responsibility} \\
\hhline{--}
\Gls{sf} (return address, frame pointer, see \cref{rnd::sysprog-conventions::stackframe-amd64,lst:amd64-stack-frame-components}).
\begin{table}
\begin{tabularx}{\textwidth}{| c | X | X |}
\hline
\textbf{descriptive name} &
\textbf{register names on amd64} &
\textbf{description} \\
\hline
the instruction pointer register & RIP & address of the next instruction to be fetched \\
\hline
the stack pointer register & RSP & address of current position in stack \\
\hline
the flags register & RFLAGS & various attributes, e.g. the interrupt flag \\
\hline
all general-purpose registers & RAX, RBX, RCX, RDX, RDI, RSI, RBP, RSP, R8R15 & arbitrary data \\
\hline
Procedure-local fixed-sized variables (primitive types, custom fixed-size structures, references, fixed-length arrays, etc.)
Procedure arguments (allocated as and copied to local variables)
&
In user space development the \gls{stack} management code is fully generated \tikzmarkcircle{1} by the \gls{compiler} in \gls{C} and \gls{Rust}, using a standardized calling convention for the platform.
The \gls{os} must implement support for any standardized calling conventions used by the \gls{compiler} for user-space \glspl{program}.
\\
\bottomrule
\multicolumn{1}{c}{\Gls{heap}} & \multicolumn{1}{c}{Responsibility} \\
\toprule
Dynamically sized data structures like linked-lists, extendable arrays, queues, trees
Can be used for unstructured arbitrary data.
&
Manual calls for allocation and freeing memory are written by the programmer in \gls{C} using the \gls{os} \gls{api}.
\gls{Rust} has support of generating these calls on behalf of the programmer on supported platforms via its standard library \tikzmarkcircle{2}.
\\
\bottomrule
\end{tabularx}
\caption{Minimum Context Registers on amd64\cite[p.~28]{AMD64Vol2}}
\label{tab:task-minimum-context-registers}
\begin{tabularx}{\textwidth}{l@{ }X}
\tikzmarkcircle{1} &
The \gls{app} programmer is in control of the source code that triggers the auto generated content.
E.g. defining a function with local variables in the code will yield generated instructions to handle the \gls{sf}. \\
\tikzmarkcircle{2} &
The origin of the auto generated content remains the source code, written by the programmer.
Therefore it is the choice of the programmer whether to place the variables on the \gls{stack} or on the \gls{heap}.
\end{tabularx}
\caption{Stack and Heap Usage and Responsibility}
\label{tab:stack-heap-usage-responsbility}
\end{table}
\FloatBarrier
\subsubsection{Using Hardware Induced Interrupts}
In preemptive multitasking, context switches are not considered voluntary, but rather by force.
This works by using the \gls{CPU}'s interrupt mechanism which has the ability to jump to an \gls{OS} function in the event of an interrupt.
Interrupts for this use-case are usually triggered by programmed timer interrupts, occurring continuously and regularly.
\subsection{Arrangement}
Both zones must be organized separately and arrange within the virtual address space which is assigned to process or thread.
\Cref{TODO-heap-stack-example-program} shows a \gls{C} \gls{program} and a simplified model of the hypothetical address space that would result on execution.
The interrupt mechanism itself is part of the \gls{CPU} which is why the lowest level of the task switching mechanism in the \gls{OS} is hardware dependent.
Safety could be increased if the \gls{compiler} or in a more general sense the \gls{proglang} could assist in architecture specific code.
\begin{figure}[ht!]
\includegraphics[width=\textwidth]{gfx/TODO-heap-stack-example-program}
\caption{TODO-heap-stack-example-program}
\label{TODO-heap-stack-example-program}
\end{figure}
% TODO: improve figure that shows stack and heap?
\FloatBarrier
More details on this mechanism is given in \cnameref{rnd::sysprog-conventions::ir-driven-preemptive-cs-amd64}.
The entries above \textit{"Heap"} are the different parts of the \gls{compiler} output for this program, and are loaded by the \gls{os} before the execution.
The colors in the code correspond to the entries in the memory model.
Text, which corresponds to the red box, contains the program instructions.
RoData is read-only memory content, in this case the string literal \textit{"string"}.
BSS contains the variable \textit{iSize}.
Lastly the \gls{stack} holds the pointer variable \textit{p}, which will reference the result of the \textit{malloc(iSize)} memory allocation.
\subsection{Programming Language Support}
In many \glspl{proglang} that are commonly used for \gls{app} development, the code for allocation and cleanup of \gls{heap} memory is generated by the \gls{compiler} on behalf of the programmer.
Such languages rely on the \gls{os} memory management \gls{api} and are thus not suited for developing the \gls{os} itself.
Visa-versa, languages which are suited for \gls{os} development usually don't generate \gls{heap} management code and therefore don't ensure memory-safety.
\gls{Rust} might be an exception to this by adding static safety checks suited for \gls{os} development.
To what extend and by which means this is true, has to be confirmed or denied by the end of this work.
\chapter{Common Memory-Safety Mistakes}
\label{context::common-mem-safety-mistakes}
@ -383,7 +597,7 @@ This can happen on certain languages, which "allow direct addressing of memory l
This formulation of languages prone to this weakness is incorrect, as it doesn't conform with the earlier statement of languages that "allow direct addressing of memory locations".
Direct memory addressing support doesn't imply a lack of memory management support.
There are languages that provide memory management support and still allow direct memory addressing, which is interesting for \gls{OS} development.
There are languages that provide memory management support and still allow direct memory addressing, which is interesting for \gls{os} development.
\gls{Rust} is one of these languages, although it requires the developer to explicitly acknowledge all direct memory access operations with the \textit{unsafe} keyword.
More information on \gls{Rust} follows in \cref{context::rust}.
@ -396,7 +610,7 @@ The only data available is based on publicly available sources, thus the complet
The data and visualizations are supplied by the \gls{NVD}, which collects the data based on the umbrella weakness CWE-635\footnote{http://cwe.mitre.org/data/definitions/635.html} that was specifically created for the \gls{NVD}.
The numbers of these selected weaknesses are detailed in the following figures, the rest is grouped as \textit{other}.
\cref{fig:vulnerability-ratio-history} and \cref{fig:vulnerability-counts-history} display a decade of data on vulnerabilities grouped by their \gls{CWE} category.
\Cref{fig:vulnerability-ratio-history,fig:vulnerability-counts-history} display a decade of data on vulnerabilities grouped by their \gls{CWE} category.
The category called \textit{buffer\footnote{A bounded chunk of memory used by programs to store and exchange data} errors} represents \autocite{MITRE-CWE-119}.
In \cref{fig:vulnerability-ratio-history} it has the color light blue, 2nd from the bottom in the legend, and in \cref{fig:vulnerability-counts-history} it has the color blue, 2nd from the top in the legend.
@ -438,11 +652,11 @@ With 16.34 percent of all vulnerabilities known by 2016, and an average of 12.92
\subsection{Vulnerable APIs in Linux and C/C++}
\label{context::common-mem-safety-mistakes::vuln-apis-linux-c}
\glspl{api} are a ubiquitous for programmers to access all kinds of functionality, serving as interfaces to network services, providing existing algorithms in form of libraries and frameworks, or interfacing with the local \gls{OS}.
\glspl{api} are a ubiquitous for programmers to access all kinds of functionality, serving as interfaces to network services, providing existing algorithms in form of libraries and frameworks, or interfacing with the local \gls{os}.
It is inherently dangerous to expose any sort of functionality through an \gls{api}, as it might contain bugs that will be spread widely with rising popularity.
Every \gls{OS} needs to provide an \gls{api} for it's core functionality to be useful and extendable.
Every \gls{os} needs to provide an \gls{api} for it's core functionality to be useful and extendable.
A very popular and widely supported \gls{OS} is \gls{LX}.
A very popular and widely supported \gls{os} is \gls{LX}.
The system libraries and the kernel are written in \gls{C}, the latter containing some hardware specific \gls{asm} code.
\gls{LX} is very popular for embedded systems, network servers and large-scale computers. % TODO: reference
Through \gls{android}, \gls{LX} has been distributed to a huge amount of mobile devices within the last decade. % TODO: reference
@ -457,16 +671,180 @@ The manifestations of memory-safety related vulnerabilities in the \gls{LX} ecos
\label{context::common-mem-safety-mistakes::manifestations}
% Significance of the Study
% The significance is a statement of why it is important to determine the answer to the gap in the knowledge, and is related to improving the human condition. The contribution to the body of knowledge is described, and summarizes who will be able to use the knowledge to make better decisions, improve policy, advance science, or other uses of the new information. The “new” data is the information used to fill the gap in the knowledge.
This section contains real-world and \textit{re}constructed example manifestations of memory-safety related weaknesses.
This section contains real-world manifestations and \textit{re}constructed experiments of memory-safety related weaknesses.
% TODO
\subsection{The Stack Clash}
A recent and high impact vulnerability named \textit{Stack Clash}\footnote{https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash}, is briefly described as \textit{"a vulnerability in the memory management of several operating systems. It affects Linux, OpenBSD, NetBSD, FreeBSD and Solaris, on i386 and amd64. It can be exploited by attackers to corrupt memory and execute arbitrary code."}
The \gls{LX} specific vulnerability is listed as CVE-2017-1000364\footnote{http://www.cvedetails.com/cve/CVE-2017-1000364/}, where \textit{"an issue was discovered in the size of the stack guard page on Linux, specifically a 4k stack guard page is not sufficiently large and can be "jumped" over (the stack guard page is bypassed)"}.
It is assigned to the \autocite{MITRE-CWE-119} explained in \cref{context::common-mem-safety-mistakes::cwe::119}.
\subsection{CWE-119 Examples}
These examples are officially listed under the \autocite{MITRE-CWE-119} website.
\subsubsection{Example 1}
This example takes an IP address from a user, verifies that it is well formed and then looks up the hostname and copies it into a buffer.
\begin{listing}[htb]
\begin{minted}[autogobble,linenos,breaklines=true]{c}
void host_lookup(char *user_supplied_addr){
struct hostent *hp;
in_addr_t *addr;
char hostname[64];
in_addr_t inet_addr(const char *cp);
/*routine that ensures user_supplied_addr is in the right format for conversion */
validate_addr_form(user_supplied_addr);
addr = inet_addr(user_supplied_addr);
hp = gethostbyaddr( addr, sizeof(struct in_addr), AF_INET);
strcpy(hostname, hp->h_name);
}
\end{minted}
\caption{Bad Code}
%\label{code::context::examples::func-callee-c}
\end{listing}
This function allocates a buffer of 64 bytes to store the hostname, however there is no guarantee that the hostname will not be larger than 64 bytes. If an attacker specifies an address which resolves to a very large hostname, then we may overwrite sensitive data or even relinquish control flow to the attacker.
Note that this example also contains an unchecked return value (CWE-252) that can lead to a NULL pointer dereference (CWE-476).
\subsubsection{Example 2}
This example applies an encoding procedure to an input string and stores it into a buffer.
\begin{listing}[htb]
\begin{minted}[autogobble,linenos,breaklines=true]{c}
char * copy_input(char *user_supplied_string){
int i, dst_index;
char *dst_buf = (char*)malloc(4*sizeof(char) * MAX_SIZE);
if ( MAX_SIZE <= strlen(user_supplied_string) ){
die("user string too long, die evil hacker!");
}
dst_index = 0;
for ( i = 0; i < strlen(user_supplied_string); i++ ){
if( '&' == user_supplied_string[i] ){
dst_buf[dst_index++] = '&';
dst_buf[dst_index++] = 'a';
dst_buf[dst_index++] = 'm';
dst_buf[dst_index++] = 'p';
dst_buf[dst_index++] = ';';
}
else if ('<' == user_supplied_string[i] ){
/* encode to &lt; */
} else dst_buf[dst_index++] = user_supplied_string[i];
}
return dst_buf;
}
\end{minted}
\caption{Bad Code}
%\label{code::context::examples::func-callee-c}
\end{listing}
The programmer attempts to encode the ampersand character in the user-controlled string, however the length of the string is validated before the encoding procedure is applied. Furthermore, the programmer assumes encoding expansion will only expand a given character by a factor of 4, while the encoding of the ampersand expands by 5. As a result, when the encoding procedure expands the string it is possible to overflow the destination buffer if the attacker provides a string of many ampersands.
\subsubsection{Example 3}
The following example asks a user for an offset into an array to select an item.
\begin{listing}[htb]
\begin{minted}[autogobble,linenos,breaklines=true]{c}
int main (int argc, char **argv) {
char *items[] = {"boat", "car", "truck", "train"};
int index = GetUntrustedOffset();
printf("You selected %s\n", items[index-1]);
}
\end{minted}
\caption{Bad Code}
%\label{code::context::examples::func-callee-c}
\end{listing}
The programmer allows the user to specify which element in the list to select, however an attacker can provide an out-of-bounds offset, resulting in a buffer over-read (CWE-126).
\subsubsection{Example 4}
In the following code, the method retrieves a value from an array at a specific array index location that is given as an input parameter to the method
\begin{listing}[htb]
\begin{minted}[autogobble,linenos,breaklines=true]{c}
int getValueFromArray(int *array, int len, int index) {
int value;
// check that the array index is less than the maximum
// length of the array
if (index < len) {
// get the value at the specified index of the array
value = array[index];
}
// if array index is invalid then output error message
// and return value indicating error
else {
printf("Value is: %d\n", array[index]);
value = -1;
}
return value;
}
\end{minted}
\caption{Bad Code}
%\label{code::context::examples::func-callee-c}
\end{listing}
However, this method only verifies that the given array index is less than the maximum length of the array but does not check for the minimum value (CWE-839). This will allow a negative value to be accepted as the input array index, which will result in a out of bounds read (CWE-125) and may allow access to sensitive memory. The input array index should be checked to verify that is within the maximum and minimum range required for the array (CWE-129). In this example the if statement should be modified to include a minimum range check, as shown below.
\begin{listing}[htb]
\begin{minted}[autogobble,linenos,breaklines=true]{c}
...
// check that the array index is within the correct
// range of values for the array
if (index >= 0 && index < len) {
...
\end{minted}
\caption{Good Code}
%\label{code::context::examples::func-callee-c}
\end{listing}
\subsection{Return Address Manipulation}
\label{context::common-mem-safety-mistakes::manifestations::ret-addr-manipulation}
\Cref{code::context::examples::sf-modification-simple} is a little example program in \gls{C}, which manipulates the return function address stored on the \gls{stack}.
This is done by simple and legal pointer arithmetic.
It abuses the address of the first local variable to create references into the \gls{sf} below on the \gls{stack}.
Since the first variable is on the bottom of the \gls{sf} in the function, any higher address is part of the previous \gls{sf}.
Depending on the \gls{compiler}, the return address is stored either one or two data entries below the first local variable.
In a brute-force manner the program simply overwrites both entries with a different function address.
By simply writing a different function address at these entries, the \mintinline{c}{ret} will jump there, since the original return address has been overwritten.
\begin{figure}[ht!]
\begin{subfigure}[T]{0.60\textwidth}
\centering
\begin{minted}[linenos,breaklines]{c}
void modifier(void) {
uint64_t *p;
*(&p + 1) =
(uint64_t *)simple_printer;
*(&p + 2) =
(uint64_t *)simple_printer;
}
\end{minted}
\subcaption{C code}
\end{subfigure}
\begin{subfigure}[T]{0.39\textwidth}
\centering
\begin{minted}[linenos,breaklines]{objdump}
movabs rax,0x400690
mov QWORD PTR [rsp],rax
mov QWORD PTR [rsp+0x8],rax
ret
\end{minted}
\subcaption{ASM code}
\end{subfigure}
\caption{Stack-Frame Modification}
\label{code::context::examples::sf-modification-simple}
\end{figure}
% TODO: port to rust
\Cref{TODO-callstack-manipulation} is an attempt to visualize what happens in memory and with the \gls{stack} and the \gls{cpu}'s RIP {64-Bit Instruction Pointer} register.
\begin{figure}
\includegraphics[width=\textwidth]{gfx/TODO-callstack-manipulation}
\caption{TODO-callstack-manipulation}
\label{TODO-callstack-manipulation}
\end{figure}
\FloatBarrier
% TODO explain that this CWE-119 vulnerability is also "Execute Code"
% TODO: more references and deeper explanation of what happens: see introduction in https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt
\subsection{Uninitialized Pointers}
@ -500,12 +878,21 @@ if (ptr == NULL) {
}
\end{lstlisting}
\subsection{The Stack Clash}
A recent and high impact vulnerability named \textit{Stack Clash}\footnote{https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash}, is briefly described as \textit{"a vulnerability in the memory management of several operating systems. It affects Linux, OpenBSD, NetBSD, FreeBSD and Solaris, on i386 and amd64. It can be exploited by attackers to corrupt memory and execute arbitrary code."}
The \gls{LX} specific vulnerability is listed as CVE-2017-1000364\footnote{http://www.cvedetails.com/cve/CVE-2017-1000364/}, where \textit{"an issue was discovered in the size of the stack guard page on Linux, specifically a 4k stack guard page is not sufficiently large and can be "jumped" over (the stack guard page is bypassed)"}.
It is assigned to the \citetitle{MITRE-CWE-119}\autocite{MITRE-CWE-119} presented in \cref{context::common-mem-safety-mistakes::cwe::119}.
\cref{context::introduction::hw-supported-mm::multilevel-paging}
% TODO explain that this CWE-119 vulnerability is also "Execute Code"
% TODO more references and deeper explanation of what happens: see introduction in https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt
\chapter{Weakness Mitigation}
\label{context::weakness-mitigation}
\chapter{Safe OS Development}
\label{context::introduciton::safe-os-dev}
This section gives a brief summary of relevant concepts of \gls{OS} development on common hardware platforms, focusing on memory management and its risks.
In order to protect the memory of each executed program according to \cref{context::introduction::memory-safety::def}, the \gls{OS} must be designed, developed, and tested carefully.
\section{Detecting Memory-Safety Violations ASAP}
\label{context::safe-os-dev::detecting-safety-violations-asap}
@ -521,15 +908,15 @@ In addition to the presence and quality of tests, their timing in the software l
The earliest tests can be as soon as the process of software development itself, and the latest ones can be at the time of execution on the production system of the end-user.
It is desirable to place tests as early as possible in the software life cycle, to prevent them from compromising running systems that hold sensitive data and offer important services.
This suggests that since the \gls{OS} is lower in the hierarchy of system components at runtime, testing of the \gls{OS} must happen regardless of specific \glspl{app} and development time.
Especially testing the \gls{OS}'s internal states which can not be directly mutated via the \gls{api} exposed to the \glspl{app}.
To explain this from the \gls{app} perspective, testing the \gls{OS} at runtime states is not plausible , because the \gls{app} can not freely mutate the system's state.
This suggests that since the \gls{os} is lower in the hierarchy of system components at runtime, testing of the \gls{os} must happen regardless of specific \glspl{app} and development time.
Especially testing the \gls{os}'s internal states which can not be directly mutated via the \gls{api} exposed to the \glspl{app}.
To explain this from the \gls{app} perspective, testing the \gls{os} at runtime states is not plausible , because the \gls{app} can not freely mutate the system's state.
Even if it could, testing all possible permutations of system state in every possible \gls{app} would be highly redundant and nonetheless leaves the risk for untested edge cases that happen only under specific system circumstances, possibly influenced by other components on the system as described in the beginning of \cref{context::introduction::memory-safety}.
The \gls{app} developer is forced to trust the underlying \gls{OS}.
This puts high importance on the safety of the \gls{OS} design and implementation.
The \gls{app} developer is forced to trust the underlying \gls{os}.
This puts high importance on the safety of the \gls{os} design and implementation.
\subsection{The Effects Of Programming Languages on Memory-Safety}
There are dozens of \glspl{proglang} used by humans to write \glspl{app}, but only a few are used to write \glspl{OS}.
There are dozens of \glspl{proglang} used by humans to write \glspl{app}, but only a few are used to write \glspl{os}.
\subsubsection{Abstraction: Safety vs. Functionality}
\label{context::introduction::memory-safety::abstr-safety-function}
@ -545,8 +932,7 @@ By defining an abstraction layer in form of a programming language, the language
\label{context::introduction::language-compilers-analyzers}
In \cref{context::introduction::memory-safety}, specifically in TODO "reference detection" was explained that programming languages have direct impact on the memory-safety.
This section gives an example of how severe this impact is and explains the requirements on a \gls{OS} language.
This section gives an example of how severe this impact is and explains the requirements on a \gls{os} language.
\chapter{Mitigation Attempts}
@ -556,25 +942,24 @@ With the growing number of vulnerabilities, various solutions have been proposed
Static analysis are not very effective on a language that has not been designed to be safety-analyzed. TODO? reference?
For this reason there have been attempts to define subsets of the \gls{C} language that can be safety checked, TODO: refernces of Cyclone, CCured, etc..
Safety checks that are performed at runtime introduce a high degree of overhead, which makes it a nonviable option in the domain of \gls{OS} development, where many code paths must be very fast to ensure the operation of high speed I/O devices\cite{Balasubramanian2017} or tasks with \gls{realtime} requirements. (TODO: explain realtime requirements)
This has been forcing \gls{OS} developers to prioritize performance over safety. (TODO: reference)
Safety checks that are performed at runtime introduce a high degree of overhead, which makes it a nonviable option in the domain of \gls{os} development, where many code paths must be very fast to ensure the operation of high speed I/O devices\cite{Balasubramanian2017} or tasks with \gls{realtime} requirements. (TODO: explain realtime requirements)
This has been forcing \gls{os} developers to prioritize performance over safety. (TODO: reference)
Details about the challenge of writing code that does memory management safely, and related vulnerabilities are given further along in \cref{context::common-mem-safety-mistakes}.
\section{Choice of Programming Language}
Criteria for the choice of programming language are much different from choosing a language for other types of \glspl{app}.
This is a list of what is required for implementing an \glspl{OS}
This is a list of what is required for implementing an \glspl{os}
\begin{itemize}
\item{Raw access to \gls{CPU} instructions}
\item{Raw access to \gls{cpu} instructions}
\item{Deterministic temporal behavior}
\end{itemize}
% TODO: put in some scientific background about static checks
% * affine types
\chapter{Memory-Safety Analysis Techniques}
As per the previous \cref{context::common-mem-safety-mistakes} there is general awareness of the problems, and there has been ongoing effort to develop and improve techniques that assist the programmer to detect and avoid such mistakes first- or secondhand.
@ -598,6 +983,7 @@ Described by the maintainers, it is a "systems programming language that runs bl
- TODO: BSYS SS17 GITHUB IO Rust Memory Layout - 4
\subsection{Static Analyser}
%TODO: mention electrolyte, formal verification for Rust
\section{Language Features}
- TODO: How does static typing help with preventing programming errors

View file

@ -18,7 +18,7 @@ Memory management mechanisms are partially implemented in the target system's ha
\chapter{Final Conclusion}
Safety - or security for this matter - is not something that can be achieved absolutely.
It grows successively and gives the \gls{OS} developers and the end-users a \emph{feeling} of safety, until another vulnerability is found and disclosed.
It grows successively and gives the \gls{os} developers and the end-users a \emph{feeling} of safety, until another vulnerability is found and disclosed.
% TODO: repeat that rust *can* be used to increase safety in the OS,
% TODO: how?
@ -120,3 +120,55 @@ It grows successively and gives the \gls{OS} developers and the end-users a \emp
\fill[yellow] (pic cs:example2) circle (0.1cm);
\end{tikzpicture}
\end{listing}
\begin{tikzpicture}[node distance=2cm,
startstop/.style={rectangle, rounded corners, minimum width=3cm, minimum height=1cm,text centered, draw=black, fill=red!30},
io/.style = {trapezium, trapezium left angle=70, trapezium right angle=110, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=blue!30},
process/.style = {rectangle, minimum width=1cm, minimum height=1cm, text centered, text width=3cm, draw=black, fill=orange!30},
decision/.style = {diamond, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=green!30},
arrow/.style = {thick,->,>=stealth}
]
%\node (start) [startstop] {Start};
%\node (in1) [io, below of=start] {Input};
%\node (pro1) [process, below of=in1] {Process 1};
%\node (dec1) [decision, below of=pro1, yshift=-0.5cm] {Decision 1};
%\node (pro2a) [process, below of=dec1, yshift=-0.5cm] {Process 2a text text text text text text text text text text};
%\node (pro2b) [process, right of=dec1, xshift=2cm] {Process 2b};
%\node (out1) [io, below of=pro2a] {Output};
%\node (stop) [startstop, below of=out1] {Stop};
%
%\draw [arrow] (start) -- (in1);
%\draw [arrow] (in1) -- (pro1);
%\draw [arrow] (pro1) -- (dec1);
%\draw [arrow] (dec1) -- node[anchor=east] {yes} (pro2a);
%\draw [arrow] (dec1) -- node[anchor=south] {no} (pro2b);
%\draw [arrow] (pro2b) |- (pro1);
%\draw [arrow] (pro2a) -- (out1);
%\draw [arrow] (out1) -- (stop);
\node[process,xshift=0ex,yshift=-0ex] (ua_back) {User Applications};
\node[process,xshift=0ex,yshift=-1ex] at (ua_back) {User Applications};
\node[process,xshift=0ex,yshift=-2ex] (ua) at (ua_back) {User Applications};
\node[process,xshift=0ex,yshift=-0ex,below of=ua] (sl_back) {System Libraries};
\node[process,xshift=0ex,yshift=-1ex] at (sl_back) {System Libraries};
\node[process,xshift=0ex,yshift=-2ex] (sl) at (sl_back) {System Libraries};
\node[process,xshift=0ex,yshift=-0ex,below of=sl] (os_back) {OS};
\node[process,xshift=0ex,yshift=-1ex] at (os_back) {OS API};
\node[process,xshift=0ex,yshift=-2ex] (os) at (os_back) {OS};
\node[process,xshift=0ex,yshift=-0ex,left of=mem, below of=os] (cpu) {CPU};
\node[process,xshift=0ex,yshift=-0ex,right of=cpu] (mem) {Memory};
\node[process,xshift=0ex,yshift=-0ex,right of=mem] (otherhw) {Other HW};
\draw [arrow] (ua) -- (sl);
\draw [arrow] (sl) -- (os);
\draw [arrow] (os) -- (cpu);
\draw [arrow] (os) -- (mem);
\draw [arrow] (os) -- (otherhw);
TODO: improve
\end{tikzpicture}

View file

@ -1,6 +1,32 @@
% // vim: set ft=tex:
\chapter{Topic Refinement}
% TODO: is this chapter required?
\chapter{Refined Research Questions}
\section{Software Tests}
% TODO: describe that tests are mostly semantics as opposed to static checks being mostly syntactical and technical
% TODO: Are they necessary in addition to static checks to cover the well-known use-cases and edge-cases.
% TODO: example?
\section{Definition Of Additional Analysis Rules To Extend Safety Checks}
% TODO: How can Business Logical
% Examples:
% TLB needs to be reset on Task Change
% Registers need to be
\subsection{Paging}
Setting up and maintaining the paging-structure, as well as allocating physical memory for the virtual pages is a complex task in the \gls{os}.
Developing this part of the \gls{os} is error-prone, and is not well-supported by mainstream \glspl{proglang}.
\section{Software Fault Isolation}
% TODO: content from \cite{Balasubramanian2017}
% TODO Which language items help with managing memory?
% TODO How generic can the memory allocators be written?
% TODO Guarantees to be statically checked:
% TODO * Control access to duplicates in page tables
% TODO * Tasks can't access unallocated (physical) memory
% TODO * Tasks can't access other tasks memory
\chapter{System Programming Conventions}
\label{rnd::sysprog-conventions}
@ -17,7 +43,7 @@ PUSH takes value operand which is to be pushed onto the stack.
The address in RSP moves towards numerically lower addresses with every PUSH instruction, which stores a new data entry on top.
The order is to first change the RSP and then copy the value at its new address.
POP takes a storage reference operand - \gls{CPU} register or memory address.
POP takes a storage reference operand - \gls{cpu} register or memory address.
It works in the opposite direction to PUSH.
First, consuming the top-most data entry and storing it on the operand location, then moving the RSP address towards the numerically higher RBP address.
@ -37,13 +63,14 @@ For example, PUSHing some value onto the stack before the end of the function wo
\paragraph{Called Procedure Setup} \emph{not} with ENTER and LEAVE.
When a procedure is called the stack is set up with the following four components
When a procedure is called, the stack is set up with the \gls{sf}, the four components listed in \cref{lst:amd64-stack-frame-components}.
\cite[p.~48]{AMD64Vol1}:
\begin{listing}[h]
\begin{enumerate}
\item{%
Parameters passed to the called procedure (created by the calling procedure). \\
\textit{Only if parameters don't fit the \gls{CPU} registers}
\textit{Only if parameters don't fit the \gls{cpu} registers}
}
\item{%
Return address (created by the CALL instruction). \\
@ -55,10 +82,13 @@ For example, PUSHing some value onto the stack before the end of the function wo
}
\item{%
Local variables used by the called procedure. \\
\textit{This includes the variables passed via \gls{CPU} registers}
\textit{This includes the variables passed via \gls{cpu} registers}
}
\end{enumerate}
only necessary when there aren't enough \gls{CPU} to pass the parameters.
\caption{\glsentrytext{amd64} Stack-Frame Components}
\label{lst:amd64-stack-frame-components}
\end{listing}
only necessary when there aren't enough \gls{cpu} to pass the parameters.
Item 3 is only necessary when
The \gls{amd64} manual also lists ENTER and LEAVE as instructions to \textit{"provide support for procedure calls, and are mainly used in high-level languages."}\cite[p.~48]{AMD64Vol1}.
@ -70,75 +100,13 @@ These instruction groups within the called procedure are called prologue and epi
\subsection{Full Procedure Call Example}
\label{context::introduction::hw-supported-mm::procedure-call-example}
This section combines the separate categories into one complete example that shows how the \gls{stack} is used by various \gls{CPU} instructions to perform procedure calls.
This section combines the separate categories into one complete example that shows how the \gls{stack} is used by various \gls{cpu} instructions to perform procedure calls.
The following code samples are extracted from a disassembled binary which was originally created using \gls{Rust}.
The Assembler that's shown uses Intel Mnemonic, which generally operates from right to left.
For example, \mint{nasm}{mov a, b} copies b to a.
\cref{code::context::examples::func-callee} shows the \gls{Rust} source code of the function \textit{sum}.
\cref{code::context::examples::func-callee-rust} shows the \gls{Rust} source code of the function \textit{sum}.
\section{4-Level Paging Hierarchy on \glsentrytext{amd64}}
\label{rnd::sysprog-conventions::paging-amd64}
On \gls{amd64} "a four-level page-translation data structure is provided to allow long-mode operating systems to translate a 64-Bit virtual-address space into a 52-Bit physical-address space."\cite[p.~18]{AMD64Vol2}.
This allows the system to only hold the \textit{PML4} table, the which is currently referenced by the \textit{Page Map Base Register (CR3)}, available in main memory.
\cref{fig:virtual-addr-transl} shows the 64-Bit virtual address composition on \gls{amd64}, which uses four-levels of page tables.
Counterintuitively the page-tables are not called level-\textit{n}-page-table, but the levels received distinct names in \citetitle{AMD64Vol2}.
The most-significant Bits labelled as \textit{Sign Extend} are not used for addressing purposes, but must adhere the canonical address form and simply repeat the value of the most-significant implemented Bit \cite[p.~130]{AMD64Vol2}.
The least significant Bits represent the offset within the physical page.
The four groups in between are used to index the page-table at their respective level.
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png}
\caption{Virtual to Physical Address in Long Mode\cite{AMD64Vol2}}
\label{fig:virtual-addr-transl}
\end{figure}
\subsubsection{Translation Scheme 4 KiB and 2 MiB Pages}
The \gls{amd64} architecture allows configuring the page-size, two of which will be introduced in this section.
\cref{tab:page-transl-vaddr-composition} displays the virtual address composition for the 4KiB and 2MiB page-size modes on \gls{amd64}.
The direction from top to bottom in the table corresponds to most significant to least significant - left to right - in the virtual address.
The \textit{sign extension} Bits cannot be used for actual information but act as a reservation for future architectural changes.
\begin{table}
\begin{tabular}{l | c | c}
Description & Bits in 4 KiB Pages & Bits in 2 MiB Pages \\
\hline
Sign Extend & 12 & 12 \\
Page-Map-Level-4 Offeset & 9 & 9 \\
Page-Directory-Pointer Offeset & 9 & 9 \\
Page-Directory Offeset & 9 & 9 \\
Page-Table Offeset & 9 & - \\
Physical Page Offset & 9 & 21 \\
\end{tabular}
\caption{Paging on \gls{amd64}: Virtual Address Composition 4KiB/2MiB pagesizes}
\label{tab:page-transl-vaddr-composition}
\end{table}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/amd64-4kb-page-translation-long-mode}
\caption{4-Kbyte Page Translation—Long Mode\cite{AMD64Vol2}}
\label{fig:4kb-page-transl}
\end{figure}
\cref{fig:4kb-page-transl} shows the detailed virtual address composition for 4 KiB pages, using four levels of page-tables.
It uses four sets of 9-Bit indices in the virtual address, one per hierarchy level, followed by the 9 Bit page-internal offset.
An alternative approach is displayed in \cref{fig:2mb-page-transl}, using 2 MiB sized pages.
It uses three sets of 9-Bit indices for the page-tables, and a 21-Bit page-internal offset.
Increasing the page-size improves speed and memory-usage and decreases the granularity.
In this specific example the hierarchy is reduced by one level of page-tables.
This reduces the amount of storage required for the page-tables in overall and causes the lookup algorithm to finish faster.
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/amd64-2mb-page-translation-long-mode}
\caption{2-Mbyte Page Translation—Long Mode\cite{AMD64Vol2}}
\label{fig:2mb-page-transl}
\end{figure}
The other supported page sizes, 4 MiB and 1 GiB, as well as intermixing page sizes through the different levels don't add new insight into the mechanism and don't need to be detailed here.
% \subsubsection{Top-Level Page Table Self-Reference}
% \subsubsection{Caching Lookups}
@ -149,27 +117,30 @@ The other supported page sizes, 4 MiB and 1 GiB, as well as intermixing page siz
\begin{listing}[htb]
\tikzset{/minted/basename=callee-c}
\begin{minted}[autogobble,linenos,breaklines=true]{rust}
TODO
\end{minted}
\caption{The called function in \gls{Rust}}
\label{code::context::examples::func-callee-c}
\label{code::context::examples::func-callee-rust}
\end{listing}
\cref{code::context::examples::func-call} shows a snippet snippet of the calling function.
\cref{code::context::examples::func-call-asm} shows a snippet snippet of the calling function.
It stores the arguments within the registers according to the System V X86\_64 calling convention. %TODO REFERENCE
The caller doesn't alter the stack-frame pointer (RBP) or the stack pointer (RSP) registers before call, hence the called function must restore these if it alters them.
\begin{listing}
\begin{minted}[escapeinside=??,highlightlines={},autogobble,linenos,breaklines=true]{rust}
TODO
\end{minted}
\caption{Procedure Call Example: Caller Rust}
\label{code::context::examples::func-call}
\label{code::context::examples::func-call-asm}
\end{listing}
\begin{listing}
\begin{minted}[escapeinside=??,highlightlines={},autogobble,linenos,breaklines=true]{nasm}
\end{minted}
TODO
\caption{Procedure Call Example: Caller Assembly}
\label{code::context::examples::func-call}
\label{code::context::examples::func-call-rust}
\end{listing}
% \balloon{comment}{
@ -250,18 +221,110 @@ $74f7: ret ; return to the caller, following the add
\caption{Memory Layout Throughout The Procedure Call Steps}
\label{fig:proc-call-example-mem}
\end{figure}
\FloatBarrier
\section{4-Level Paging Hierarchy on \glsentrytext{amd64}}
\label{rnd::sysprog-conventions::paging-amd64}
On \gls{amd64} "a four-level page-translation data structure is provided to allow long-mode operating systems to translate a 64-Bit virtual-address space into a 52-Bit physical-address space."\cite[p.~18]{AMD64Vol2}.
This allows the system to only hold the \textit{PML4} table, the which is currently referenced by the \textit{Page Map Base Register (CR3)}, available in main memory.
\cref{fig:virtual-addr-transl} shows the 64-Bit virtual address composition on \gls{amd64}, which uses four-levels of page tables.
Counterintuitively the page-tables are not called level-\textit{n}-page-table, but the levels received distinct names in \citetitle{AMD64Vol2}.
The most-significant Bits labelled as \textit{Sign Extend} are not used for addressing purposes, but must adhere the canonical address form and simply repeat the value of the most-significant implemented Bit \cite[p.~130]{AMD64Vol2}.
The least significant Bits represent the offset within the physical page.
The four groups in between are used to index the page-table at their respective level.
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png}
\caption{Virtual to Physical Address in Long Mode\cite{AMD64Vol2}}
\label{fig:virtual-addr-transl}
\end{figure}
\subsubsection{Translation Scheme 4 KiB and 2 MiB Pages}
The \gls{amd64} architecture allows configuring the page-size, two of which will be introduced in this section.
\cref{tab:page-transl-vaddr-composition} displays the virtual address composition for the 4KiB and 2MiB page-size modes on \gls{amd64}.
The direction from top to bottom in the table corresponds to most significant to least significant - left to right - in the virtual address.
The \textit{sign extension} Bits cannot be used for actual information but act as a reservation for future architectural changes.
\begin{table}
\begin{tabular}{l | c | c}
Description & Bits in 4 KiB Pages & Bits in 2 MiB Pages \\
\hline
Sign Extend & 12 & 12 \\
Page-Map-Level-4 Offeset & 9 & 9 \\
Page-Directory-Pointer Offeset & 9 & 9 \\
Page-Directory Offeset & 9 & 9 \\
Page-Table Offeset & 9 & - \\
Physical Page Offset & 9 & 21 \\
\end{tabular}
\caption{Paging on \gls{amd64}: Virtual Address Composition 4KiB/2MiB pagesizes}
\label{tab:page-transl-vaddr-composition}
\end{table}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/amd64-4kb-page-translation-long-mode}
\caption{4-Kbyte Page Translation—Long Mode\cite{AMD64Vol2}}
\label{fig:4kb-page-transl}
\end{figure}
\cref{fig:4kb-page-transl} shows the detailed virtual address composition for 4 KiB pages, using four levels of page-tables.
It uses four sets of 9-Bit indices in the virtual address, one per hierarchy level, followed by the 9 Bit page-internal offset.
An alternative approach is displayed in \cref{fig:2mb-page-transl}, using 2 MiB sized pages.
It uses three sets of 9-Bit indices for the page-tables, and a 21-Bit page-internal offset.
Increasing the page-size improves speed and memory-usage and decreases the granularity.
In this specific example the hierarchy is reduced by one level of page-tables.
This reduces the amount of storage required for the page-tables in overall and causes the lookup algorithm to finish faster.
\begin{figure}
\centering
\includegraphics[width=\textwidth]{gfx/amd64-2mb-page-translation-long-mode}
\caption{2-Mbyte Page Translation—Long Mode\cite{AMD64Vol2}}
\label{fig:2mb-page-transl}
\end{figure}
The other supported page sizes, 4 MiB and 1 GiB, as well as intermixing page sizes through the different levels don't add new insight into the mechanism and don't need to be detailed here.
\section{Interrupt Driven Preemptive Context Switches on \glsentrytext{amd64}}
\label{rnd::sysprog-conventions::ir-driven-preemptive-cs-amd64}
On \gls{amd64}, the \gls{CPU}'s interrupt mechanism does not switch the full context described previously, but only handles the registers that are necessary to successfully jump to the interrupt function: RFLAGS, RSP, RBP, RIP\footnote{Segment registers are neglected}.
On \gls{amd64}, the \gls{cpu}'s interrupt mechanism does not switch the full context described previously, but only handles the registers that are necessary to successfully jump to the interrupt function: RFLAGS, RSP, RBP, RIP\footnote{Segment registers are neglected}.
\subsection{Interrupts}
% TODO https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf p. 2848
\subsection{Context Content}
A description for \gls{amd64} is given in \cref{tab:task-minimum-context-registers}.
\begin{table}
\begin{tabularx}{\textwidth}{| c | X | X |}
\hline
\textbf{descriptive name} &
\textbf{register names on amd64} &
\textbf{description} \\
\hline
the instruction pointer register & RIP & address of the next instruction to be fetched \\
\hline
the stack pointer register & RSP & address of current position in stack \\
\hline
the flags register & RFLAGS & various attributes, e.g. the interrupt flag \\
\hline
all general-purpose registers & RAX, RBX, RCX, RDX, RDI, RSI, RBP, RSP, R8R15 & arbitrary data \\
\hline
\end{tabularx}
\caption{Minimum Context Registers on amd64\cite[p.~28]{AMD64Vol2}}
\label{tab:task-minimum-context-registers}
\end{table}
\subsection{Storing The Context On The Stack}
In this scenario, the context is stored on the \gls{stack} of the function that is interrupted.
\Cref{fig:amd64-long-mode-interrupt-stac} pictures the \gls{stack} layout on interrupt entry.
In order to leverage an interrupt for a context switch, the interrupt function needs to replace these values on the \gls{stack} with values for the new context.
CS (Code-Segment) and SS (Stack-Segment) have no effect in \gls{amd64} 64-Bit mode\cite[p.~20]{AMD64Vol1} and can remain unchanged.
The \gls{OS} developer needs to know the exact address where on the \gls{stack} this data structure has been pushed by the \gls{CPU}, and must then manipulate these addresses directly.
The \gls{os} developer needs to know the exact address where on the \gls{stack} this data structure has been pushed by the \gls{cpu}, and must then manipulate these addresses directly.
This type of manipulation is inherently dangerous and can not be easily checked by the \gls{compiler}.
The function that handles the interrupt must then use the instruction \textit{iretq}\cite[p.~252]{AMD64Vol2}, to make the \gls{CPU} restore the partial context from the \gls{stack} and continue to function pointed to by the RIP.
The function that handles the interrupt must then use the instruction \textit{iretq}\cite[p.~252]{AMD64Vol2}, to make the \gls{cpu} restore the partial context from the \gls{stack} and continue to function pointed to by the RIP.
\begin{figure}
@ -271,40 +334,7 @@ The function that handles the interrupt must then use the instruction \textit{ir
\label{fig:amd64-long-mode-interrupt-stac}
\end{figure}
For a full context-switch, the other registers that are part of the context need to be handled by the \gls{OS}'s interrupt function.
\chapter{Research Questions}
Setting up and maintaining the paging-structure, as well as allocating physical memory for the virtual pages is a complex task in the \gls{OS}.
Developing this part of the \gls{OS} is error-prone, and is not well-supported by mainstream \glspl{proglang}.
\section{Definition Of Additional Analysis Rules To Extend Safety Checks}
% TODO: How can Business Logical
% Examples:
% TLB needs to be reset on Task Change
% Registers need to be
\subsubsection{Software Fault Isolation}
% TODO: content from \cite{Balasubramanian2017}
\subsection{More Detailed Research Questions}
% TODO Which language items help with managing memory?
% TODO How generic can the memory allocators be written?
% TODO Guarantees to be statically checked:
% TODO * Control access to duplicates in page tables
% TODO * Tasks can't access unallocated (physical) memory
% TODO * Tasks can't access other tasks memory
\subsection{Interrupts}
% TODO https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf p. 2848
\section{Software Tests}
% TODO: describe that tests are mostly semantics as opposed to static checks being mostly syntactical and technical
% TODO: They necessary in addition to static checks to cover the well-known use-cases and edge-cases.
% TODO: example?
For a full context-switch, the other registers that are part of the context need to be handled by the \gls{os}'s interrupt function.
\chapter{Porting \glsentrytext{C} Vulnerabilities}
\label{rnd::porting-c-vulns}
@ -312,8 +342,8 @@ In this chapter, the weakness manifestations from \cref{context::common-mem-safe
\chapter{\glsentrytext{LX} Modules Written In \glsentrytext{Rust}}
\chapter{Existing \glsentrytext{OS}-Development Projects Based On Rust}
\label{rnd::existing-os-dev-wity-rust}
\chapter{Existing \glsentrytext{os}-Development Projects Based On Rust}
\label{rnd::existing-os-dev-with-rust}
\section{Libraries}
@ -326,8 +356,9 @@ In this chapter, the weakness manifestations from \cref{context::common-mem-safe
\subsection{Blog OS}
\subsection{Redox}
\subsection{Tock}
%TODO: mention paper's by tockos team
\chapter{\glsentrytext{imezzos}: Adding Preemptive \glsentrytext{OS}-Level Multitasking}
\chapter{\glsentrytext{imezzos}: Adding Preemptive \glsentrytext{os}-Level Multitasking}
\label{rnd::imezzos-preemptive-multitasking}
\section{Timed Interrupts For Scheduling and Dispatching}

View file

@ -3,6 +3,11 @@ Any changes to this file will be lost if it is regenerated by Mendeley.
BibTeX export options can be customized via Options -> BibTeX in Mendeley Desktop
@article{Junker,
author = {Junker, Stefan},
file = {:home/steveej/src/steveej/msc-thesis/src/docs/thesis.pdf:pdf},
title = {{Guarantees On In-Kernel Memory-Safety Using Rust's Static Code Analysis}}
}
@article{Lattner2005,
abstract = {The LLVM Compiler Infrastructure (http://llvm.cs. uiuc.edu) is a$\backslash$nrobust system that is well suited for a wide variety of research$\backslash$nand development work. This brief paper introduces the LLVM system$\backslash$nand provides pointers to more extensive documentation, complementing$\backslash$nthe tutorial presented at LCPC.},
archivePrefix = {arXiv},

View file

@ -11,10 +11,13 @@
\geometry{a4paper, top=25mm, left=30mm, right=35mm, bottom=35mm, headsep=10mm, footskip=12mm}
\usepackage{multirow,tabularx,tabu}
\usepackage{booktabs}
\usepackage{spreadtab}
\usepackage{hhline}
\renewcommand{\arraystretch}{1.2}
\usepackage{colortbl}
\usepackage[dvipsnames]{xcolor}
\usepackage{hhline}
\usepackage[backend=biber,style=numeric,citestyle=numeric,url=true]{biblatex}
\addbibresource{thesis.bib}
@ -39,23 +42,20 @@
\usetikzlibrary{tikzmark,mindmap}
\usetikzlibrary{chains,shapes.arrows, arrows, positioning,decorations.pathreplacing,bending}
\usetikzlibrary{calc}
\usetikzlibrary{matrix,shapes,arrows,positioning}
\usepackage{smartdiagram}
\usepackage{color}
\usepackage{caption}
\usepackage{subcaption}
\tikzset{/minted/basename/.initial=minted}
\appto\theFancyVerbLine{\tikzmark{\pgfkeysvalueof{/minted/basename}\arabic{FancyVerbLine}}}
\usepackage{amsmath}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{wrapfig}
\usepackage[parfill]{parskip}
\usepackage{amsmath}
\usepackage{etoolbox}
\newcommand{\topic}{Guarantees On In-Kernel Memory-Safety Using Rust's Static Code Analysis}
\newcommand{\authorOne}{Stefan Junker}
\newcommand{\authorOneInit}{SJ}
\newcommand{\authorOnestreet}{Alemannenstr. 7}
@ -68,10 +68,10 @@
\newcommand{\studies}{Information Technology - Embedded And Mobile Systems}
\newcommand{\startdate}{2017/4/1}
\newcommand{\submitdate}{2017/9/29}
\newcommand{\buzzwords}{memory-safety, operating system, rust}
\newcommand{\buzzwords}{memory-safety, operating system development, rust, static software analysis, software vulnerability}
% Numbered Subsubsections
\setcounter{secnumdepth}{3}
\setcounter{secnumdepth}{5}
\date{Summersemester 2017}
\title{\topic}
@ -127,14 +127,17 @@
\newcommand{\cnameref}[1]{\cref{#1} \textit{(\nameref{#1})}}
\newcommand{\Cnameref}[1]{\Cref{#1} \textit{(\nameref{#1})}}
\newcommand{\cnamepref}[1]{\cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}}
\newcommand{\Cnamepref}[1]{\Cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}}
%\renewcommand\paragraph{\startsection{paragraph}{4}{\z}%
% {-3.25ex\plus -1ex \minus -.2ex}%
% {0.0001pt \plus 0.2ex}%
% {\normalfont\normalsize\bfseries}}
\renewcommand\subparagraph{\startsection{subparagraph}{5}{\z}%
{-3.25ex\plus -1ex \minus -.2ex}%
{0.0001pt \plus 0.2ex}%
{\normalfont\normalsize\bfseries}}
%\renewcommand\subparagraph{\startsection{subparagraph}{5}{\z}%
% {-3.25ex\plus -1ex \minus -.2ex}%
% {0.0001pt \plus 0.2ex}%
% {\normalfont\normalsize\bfseries}}
\newcommand{\iitemA}{\setlength\itemindent{0pt}\item}
\newcommand{\iitemB}{\setlength\itemindent{25pt}\item}
@ -163,12 +166,12 @@
\tikzset{east above/.code=\tikz@lib@place@handle@{#1}{south east}{0}{1}{north east}{1}}
\tikzset{east below/.code=\tikz@lib@place@handle@{#1}{north east}{0}{-1}{south east}{1}}
% Tikzmark code helpers {
% Tikzmark code helpers
\newcommand{\tikzmarkprefix}{\pgfkeysvalueof{/tikz/tikzmark prefix}}
\newcommand{\tikzmarkcountprep}[1]{%
\tikzset{tikzmark prefix=#1}%
%\tikzset{tikzmark prefixes/#1/counter/.initial=0}%
\newcounter{Tikzcounter#1}%
\setcounter{Tikzcounter#1}{0}%
}
\newcommand{\tikzmarkcount}[1][\tikzmarkprefix]{%
\stepcounter{Tikzcounter#1}%
@ -178,7 +181,13 @@
\expandafter\arabic\expandafter{Tikzcounter#1}%
}
\newcommand{\tikzmarkcircle}[1]{%
\tikz[baseline=-0.77ex]\fill circle[fill=black,radius=1.1ex] node[font=\small,color=white]{#1};
\tikz[baseline=-0.77ex]\fill circle[fill=black,radius=1.1ex] node[font=\small,color=white]{#1};%
}
\newcommand{\tikzmarkdrawcirclesarg}[1]{%
\begin{tikzpicture}[remember picture,overlay]
\foreach \x in {1,...,\expandafter\arabic{Tikzcounter#1}}
\fill (pic cs:\x)+(1.3ex,0.5ex) circle[fill=black,radius=1.1ex,anchor=west] node[font=\small,color=white]{$\x$};
\end{tikzpicture}%
}
\newcommand{\tikzmarkdrawcircles}{%
\begin{tikzpicture}[remember picture,overlay]
@ -187,7 +196,25 @@
\end{tikzpicture}%
}
% }
% capitablize every First Letter
\let\oldmakefirstuc\makefirstuc
\renewcommand*{\makefirstuc}[1]{%
\def\gls@add@space{}%
\mfu@capitalisewords#1 \@nil\mfu@endcap
}
\def\mfu@capitalisewords#1 #2\mfu@endcap{%
\def\mfu@cap@first{#1}%
\def\mfu@cap@second{#2}%
\gls@add@space
\oldmakefirstuc{#1}%
\def\gls@add@space{ }%
\ifx\mfu@cap@second\@nnil
\let\next@mfu@cap\mfu@noop
\else
\let\next@mfu@cap\mfu@capitalisewords
\fi
\next@mfu@cap#2\mfu@endcap
}
\makeatother
\include{glossary}

View file

@ -21,7 +21,7 @@
\begin{tabular}{p{3cm}p{10cm}}
Topic: & \textbf{\large \topic} \\[10ex]
Applicant: & \authorOne, \authorOnestreet, \authorOnezip$ $ \authorOnecity, \authorOneCountry \\
& Student Identification Number: \authorOneId\\[10ex]
Matr-Nr.: & \authorOneId\\[10ex]
1st Supervisor: & \supervisorOne\\
2nd Supervisor: & \supervisorTwo\\[10ex]
Start Date: & \startdate\\