diff --git a/.gitignore b/.gitignore index fa17137..80dded1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,10 @@ *.fls *.lof *.log -*.lol *.lot *.synctex.gz *.toc +src/docs/*.pdf *.dvi *.glo *.ist @@ -16,13 +16,4 @@ .dot/* *.out *.bbl -*-blx.bib -*.bcf *.blg -*.run.xml - -.vscode/ - -/src/docs/thesis.pdf -/docs/ -_minted*/ diff --git a/.vimrc b/.vimrc deleted file mode 100644 index 4044dab..0000000 --- a/.vimrc +++ /dev/null @@ -1,120 +0,0 @@ -set nocompatible - -" leader -let mapleader = '\' - -" save on ctrl-s -nnoremap :w -inoremap :w - -set hidden -syntax on -set hlsearch -set number -set relativenumber - -" mappings to stop insert mode -imap jjj -imap kkk -imap lll -imap hhh -set scroll=11 - -" new scroll mappings -noremap -noremap - -noremap :tabn -noremap :tabp -" TODO: get tab movement working with ctrl-alt-{n,p} -noremap :tabm +1 -noremap :tabm -1 -let g:ctrlp_map = '' -let g:ctrlp_custom_ignore = { -\ 'dir': '\v[\/]\.(git|hg|svn|)$$', -\ 'file': '\v\.(exe|so|dll|so|swp|zip|aux|log|fdb_latexmk|fdb|dvi|lof|lot|pdf|fls|toc|gz|latexmain)$$', -\ } - -" allways show status line -set ls=2 -set tabstop=4 -set shiftwidth=4 -set softtabstop=4 -set expandtab -"set textwidth=80 - -" sync default register to clipboard { -if has('unnamedplus') -set clipboard=unnamedplus -else -set clipboard=unnamed -endif -" } - -" colored brackets { -let g:rbpt_colorpairs = [ - \ ['brown', 'RoyalBlue3'], - \ ['Darkblue', 'SeaGreen3'], - \ ['darkgray', 'DarkOrchid3'], - \ ['darkgreen', 'firebrick3'], - \ ['darkcyan', 'RoyalBlue3'], - \ ['darkred', 'SeaGreen3'], - \ ['darkmagenta', 'DarkOrchid3'], - \ ['brown', 'firebrick3'], - \ ['gray', 'RoyalBlue3'], - \ ['black', 'SeaGreen3'], - \ ['darkmagenta', 'DarkOrchid3'], - \ ['Darkblue', 'firebrick3'], - \ ['darkgreen', 'RoyalBlue3'], - \ ['darkcyan', 'SeaGreen3'], - \ ['darkred', 'DarkOrchid3'], - \ ['red', 'firebrick3'], - \ ] -let g:rbpt_max = 16 -let g:rbpt_loadcmd_toggle = 0 - -au VimEnter * RainbowParenthesesToggle -au Syntax * RainbowParenthesesLoadRound -au Syntax * RainbowParenthesesLoadSquare -au Syntax * RainbowParenthesesLoadBraces -" } -set backspace=indent,eol,start -colorscheme PaperColor - -" Latex Related {{{ -au BufRead,BufNewFile *.tex,*.md,*.markdown setlocal spell spelllang=en_us - -let g:vimtex_view_method = 'zathura' -let g:vimtex_complete_enabled = 1 -let g:vimtex_complete_close_braces = 1 -let g:vimtex_complete_recursive_bib = 1 -let g:vimtex_indent_enabled = 1 -let g:vimtex_indent_bib_enabled = 1 -let g:vimtex_fold_enabled = 1 -let g:vimtex_fold_comments = 1 -let g:vimtex_fold_preamble = 1 - -let g:vimtex_latexmk_options = '-verbose -pdf -shell-escape -file-line-error -synctex=1 -interaction=nonstopmode' - -if !exists('g:ycm_semantic_triggers') - let g:ycm_semantic_triggers = {} -endif -let g:ycm_semantic_triggers.tex = [ - \ 're!\\[A-Za-z]*cite[A-Za-z]*(\[[^]]*\]){0,2}{[^}]*', - \ 're!\\[A-Za-z]*ref({[^}]*|range{([^,{}]*(}{)?))', - \ 're!\\hyperref\[[^]]*', - \ 're!\\includegraphics\*?(\[[^]]*\]){0,2}{[^}]*', - \ 're!\\(include(only)?|input){[^}]*', - \ 're!\\\a*(gls|Gls|GLS)(pl)?\a*(\s*\[[^]]*\]){0,2}\s*\{[^}]*', - \ 're!\\includepdf(\s*\[[^]]*\])?\s*\{[^}]*', - \ 're!\\includestandalone(\s*\[[^]]*\])?\s*\{[^}]*', - \ ] - -function! ViewerCallback() dict -call self.forward_search(self.out) -endfunction -let g:vimtex_view_zathura_hook_callback = 'ViewerCallback' -" }}} - -" hack to copy mendeley bibliography -autocmd BufWritePost * execute ':silent ! cp /home/steveej/src/mendeley/Static-Code-Analysis-Kernel-Memory-Saftey.bib /home/steveej/src/steveej/msc-thesis/src/docs/thesis.bib >/dev/null 2>&1' diff --git a/README.md b/README.md index 7a34020..113aedf 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,168 @@ -# Using Static Code Analysis To Improve Kernel-Task Memory-Safety +# Package-Centric Source-Based Container Build System -# Simple Motivational Golden Circle -* Describes what this project is about very briefly -* Helps to keep track of the goals +## Motivation +* There's no standardized way to create container images that include + applications built from upstream sources. +* No standardized way to share artifacts for containers instead of whole + containers +* Application dependencies, which are mostly libraries, are typically + neglected when calculating container dependencies. This causes not knowing + what libraries are installed -## Who? -* Primariliy: OS Software Developers -* Secondary: End Users +## Features +### Package-based container builds +This project aims to find a solution for creating and managing containers in a package-centric manner. +In this context a package is both a definition of how to build (e.g. a Gentoo ebuild or Debian control file) +Every container will be able to specify their dependencies, which can be self-built packages or exact versions of already existing packages. +Like this the user will always exactly know what libraries, files, etc. is available to their container at runtime. +The specified container will have its dependencies on packages which themselves can be packaged separately into one container per package. -## Why? -* The Linux/C/C++ software is full of vulnerabilities -* A human solution is not reliable: - - Developers make mistakes anyway! -* C/C++ Compilers allow too many mistakes -* Hardware-supported memory-management with stack/heap is hard to implement safely -* Rust is a promising language +### Source-based builds +To allow maximum flexibility for users, the package files must be able to describe source builds, which allows the user to make changes to the source before the package is built and integrated into the target container image. -## How? -* Look for a *technical* solution for these issues by first analyzing the status quo: - - Which popular systems are most vulnerable? - - What exact software situations cause these vulnerabilities? -* Look through CVE database +### Reproducibility +Builds of packages, as well as builds of container images, must be reproducible in a way that the same input always yields the same output. +Because every change of a package yields in a different identity, the system allows to have multiple versions of one software application. +These can differ in properties like source code patches, build configuration options, target architecture, etc.. -## What? -* Find out if Rust can help with memory safety within the OS +### Portability of the whole system +As a portable system, builds can be processed locally on a user's workstation or distributed to a bunch of servers. + +### Shareable source and binary files +The build system will ship tools that make it easy to discover and share source and binary files that are respectively consumed and produced by the build system. +This will allow for very fast setups of containers that involve only downloading from trusted repositories. + +# Usage +## Buildit configuration +**.buildit-config.yaml** +``` +--- +repository: + name: mysuperbinhost + upload-type: ssh + upload-path: containers@mysuperbinhost.org/containers + download-type: https + download-path: mysuperbinhost.org/containers +``` + +## Sysadmin needs patched nginx + +### Sysadmin +In case a sysadmin needs a patched and specifically configured version of its favorite webserver nginx. + +1. Put directories and files in place + + --- + Directory layout + ``` + ├── nginx-prod + │   ├── container.yaml + │   ├── files + │   │   └── nginx.conf + │   └── pkgs + │   └── nginx + │   ├── patches + │   │   └── https-only.patch + │   └── pkg.yaml + ``` + --- + **pkg.yaml** + ``` + --- + base: www-servers/nginx-1.7.6 + author: Sysadmin42 + patches: + patches/https-only.patch: "This patch denies all plain http requests" + https://github.com/nginx/nginx/commit/52e4dc2f74fd032dace01acbe5eb29ddf7c1ad96.patch: + "Fix buffer overruns" + use: + with: + - ipv6 + - selinux + + ``` + --- + **container.yaml** + ``` + --- + - vars: + author: Sysadmin42 + name: nginx-production + version: 1.7.6-p1 + os: linux + arch: amd64 + + - package: + type: embedded + path: ./pkgs/nginx + + - sync: + src: ./files/nginx.conf + dest: /etc/nginx/nginx.conf + recursive: True + chmod: 0644 + + - image: + type: aci + content: | + { + "acKind": "ImageManifest", + "acVersion": "0.6.1", + "name": "{{ name }}-{{ version }}", + "labels": [ + {"name": "os", "value": "{{ os }}"}, + {"name": "arch", "value": "{{ arch }}"} + ], + "app": { + "exec": [ + "/sbin/nginx" + ], + "user": "0", + "group": "0" + } + } + ``` + +2. Build the container + ``` + $ buildit nginx-prod/ --discover=github.com/sysadmin42/containers,push=True + Building Sysadmin42/nginx-production-1.7.6-p1 + Processing package from './pkgs/nginx' for linux/amd64. + HASH: 86c8ef43-f4a4-49ba-a0ee-92900211c7b6 + Can't find HASH in any known location... + Defaulting to local build... [OK] + Uploading packages to 'mysuperbinhost' [OK] + Packaging Sysadmin42/nginx-production-1.7.6-p1 as ACI... [OK] + Uploading container spec and image(s) to 'mysuperbinhost' [OK] + ``` + +# Implementation +## Resources +### CoreOS related tools + * https://github.com/derekchiang/acbuild + * https://github.com/coreos/manifest + +### Similar Projects + * Previous work of mine: https://embedux.github.io/documentation/usage/rootfs/configuration.yml/index.html + +### Operating Systems and Package managers + * http://nixos.org/nixos/about.html + * https://gitweb.gentoo.org/proj/releng.git/tree/releases/weekly/specs/amd64?id=HEAD + * https://github.com/zefhemel/nix-docker + * [nix build farm + paper](http://www.researchgate.net/publication/228629017_The_Nix_Build_Farm_A_declarative_approach_to_continuous_integration) + * https://blogs.gentoo.org/zmedico/2015/07/06/tardelta-generate-a-tarball-of-differences-between-two-tarballs/ + * https://github.com/jordansissel/fpm/wiki + +## Outlook +The completion of the described container build system will benefit greatly to how container images can be shared and deployed. + +### Trusted Containers by reproducibility +Trusting container images has been hard. Being able to reproduce and verify the builds improves this. + +### Obsolete Container-Vulnerabilities Scans +Vulnerabilities scans are only necessary if it's unknown what the container image contains. With the new build system the build specification allows to inspect the included container images much more efficiently. Image vendors can directly track contained packages and their CVEs instead of relying on posteriori scans. + +### Automatic Container Updates +When identified, regular and security updates to 3rd party packages can trigger rebuilds as well as changed source files of 1st party applications. The update circle can be closed by automatically deploying new containers triggered by the updated images. +Complete automation might be difficult in real-world deployments because software updates sometimes require configuration changes. diff --git a/build/texlive-url-mirror.patch b/build/texlive-url-mirror.patch deleted file mode 100644 index 04c4f10..0000000 --- a/build/texlive-url-mirror.patch +++ /dev/null @@ -1,27 +0,0 @@ -diff --git a/pkgs/tools/typesetting/tex/texlive/bin.nix b/pkgs/tools/typesetting/tex/texlive/bin.nix -index 5591f64cee..2bdbfd50a7 100644 ---- a/pkgs/tools/typesetting/tex/texlive/bin.nix -+++ b/pkgs/tools/typesetting/tex/texlive/bin.nix -@@ -20,7 +20,7 @@ let - common = rec { - src = fetchurl { - url = # "ftp://tug.org/historic/systems/texlive/${year}/" -- "http://lipa.ms.mff.cuni.cz/~cunav5am/nix/texlive-2016" # FIXME: a proper mirror -+ "http://146.185.144.154/texlive-2016/" - + "/texlive-${year}0523b-source.tar.xz"; - sha256 = "1v91vahxlxkdra0qz3f132vvx5d9cx2jy84yl1hkch0agyj2rcx8"; - }; -diff --git a/pkgs/tools/typesetting/tex/texlive/default.nix b/pkgs/tools/typesetting/tex/texlive/default.nix -index 692f6b1772..21463dc367 100644 ---- a/pkgs/tools/typesetting/tex/texlive/default.nix -+++ b/pkgs/tools/typesetting/tex/texlive/default.nix -@@ -109,8 +109,7 @@ let - fixedHash = fixedHashes.${tlName} or null; # be graceful about missing hashes - - url = args.url or "${urlPrefix}/${urlName}.tar.xz"; -- urlPrefix = args.urlPrefix or -- http://lipa.ms.mff.cuni.cz/~cunav5am/nix/texlive-2016; -+ urlPrefix = http://146.185.144.154/texlive-2016; - # XXX XXX XXX FIXME: mirror the snapshot XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX - # ("${mirror}/pub/tex/historic/systems/texlive/${bin.texliveYear}/tlnet-final/archive"); - #mirror = "http://ftp.math.utah.edu"; diff --git a/build/update-minted-2.5.patch b/build/update-minted-2.5.patch deleted file mode 100644 index 699fc80..0000000 --- a/build/update-minted-2.5.patch +++ /dev/null @@ -1,41 +0,0 @@ -diff --git a/pkgs/tools/typesetting/tex/texlive/pkgs.nix b/pkgs/tools/typesetting/tex/texlive/pkgs.nix -index 4d891c0758..09c413559f 100644 ---- a/pkgs/tools/typesetting/tex/texlive/pkgs.nix -+++ b/pkgs/tools/typesetting/tex/texlive/pkgs.nix -@@ -5713,6 +5713,7 @@ tl: { # no indentation - deps."fundus-calligra" = tl."fundus-calligra"; - deps."fundus-cyr" = tl."fundus-cyr"; - deps."fundus-sueterlin" = tl."fundus-sueterlin"; -+ deps."fvextra" = tl."fvextra"; - deps."fwlw" = tl."fwlw"; - deps."g-brief" = tl."g-brief"; - deps."gauss" = tl."gauss"; -@@ -16203,13 +16204,24 @@ tl: { # no indentation - hasRunfiles = true; - version = "1.1"; - }; -+"fvextra" = { -+ stripPrefix = 0; -+ sha512.run = "3526c3656124b15217161792f14f914dad2ec84af7ffd85f95654701c82e3be6bde304df0ca1ba8b4eb943122e731fe7c059a0d8ba7554f49a5401be6b709d8a"; -+ sha512.doc = "919cff94f930cb624723fdca66033df39fee4dfe391df4ec592ded2a5c9ae59a9f29594ebdb09c9d9e54b5afc9dd77591258155530aa7c8fc45361b8dc31e8c3"; -+ sha512.source = "934d419ca8e4c748f0d7d41f520abd07df4682295c6eef3390b9d2d628469d0d84ff7ffb707f1ee70b83b0fdf42b16ad9ad350c4b5166b28117543dde9d7d049"; -+ urlPrefix = "http://213.136.89.14/texlive-mirror"; -+ hasRunfiles = true; -+ version = "1.3.1"; -+}; - "minted" = { -+ deps."fvextra" = tl."fvextra"; - stripPrefix = 0; -- sha512.run = ""; -- sha512.doc = ""; -- sha512.source = ""; -+ sha512.run = "6bfff77120a70214f492f34b31bc7354f75173e742b0d40b76116e78f18d471946175e7cae1d51d0f2b948bf566c8e44c755f1d98a338b8a0b1b0cfb6b77f462"; -+ sha512.doc = "013bab6a7abfff35d5316ec335ddb13e8c91e918ef78d9e49bc393be3cc6e71f22c740ff862045a5d20bbfc1a508bbc272f0dbc668db3ffc657dafb0ddfbc45c"; -+ sha512.source = "3ae1e00adeff4ff117be3c35120d0c94569f62c7e4b6c8640236fbd52a9ceb20fce24b87faf0d5cc2b9295fd5a3689cda5eb01dc2c3897e8a2f228326ae6cdb6"; -+ urlPrefix = "http://213.136.89.14/texlive-mirror"; - hasRunfiles = true; -- version = "2.2.1"; -+ version = "2.5"; - }; - "mintspirit" = { - stripPrefix = 0; diff --git a/nginx-prod/container.yaml b/nginx-prod/container.yaml new file mode 100644 index 0000000..9615e63 --- /dev/null +++ b/nginx-prod/container.yaml @@ -0,0 +1,35 @@ +--- +- vars: + author: Sysadmin42 + name: nginx-production + version: 1.7.6-p1 + +- package: + type: embedded + path: ./pkgs/nginx + +- sync: + src: ./files/nginx.conf + dest: /etc/nginx/nginx.conf + recursive: True + chmod: 0644 + +- image: + type: aci + content: | + { + "acKind": "ImageManifest", + "acVersion": "0.6.1", + "name": "{{ name }}-{{ version }}", + "labels": [ + {"name": "os", "value": "linux"}, + {"name": "arch", "value": "amd64"} + ], + "app": { + "exec": [ + "/sbin/nginx" + ], + "user": "0", + "group": "0" + } + } diff --git a/nginx-prod/files/nginx.conf b/nginx-prod/files/nginx.conf new file mode 100644 index 0000000..f747925 --- /dev/null +++ b/nginx-prod/files/nginx.conf @@ -0,0 +1 @@ +DUMMY diff --git a/nginx-prod/pkgs/nginx/patches/https-only.patch b/nginx-prod/pkgs/nginx/patches/https-only.patch new file mode 100644 index 0000000..f747925 --- /dev/null +++ b/nginx-prod/pkgs/nginx/patches/https-only.patch @@ -0,0 +1 @@ +DUMMY diff --git a/nginx-prod/pkgs/nginx/pkg.yaml b/nginx-prod/pkgs/nginx/pkg.yaml new file mode 100644 index 0000000..fc66cf7 --- /dev/null +++ b/nginx-prod/pkgs/nginx/pkg.yaml @@ -0,0 +1,10 @@ +--- +base: www-servers/nginx-1.7.6 +author: Sysadmin42 +patches: + patches/https-only.patch: "This patch denies all plain http requests" + https://github.com/nginx/nginx/commit/52e4dc2f74fd032dace01acbe5eb29ddf7c1ad96.patch: "Fix buffer overruns" +use: + with: + - ipv6 + - selinux diff --git a/shell.nix b/shell.nix index e376613..f1b379d 100644 --- a/shell.nix +++ b/shell.nix @@ -1,30 +1,49 @@ let nixpkgs = import {}; - patchedPkgsSrc = nixpkgs.stdenv.mkDerivation { - name = "patchedPkgsSrc"; - src = nixpkgs.pkgs.fetchFromGitHub { - owner = "nixos"; - repo = "nixpkgs-channels"; - rev = "81fceb255448415e70b9e7775d590b6def45f861"; - sha256 = "0sfx21b9rb6qxjm7li3krk6ik0xxph1il7r5l69n8b9agp72yjfx"; -# rev = "1b1fc6550559f9d73ddf7cea611c387a847bf03b"; -# sha256 = "0gipwxghvwnv2n7csp8ks3l2g1z7hwqn96bljikkm7p8jjpfb5ds"; - }; - patches = [ - #./build/texlive-url-mirror.patch - ./build/update-minted-2.5.patch - ]; - buildPhase = ""; - installPhase = '' - cp -a . $out - ''; - fixupPhase = "true"; - postFixup = "true"; + pkgsSrc = nixpkgs.pkgs.fetchFromGitHub { + owner = "nixos"; + repo = "nixpkgs-channels"; + rev = "adfcc2d9531e78bf6a9e3b56e2f4fc873cb3d87b"; + sha256 = "101mh6gb9wlx0lq5pw2m0n1fi3h2pag3ndg184rrqzvn9ynp28iw"; }; - pkgs = import patchedPkgsSrc {}; + pkgs = import pkgsSrc {}; + + eqexam.pkgs = [ + (pkgs.stdenv.mkDerivation rec { + version = "2012-05-17"; + pname = "eqexam"; + name = "${pname}-${version}"; + tlType = "run"; + + src = pkgs.fetchurl { + url = "http://www.math.uakron.edu/~dpstory/eqexam/eqexam_pack.zip"; + sha256 = "0g4w9ma6cr277li5b8dps9fm9jkjlwzwjc6ix01mw4kva4h5fdrj"; + }; + + buildInputs = [ pkgs.unzip pkgs.texlive.combined.scheme-basic ]; + + buildPhase = " + latex eqexam.ins + "; + + installPhase = " + mkdir -p $out/tex/latex/eqexam + cp -va *.sty *.cfg *.def $out/tex/latex/eqexam + + mkdir -p $out/doc/latex/eqexam + cp -va doc/* $out/doc/latex/eqexam + "; + + meta = { + branch = "3"; + platforms = pkgs.stdenv.lib.platforms.unix; + }; + }) + ]; mytexlive = (pkgs.texlive.combine { - inherit (pkgs.texlive) scheme-full minted pygmentex fvextra; + inherit (pkgs.texlive) scheme-full; + inherit eqexam; }); in pkgs.stdenv.mkDerivation { @@ -34,8 +53,7 @@ in pkgs.stdenv.mkDerivation { $PWD/src/docs/thesis.tex \ $PWD/src/docs/glossary.tex \ $PWD/src/docs/parts/context/context.tex \ - $PWD/src/docs/parts/research_and_development/research_and_development.tex \ - $PWD/src/docs/parts/eval_and_conclusion/eval_and_conclusion.tex \ + $PWD/src/docs/parts/research/research.tex \ " ''; buildInputs = [ @@ -43,9 +61,109 @@ in pkgs.stdenv.mkDerivation { (pkgs.vim_configurable.customize { name = "vim"; vimrcConfig = { - - customRC = '' - source .vimrc + # add custom .vimrc lines like this: + customRC = '' + set nocompatible + + " leader + let mapleader = ',' + + set hidden + syntax on + set hlsearch + set number + + " mappings to stop insert mode + imap jjj + imap kkk + imap lll + imap hhh + set scroll=11 + + noremap :tabn + noremap :tabp + let g:ctrlp_map = '' + let g:ctrlp_custom_ignore = { + \ 'dir': '\v[\/]\.(git|hg|svn|)$$', + \ 'file': '\v\.(exe|so|dll|so|swp|zip|aux|log|fdb_latexmk|fdb|dvi|lof|lot|pdf|fls|toc|gz|latexmain)$$', + \ } + + " allways show status line + set ls=2 + set tabstop=4 + set shiftwidth=4 + set softtabstop=4 + set expandtab + "set textwidth=80 + + " sync default register to clipboard { + if has('unnamedplus') + set clipboard=unnamedplus + else + set clipboard=unnamed + endif + " } + + " colored brackets { + let g:rbpt_colorpairs = [ + \ ['brown', 'RoyalBlue3'], + \ ['Darkblue', 'SeaGreen3'], + \ ['darkgray', 'DarkOrchid3'], + \ ['darkgreen', 'firebrick3'], + \ ['darkcyan', 'RoyalBlue3'], + \ ['darkred', 'SeaGreen3'], + \ ['darkmagenta', 'DarkOrchid3'], + \ ['brown', 'firebrick3'], + \ ['gray', 'RoyalBlue3'], + \ ['black', 'SeaGreen3'], + \ ['darkmagenta', 'DarkOrchid3'], + \ ['Darkblue', 'firebrick3'], + \ ['darkgreen', 'RoyalBlue3'], + \ ['darkcyan', 'SeaGreen3'], + \ ['darkred', 'DarkOrchid3'], + \ ['red', 'firebrick3'], + \ ] + let g:rbpt_max = 16 + let g:rbpt_loadcmd_toggle = 0 + + au VimEnter * RainbowParenthesesToggle + au Syntax * RainbowParenthesesLoadRound + au Syntax * RainbowParenthesesLoadSquare + au Syntax * RainbowParenthesesLoadBraces + " } + set backspace=indent,eol,start + colorscheme PaperColor + + " Latex Related {{{ + au BufRead,BufNewFile *.tex,*.md,*.markdown setlocal spell spelllang=en_us + + let g:vimtex_view_method = 'zathura' + + let g:vimtex_complete_enabled = 1 + let g:vimtex_complete_close_braces = 1 + let g:vimtex_complete_recursive_bib = 1 + let g:vimtex_indent_enabled = 1 + let g:vimtex_indent_bib_enabled = 1 + + if !exists('g:ycm_semantic_triggers') + let g:ycm_semantic_triggers = {} + endif + let g:ycm_semantic_triggers.tex = [ + \ 're!\\[A-Za-z]*cite[A-Za-z]*(\[[^]]*\]){0,2}{[^}]*', + \ 're!\\[A-Za-z]*ref({[^}]*|range{([^,{}]*(}{)?))', + \ 're!\\hyperref\[[^]]*', + \ 're!\\includegraphics\*?(\[[^]]*\]){0,2}{[^}]*', + \ 're!\\(include(only)?|input){[^}]*', + \ 're!\\\a*(gls|Gls|GLS)(pl)?\a*(\s*\[[^]]*\]){0,2}\s*\{[^}]*', + \ 're!\\includepdf(\s*\[[^]]*\])?\s*\{[^}]*', + \ 're!\\includestandalone(\s*\[[^]]*\])?\s*\{[^}]*', + \ ] + + function! ViewerCallback() dict + call self.forward_search(self.out) + endfunction + let g:vimtex_view_zathura_hook_callback = 'ViewerCallback' + " }}} ''; vam.knownPlugins = pkgs.vimPlugins; @@ -61,13 +179,7 @@ in pkgs.stdenv.mkDerivation { ]; }; }) - pkgs.bashInteractive - mytexlive - pkgs.biber - pkgs.pygmentex - pkgs.python27Packages.pygments-markdown-lexer - pkgs.zathura ]; } diff --git a/src/code/llc-arglist/arglist.cc b/src/code/llc-arglist/arglist.cc deleted file mode 100644 index 8780ca4..0000000 --- a/src/code/llc-arglist/arglist.cc +++ /dev/null @@ -1,12 +0,0 @@ -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -/* - * Call with `--help-list-hidden` as argument to get a full list - */ -int main(int argc, char** argv) { - cl::ParseCommandLineOptions(argc, argv, ""); - - return 0; -} diff --git a/src/code/llc-arglist/run.sh b/src/code/llc-arglist/run.sh deleted file mode 100755 index 62c098e..0000000 --- a/src/code/llc-arglist/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -g++ arglist.cc -o arglist -std=gnu++11 -lLLVM-4.0.1 -./arglist --help-list-hidden diff --git a/src/code/stack_handling.c/makefile b/src/code/stack_handling.c/makefile deleted file mode 100644 index d9a25ad..0000000 --- a/src/code/stack_handling.c/makefile +++ /dev/null @@ -1,27 +0,0 @@ -TARGET = stack_handling -LIBS = -CC = gcc -CFLAGS = -Wall -fomit-frame-pointer -fstack-check #-fsanitize=address - -.PHONY: default all clean - -default: $(TARGET) -all: default objdump - -OBJECTS = $(patsubst %.c, %.o, $(wildcard *.c)) -HEADERS = $(wildcard *.h)GG - -%.o: %.c $(HEADERS) - $(CC) $(CFLAGS) -c $< -o $@ - -.PRECIOUS: $(TARGET) $(OBJECTS) - -$(TARGET): $(OBJECTS) - $(CC) $(OBJECTS) $(CFLAGS) $(LIBS) -o $@ - -clean: - -rm -f *.o - -rm -f $(TARGET) - -objdump: $(TARGET) - objdump --no-show-raw-insn --disassembler-options=intel-nmemonic -d $(TARGET) > $@ \ No newline at end of file diff --git a/src/code/stack_handling.c/stack_handling.c b/src/code/stack_handling.c/stack_handling.c deleted file mode 100644 index 8a3f7d9..0000000 --- a/src/code/stack_handling.c/stack_handling.c +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include - -static int64_t passthrough(int64_t a) { return a; } - -static int64_t neg(int64_t a) { return -a; } - -static int64_t neg_extravar(int64_t a) { - int64_t neg = -a; - return neg; -} - -static int64_t rec_many_args(int64_t rdi, int64_t rsi, int64_t rdx, int64_t rcx, - int64_t r8, int64_t r9, int64_t s1) { - if (s1 == INT64_MIN) { - return INT64_MIN; - } else { - return rec_many_args(rdi, rsi, rdx, rcx, r8, r9, s1 - 1); - } -} - -static int64_t many_args(int64_t rdi, int64_t rsi, int64_t rdx, int64_t rcx, - int64_t r8, int64_t r9, int64_t s1) { - - int64_t s2 = - rec_many_args(0xfffffffffffffff0, 0xfffffffffffffff1, 0xfffffffffffffff3, - 0xfffffffffffffff4, 0xfffffffffffffff5, 0xfffffffffffffff6, - INT64_MIN + 80000); - return s2; -} - -static void printer(int64_t *a, int64_t *b) { - fprintf(stdout, "%" PRId64 " = -%" PRId64 "\n", *a, *b); -} - -static void caller(void) { - int64_t a = passthrough(INT64_MAX); - int64_t b = neg(a); - printer(&a, &b); -} - -static int *grow_stack(int size) { - int a[size]; - for (int i = 0; i < size; i++) { - a[i] = 0; - } - return a; -} - -static int64_t *large_stack() { - uint64_t a[100000000]; - return a; -} - -static void simple_printer(void) { fprintf(stderr, "I wonder who called me?"); } - -static void modifier(void) { - uint64_t *p; - // without frame-pointer - // *(&p + 1) = (uint64_t *)simple_printer; - - // with frame-pointer - *(&p + 2) = (uint64_t *)simple_printer; -} - -static void modifier_indexed(uint64_t *p) { - // without frame-pointer - (&p)[1] = (uint64_t *)simple_printer; - - // with frame-pointer - (&p)[2] = (uint64_t *)simple_printer; -} - -int main(void) { - // caller(); - // many_args(0xfffffffffffffff0, 0xfffffffffffffff1, 0xfffffffffffffff3, - // 0xfffffffffffffff4, 0xfffffffffffffff5, 0xfffffffffffffff6, - // 0xfffffffffffffff7); - // modifier_indexed(NULL); - // modifier(); - large_stack(); - fprintf(stderr, "main exiting"); - return 0; -} \ No newline at end of file diff --git a/src/code/stack_handling.rs/Cargo.lock b/src/code/stack_handling.rs/Cargo.lock deleted file mode 100644 index ace96fe..0000000 --- a/src/code/stack_handling.rs/Cargo.lock +++ /dev/null @@ -1,4 +0,0 @@ -[root] -name = "stack_handling" -version = "0.1.0" - diff --git a/src/code/stack_handling.rs/Cargo.toml b/src/code/stack_handling.rs/Cargo.toml deleted file mode 100644 index 025cb7f..0000000 --- a/src/code/stack_handling.rs/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "stack_handling" -version = "0.1.0" -authors = ["Stefan Junker "] -build = "build.rs" - -[dependencies] - -[profile.dev] -panic = "abort" - -[profile.release] -panic = "abort" \ No newline at end of file diff --git a/src/code/stack_handling.rs/build.rs b/src/code/stack_handling.rs/build.rs deleted file mode 100644 index 220a697..0000000 --- a/src/code/stack_handling.rs/build.rs +++ /dev/null @@ -1,10 +0,0 @@ -// build.rs - -use std::env; -use std::fs::File; -use std::io::Write; -use std::path::Path; - -fn main() { - println!("build.rs ran"); -} \ No newline at end of file diff --git a/src/code/stack_handling.rs/src/main.rs b/src/code/stack_handling.rs/src/main.rs deleted file mode 100644 index d47839e..0000000 --- a/src/code/stack_handling.rs/src/main.rs +++ /dev/null @@ -1,84 +0,0 @@ -#![feature(naked_functions)] -#![deny(unconditional_recursion)] - -// #[derive(Debug)] -// struct Stat { -// sum: isize, -// count: isize, -// avg: isize, -// min: isize, -// max: isize, -// } -// -// macro_rules! sum { -// () => (0); -// ($e:expr) => ( $e ); -// ($head:expr, $($tail:expr),*) => { $head + sum!($($tail),*)}; -// } -// -// macro_rules! count { -// () => (0); -// ($e:expr) => ( 1 ); -// ($head:expr, $($tail:expr),*) => { 1 + count!($($tail),*) }; -// } -// -// #[inline(never)] -// fn push(a: isize, b: isize, c: isize) -> isize { -// sum!(a, b, c) -// } -// -// macro_rules! avg{ -// ($($all:expr),+) => { -// sum!($($all),+) / count!($($all),+) -// }; -// } -// -// macro_rules! max { -// ($x:expr) => ( $x ); -// ($x:expr, $($xs:expr),+) => { -// { -// use std::cmp::max; -// max($x, max!( $($xs),+ )) -// } -// }; -// } -// -// macro_rules! min { -// ($x:expr) => ( $x ); -// ($x:expr, $($xs:expr),+) => { -// { -// use std::cmp::min; -// min($x, min!( $($xs),+ )) -// } -// }; -// } -// #[inline(never)] -// fn stats(a: isize, b: isize, c: isize) -> Stat { -// let sum = sum(a, b, c); -// let count = count!(a, b, c); -// let avg = avg!(a, b, c); -// let min = min!(a, b, c); -// let max = max!(a, b, c); -// Stat { -// sum, -// count, -// avg, -// min, -// max, -// } -// } -// -// #[inline(never)] -// fn sum(a: isize, b: isize, c: isize) -> isize { -// sum!(a, b, c) -// } -#[inline(never)] -fn passthrough(a: isize) -> isize { - let b = a; - b -} - -#[inline(never)] -fn r(); - println!("main exiting") -} \ No newline at end of file diff --git a/src/docs/abstract.tex b/src/docs/abstract.tex index 0e705fb..1e5f7ca 100644 --- a/src/docs/abstract.tex +++ b/src/docs/abstract.tex @@ -1,4 +1,3 @@ -% // vim: set ft=tex: \begin{center} {\Large \textbf{Abstract}} \end{center} @@ -14,9 +13,9 @@ Supervisors: & \supervisorOne \\[.5ex] & \supervisorTwo\\ & \\ - Submission: & \submitdate \\ + Submition: & \submitdate \\ & \\ - Categories: & \buzzwords \\ + Buzzwords: & \buzzwords \\ & \\ \end{tabular} \end{center} @@ -24,4 +23,4 @@ \bigskip \noindent -\abstract +TODO: abstract goes here diff --git a/src/docs/affidavit.tex b/src/docs/affidavit.tex deleted file mode 100644 index 3700552..0000000 --- a/src/docs/affidavit.tex +++ /dev/null @@ -1,26 +0,0 @@ -% // vim: set ft=tex: - -\chapter*{Ehrenwörtliche Erklärung} - -Hiermit erkläre ich, {\authorOne}, geboren am {\authorOneBirthDate} in {\authorOneBirthCity}, - -\begin{enumerate} - \item{ - dass ich meine Masterarbeit mit dem Titel: - - {"\topic"} - - in der Fakultät Informatik unter Anleitung von Professor {\supervisorOne} und ohne fremde Hilfe angefertigt habe und keine anderen als die angeführten Hilfen benutzt habe; - } - \item{ - dass ich die Übernahme wörtlicher Zitate, von Tabellen, Zeichnungen, Bildern und Programmen aus der Literatur oder anderen Quellen (Internet) sowie die Verwendung der Gedanken anderer Autoren an den entsprechenden Stellen innerhalb der Arbeit gekennzeichnet habe; - } - \item{ - dass die eingereichten Abgabe-Exemplare in Papierform und im PDF-Format vollständig übereinstimmen. - } -\end{enumerate} - -Ich bin mir bewusst, dass eine falsche Erklärung rechtliche Folgen haben wird. - -\vspace{4cm} -Konstanz, 29.9.2017\hspace{5cm} \authorOne diff --git a/src/docs/cover.tex b/src/docs/cover.tex deleted file mode 100644 index a4e0825..0000000 --- a/src/docs/cover.tex +++ /dev/null @@ -1,26 +0,0 @@ -\begin{titlepage} - -\vspace*{-1.0cm} - -\begin{centering} - \includegraphics[width=\textwidth]{gfx/htwg-logo.pdf} -\end{centering} - -\vspace{1.5cm} - -\begin{center} - \huge{ - \textbf{\topic} \\[4cm] - } - \Large{ - \textbf{\authorOne}} \\[5.5cm] - \large{ - \textbf{Konstanz, \submitdate} \\[2.3cm] - } - - \Huge{ - \textbf{{\textsf Masterarbeit}} - } -\end{center} - -\end{titlepage} diff --git a/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year-Legend.png b/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year-Legend.png deleted file mode 100644 index 761fcae..0000000 Binary files a/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year-Legend.png and /dev/null differ diff --git a/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year.png b/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year.png deleted file mode 100644 index a4d9536..0000000 Binary files a/src/docs/gfx/Relative-Vulnerability-Type-Totals-By-Year.png and /dev/null differ diff --git a/src/docs/gfx/TODO-Callstacklayout.png b/src/docs/gfx/TODO-Callstacklayout.png deleted file mode 100644 index 5004c03..0000000 Binary files a/src/docs/gfx/TODO-Callstacklayout.png and /dev/null differ diff --git a/src/docs/gfx/TODO-Callstacklayout.svg b/src/docs/gfx/TODO-Callstacklayout.svg deleted file mode 100644 index dcfa2d4..0000000 --- a/src/docs/gfx/TODO-Callstacklayout.svg +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Parameters for - DrawSquare - Locals of - DrawSquare - Return Address - Parameters for - DrawLine - Locals of - DrawLine - Return Address - - - stack frame - for - DrawLine - subroutine - - - stack frame - for - DrawSquare - subroutine - - Frame Pointer - Stack Pointer - top of stack - - diff --git a/src/docs/gfx/TODO-callstack-manipulation.png b/src/docs/gfx/TODO-callstack-manipulation.png deleted file mode 100644 index 0425904..0000000 Binary files a/src/docs/gfx/TODO-callstack-manipulation.png and /dev/null differ diff --git a/src/docs/gfx/TODO-heap-stack-example-program.png b/src/docs/gfx/TODO-heap-stack-example-program.png deleted file mode 100644 index d88cf3f..0000000 Binary files a/src/docs/gfx/TODO-heap-stack-example-program.png and /dev/null differ diff --git a/src/docs/gfx/TODO-nlevel-paging.jpg b/src/docs/gfx/TODO-nlevel-paging.jpg deleted file mode 100644 index ae263d5..0000000 Binary files a/src/docs/gfx/TODO-nlevel-paging.jpg and /dev/null differ diff --git a/src/docs/gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png b/src/docs/gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png deleted file mode 100644 index 4d62b79..0000000 Binary files a/src/docs/gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png and /dev/null differ diff --git a/src/docs/gfx/Vulnerability-Type-Change-by-Year-Legend.png b/src/docs/gfx/Vulnerability-Type-Change-by-Year-Legend.png deleted file mode 100644 index 421bfb9..0000000 Binary files a/src/docs/gfx/Vulnerability-Type-Change-by-Year-Legend.png and /dev/null differ diff --git a/src/docs/gfx/Vulnerability-Type-Change-by-Year.png b/src/docs/gfx/Vulnerability-Type-Change-by-Year.png deleted file mode 100644 index c4e0a33..0000000 Binary files a/src/docs/gfx/Vulnerability-Type-Change-by-Year.png and /dev/null differ diff --git a/src/docs/gfx/amd64-2mb-page-translation-long-mode.png b/src/docs/gfx/amd64-2mb-page-translation-long-mode.png deleted file mode 100644 index ea47e78..0000000 Binary files a/src/docs/gfx/amd64-2mb-page-translation-long-mode.png and /dev/null differ diff --git a/src/docs/gfx/amd64-4kb-page-translation-long-mode.png b/src/docs/gfx/amd64-4kb-page-translation-long-mode.png deleted file mode 100644 index 63c8a7d..0000000 Binary files a/src/docs/gfx/amd64-4kb-page-translation-long-mode.png and /dev/null differ diff --git a/src/docs/gfx/amd64-long-mode-stack-after-interrupt.png b/src/docs/gfx/amd64-long-mode-stack-after-interrupt.png deleted file mode 100644 index e5f30ef..0000000 Binary files a/src/docs/gfx/amd64-long-mode-stack-after-interrupt.png and /dev/null differ diff --git a/src/docs/gfx/call-procedure-memory-content.png b/src/docs/gfx/call-procedure-memory-content.png deleted file mode 100644 index 58f764d..0000000 Binary files a/src/docs/gfx/call-procedure-memory-content.png and /dev/null differ diff --git a/src/docs/gfx/htwg-logo.pdf b/src/docs/gfx/htwg-logo.pdf deleted file mode 100644 index e7dfc76..0000000 Binary files a/src/docs/gfx/htwg-logo.pdf and /dev/null differ diff --git a/src/docs/gfx/llvm-number-paper-pa.png b/src/docs/gfx/llvm-number-paper-pa.png deleted file mode 100644 index 54d3b61..0000000 Binary files a/src/docs/gfx/llvm-number-paper-pa.png and /dev/null differ diff --git a/src/docs/gfx/qemu-stack-overflow.png b/src/docs/gfx/qemu-stack-overflow.png deleted file mode 100644 index ae76dc5..0000000 Binary files a/src/docs/gfx/qemu-stack-overflow.png and /dev/null differ diff --git a/src/docs/gfx/rust-compiler-flow.png b/src/docs/gfx/rust-compiler-flow.png deleted file mode 100644 index cb0d140..0000000 Binary files a/src/docs/gfx/rust-compiler-flow.png and /dev/null differ diff --git a/src/docs/glossary.tex b/src/docs/glossary.tex index 42a24dd..e0d379c 100644 --- a/src/docs/glossary.tex +++ b/src/docs/glossary.tex @@ -1,284 +1,133 @@ % // vim: set ft=tex: -\newglossaryentry{bbox} { - name = {busybox}, - long = {BusyBox: The Swiss Army Knife of Embedded \gls{LX}}, - description = {% - BusyBox combines tiny versions of many common UNIX utilities into a single small executable% - }, - first = {\glsentrylong{bbox}} -} - -\newglossaryentry{Rust} { - name = {Rust}, - long = {The Rust programming language}, - description = {% - Statically typed programming language that uses a new concept of variable ownership and reference tracking. Largely explain in \cref{rnd::rust}. - }, - first = {\glsentrylong{Rust}} -} - -\newglossaryentry{rustc}{ - name = rustc, - long = {The Rust compiler}, - description = {% - This program is a compiler for the Rust language (man rustc). - }, - first = {\glsentrylong{rustc}}, -} - -\newglossaryentry{cargo} { - name = {cargo}, - long = {The Rust package manager}, - description = {% - This program is a package manager for the Rust language (man cargo). - It is also a wrapper for the Rust compiler. - It is also a management tool for Rust source code projects, and simplifies the initialization of project directories, the build of the source code and the run of the compiled binary. - }, - first = {\glsentrylong{Rust}} -} - -\newglossaryentry{xargo} { - name = {xcargo}, - long = {The Cross-Compilation wrapper for cargo}, - description = {% - Wrapper for cargo to simplify cross-compilation. - }, - first = {\glsentrylong{Rust}} -} - -\newglossaryentry{proglang} { - name = {programming language}, - description = {% - A well-defined language used to write software. Hundreds of language exists, each with focus on different aspects like comfort for humans, size, speed, safety, etc. - }, -} - - -\newglossaryentry{compiler}{ - name = compiler, - long = {source- to machine-code compiler}, - description = {% - A program that can transform software source code to executable machine code. - Typically targetted for a \glsentryname{proglang} or a family of \glspl{proglang}. - }, - first = {\glsentrylong{compiler}} -} - -\newglossaryentry{GCC}{ - name = GCC, - long = {GNU Compiler Collection}, - description = {% - GCC is an integrated distribution of compilers for several major programming languages. - These languages currently include C, C++, Objective-C, Objective-C++, Java, Fortran, Ada, and Go. - }, - first = {\glsentrylong{GCC}} -} - -\newglossaryentry{llvm}{ - name = LLVM, - long = {Low Level Virtual Machine}, - description = {% - A Virtual Instruction Set and Compilation Framework. - The key idea in LLVM is to use a rich virtual instruction set (instead of raw machine code) as the object code representation manipulated by link-time and post-link optimizers and code generators.\cite{Kowshik2002}. - }, - first = {\glsentrylong{clang}} -} - - -\newglossaryentry{clang}{ - name = Clang, - long = {C Language frontendend for LLVM}, - description = {% - The goal of the Clang project is to create a new C based language front-end: C, C++, Objective C/C++, OpenCL C and others for the LLVM compiler. You can get and build the source today. - \url{http://clang.llvm.org/} - }, - first = {\glsentrylong{clang}} -} - -\newglossaryentry{addrspace}{ - name = address space, - long = bound address range in memory, - description = {% - A logical entity that represents a section of memory, specified with a start address and either by end address or length given in a standardize unit - }, - first = {\glsentrylong{addrspace}} -} - -\newglossaryentry{stack}{ - name = stack, - description = {% - }, -} - -\newglossaryentry{sf}{ - name = stack-frame, - description = {% - Procedure data and meta-data (see \cref{lst:amd64-stack-frame-components})% - }, -} - -\newglossaryentry{heap}{ - name = heap, - description = {% - }, -} - -\newglossaryentry{api}{ +\newglossaryentry{API}{ name = API, - long = {Application Programming Interface}, - description = {% + description = { + Application Programming Interface }, - first = {\glsentrylong{api}} } -\newglossaryentry{os}{ - name = OS, - long = Operating System, - description = {% +\newglossaryentry{OS}{ + name = Operating System, + description = { The software that manages the system's hardware ressources. - Other \glspl{app} can access the ressources only through the interface provided by the \gls{os}. + Other \glspl{app} can access the ressources only through the interface provided by the \gls{OS}. }, - first = {\glsentrylong{os}} } - \newglossaryentry{fs}{ name = filesystem, - description = {% + description = { + TODO }, } \newglossaryentry{virt}{ name = virtualization, - description = {% + description = { + TODO }, } \newglossaryentry{OSS}{ name = Open-Source Software, - description = {% + description = { + TODO }, } \newglossaryentry{osvirt}{ name = Operating System-Level Virtualization, - description = {% + description = { + TODO }, } \newglossaryentry{hypervisor}{ name = Hypervisor, - description = {% + description = { + TODO }, } \newglossaryentry{VM}{ name = Virtual Machine, - description = {% + description = { + TODO }, } -\newglossaryentry{LX}{ +\newglossaryentry{Linux}{ name = Linux, - description = {% + description = { is a generic term referring to the family of Unix-like computer operating systems that use the Linux kernel }, plural=Linuces } -\newglossaryentry{microkernel}{ - name = microkernel, - description = {% - Kernel design that operates most drivers in userland, and only provides bare minimum functionality in kernel mode for glueing the drivers together. - }, -} - -\newglossaryentry{android}{ - name = Android, - description = {an open-source mobile \gls{os} based on \gls{LX}}, - first = {\glsentryname{android}, \glsentrydesc{android}}, -} - - -\newglossaryentry{imezzos}{ - name = intermezzOS, - description = {% - }, -} - -\newglossaryentry{redoxos}{ - name = Redox OS, - description = {% - }, -} - -\newglossaryentry{blogos}{ - name = Blog OS, - description = {% - }, -} - -\newglossaryentry{tockos}{ - name = Tock OS, - description = {% - }, -} - \newglossaryentry{rootfs}{ name = RootFS, - description = {% + description = { + % TODO }, } \newglossaryentry{lxns}{ name = Linux Namespace, - description = {% + description = { entitiy that holds a specific set of process attributes and can be set per process }, } \newglossaryentry{lxcap}{ name = Linux Capability, - description = {% - entitiy that holds a specific set of process attributes and can be set per process, mainly to establish a relationship between processes and \gls{os} resources + description = { + entitiy that holds a specific set of process attributes and can be set per process, mainly to establish a relationship between processes and \gls{OS} resources }, plural = Linux Capabilities, } \newglossaryentry{lxvfs}{ name = Linux VFS, - description = {% - Virtual Filesystem Switch, a filesystem abstraction layer in \gls{LX}. + description = { + Virtual Filesystem Switch, a filesystem abstraction layer in \gls{Linux}. }, } \newglossaryentry{BSD}{ name = BSD, - description = {% + description = { + TODO } } \newglossaryentry{computer}{ name = Computer, - description = {% + description = { is a programmable machine that receives input, stores and manipulates data, and provides output in a useful format } } +\newglossaryentry{app}{ + name=software-application, + description={ + TODO + } +} \newglossaryentry{pm}{ - name = package manager, - description = {% + name=package manager, + description={ + TODO } } \newglossaryentry{sac}{ - name = Software Application Container, - description = {% + name=Software Application Container, + description={ The broad term for the technology used to build, package, distribute and run an application program in isolation from the underlying and co-existing systems, wherein the level or technique of isolation can be different depending on the \gls{sacr}. The term is nuanced from \gls{appc} defined by the \gls{appcorg}. The \gls{appcorg} is a community driven effort to create an open, standardized specification for developers and users of \gls{sac} technology. @@ -287,242 +136,57 @@ } } \newglossaryentry{saci}{ - name = Software Application Container Image, - description = {% + name=Software Application Container Image, + description={ An archive file that contains all of the necessary binaries that are needed to execute an application and a manifest file that that contains metadata about the application. Alternatively to containing all the required binary files, the manifest file can declare dependencies to other application container images, which must then be available at runtime to execute the contained application. } } \newglossaryentry{sacr}{ - name = Software Application Container Runtime, - description = {% + name=Software Application Container Runtime, + description={ An application program (suite) that understands how to run the software inside an \gls{saci}. } } \newglossaryentry{LXC}{ - name = LXC, - description = {% + name=LXC, + description={ + TODO } } \newglossaryentry{Docker}{ - name = Docker, - description = {% + name=Docker, + description={ A very popular \gls{sac} platform and application suite, providing functionality to build and deploy Docker specific \glspl{saci}. } } \newglossaryentry{systemd-nspawn}{ - name = systemd-nspawn, - description = {% + name=systemd-nspawn, + description={ + TODO } } \newglossaryentry{rkt}{ - name = rkt, - description = {% + name=rkt, + description={ + TODO } } \newglossaryentry{appcorg}{ - name = App Container Organisation, - description = {% + name=App Container Organisation, + description={ Organisation for the App Container specification, including the schema and associated tooling. } } \newglossaryentry{appc}{ - name = App Container, - description = {% - Specific variant of an \glsentrytext{sac} defined by the \glsentrytext{appcorg}. - } -} - -\newglossaryentry{NVD}{ - name = {NVD}, - description = { - The NVD is the U.S. government repository of standards based vulnerability management data represented using the Security Content Automation Protocol (SCAP). This data enables automation of vulnerability management, security measurement, and compliance. The NVD includes databases of security checklist references, security related software flaws, misconfigurations, product names, and impact metrics.\cite{NVD} - }, - long = {\citetitle{NVD}}, - first = {\glsentrylong{NVD}} -} - -\newglossaryentry{CWE}{ - name = {CWE™}, - long = Common Weakness Enumeration, - description = {a community-developed list of common software security weaknesses. It serves as a common language, a measuring stick for software security tools, and as a baseline for weakness identification, mitigation, and prevention efforts}, - first = {\glsentrylong{CWE}, "\glsentrydesc{CWE}"\cite{MITRE-CWE}} -} - -\newglossaryentry{CWE-633}{ - name = CWE-633, - description = {Weaknesses in this category affect memory resources}, - first = {CWE-633: \glsentrydesc{CWE-633}\cite{MITRE-CWE-633}} -} - -\newglossaryentry{CWE-119}{ - name = CWE-119, - long = {CWE-119: \glsentrydesc{CWE-119}}, - description = {Improper Restriction of Operations within the Bounds of a Memory Buffer}, - first = {\glsentrytext{CWE-119}\cite{MITRE-CWE-119}} -} - -\newglossaryentry{CWE-635}{ - name = CWE-635, - long = {\glsentrydesc{CWE-635}}, - description = {\citetitle{MITRE-CWE-635}}, - first = {\glsentrytext{CWE-635}\cite{MITRE-CWE-635}} -} - -\newglossaryentry{CWE-122}{ - name = CWE-122, - long = {\glsentrydesc{CWE-122}}, - description = {\citetitle{MITRE-CWE-122}}, - first = {\glsentrytext{CWE-122}\cite{MITRE-CWE-122}} -} - -\newglossaryentry{CWE-134}{ - name = CWE-134, - long = {\glsentrydesc{CWE-134}}, - description = {\citetitle{MITRE-CWE-134}}, - first = {\glsentrytext{CWE-134}\cite{MITRE-CWE-134}} -} - -\newglossaryentry{C}{ - name = C, - , description = {% - C programming language, - } -} - -\newglossaryentry{C++}{ - name = C++, - , description = {% - A \glsentrytext {proglag} based on \glsentrytext{C}, enahnced by features like object-orientation, lambdas, and much more. - } -} - -\newglossaryentry{asm}{ - name = ASM, - long = Assembly programming language, - description = {% - } -} - -\newglossaryentry{amd64}{ - name = AMD64, - long = AMD64, - description = {% - Contemporary Hardware Architecture\cite{AMD64Vol1,AMD64Vol2} - }, - first = {\glsentrylong{amd64}}, -} - -\newglossaryentry{cpu}{ - name = CPU, - long = Central Processing Unit, - description = {% - Central Haddware Unit that executes machine code - }, - first = {\glsentrylong{cpu}}, -} - - -\newglossaryentry{tlb}{ - name = TLB, - long = Translation Lookaside Buffer, - description = {% - }, - first = {\glsentrylong{tlb}}, -} - -\newglossaryentry{mmu}{ - name = MMU, - long = Memory Management Unit, - description = {% - Physical part of the \gls{cpu} equipped for managing the system's memory. - }, - first = {\glsentrylong{MMU}}, -} - -\newglossaryentry{vaddr}{ - name = virtual address, - plural = virtual addresses, - description = {% - Memory Addresses that does not reference physical memory directly, but is part of a memory virtualization scheme. - }, -} - -\newglossaryentry{sysadmin}{ - name = System Administrator - , description = {% - } -} - -\newglossaryentry{realtime}{ - name = realtime - , description = {% - In computer science realtime refers to guaranateed execution within specified time boundaries. - } -} - -\newglossaryentry{app}{ - name = software-application, - description = {% - A bundle of one or multiple programs with a common use-case that can be run on an \gls{os}. - } -} - -\newglossaryentry{task}{ - name = task - , description = {% - Generic term for any unit of work to be executed on the. - In the context of this study, it may be used for any of \glsentrytext{program}, \glsentrytext{process}, \glsentrytext{thread}. - } -} - -\newglossaryentry{program}{ - name = program - , description = {% - A group of instructions that can be executed by the \glsentryname{cpu}. - } -} - -\newglossaryentry{process}{ - name = process - , description = {% - A Program in execution. - } -} - -\newglossaryentry{thread}{ - name = thread - , description = {% - A defined path of instructions within a process. - It can span from a part of a procedure up to the whole program of the process. - Threads can be identified in the program code by hypothetical execution paths. - The thread can only be executed by spawning a process in such a way that the specific thread in the program will be executed, e.g. by invoking specific arguments. - } -} - -\newglossaryentry{procedure}{ - name = procedure - , description = {% - An addressable subgroup of instructions in a program that contains specific functionality. - } -} - -\newglossaryentry{function}{ - name = function - , description = {% - See Procedure. - } -} - -\newglossaryentry{job}{ - name = job - , description = {% - A specific unit of work, specifying one or multiple programs to execute along with the arguments to be passed to them. + name=App Container, + description={ + Specific variant of an \gls{sac} defined by the \gls{appcorg}. } } diff --git a/src/docs/parts/context/context.tex b/src/docs/parts/context/context.tex index fb3144e..5e2559f 100644 --- a/src/docs/parts/context/context.tex +++ b/src/docs/parts/context/context.tex @@ -1,1470 +1,322 @@ % // vim: set ft=tex: + \chapter{Introduction} -\label{context::introduction} -This document contains a study on the feasibility of using compile-time code analysis, as found in \gls{Rust}'s \gls{compiler}, for ensuring memory-safety within an \gls{os} kernel. -This study could be applied to all \glspl{app}, but the focus is on the implementation of \glspl{os} which is the \gls{app} that is responsible for managing the system's resources and provide abstractions for all other \glspl{app}. -For this the \gls{os} is the only \gls{app} that required unrestricted access to these resources, with the responsibility of managing them safely according to the rules that are either hard-coded or set up by the \gls{sysadmin}. +This thesis is a scientific approach to analyze and solve the practical problems of packaging and deploying \glspl{app} in the context of \gls{sac} technology. +For a lack of an official definition and common understanding what this technology is, the term \gls{sac} is defined in this chapter as a reference for the rest of the thesis. +The technology combines \gls{virt} techniques with new approaches to \gls{app} development and deployment. -The increasing number of vulnerabilities based on memory-safety issues in \glspl{app}, as presented in \cref{context::weaknesses-mem-safety::cwe::statistics}, is a major motivator for working on this topic. +The two main drivers for this technology have been long standing problems in information technology; optimal utilization of hardware and correct deployment of software to said hardware, both without sacrificing security. -\section{Motivational Hypothesis} +The optimal utilization of hardware is done by collocating and running multiple \glspl{app} simultaneously on the same hardware. +In order to increase security, these applications are separated by applying \gls{virt} techniques. +By developing \gls{virt} techniques into the \gls{OS} and thereby providing low-level security mechanisms, the foundation of \glspl{sac} was formed. +In the next chapter an introduction to \gls{virt} is given, as it is important to understand this aspect of the \gls{sac} technology. + +The correctness of software deployment is not easily measurable by quantity like hardware utilization, but is of qualitative nature. +The \gls{sac} approach is to deploy self-contained bundles for \glspl{app} in form of \glspl{saci}. +The creation of these \glspl{saci} is the main concern of this thesis and will be the subject to research and development in part \ref{part:research}. +The underlying problems related to conventional \gls{app} deployment are covered in chapter \ref{chap:sdd}. +State of the art attempts to solve these problems using \glspl{saci} are introduced and critically discussed in chapter \ref{chap:saci}, detailing the problem statement for this thesis. + +\chapter{\Gls{virt}} +\label{chap:virt} +Since the first \gls{VM} \gls{OS} \cite[p.~217-218]{Sarton1975} was created, \gls{virt} has been an important field in computer sciences, both in academic research and in the industry, and has been subject to continuous development, improvement, and adoption. +The very basic understanding of virtualizing a system or an application is to isolate and to control it. +In greater detail it is the principle of controlling and monitoring the availability and the access to soft- and hardware resources for users, their applications or whole virtual systems running on top of existing systems. +Virtualization techniques can be grouped by two categories: \glspl{hypervisor} and \gls{osvirt}. + +\section{\glspl{hypervisor}} +The term \gls{hypervisor} is synonymous to the more self-explanatory terms control program \cite[p. 217]{Sarton1975} and \gls{VM} Monitor. +The \gls{hypervisor} operates on a host machine and can control multiple \glspl{VM}. +The principle is easy to understand, because one can simply picture one or many virtual computers running on a real computer. +\glspl{VM} are presented with a set of virtual hardware resources. +These don't necessarily exist in the presented form on the underlying hardware machine. + +\subsection{Guest \glspl{OS}} +In order to be able to boot the virtual hardware and run \glspl{app}, \glspl{VM} need an \gls{OS} to run applications. +Specific to the environment and features of the \gls{hypervisor}, there are different storage formats for the file(s) that contain the \gls{OS} for the \gls{VM}. +More insight on this is given in section \ref{sect:vm-fs-isolation}. +In general, these files contain the \gls{OS} itself, as well as the installed applications in order to run the desired services within the \gls{VM}. + +\subsection{Flexible Guest \gls{OS} Choice} +Because each \gls{VM} on is necessary to store and run a separate \gls{OS}, they can run whatever the \gls{hypervisor} supports, and are not tied to run the same \gls{OS}, or even the same platform, as the host machine. +This allows to create heterogeneous scenarios like running an ARM \gls{VM} using \gls{BSD} on a x86 \gls{Linux} host, or vice versa. + +\subsection{\Gls{fs} Storage Isolation} +\label{sect:vm-fs-isolation} +The \gls{OS} running inside a \gls{VM} is typically presented with a virtual disk drive. +The guest \gls{OS} has to implement the driver for this virtual disk drive. +The drive backed by a file on the host system, which can either be a file on a \gls{fs} or point to a block device. +By exclusively assigning one virtual disk drive per \gls{VM}, \glspl{VM} not access other \glspl{VM} data. + +As a result, the \gls{hypervisor} features full isolation between guest's \gls{fs} and also prevents them from accessing the host's files. + +\subsection{Overhead In \Gls{app} \Gls{virt}} +\label{sect:virt-overhead-app-virt} +Compared to running services directly on the host machine, one obvious overhead is that an additional \gls{OS} is required to be installed and configured once upfront, and virtually booted to enable execution of applications within. +In case that multiple \glspl{VM} are supposed to run the same application, e.g. with different configuration, each of them will have a separate copy of the \gls{OS} and the application itself, differing only by configuration and runtime data. +For this thesis, the use-case is in which solely the applications running on top of the virtualized \gls{OS} is the required subject to gls{virt} is of highest interest. +Further the focus lies on applications that can run on \gls{Linux}. + +Running a separate virtualized \glspl{OS} in the use-case of virtualizing \gls{Linux} applications involves considerable and unnecessary overhead. +In addition there are several performance aspects that are slowed down when running software inside \Glspl{VM}\cite{Felter2014}. + +\section{\gls{osvirt}} +\label{sect:sac-osvirt} +Virtualizing \glspl{app}, as described in section \ref{sect:virt-overhead-app-virt}, is the primary use-case for \gls{osvirt}. +Compared to \gls{hypervisor}, \gls{osvirt} doesn't require another virtualized, full-fledged \gls{OS}, to virtualize an application. +Instead, the application's processes run in the same \gls{OS} in isolated and controlled virtual environments, called containers\cite{Reshetova2014}. + +The technology of \gls{osvirt} has been under active development for about decade now\cite{Reshetova2014}, with the purpose of supporting virtualized applications rather than virtualizing whole machines. +The popularity of the technology burgeoned with the release of one specific user-facing implementation named \gls{Docker}, which was originally exclusively available for the \gls{Linux}-platform. + +Section \ref{sect:virt-advent-sac} contains an overview of \gls{Docker}'s features that presumably helped to make it very popular in a relatively short amount of time, compared to how long the underlying technology and similar tools have existed. + +To form a complete view it is useful to learn the leveraged low-level mechanisms implemented in the \gls{Linux} kernel. + +\subsection{\gls{Linux} Process Separation and (Resource) Access Control} +\label{sect:linux-process-isolation} +\gls{osvirt} on \gls{Linux} allows to virtualize applications on the process level instead of the machine level. +It's based on the \gls{OS} primitive of processes, providing a low-overhead isolation and resource-control for user-space processes, creating a form of \gls{virt}. +Every \gls{app} instance, virtualized or not, runs in the context of a separate process known to and managed by the very same host \gls{OS}. +With \gls{osvirt}, the control as well as the responsibility for the virtualized application belongs to the host \gls{OS}. +In contrast to running an application inside a \gls{VM}, where the host \gls{OS} doesn't know anything beyond the border of the process that runs the \gls{VM} and has no direct control over the application processes inside the \gls{VM}. + +On \gls{Linux}, there are many different mechanisms to regulate a process's access or its view on resources of all kinds. +It is important to understand that not all of these mechanisms contribute to security, and that \gls{osvirt} is more difficult to secure compared to \Glspl{VM}. + +\subsubsection{The chroot System Functionality} +\label{sect:lpc-chroot} +The oldest mechanism contributing a core functionality to realizing containers is the \textit{change root} functionality\cite{Reshetova2014}. +It allows a privileged process to change its effective root \gls{fs}, which can be any directory on the host. +Effectively every file accessed by path changing the root \gls{fs} will be prefixed with the new root path automatically by the \gls{OS}. +As an example, when a process changes its root \gls{fs} to \textit{/newroot} and then requests to open the file \textit{/file}, the application will transparently access \textit{/newroot/file}. +As a usage example, this allows to easily have a completely separate userspace file structure under \textit{/newroot}. +Hence, the chroot-path could contain an application and different libraries installed, which would otherwise be in conflict with other libraries if they were installed in the \gls{rootfs} that's in use by host \gls{OS}. + +Note that \textit{chroot} has not been designed as a security feature, so if no countermeasures are taken, privileged \textit{chroot}'ed applications can still access files outside of the chroot-path if they have the intention to. + + +\subsubsection{Namespaces} +\label{sect:lpc-ns} +\Glspl{lxns} were designed in 2007 and described as lightweight in-kernel virtualization/isolation\cite{Menage2007}. +The authors chose to invent a new name instead of using the descriptive term in order to clarify the distinction from the more heavyweight technology of \gls{VM} \glspl{hypervisor}. +The various \Glspl{lxns} all represent different attributes related to the process and resource model on \gls{Linux}. +Each namespace can contain one or more processes, allowing for arbitrary grouping of processes sharing. +Table \ref{tab:lxns} shows 7 different \Glspl{lxns} that are available at the time of writing. +Collectively, they represent the context of a process, and changes to resources within the respective namespace will only affect processes that share the same namespace. + +\ctable[ + cap = \Glspl{lxns}, + caption = \Glspl{lxns}\footnote{from \textit{NAMESPACES(7)} and \textit{UNSHARE(2)}}, + maxwidth = \textwidth, + label = tab:lxns, + ]{l | X}{}{ +\FL + Namespace & Resources +\ML + UTS & + Hostname, NIS domain name +\NN + PID & + Process IDs (each namespace will start counting from 1) +\NN + Network & + Complete network stack: + Network interfaces, addresses, sockets, ports, routes, filter rules, \textit{/proc/net} and \textit{/sys/class/net} +\NN + IPC & + System V/POSIX message queues, semaphore sets, shared memory segments +\NN + Cgroup & + cgroup root directory \textit{/proc/self/cgroup} +\NN + User & + UIDs, GIDs, capabilities +\NN + \textbf{Mount} & + Mount points, /proc/self/mountinfo +} + +To continue the example given in section \ref{sect:lpc-chroot}, assuming that a \textit{chroot()} call took place, an application that lies within this chroot-path is going to be executed. +The resulting application, executed through the process that also called \textit{chroot()}, could be further isolated from the outer system by moving the process to a new set of \gls{lxns} before executing the application. +Particularly interesting for this example is the perspective of a new \textbf{Mount}-Namespace, since it further contributes to aspect of filesystem isolation. +Manipulations to the mount points made within the new namespace will not affect other processes on the system. +This is effective for protecting against error cases like, accidental file deletions, but can be considered an initial level of security as the mounts are not propagated to the parent process, allowing the management and usage of specific resources only within a specific set of processes. + +As an example, an application could mount a secured file-resource within the new mount-namespace, and no other process on the system would be able to find this resource by simply searching their view of the filesystem nor by examining the mount table. +If another process is then moved to this mount-namespace it could automatically access the secured filessytem, without the need of having the secrets for and the accomplishment of the mount procedure. + +\subsubsection{Capabilities} +\label{sect:lpc-caps} +\Glspl{lxcap} provide a mechanism for fine-grained permission control for \gls{Linux} processes and programs files.\cite{Hallyn2008}. +Conventionally, applications that require elevated privileges are set up to run as \textit{root\footnote{the administrator account on \gls{Linux}}} and therefore have \textbf{full} system privileges. +By being able to drop specific unneeded capabilities, the risk in running applications that needs some, but not all, of the \textit{root} privileges can be heavily reduced because. + +\ctable[ + cap = \Glspl{lxcap}, + caption = \Glspl{lxcap}\footnote{from \textit{CAPABILITIES(7)}}, + maxwidth = \textwidth, + label = tab:lxcap, + ]{X}{}{ +\FL AUDIT\_CONTROL, AUDIT\_READ, AUDIT\_WRITE, +\NN BLOCK\_SUSPEND, CHOWN, DAC\_OVERRIDE, +\NN DAC\_READ\_SEARCH, FOWNER, FSETID +\NN IPC\_LOCK, IPC\_OWNER, KILL +\NN LEASE, LINUX\_IMMUTABLE, MAC\_ADMIN +\NN MAC\_OVERRIDE, MKNOD, NET\_ADMIN +\NN NET\_BIND\_SERVICE, NET\_BROADCAST, NET\_RAW +\NN SETGID, SETFCAP, SETPCAP +\NN SETUID, SYS\_ADMIN, SYS\_BOOT +\NN \textbf{SYS\_CHROOT}, SYS\_MODULE, SYS\_NICE +\NN SYS\_PACCT, SYS\_PTRACE, SYS\_RAWIO +\NN SYS\_RESOURCE, SYS\_TIME, SYS\_TTY\_CONFIG +\NN SYSLOG, WAKE\_ALARM, SETPCAP +} + +At the time of writing \gls{Linux} the 39 capabilities that are currently available are listed in table \ref{tab:lxns}. +They are listed explicitly for the sake of completeness, and as a demonstration of how many different privileges are distinguished today on \gls{Linux}. + +As the focus for this project is not \gls{appc} per-se, but only the method of creation and form of distribution, it is not important to examine every listed \gls{lxcap}, but rather look at an interesting example. +The highlighted \textit{CAP\_SYS\_CHROOT} serves well here for demonstrating the security potential and further extending the example that has been used in \ref{sect:lpc-chroot} and \ref{sect:lpc-ns}. + +After a successful \textit{chroot()}-call and switch to a new mount-namespace, there is still the chance that the process has kept or retrieves a reference to a file on the outside. +This reference can be used to escape the chroot by issuing another \textit{chroot()} syscall to the topmost parent of said file, which is likely the host's root. +When \textit{CAP\_SYS\_CHROOT} is dropped after executing the \textit{chroot()} syscall, the kernel will not permit the executed application to call \textit{chroot()} again, and can hereby effectively prevent the escape. + +\subsubsection{Control Groups} +% TODO + +\subsubsection{Security Modules} +% TODO explain that LSM is a framework +% TODO AppArmor +% TODO SELinux + +\subsubsection{Initialization And Combination Of The Above} +When \gls{Linux} is booted, an initial set of namespaces, cgroups, and security contexts are created to contain the first userspace process, typically called init. + +Depending on the program and configuration of the init process, other \glspl{app} are created with a new set of namespaces and cgroups, inherit all or only a selected set from the init process. +The two concepts can be mingled since the mechanisms allow for a flexible configuration per process. +This functionality is exposed via the \gls{Linux} systemcall \gls{API}. + +The systemcall \gls{API} can be used by any \gls{app} running as a privileged process, not only by the init process. +Hence, any vendor or organization might create libraries and programs that make use of these functions, in order to provide lightweight virtualization functionality to other application developers, system administrators and end-users in an abstracted and easily usable fashion. + +\subsection{\Gls{fs} Storage Isolation And \Gls{app} Deployment} +With the help of the different \ref{sect:linux-process-isolation} techniques ... TODO + +\subsection{\Gls{app} \Gls{virt}} +% TODO compare app productivity and deployment by example of chroot vs VM + +\section{The Advent of \glspl{sac} with \gls{Docker}} +\label{sect:virt-advent-sac} +Even though the underlying technology \gls{osvirt} had been available for a relatively long time, \gls{Docker}\cite{Fink2014}, since its release in 2014\footnote{http://blog.docker.com/2014/06/its-here-docker-1-0}, has brought \glspl{sac} to the attention and hands of the masses in the \gls{OSS} community. +From a psychological standpoint this is not surprising, as it has abstracted most complexities of the technology, adding ease of deployment, a platform for hosting the \gls{saci} in a Docker specific format, as well as a very convenient way for building the like using Dockerfiles(TODO reference). +Its popularity has come to a point that the term \textit{Docker} is being used interchangeably with the \gls{sac} technology itself. +% TODO: references for this claim +% TODO introduce \gls{LXC}, \gls{systemd-nspawn}, \gls{Docker}, and \gls{rkt}. + +The first part of this section analyzes the \gls{sacr} aspects of the implementations, while the second part demonstrates currently popular approaches to assemble \glspl{saci}. + +\section{Virtualization Summary} +% TODO: graphic to show the differences + +\chapter{Software Development \& Deployment} +\label{chap:sdd} + +\section{Bringing Together Developers \& Operators} +The technology that is currently available and combined to form \glspl{sac} reuses different patterns and techniques to solve a combination of problems that either software developers or system operators have faced separately. +These problems are all related to software deployment and system operation and can be represented by the following questions. +\begin{enumerate} +\item How do we maximize the utilization of our hardware systems without compromising security? + + This question is about isolating system resources and fine-grained resource control, which is not in the research scope of this thesis. + It is nonetheless briefly explained under the section \ref{sect:sac-osvirt}, to form a complete view on the scope of \glspl{sac} technology. + +\item How do we guarantee that the application works on every target machine the same as on the developer machine? +\item How do we build multiple variants and/or versions of an application? +\item How do we install, configure and run multiple instances simultaneously on the same target machine without exhibiting version conflicts and without sacrificing application security? +\item How do we verify that an application runs on the target system has not been altered maliciously at one point in the deployment chain? +\end{enumerate} +Only a subset of these problems and attempted solutions will be subject to research for this thesis. +Questions 2 - 4 are in the scope of this thesis, while the concern of question 5 is declared as an optional question. +All of these questions are very important to the ideology of \glspl{sac}, and they have their origin in the conventional methods of software deployment. +More thorough examination of the questions and their presented problems is found in section \ref{sect:sd-challenges} of this chapter. + +\section{Challenges of Software Development \& Deployment} +\label{sect:sd-challenges} +Software is typically developed on a workstation or laptop, translated to a format that is understood by the target machine to which it must be transfered, and where it is configured and finally executed to serve its purpose. + +In order to be executable on the target machine, the software needs to be translated into the target-platform specific format before it can be executed on to the target system. +If the software is changed and updated, the cycle has to be repeated. +This represents a first challenge: software updates, or deployments in general, are not supposed to be negatively influenced by any previous version or state that exists on the target machine. +On the technical, this process starts with software developers who write software source code. +This code is then transformed and stored in executable binary files that contain specific platform-dependent machine-code. +The translation is done by processing the source code with a compiler toolchain. +The binary files are then made available as software packages that can be downloaded and installed on the target machines operating system. +Typically and ideally, this is done with the help of a software package manager, which itself is a software that is included in most modern operating systems. +The location where the files of the package are installed on the target machine depends on how it is configured at packaging time, but in most popular software package approaches this location is agnostic of software application version changes. + +Another challenge is to be able to verify that the software hasn't been altered, either accidentally or maliciously, and differ from its intentional behavior at any point of the deployment process. + +\chapter{\glspl{saci}} +\label{chap:saci} + +\section{No truly declarative method to create \gls{saci}} + +\section{No Independent Verification Of The Content} + +\section{Customization} + +\chapter{Scope} + +\section{Goals} +The primary concern is to find a viable method for declaring and assembling \glspl{saci} deterministically. +As a highly anticipated option, the solution should also be able to reproduce these builds on different computers and different points in time and yield the exact same results. +The secondary concern is to abstract enough of the solution's complexity in order to make it attractive for application developers while still allowing them to specify the exact contents alongside their software within the resulting \gls{saci}. + +\section{Motivation} +The choice for this topic was made due to my personal dissatisfaction with currently available methods for building \glspl{saci}, and seeing both the need and the potential of a scientifically substantiated approach. + + +\section{Purpose of the Study} +% Purpose of the Study +%The Purpose of the Study is a statement contained within one or two paragraphs that identifies the research design, such as qualitative, quantitative, mixed methods, ethnographic, or another design. The research variables, if a quantitative study, are identified, for instance, independent, dependent, comparisons, relationships, or other variables. The population that will be used is identified, whether it will be randomly or purposively chosen, and the location of the study is summarized. Most of these factors will be discussed in detail in Chapter 3. + +\section{Significance of the Study} +% Significance of the Study +% The significance is a statement of why it is important to determine the answer to the gap in the knowledge, and is related to improving the human condition. The contribution to the body of knowledge is described, and summarizes who will be able to use the knowledge to make better decisions, improve policy, advance science, or other uses of the new information. The “new” data is the information used to fill the gap in the knowledge. + +\section{Primary Research Questions} % Primary Research Questions % The primary research question is the basis for data collection and arises from the Purpose of the Study. There may be one, or there may be several. When the research is finished, the contribution to the knowledge will be the answer to these questions. Do not confuse the primary research questions with interview questions in a qualitative study, or survey questions in a quantitative study. The research questions in a qualitative study are followed by both a null and an alternate hypothesis. + +\section{Hypotheses} % Hypotheses % A hypothesis is a testable prediction for an observed phenomenon, namely, the gap in the knowledge. Each research question will have both a null and an alternative hypothesis in a quantitative study. Qualitative studies do not have hypotheses. The two hypotheses should follow the research question upon which they are based. Hypotheses are testable predictions to the gap in the knowledge. In a qualitative study the hypotheses are replaced with the primary research questions. +The goals described above should be possible to achieve with the help of a package manager that source-based packages, by specifying the container image content by referencing packages in a declarative manner. +%TODO why a package manager? +%TODO why source-based? +%TODO why is declarative? -According to my best-effort literature research in Q1/2017, the hypothesis that \textit{Rust's static code analysis can guarantee memory safety in the \gls{os}} has not been studied explicitly. -This is to my surprise, because as explained in \cref{context::introduction::memory-safety}, memory-safety in \gls{os} development is critical, and \gls{Rust} offers attractive features that might bring improvements, which is covered in \cref{rnd::rust}. -The hypothesis cannot be trivially approved or denied, which drives the research efforts for my final study project. +\section{Research Design} +% In Chapter 1 this is a summary of the methodology and contains a brief outline of three things: (a) the participants in a qualitative study or the subjects of a quantitative study (human participants are referred tyo as participants, non-human subjects are referred to as subjects), (b) the instrumentation used to collect data, and (c) the procedure that will be followed. All of these elements will be reported in detail in Chapter 3. In a quantitative study, the instrumentation will be validated in Chapter 3 in detail. In a qualitative study, if it is a researcher-created questionnaire, validating the correctness of the interview protocol is usually accomplished with a pilot study. For either a quantitative or a qualitative study, using an already validated survey instrument is easier to defend and does not require a pilot study; however, Chapter 3 must contain a careful review of the instrument and how it was validated by the creator. +% In a qualitative study, which usually involves interviews, the instrumentation is an interview protocol – a pre-determined set of questions that every participant is asked that are based on the primary research questions. A qualitative interview should contain no less than 10 open-ended questions and take no less than 1 hour to administer to qualify as “robust” research. +% In the humanities, a demographic survey should be circulated with most quantitative and qualitative studies to establish the parameters of the participant pool. Demographic surveys are nearly identical in most dissertations. In the sciences, a demographic survey is rarely needed. -Besides this specific hypothesis, many implementations of \glspl{os} with \gls{Rust} have appeared in public. -Their purposes range from proof-of-concept and educational work like \gls{imezzos} and \gls{blogos}, to implementations that aim to be production grade software like \gls{redoxos} and \gls{tockos} \cite{Levy2015a}. -These implementations are subject to evaluation in \cref{rnd::existing-os-dev-with-rust}. +\section{Theoretical Framework} +% The theoretical framework is the foundational theory that is used to provide a perspective upon which the study is based. There are hundreds of theories in the literature. For instance, if a study in the social sciences is about stress that may be causing teachers to quit, Apple’s Intensification Theory could be cited as the theory was that stress is cumulative and the result of continuing overlapping, progressively stringent responsibilities for teachers that eventually leads to the desire to quit. In the sciences, research about new species that may have evolved from older, extinct species would be based on the theory of evolution pioneered by Darwin. +% Some departments put the theoretical framework explanation in Chapter 1; some put it in Chapter 2. -The final results will be of qualitative nature, captured by analyzing the existing and a self-developed \gls{Rust}-implementations of popular memory management techniques. -In addition to the sole analysis of \gls{Rust}-implementations, comparisons will be made, discerning the level of memory safety guarantees gained over similarly intending implementations in \gls{C}. +\section{Assumptions, Limitations, and Scope (Delimitations)} +% Assumptions are self-evident truths. In a qualitative study, it may be assumed that participants be highly qualified in the study is about administrators. It can be assumed that participants will answer truthfully and accurately to the interview questions based on their personal experience, and that participants will respond honestly and to the best of their individual abilities. +% Limitations of a study are those things over which the research has no control. Evident limitations are potential weaknesses of a study. Researcher biases and perceptual misrepresentations are potential limitations in a qualitative study; in a quantitative study, a limitation may be the capability of an instrument to accurately record data. -\section{What is Memory-Safety?} -\label{context::introduction::memory-safety} -Memory-safety is a term that is only vaguely defined in general, thus a definition is given for the context of this study. -For a thorough understanding of the issues discussed further in this document, it might be helpful to review the basics of how memory is used in current computer systems. +% Scope is the extent of the study and contains measurements. In a qualitative study this would include the number of participants, the geographical location, and other pertinent numerical data. In a quantitative study the size of the elements of the experiment are cited. The generalizability of the study may be cited. The word generalizability, which is not in the Word 2007 dictionary, means the extent to which the data are applicable in places other than where the study took place, or under what conditions the study took place. +% Delimitations are limitations on the research design imposed deliberately by the researcher. Delimitations in a social sciences study would be such things as the specific school district where a study took place, or in a scientific study, the number of repetitions. -For decades computer systems, more specifically their \glspl{cpu}, were designed to execute instructions that were previously loaded into volatile main memory, typically from a secondary, persistent memory. -These instructions are themselves able to alter the very main memory they are stored at, which allows for great flexibility but also involves the risk of corrupting a consistent chain of instructions or other memory content like data. +\section{Definition of Terms} +% The definition of terms is written for knowledgeable peers, not people from other disciplines As such, it is not the place to fill pages with definitions that knowledgeable peers would know at a glance. Instead, define terms that may have more than one meaning among knowledgeable peers. -As any other \gls{app}, the \gls{os} is loaded and executed in form of one or multiple sets of logically grouped instructions, called \glspl{program}. -Loading the \gls{os}'s program into memory is not the responsibility of the \gls{os}, it belongs to the components earlier in the boot process, namely the boot loader and system firmware. -The \gls{os} takes over the responsibility to protect the main and secondary memory as soon as the bootloader has loaded the \gls{os} and has jumped to its first instruction. -From this point, loading further programs into main memory is done by the \gls{os}, either according to scheduled jobs set up by the \gls{sysadmin}, or based on well-defined events which can be triggered by any form of input via the system's interfaces. -For example, the \gls{os} can load and execute a program stored on the hard-disk, after the user has given the appropriate instructions via a terminal. - -The execution of other programs is potentially dangerous, because they might attempt to access the memory content of other programs and their data. -It is the responsibility of the \gls{os} to prevent such executed programs from being able to mutually interfere with memory content that is not theirs, keeping the memory in a safe state at all times \footnote{This does not include memory-safety \textit{within} each of these executed programs, as the \gls{os} has no pertinent knowledge of the program's intentions.}. -This requires an extensive amount of care and foresight from the developers of the \gls{os}, to ensure memory consistency in any of the various events and combinations thereof that might possibly occur at runtime. - -\subsection{A Definition Of Memory-Safety For \glsentryplural{os}} -\label{context::introduction::memory-safety::def} -If the \gls{os} is memory-safe, any program, whether it is part of the \gls{os} or any installed \gls{app}, is only able to access its allocated memory regions. -Additionally, if the \gls{os} supports shared memory regions, each shared memory region may only be accessible by programs that have been granted access to it. - -\section{Memory-Safety Violation in Software} -\label{context::introduction::memory-safety-violation-in-sw} -Software that has memory-safety violations is vulnerable to random crashes and intentional attacks. -This is why information on safety related mistakes in software shouldn't be publicly available immediately. -Ideally, before the vulnerability is publicly known, all systems that run the erroneous software in question should have the chance to update the software is question, so that any potential attackers can't leverage the known vulnerability. -This introduces a dilemma, because software updates usually contain publicly known information, at least in the open-source sector. - -Any existing or hypothetical solution to this dilemma is not in scope of this study, but two conclusions can be made. -First, public statistics in the area of software vulnerabilities are questionable with regard to their completeness. -Second, and more importantly, memory-safety related software mistakes should be detected as early as possible, ideally before the software is released and installed anywhere. - -\subsection{Human Aspect} -\label{context::introduction::human-aspect} -To detect software mistakes early, it is helpful to analyze where they originate. -This section emphasizes the fact that software - even if software-generators are interleaved - is ultimately produced by humans. -This aspect is relevant to assessing the origins of memory-safety related errors, as only errors made by humans during any stage of the development process can lead to unsafe memory access at runtime. -The following assumptions are made based on common sense -\begin{itemize} -\item{No human is born as a flawless software engineer.} -\item{Beginners will start writing production software before they master programming in perfection.} -\item{With each generation of humans there will always be new beginners that will start learning from scratch.} -\item{Capabilities and motivation vary significantly between individuals.} -\item{Less capable or motivated individuals will eventually write software for production use.} -\item{Education is not ideal.} -\end{itemize} -Combining these assumptions, it cannot generally be assumed that every beginner that writes software has learned about the involved risks, and is determined and capable to ensure memory-safety and other high quality standards in their software. - -From my personal experience with software developers and students of software engineering, I have received the impression that many do not prioritize safety in their software. - -One severe example for this in my personal career so far is a former team partner in one of our \gls{C}/\gls{C++} programming courses. -Despite the fact that the professor instructed us to use valgrind\footnote{a runtime memory analyzer and debugger} to verify our programs, my partner was satisfied with the result after writing the algorithms to his best understanding and correcting all errors detected by the \gls{compiler}. -Discussing the topic with him did not lead to any understanding on his side, and even after verifying that his program had easily detectable memory issues, he insisted on the correct result of the algorithm and pointed out the lack of time. -I realized similar mindset in some of the other teams. - -This personal experience is no scientific proof nor is it statistically significant. -It does create a feeling of insecurity, because if their software is distributed widely a few of these people are enough to risk the security of thousands of systems. - -A professor and co-author of \citetitle{Arpaci-Dusseau2015} gives the following warning about this issue: -\textit{"Just because a program compiled(!) or even ran once or many times correctly does not mean the program is correct. Many events may have conspired to get you to a point where you believe it works, but then something changes and it stops. A common student reaction is to say (or yell) “But it worked before!” and then blame the compiler, operating system, hardware, or even (dare we say it) the professor. But the problem is usually right where you think it would be, in your code. Get to work and debug it before you blame those other components."}\cite[p.~127]{Arpaci-Dusseau2015} - -Plenty of educational, economical or methodological solutions are imaginable for this problem. -Higher focus on safety and testing in education, enforced internal company guidelines, or industry wide third party software certification requirements can be attempted. -For this study such constraints are out of scope, and the focus is on examining technical methods that detect and indicate mistakes as early as possible. - -\subsection{Technical Aspect} -\label{context::introduction::technical-aspect} -As established in the \nameref{context::introduction::human-aspect}, it can not be prevented for individuals to type erroneous code into their code editors, thus a technical solution must be found. -The problem on the technical side is that the \gls{compiler} is not able to detect all errors that are in the source code and humans are able to produce an executable program from problematic source code. -The resulting executable program might merely serve its purpose, and can contain severe technical mistakes that are not considered an error by the \gls{compiler}. -This is especially likely using low-abstraction languages like \gls{C} and \gls{C++} for \gls{os} development, where technical mistakes and intended behavior are very difficult to distinguish. - -The ultimate goal beyond this project must be to prevent any kind of vulnerable or erroneous software from being distributed to production systems. -This project's contribution towards that goal is to evaluate if \gls{Rust}'s \gls{compiler} is able to guarantee safe memory-management in the \gls{os}. - -\subsection{Time Aspect} -\label{context::introduction::memory-safety::time-aspect} -Tests are important for the functionality and safety of a \gls{app}. -In addition to the presence and quality of tests, their timing in the software life cycle plays an important role. -It is desirable to place software tests as early as possible in the software life cycle and detect mistakes then, rather than compromising production-systems that hold sensitive data and offer important services. - -\begin{figure}[ht] -\centering -\smartdiagramset{ - back arrow disabled=true, - module minimum width=5cm, - text width=4cm, - additions={ - additional item offset=1cm, - additional item width=4cm, - additional item border color=gray, - additional item text width=4cm, - additional arrow tip=to, - additional arrow line width=2pt, - additional arrow color=black, - }, -} -\smartdiagramadd[flow diagram]{ - Design, - Programming, - Compilation, - Distribution, - Execution - }{% - right of module2/When vulnerabilities are created, - right of module3/When they should be detected, - right of module5/When they create real problems -} -\smartdiagramconnect{to-}{module2/additional-module1} -\smartdiagramconnect{to-}{module3/additional-module2} -\smartdiagramconnect{to-}{module5/additional-module3} -\caption{Simple Software Life Cycle} -\label{fig:simple-software-lifecycle} -\end{figure} -A simple software life cycle model is visualized in \cref{fig:simple-software-lifecycle}. - -Assuming that there is a software design, even a flawless one, the next step is to translate this flawless design into flawless source code. -The is a manual translation step from one representation into another, leaving much room for mistakes. -The next component in the chain is the \gls{compiler} which processes the source code by translating it to the next representation, machine code. -This is the step that where source code analysis can be technically enforced. -Ideally, as previously explained, the \gls{compiler} should be able to detect the programmers mistakes, especially the ones that have a negative impact on memory-safety. -Advanced programmers can profit too, as everybody makes mistakes from time to time, depending on the level of focus, which is not a constant. - -When programmers talk about tests, they typically mean little functions they have to write to test the actual functions of their programs. -This kind of testing requires the execution of the program after its compilation, and is likely to neglect severe technical mistakes. -The method of testing the source code directly has much more potential, because relations between source code objects can be analyzed. -Various \textit{static code analyzer} tools have become popular for such analysis. -However, these are always optional and the developer cannot be forced to use them. -Much different is the situation when the analysis is part of the \gls{compiler}, which is the case in \gls{Rust}. - -\chapter{OS Development: Concepts, Conventions, Risks} -\label{context::os-dev-concepts} -In order to protect the memory of each executed program according to \cref{context::introduction::memory-safety::def}, the \gls{os} must be designed developed, and tested carefully. -This chapter explains concepts used in \gls{os} development and points out memory-safety critical operations in necessary detail. -This is done in preparation for the next \cref{context::weaknesses-mem-safety}, which explains weaknesses that result from memory management mistakes that were made in the attempt to implement the following concepts. - -Since the \gls{os} manages the system's hardware directly, many of the implementation and design choices depend on hardware design and architecture. -To bound the extent of this document, the explanations are limited to one contemporary architecture, \gls{amd64}, and further narrowed down by focusing on the operation in 64-Bit Long Mode\cite[p.~18]{AMD64Vol2}. - -\section{Virtualization} -\label{context::os-dev-concepts::virtualization} -The \gls{os} is considered the lowest software layer on the system and must know the very details of the system's hardware resources and perform raw access to it. -The goal is to make \gls{os} the only software on the system that is required to have this particular knowledge, so that other \glspl{app} could run on virtually any system. - -\subsection{Abstraction Layers} -The first step of \gls{virt}\footnote{The term \textit{virtualization} within the \gls{os} the jargon can be understood as abstraction} in the \gls{os} is to create a on top of architecture specific code and abstract it in form of an internal \gls{api}. -This layer abstracts at least the \gls{cpu} and memory\cite[p.~5-7]{Arpaci-Dusseau2015}. -Higher-level complex management algorithms can be implemented hardware-independently on top of this \gls{api}, making it reusable across different architectures. -The \gls{os} then provides an \gls{api} through which \glspl{app} can request access to these virtualized resources. -Higher-level \glspl{proglang} allow the \gls{app} developers to develop and run different programs comfortably, concurrently and presumably safely on any hardware which is supported by the \gls{os}. -\Cref{fig:system-abstraction-layers} shows a top-down model of the abstractions layers in the system. - -\begin{figure}[ht] -\begin{tikzpicture} - [start chain=main going below, every on chain/.append style={align=center, text width=35ex, minimum height=7ex, fill=blue!20}, >={LaTeX[]}, node distance=7ex] -% \node[on chain] {\gls{os} Abstraction Layers}; - \foreach \i/\itext [remember=\i as \iprior] in { - 1/Software Applications, - 2/System Libraries, - 3/OS API, - 4/OS {Core \& Drivers}, - 5/{CPU, Memory, I/O, ...} - } - { - \node (block \i) [on chain] {\itext}; - \ifnum\i>1 - \draw - (block \iprior.west) +(0ex,-1ex) coordinate (b\i) - edge["Synchronous API Calls" {left,text width=15ex,anchor=east, align=right}, bend right=90, ->] - (block \i.east -| b\i) +(0ex,+1ex); - \draw - (block \iprior.east) +(0ex,-1ex) coordinate (k\i) - edge["Asynchronous Events" {right,text width=15ex,anchor=west, align=left}, bend left=90, <-] - (block \i.east -| k\i) +(0ex,1ex); - \fi - } - - \draw[dashed,thick,red] - ($(block 2)!0.5!(block 3)$) ++(-20ex,0) edge["Userspace", "Kernelspace"'] ++(40ex,0); - \draw[dashed,black,thick] - ($(block 4)!0.5!(block 5)$) ++(-20ex,0) edge["Software", "Hardware"'] ++(40ex,0); -\end{tikzpicture} -\caption{System Abstraction Layers} -% TODO: fill user/kernel space with colors -\label{fig:system-abstraction-layers} -\end{figure} - -Virtualization has different technical implications for different resource types, depending on their nature and available count. -Programs that are executed need at least the \gls{cpu} and a certain amount of memory. - -\subsection{Programming Languages} -High-level \glspl{proglang} that support or include the \gls{os} \gls{api} allow the programmer to develop \glspl{app} without knowing \gls{os} or hardware internals. -This requires conventions to be defined which must then be adopted by all involved software components including and above the "OS API" layer in \cref{fig:system-abstraction-layers}. -For \gls{amd64}, the most popular calling convention is defined in the \citetitle{Matz2009}\cite{Matz2009}, which is supported by the major \gls{C} \glspl{compiler} GCC and CLANG, as well as by \gls{Rust}'s compiler. -\FloatBarrier - -\subsection{Memory Virtualization: Paging} -Memory is a resource that is available in large quantities, made up from units reachable by single memory address. -The size of the smallest unit depends on the hardware and is ambiguously referred to as a \textit{memory word}, which is a fixed amount of Bytes depending on the architecture. -The whole memory can be divided into arbitrary groups of addressable units. -The \gls{os} supervises this structure, and programs must request memory from the \gls{os} to gain access to memory. -A method that combines virtualization and structuring is called paging and is explained in \cref{context::os-dev-concepts::hw-supported-mm}. - -The goal of memory virtualization is to use \glspl{vaddr} in \glspl{program}, which can be dynamically mapped to physical memory addresses at system runtime. - -\subsection{CPU Virtualization} -The \gls{cpu} is generally not explicitly requested by \glspl{program}, because any of the program's instructions implicitly requires the \gls{cpu} for being executed on the system at all. -Thus, the \gls{os} must chose at which time it executes which \gls{program} on the \gls{cpu}. - -The goal of \gls{cpu} virtualization is to seamlessly share a single \gls{cpu} among all \glspl{program} and the \gls{os} itself, without changing any of the \glspl{program}. -How this can be achieved is explained in \cnameref{context::os-dev-concepts::preemptive-multitasking}. - -\subsection{Program Execution} -\label{context::os-dev-concepts::virtualization::task} -As explained in this document it should be understood that a program consists of instructions that can be executed by the \gls{cpu}. -When the \gls{os} loads a program into memory and begins executing its instructions it is called a process.\cite[p.~25]{Arpaci-Dusseau2015} -A process can begin to exist before its execution, when the \gls{os} has internally created an entry for the process that at least contains a reference to the program and the arguments to be passed. -Processes that use the same program are not to be treated differently by the \gls{os} than any other process in terms of memory-safety, and must be prevented from mutual memory access. -These processes can differ in arguments that are passed to the program their, so that their runtime behavior can differ significantly. - -\subsection{Terminology} -Terms that are often used to describe various forms of executed code are often used ambiguously in various documents, manuals, and websites. -\Cref{tab:os-dev-concepts:task-terms} defines such terms and their relationships for the scope of this document. - -\begin{table} - \begin{tabularx}{\textwidth}{@{}lX@{}} - \toprule - \Gls{task} & \glsentrydesc{task} \\ - \Gls{program} & \glsentrydesc{program} \\ - \Gls{process} & \glsentrydesc{process} \\ - \Gls{thread} & \glsentrydesc{thread} \\ - \Gls{procedure} & \glsentrydesc{procedure} \\ - \Gls{function} & \glsentrydesc{function} \\ - \Gls{job} & \glsentrydesc{job} \\ - \Gls{app} & \glsentrydesc{app} \\ - \bottomrule -\end{tabularx} -\caption{Definition of commonly used terms for executable code} -\label{tab:os-dev-concepts:task-terms} -\end{table} -\FloatBarrier - - -\subsubsection{Demo: Process $\neq$ Program} -A great example for demonstrating this difference is the program \textit{"\gls{bbox}. \glsentrydesc{bbox}"}. - -Line 1 in \cref{shell::context::os-dev-concepts::program-process} shows a command that instructs the \gls{os} - \gls{LX} in this example - to execute the program \gls{bbox} three times, with different arguments each time. -The purpose is to demonstrate that the same program \gls{bbox} is instantiated thrice with completely different functionality each time, even existing simultaneously in the \gls{os}'s process list. - -\begin{listing}[ht] - \begin{minted}[escapeinside=??,linenos,autogobble,breaklines=true]{shell} - $ busybox sh -c "busybox ps -Ao pid,args | busybox grep busybox" - 9441 busybox sh -c busybox ps -Ao pid,args | busybox grep busybox - 9442 busybox ps -Ao pid,args - 9443 busybox grep busybox - \end{minted} - \caption{Multiple Process From Same Program} - \label{shell::context::os-dev-concepts::program-process} -\end{listing} - -It invokes \gls{bbox} with the \code{sh} (a shell utility) argument which in turn receives the \code{-c} (command execute" argument and another argument containing the expected command. -This command consists of subsequent calls to \gls{bbox} invoking its builtin \code{ps} (a utility to print the process list) and \code{grep} (a tool to find text) utilities. -The process list is received by passing \code{ps} as the argument to \gls{bbox} in the third execution. -Line 2 through 4 show the three \glspl{process} of the \gls{bbox} \gls{program} with different proccess ids and their respective arguments. - -\section{Preemptive Multitasking Concepts} -\label{context::os-dev-concepts::preemptive-multitasking} -Multitasking is another vague term used in the \gls{os} jargon. -In this document, it is the \gls{os}'s capability of switching processes without terminating them, effectively keeping their runtime state in a place that persists while the task is not actively executed on the system's \gls{cpu}. -The \textit{preemptive} adds to multitasking the ability of switching tasks without relying on their cooperation, but instead be able to do this at any time the \gls{os} intends to do so. - -Preemptive multitasking enables a form of \gls{cpu} virtualization, as a task is not aware of being preempted and resumed. -As explained above, the \gls{cpu} resource doesn't have to be explicitly requested, as the request to execute a program implies a dependency on the \gls{cpu}.. - -The previously explained virtualization is the foundation for the \gls{os} to perform preemptive multitasking inconspicuously towards the \glspl{app}. -This means that when a task is preempted and continued later, it observes no side-effects other than an elapse of time. -Preemptive multitasking needs not be considered during development of single-threaded \gls{app}. - -\subsection{Concurrent Resource Usage} -Switching tasks has different technical implications for different resources types, depending on their nature and quantity. - -A single \gls{cpu} system can not execute more than one program at the same time, as it runs instructions through the \gls{cpu} one-by-one, implicitly holding the program state in form of the \gls{cpu} registers, which are preserved in between the instructions, and preserved between preemptive task switches. - -While it doesn't make sense for any instruction to request the \gls{cpu} per-se, there are valid use-cases for programs to request a specific amount of \gls{cpu} bandwidth within a specific amount of time to guarantee a certain amount of computing speed. -Other use-cases emerge when concurrent programs access the exact same resources and are intermixed by the \gls{os}, creating non-sequential resource usage pattern which may put the resource in an inconsistent state and lead to unexpected results for the application. -Within this document these technicalities are considered part of the application semantics and shouldn't affect the \gls{os} development. - -Hence for this document it is sufficient to recognize the \gls{os}'s responsibility of cleanly switching the program in execution periodically. - -In contrast to the \gls{cpu}, the main memory resource is available in limited but huge quantities. -Replacing the content of the memory is not necessary on a preemptive task switch, as long as the memory is not exhausted. -This has the effect that tasks which are currently not in execution on the \gls{cpu} still own a region of main memory. - -The \gls{os} must ensure that switching tasks is done properly for all resources to prevent interference and unintended behavior. -To ensure memory safety in this scenario, all data in the memory must be protected from unintended access, according to the definition of memory safety in \cref{context::introduction::memory-safety::def}. - -\subsection{Context Switching} -The context switch is the core functionality of the multitasking as it effectively switches to a different process, possibly by preempting the one that is currently running. -When the \gls{os} preempts a process, it needs to store and preserve the current process's context. -The context consists of all volatile resources that can possibly be overwritten by another process. -This is at minimum a set of \gls{cpu} registers depending on the specific architecture. - -The \gls{os} stores the preempted context in a well-known and protected memory location, so that it can be restored when this process is resumed. - -In preemptive multitasking, context switches are not considered voluntary, but rather by force. -This works by using the \gls{cpu}'s interrupt mechanism which has the ability to jump to an \gls{os} function in the event of an interrupt. -Interrupts for this use-case are usually triggered by programmed timer interrupts, occurring continuously and regularly. -The interrupt is signaled to the \gls{os} by \gls{cpu}, so the lowest level of the process switching mechanism in the \gls{os} takes place in a hardware specific interrupt handler. - -Safety could be increased if the \gls{compiler} or in a more general sense the \gls{proglang} could assist in architecture specific code. -More details on this mechanism is given in \cref{context::os-dev-concepts::preemptive-multitasking-amd64}, but first it is necessary to understand the involved memory management mechanisms which are explained in \cref{context::os-dev-concepts::hw-supported-mm,context::os-dev-concepts::stackheap,context::os-dev-concepts::sf-handling-amd64,context::os-dev-concepts::stackheap-combined}. - -\section{Hardware-supported Memory-Paging} -\label{context::os-dev-concepts::hw-supported-mm} -To improve the efficiency and safety of memory management, developers of hardware and software have been collaborating to offload virtual memory address lookup and caching from the \gls{os} software to the hardware, the \gls{cpu}'s \gls{mmu} to be specific. -A hardware-implementation of the lookup algorithm is fast, and allows rudimentary memory permission runtime-checks to protect pages by leveraging \gls{cpu}'s security rings\cite[p.~117,~p.~145]{AMD64Vol2}. - -Activating the 64-Bit long mode on \gls{amd64} makes the system rely primarily on paging memory management, thus the technique of memory segmentation can be neglected in this context. -This section provides information about hardware-supported memory paging and protection techniques. - -\subsection{Virtual Address Translation and Paging} -Paging with \glspl{vaddr} is one method of virtualizing memory and in this way transparently share the system's memory among running tasks and the \gls{os} itself, presumably in a safe way. -Even when using a language that supports direct memory addressing, \gls{app} developers don't have to consider paging and address translation in the logic of their programs, because all addresses in their program are virtual and are translated at runtime by the \gls{mmu}. -The translation itself is performed by the \gls{mmu} according to a map that is called page table, which is a structure maintained by the \gls{os} in the main memory. -This memory structure can be stored anywhere in memory, and the address is handed to the \gls{mmu} via a specific \gls{cpu} register, \textit{CR3} on \gls{amd64}. -The \gls{os} can maintain multiple page table structures, and can create different \gls{vaddr} spaces by changing the \gls{mmu}'s page-table pointer register. - -\subsubsection{Translation Caching} -The hardware caches the translation results for subsequent lookups in the \gls{tlb} \cite[p.~142-143]{AMD64Vol2}. -This greatly improves the speed for repeated access to the same \glspl{vaddr}, but is certainly dangerous for memory-safety. -Controlling the validity of these cache entries is the responsibility of the \gls{os} . -This is critical for memory-safety, as the cached virtual to physical address lookup results are different for each address space and shouldn't leak into other address spaces. - -If any lookup yields a cached result which originates from a different \gls{vaddr} space, the physical address is likely to belong to a memory region to which the current task shouldn't have access to. -What makes it more difficult to manage is that there are exceptions to this, e.g. when memory is intentionally shared between two processes or threads, which must be set up by the \gls{os} according to the processes requests. - -\subsubsection{Pages: Chunks of Smallest Addressable Unit} -To avoid the need for storing a translation mapping for every single \gls{vaddr} , mappings are grouped into equisized regions, called \textit{page}s. -This works by encoding a page-offset in the \gls{vaddr} , together with page's index in the page table. -The offset size depends on the chosen page-size, and can be calculated with the following formula, given page-size $p$, a power of two given in Byte: -\begin{equation} - f(p) = log_2(p) -\end{equation} -For example, the \gls{amd64} default page-size of 4 KiB has a 12-bit offset, which theoretically leaves the other $64-12 = 52$ bits of the \gls{vaddr} for page-table indexing. -On \gls{amd64} there's an architectural limit of 48 bits for the \gls{vaddr} , thus the address constellation is different than explained here. -Details on this concrete implementation follows in \cnameref{context::os-dev-concepts::hw-supported-mm::multilevel-paging-amd64}. - -\subsection{Page-Faults} -\label{context::os-dev-concepts::hw-supported-mm::page-fault} -To improve the efficiency and safety of memory management, developers of hardware and software have been collaborating to offload virtual memory address lookup and caching from the \gls{os} software to the hardware, the \gls{cpu}'s \gls{mmu} to be specific. -The page-fault is a hardware-triggered, memory-safety critical event that must be handled by the \gls{os}. -It is triggered by the \gls{cpu}'s \gls{mmu} during the \gls{vaddr} lookup algorithm, when an instruction uses a \gls{vaddr} for which the target page is not available. -This happens for example if the indexed page is not present in main memory or has not been allocated at all. -It also happens when an instruction violates a page protection, of which four exist and can be configured by the \gls{os} on \gls{amd64} \cite[p.~145-148]{AMD64Vol2}: -\begin{itemize} - \item (1) Protect supervisor pages from user access - \item (2) Prevent writes into read-only pages - \item Prevent the \gls{cpu} from executing (3) non-executable pages and (4) user pages -\end{itemize} - -The \gls{os} must implement the page-fault handler to deal with it accordingly. -For example, the case of a non-existing mapping requires to find and allocate free physical memory and map it to the page by modifying the \gls{vaddr} e's page-table entry. -Or in case of protection violation it would simply indicate denied access. - -\subsection{Swapping} -The finite primary memory can only hold a finite number of virtual pages, and the \gls{os} is responsible for having the required pages present. -Besides the pages that contain the page-table itself, the pages that aren't required by the current instruction might be moved to secondary memory. -Swapping pages in and out of primary memory is risky as it requires to transfer large amounts of raw memory content, but these safety analyzes exceed the scope of this study. - -\subsection{Multi-Level Paging Concept} -\label{context::os-dev-concepts::hw-supported-mm::multilevel-paging-concept} -If only one page-table per \gls{vaddr} space was used that consists of $2^{52}$ page-table entries, which must at minimum store the physical address, it would require $\frac{52 * 2^{52} [Bit]}{8*1024^4 [Bit/Byte]} = 26624$ TiB of memory for each \gls{vaddr} space. -Even if only a handful of additional pages were allocated and mapped, the \gls{os} would still have to allocate this huge page-table. -This vast consumption of main memory is impractical and impossible for average systems, which rarely surpass 100 GiB of main memory. - -Therefore most systems use a hierarchy of page tables. -Using a hierarchical translation structure allows to save significant amounts of memory, as not every page-table of every level in the address space has to be allocated and present in main memory. - -% TODO picture this -% \begin{figure} -% \centering -% \includegraphics[width=\textwidth]{gfx/TODO-nlevel-paging} -% \begin{tikzpicture} -% \def\x{9ex} -% % memory cells -% \path[draw,font=\small] -% % cells -% (0,0) -% rectangle ++(\x, 1) -% rectangle ++(\x,-1) -% rectangle ++(\x, 1) -% rectangle ++(\x,-1) -% rectangle ++(\x, 1) -% % cell text -% (0.5*\x,0.5) -% node(text-a){idx$_n$} ++(\x,0) -% node(text-b){idx$_{n-1}$} ++(\x,0) -% node(text-c){...} ++(\x,0) -% node(text-d){idx$_1$} ++(\x,0) -% node(text-e){offset$_{page}$}++(\x,0) -% % bit numbers -% (0,1) -% node[anchor=south]{63} -% (5*\x,1) -% node[anchor=south]{0} -% ; -% % braces -% \foreach \y in {1,...,5} { -% \pgfmathparse{\y-1} -% \draw[decorate,decoration={brace,mirror}] -% ($(\pgfmathresult*\x,-1ex)!0.1!(\y*\x,-1ex)$) -- node[shape=coordinate](brace-\y){} -% ($(\pgfmathresult*\x,-1ex)!0.9!(\y*\x,-1ex)$); -% } -% -% \draw -% % cells -% (0*\x,-1*\x) -% rectangle ++( \x, 0.5*-1) -% rectangle ++(-\x,-1) -% rectangle ++( \x, 0.5*-1) -% (1*\x,-3*\x) -% rectangle ++( \x, 0.5*-1) -% rectangle ++(-\x,-1) -% rectangle ++( \x, 0.5*-1) -% (2*\x,-5*\x) -% rectangle ++( \x, 0.5*-1) -% rectangle ++(-\x,-1) -% rectangle ++( \x, 0.5*-1) -% ; -% -% \end{tikzpicture} -% \caption{Hierarchical Virtual Paging} -% \label{fig:paging-hierarchy-abstract} -% \end{figure} -% \FloatBarrier - -\subsection{Multi-Level Paging on AMD64} -\label{context::os-dev-concepts::hw-supported-mm::multilevel-paging-amd64} -On \gls{amd64} "a four-level page-translation data structure is provided to allow long-mode operating systems to translate a 64-Bit virtual-address space into a 52-Bit physical-address space."\cite[p.~18]{AMD64Vol2}. -This allows the system to only hold the \textit{PML4} table, the which is currently referenced by the \textit{Page Map Base Register (CR3)}, available in main memory. - -\Cref{fig:virtual-addr-transl} shows the 64-Bit \gls{vaddr} composition on \gls{amd64}, which uses four-levels of page tables. -Counter intuitively the page-tables are not called level-\textit{n}-page-table, but the levels received distinct names in \citetitle{AMD64Vol2}. -The most-significant Bits labelled as \textit{Sign Extend} are not used for addressing purposes, but must adhere the canonical address form and simply repeat the value of the most-significant implemented Bit \cite[p.~130]{AMD64Vol2}. -The least significant Bits represent the offset within the physical page. -The four groups in between are used to index the page-table at their respective level. - -\begin{figure} -\centering -\includegraphics[width=\textwidth]{gfx/Virtual-to-Physical-Address-Translation-Long-Mode.png} -\caption{Virtual to Physical Address in Long Mode\cite{AMD64Vol2}} -\label{fig:virtual-addr-transl} -\end{figure} -\subsubsection{Translation Scheme 4 KiB and 2 MiB Pages} -The \gls{amd64} architecture allows configuring the page-size, two of which will be introduced in this section. -\cref{tab:page-transl-vaddr-composition} displays the \gls{vaddr} composition for the 4KiB and 2MiB page-size modes on \gls{amd64}. -The direction from top to bottom in the table corresponds to most significant to least significant - left to right - in the \gls{vaddr} . -The \textit{sign extension} Bits cannot be used for actual information but act as a reservation for future architectural changes. - -\begin{table} - \begin{tabular}{l | c | c} - Description & Bits in 4 KiB Pages & Bits in 2 MiB Pages \\ - \hline - Sign Extend & 12 & 12 \\ - Page-Map-Level-4 Offeset & 9 & 9 \\ - Page-Directory-Pointer Offeset & 9 & 9 \\ - Page-Directory Offeset & 9 & 9 \\ - Page-Table Offeset & 9 & - \\ - Physical Page Offset & 9 & 21 \\ - \end{tabular} - \caption{Paging on \gls{amd64}: Virtual Address Composition 4KiB/2MiB pagesizes} - \label{tab:page-transl-vaddr-composition} -\end{table} - -\begin{figure} -\centering -\includegraphics[width=\textwidth]{gfx/amd64-4kb-page-translation-long-mode} -\caption{4-Kbyte Page Translation—Long Mode\cite{AMD64Vol2}} -\label{fig:4kb-page-transl} -\end{figure} - -\cref{fig:4kb-page-transl} shows the detailed \gls{vaddr} composition for 4 KiB pages, using four levels of page-tables. -It uses four sets of 9-Bit indices in the \gls{vaddr} , one per hierarchy level, followed by the 9 Bit page-internal offset. - -An alternative approach is displayed in \cref{fig:2mb-page-transl}, using 2 MiB sized pages. -It uses three sets of 9-Bit indices for the page-tables, and a 21-Bit page-internal offset. -Increasing the page-size improves speed and memory-usage and decreases the granularity. -In this specific example the hierarchy is reduced by one level of page-tables. -This reduces the amount of storage required for the page-tables in overall and causes the lookup algorithm to finish faster. - -\begin{figure} -\centering -\includegraphics[width=\textwidth]{gfx/amd64-2mb-page-translation-long-mode} -\caption{2-Mbyte Page Translation—Long Mode\cite{AMD64Vol2}} -\label{fig:2mb-page-transl} -\end{figure} - -The other supported page sizes, 4 MiB and 1 GiB, as well as intermixing page sizes through the different levels don't add new insight into the mechanism and don't need to be detailed here. - -\section{Stack And Heap: Basic Concepts} -\label{context::os-dev-concepts::stackheap} -In \gls{proglang} and \gls{os} literature, the terms \gls{stack} and \gls{heap} are ubiquitous. -A research for their original definition wasn't conclusive, indicating that they are to be taken as concepts rather than absolutely defined methods. -They might be implemented and used differently on various architectures, \glspl{proglang} and \glspl{os}. - -The hardware manuals \citetitle{AMD64Vol1} and \citetitle{AMD64Vol2} have no mention of the word \textit{heap}, but use \textit{stack} hundreds of times, indicating that only the \gls{stack} concept is implemented in hardware. -It is comparable to computers that had physical rewindable memory tapes, which is conceptually similar to the \gls{stack} implementations in today's memory management. - -Likely the first mention of the term \gls{heap} with regard to memory management is found in \citetitle{Mailloux1969}. -It was introduced featuring "dynamic storage allocation", further described to have "the ability to create and manipulate objects whose lifetimes are not so restricted. This ability implies the use of an additional area of storage, the “heap”, in which garbage-collection techniques must be used."\cite[p.~8]{Mailloux1969} -Since then, the \gls{heap} has been implemented in all major \glspl{proglang}. - -This sections explains both concepts with the tendency towards the \gls{amd64} architecture, the \gls{C} and \gls{Rust} \glspl{proglang} and their usage for bare-metal \gls{os} and \gls{LX} \gls{app} development. - -\subsection{Stack: Hardware-Backed Abstract Type} -\label{context::os-dev-concepts::stackheap::stack} -In summary, the \gls{stack} is a memory model for structuring a contiguous region of memory. -It grows by adding new data entries on top of each other. -According to the \gls{stack} analogy, only the topmost element can be accessed and removed, thus it behaves like a Last-In-First-Out data structure. - -The \gls{amd64} manuals conjunctionally describe how the \gls{stack} is used and influenced by various instructions on this architecture. -Here it grows from numerically higher to numerically lower addresses, whereas the numerically highest address is called the stack bottom, and the current numerically lowest address is the stack top. -In 64-Bit long mode on \gls{amd64}, the \gls{cpu} doesn't consider the stack to be sized or explicitly bounded, which is highly safety critical. - -The \gls{stack} is typically allocated per process or thread and is used to store each procedure's \gls{sf}. -Each \gls{sf} is automatically cleaned up or simply forgotten once the procedure has completed. -When multiple procedure calls are nested, each preceding \gls{sf} remains on the stack in front of the next one. -\cref{fig:stack-with-two-frames}\footnote{By R. S. Shaw - Own work, Public Domain, \url{https://commons.wikimedia.org/w/index.php?curid=1956587}} displays a \gls{stack} that contains two \glspl{sf} from different procedures. -In this picture, the \gls{stack} grows upwards. - -\begin{figure}[ht] -\centering -%\begin{wrapfigure}{R}{0.5\textwidth} - \includegraphics[width=0.49\textwidth]{gfx/TODO-Callstacklayout} - \caption{An upwards growing stack with two stack-frames} - \label{fig:stack-with-two-frames} -%\end{wrapfigure} -\end{figure} -% TODO: draw stack with multiple stack-frames -\FloatBarrier - -\subsubsection{Safety Concerns} -To achieve memory-safe \gls{stack} management in the \gls{os}, each procedure must only be able to access its own particular \gls{stack} and possible references via its arguments. -This is not the case in a regular \gls{C} program, where the called procedure is able to modify the previous \gls{sf}, which is demonstrated in \cnameref{rnd::weakness-mitig-prev::stack-protection}. -Additionally, the \glspl{stack} must be prevented from growing into other memory zones like the \gls{heap}. -Since \Gls{stack} management is memory-safety critical for \gls{os} developers when implementing memory management for multitasking within the \gls{os}; This is one of the main subjects in \cref{rnd::existing-os-dev-with-rust,rnd::imezzos-preemptive-multitasking}. - -\subsection{Heap: Organized Chaos} -\label{context::os-dev-concepts::stackheap::heap} -\Gls{heap} is an ambiguous term that names two computer related models. -A data structure in theoretical computer science, and a memory model in system resource management. -This document refers to the latter. - -The \gls{heap} is managed by the \gls{os} to keep track of allocated memory on behalf of all \glspl{app} on the system. -The algorithms within the \gls{os} to manage the \gls{heap} can be arbitrary complex, and the choice is based on the trade-offs between complexity, efficiency and speed. -Safety is explicitly omitted from the trade-offs list, as it should never be traded against anything else. - -\Gls{app} developers make use of dynamic \gls{heap} allocation requests via the \gls{os}'s \gls{api} if the memory usage for their program is not predictable at the time of development. -The requests will be processed by the \gls{os} and granted or denied according to ratio of requested memory and available system memory. -The \glspl{app} is responsible for returning no-longer required memory to the \gls{os}, which makes it available to other \glspl{app} in the system. - -Memory which is not cleaned up by properly is blocked until the \gls{app} is terminated. -If \gls{heap} allocations within the \gls{os} are not cleaned up properly, the allocated memory is lost until the whole system is restarted. -Between properly cleaning up and loosing memory allocations is a whole range of possible memory-safety issues, which are explained in \cref{context::introduction::memory-safety-violation-in-sw}. - -\section{Stack Frame Handling on AMD64} -\label{context::os-dev-concepts::sf-handling-amd64} -The usage of the \gls{stack} is tightly coupled with control flow instructions in conjunction with two registers, the Stack-Frame Base Pointer (RBP) and the Stack Pointer (RSP). -The instructions that use these registers and explicitly or implicitly work with the stack\cite[p.~83]{AMD64Vol1} can be grouped into the following categories. -Together they can be used to perform \gls{stack} based procedure calls, in \cref{context::stack-frame-management-instructions}. - -\subsection{Direct Stack Data Management Instructions} -\code{push} a takes value operand which is to be pushed onto the stack. -The address in RSP moves towards numerically lower addresses with every \code{push} instruction, which stores a new data entry on top. -The order is to first change the RSP and then copy the value at its new address. - -\code{pop} takes a storage reference operand - \gls{cpu} register or memory address. -It works in the opposite direction to \code{push}. -First, consuming the top-most data entry and storing it on the operand location, then moving the RSP address towards the numerically higher RBP address. - -When RBP and RSP point to the same address, the stack is considered empty. - -\subsection{Procedure Calls Instructions} -The \code{call} and \code{ret} instructions control the instruction flow by calling another instruction procedure\footnote{loosely synonymous with function}. - - -The \code{call} instruction takes the address of the instruction that is to be called. -Before jumping to the instruction at the given address, it \code{push}es the current RIP (instruction pointer) register onto the \gls{stack}. - -\code{ret} takes no operand, but instead \code{pop}s the \gls{stack}'s top entry. -The consumed value is used as a jump address. - -As \code{push} and \code{pop} use the RSP register, the called procedure is responsible to finish with the RSP at the same position as when it was entered. -For example, \code{push}ing some value onto the stack before the end of the function would cause the \code{ret} to jump to that address instead of returning to the caller. - -\subsection{Stack Frame Management Instructions} -\label{context::stack-frame-management-instructions} -When a procedure is called, the stack is set up with the \gls{sf}, the four components listed in \cref{lst:amd64-stack-frame-components}. -\cite[p.~48]{AMD64Vol1}: - -\begin{listing}[h] -\begin{enumerate} - \item{% - Parameters passed to the called procedure (created by the calling procedure). \\ - \textit{Only if parameters don't fit the \gls{cpu} registers} - } - \item{% - Return address (created by the \code{call} instruction). \\ - \textit{Always used by \code{call}} - } - \item{% - Array of stack-frame pointers (pointers to stack frames of procedures with smaller nesting-level depth) which are used to access the local variables of such procedures. \\ - \textit{Depends on support and implementation of nested functions in the \gls{compiler}} - } - \item{% - Local variables used by the called procedure. \\ - \textit{This includes the variables passed via \gls{cpu} registers} - } -\end{enumerate} -\caption{\glsentrytext{amd64} Stack-Frame Components} -\label{lst:amd64-stack-frame-components} -\end{listing} - -The \gls{amd64} manual also lists \code{enter} and \code{leave} as instructions to \textit{"provide support for procedure calls, and are mainly used in high-level languages."}\cite[p.~48]{AMD64Vol1}. -The latter claim could not be verified by inspecting binaries produced by the \gls{C} and \gls{Rust} \glspl{compiler}. - -Instead, these \glspl{compiler} generate a sequence of \code{push}, \code{mov} and \code{sub} instructions to manage theset up the \gls{stack}. -There are instructions before and after the procedure's logic, taking care of the technicalities of \gls{stack} management. -These instruction groups within the called procedure are called prologue and epilogue. - -% \subsection{Full Procedure Call Example} -% \label{context::os-dev-concepts::sf-handling-amd64::procedure-call-example} -% This section combines the separate categories into one complete example that shows how the \gls{stack} is used by various \gls{cpu} instructions to perform procedure calls. -% The following code samples are extracted from a disassembled binary which was originally created using \gls{Rust}. -% The Assembler that's shown uses Intel Mnemonic, which generally operates from right to left. -% For example, \code{mov a, b} copies b to a. -% -% \cref{code::context::examples::func-callee-rust} shows the \gls{Rust} source code of the function \textit{sum}. -% -% \begin{listing}[htb] -% \tikzset{/minted/basename=callee-rust} -% \begin{minted}[autogobble,linenos,breaklines=true]{rust} -% TODO -% \end{minted} -% \caption{Procedure Call Example: Callee in Rust} -% \label{code::context::examples::func-callee-rust} -% \end{listing} -% -% \begin{listing}[htb] -% \tikzset{/minted/basename=callee-rust} -% \begin{minted}[autogobble,linenos,breaklines=true]{nasm} -% TODO -% \end{minted} -% \caption{Procedure Call Example: Callee in Assembly} -% \label{code::context::examples::func-callee-assembly} -% \end{listing} -% -% \Cref{code::context::examples::func-caller-asm} shows a snippet of the calling function. -% It stores the arguments within the registers according to the calling convention. -% The caller doesn't alter the stack-frame pointer (RBP) or the stack pointer (RSP) registers before call, hence the called function must restore these if it alters them. -% -% \begin{listing} -% \begin{minted}[escapeinside=??,highlightlines={},autogobble,linenos,breaklines=true]{nasm} -% TODO -% \end{minted} -% \caption{Procedure Call Example: Caller Assembly} -% \label{code::context::examples::func-caller-asm} -% \end{listing} -% -% \begin{listing} -% \begin{minted}[escapeinside=??,highlightlines={},autogobble,linenos,breaklines=true]{rust} -% \end{minted} -% TODO -% \caption{Procedure Call Example: Caller in Rust} -% \label{code::context::examples::func-caller-rust} -% \end{listing} -% -% % \balloon{comment}{ -% -% % RDI, RSI, RDX, RCX, R8, R9, XMM0–7 -% -% \begin{table}[ht!] -% \centering -% \begin{tabular}{ r | >{\columncolor{YellowGreen}}c | l } -% \multicolumn{1}{r}{RBP offset} & \multicolumn{1}{c}{Content} & \\ -% $\uparrow$ & \cellcolor{white} & \\ -% & \cellcolor{white} \dots \textit{beyond current stack} \dots & \\ -% \hhline{~-~} -% 0 & \textit{Previous RSP} & $\leftarrow$ RBP \\ -% \hhline{~-~} -% \vdots & \dots~~\textit{local variables}~~\dots & \\ -% \hhline{~-~} -% -0x30 & 3rd arg & \\ -% \hhline{~|-|~} -% -0x38 & 2nd arg & \\ -% \hhline{~-~} -% -0x40 & 1st arg & \\ -% \hhline{~-~} -% \vdots & \dots~~\textit{local variables}~~\dots & \\ -% \hhline{~-~} -% -0x60 & rdi & \\ -% \hhline{~-~} -% & \dots~~\textit{local variables}~~\dots & \\ -% \hhline{~-~} -% $RBP-RSP$ & \textit{unknown} & $\leftarrow$ RSP \\ -% \hhline{~-~} -% & \cellcolor{white} & \\ -% $\downarrow$ & \cellcolor{white} & \\ -% \end{tabular} -% \end{table} -% -% \cref{code::context::examples::func-prologue} shows \textit{sum}'s prologue. -% The corresponding epilogue is displayed in \cref{code::context::examples::func-epilogue}. -% The comments explain the code line by line, please read them to understand what exactly happens at each instruction. - -\Cref{code::context::examples::func-prologue,code::context::examples::func-epilogue} show assembly code of a callee's procedure pro- and epilogue. -Respectively, they show how the arguments are copied from the CPU registers onto the stack on entry, and the return value copied from the stack to the CPU register before return. - -\begin{listing}[ht!] -\begin{minted}[escapeinside=??,linenos=false,breaklines=true]{nasm} -$7490: push rbp ; save the stack-frame pointer on the stack -$7491: mov rbp,rsp ; set the stack-frame base pointer from the stack pointer -$7494: sub rsp,0x50 ; allocate 0x50 Bytes for arguments and local variables -$7498: mov QWORD PTR [rbp-0x30],rdi ; copy 1st arg onto stack -$749c: mov QWORD PTR [rbp-0x28],rsi ; copy 2nd arg onto stack -$74a0: mov QWORD PTR [rbp-0x20],rdx ; copy 3rd arg onto stack -\end{minted} -\caption{Function Prologue with three Arguments} -\label{code::context::examples::func-prologue} -\end{listing} - -\begin{listing}[ht!] -\begin{minted}[linenos=true,breaklines=true]{nasm} -$74ee: mov rax,QWORD PTR [rbp-0x48] ; store return value in RAX -$74f2: add rsp,0x50 ; set stack pointer to where stack-frame pointer was stored -$74f6: pop rbp ; restore the stack-frame pointer -$74f7: ret ; return to the caller, following the address on the stack -\end{minted} -\caption{Function Epilogue} -\label{code::context::examples::func-epilogue} -\end{listing} -\FloatBarrier - -%\cref{fig:proc-call-example-mem} displays -% -%\begin{figure} -%\centering -%\includegraphics[width=0.95\textwidth]{gfx/call-procedure-memory-content.png} -%\caption{Memory Layout Throughout The Procedure Call Steps} -%\label{fig:proc-call-example-mem} -%\end{figure} -%\FloatBarrier - -\section{Stack And Heap: Combined Usage} -\label{context::os-dev-concepts::stackheap-combined} -\Glspl{program} combine the usage of \gls{stack} and \gls{heap} use them for different purposes. - -Depending on the \gls{proglang}'s \gls{compiler} and the target system, the responsibility of writing the memory management code falls either on the developer, the \gls{compiler}, or both. -\Cref{tab:stack-heap-usage-responsbility} describes the usage-cases and responsibilities for \gls{stack} and \gls{heap} with regard to user-space and \gls{os} development. - -\begin{table}[ht!] - \begin{tabularx}{0.99\textwidth}{XX} - \toprule - \multicolumn{2}{c}{Stack and Heap Usage Overview} \\ - \toprule - \multicolumn{1}{c}{\Gls{stack}} & \multicolumn{1}{c}{Responsibility} \\ - \hhline{--} - \Gls{sf} (return address, frame pointer, see \cref{context::os-dev-concepts::stackheap::stack}). - - Procedure-local fixed-sized variables (primitive types, custom fixed-size structures, references, fixed-length arrays, etc.) - - Procedure arguments (allocated as and copied to local variables) - & - In user space development the \gls{stack} management code is fully generated \tikzmarkcircle{1} by the \gls{compiler} in \gls{C} and \gls{Rust}, using a standardized calling convention for the platform. - The \gls{os} must implement support for any standardized calling conventions used by the \gls{compiler} for user-space \glspl{program}. - \\ - \bottomrule - \multicolumn{1}{c}{\Gls{heap}} & \multicolumn{1}{c}{Responsibility} \\ - \toprule - Dynamically sized data structures like linked-lists, extendable arrays, queues, trees - - Can be used for unstructured arbitrary data. - & - Manual calls for allocation and freeing memory are written by the programmer in \gls{C} using the \gls{os} \gls{api}. - \gls{Rust} has support of generating these calls on behalf of the programmer on supported platforms via its standard library \tikzmarkcircle{2}. - \\ - \bottomrule - \end{tabularx} - \begin{tabularx}{\textwidth}{l@{ }X} - \tikzmarkcircle{1} & - The \gls{app} programmer is in control of the source code that triggers the auto generated content. - E.g. defining a function with local variables in the code will yield generated instructions to handle the \gls{sf}. \\ - \tikzmarkcircle{2} & - The origin of the auto generated content remains the source code, written by the programmer. - Therefore it is the choice of the programmer whether to place the variables on the \gls{stack} or on the \gls{heap}. - \end{tabularx} -\caption{Stack and Heap Usage and Responsibility} -\label{tab:stack-heap-usage-responsbility} -\end{table} -\FloatBarrier - -\subsection{Arrangement} -\label{context::os-dev-concepts::stackheap-combined::arrangement} -Both zones must be organized separately and arrange within the \gls{vaddr} space which is assigned to process or thread. -\Cref{fig:heap-malloc-stack-example-program} shows a \gls{C} \gls{program} and a simplified model of the hypothetical address space that would result on execution. -In this example, the \gls{stack} and \gls{heap} are placed on opposite sides of the \gls{vaddr} space, and will grow towards each other. - -\begin{figure}[ht!] - \centering - \includegraphics[width=0.6\textwidth]{gfx/TODO-heap-stack-example-program} - %\caption{Stack/Heap Arrangement And Dynamic Allocation \footnote{Prof. Jennifer Rexford, \url{http://slideplayer.com/slide/3288060/}}} - \label{fig:heap-malloc-stack-example-program} - % TODO: redraw -\end{figure} -\FloatBarrier - -The entries above \textit{"Heap"} are the different parts of the \gls{compiler} output for this program, and are loaded by the \gls{os} before the execution. -The colors in the code correspond to the entries in the memory model. -Text, which corresponds to the red box, contains the program instructions. -RoData is read-only memory content, in this case the string literal \textit{"string"}. -BSS contains the variable \textit{iSize}. -Lastly the \gls{stack} holds the pointer variable \textit{p}, which will reference the result of the \textit{malloc(iSize)} memory allocation. - -\subsection{Safety Concerns} -\label{context::os-dev-concepts::stackheap-combined::safety-concerns} -Even though \gls{vaddr} spaces are huge on \gls{amd64}, there is a slight chance that the \gls{stack} and \gls{heap} will interfere. -This could be due to direct collision, or more subtly by not detecting invalid mutual references. - - -\subsection{Programming Language Support} -In many \glspl{proglang} that are commonly used for \gls{app} development, the code for allocation and cleanup of \gls{heap} memory is generated by the \gls{compiler} on behalf of the programmer. -Such languages rely on the \gls{os} memory management \gls{api} and are thus not suited for developing the \gls{os} itself. -Visa-versa, languages which are suited for \gls{os} development usually don't generate \gls{heap} management code and therefore don't ensure memory-safety on the \gls{heap}. - -\gls{Rust} might be an exception to this which has to be confirmed or denied by the end of this work. - -\section{Preemptive Multitasking on \glsentrytext{amd64}} -\label{context::os-dev-concepts::preemptive-multitasking-amd64} -On \gls{amd64}, the \gls{cpu}'s interrupt mechanism does not switch the full content of the context, but only handles the registers that are necessary to successfully jump to the interrupt function: RFLAGS, RSP, RBP, RIP\footnote{Segment registers are neglected}. - -\subsection{The Process Context} -The content of a process's context on \gls{amd64} is given in \cref{tab:task-minimum-context-registers}. -All these registers need to be stored and restored by the \gls{os}'s interrupt handler for process preemption. - -\begin{table} - \begin{tabularx}{\textwidth}{| c | X | X |} - \hline - \textbf{descriptive name} & - \textbf{register names on amd64} & - \textbf{description} \\ - \hline - the instruction pointer register & RIP & address of the next instruction to be fetched \\ - \hline - the stack pointer register & RSP & address of current position in stack \\ - \hline - the flags register & RFLAGS & various attributes, e.g. the interrupt flag \\ - \hline - all general-purpose registers & RAX, RBX, RCX, RDX, RDI, RSI, RBP, RSP, R8–R15 & arbitrary data \\ - \hline - \end{tabularx} - \caption{Minimum Context Registers on amd64\cite[p.~28]{AMD64Vol2}} - \label{tab:task-minimum-context-registers} -\end{table} - -\subsection{Storing The Context On The Stack} -In this scenario, the context is stored on the \gls{stack} of the function that is interrupted. -\Cref{fig:amd64-long-mode-interrupt-stac} pictures the \gls{stack} layout on interrupt entry. -In order to leverage an interrupt for a context switch, the interrupt function needs to replace these values on the \gls{stack} with values for the new context. -CS (Code-Segment) and SS (Stack-Segment) have no effect in \gls{amd64} 64-Bit mode\cite[p.~20]{AMD64Vol1} and can remain unchanged. -The \gls{os} developer needs to know the exact address where on the \gls{stack} this data structure has been pushed by the \gls{cpu}, and must then manipulate these addresses directly. -This type of manipulation is inherently dangerous and can not be easily checked by the \gls{compiler}. -The function that handles the interrupt must then use the instruction \textit{iretq}\cite[p.~252]{AMD64Vol2}, to make the \gls{cpu} restore the partial context from the \gls{stack} and continue to function pointed to by the RIP. - -\begin{figure} -\centering -\includegraphics[width=0.8\textwidth]{gfx/amd64-long-mode-stack-after-interrupt.png} -\caption{Long-Mode Stack After Interrupt\cite[p.~252]{AMD64Vol2}} -\label{fig:amd64-long-mode-interrupt-stac} -\end{figure} - -For a full context-switch, the other registers that are part of the context need to be handled by the \gls{os}'s interrupt function. - -\chapter{Weaknesses That Affect Memory-Safety} -\label{context::weaknesses-mem-safety} -The previous \cref{context::introduction,context::os-dev-concepts} describe the concepts of memory management on \gls{amd64} and how mistakes might come into existence. -This chapter describes the related software weaknesses which are too commonly found. -The underlying weakness classes are explained alongside real-world and exemplary manifestations in \gls{C}. -The latter are ported and compared to functionally equivalent versions written with \gls{Rust} in \cref{rnd::weakness-mitig-prev::porting-c-vulns}, - -%\section{\glsentrylong{CWE}} -%\label{context::weaknesses-mem-safety::cwe} -Ongoing effort of collecting, analyzing and classifying vulnerabilities and their underlying weaknesses has been expended by the \textit{The MITRE Corporation} in form of the \gls{CWE}. -It has grown to a large relational database of typed weaknesses. -The following information is provided for enumerations of the type weakness class: -\begin{markdown} -1. Description -1. Applicable Platforms -1. Common Consequences -1. Likelihood of Exploit -1. Demonstrative Examples -1. Potential Mitigations -1. Relationships -\end{markdown} - -The relevant weaknesses for this study are children of the umbrella weakness \citetitle{MITRE-CWE-633}. -Their information about demonstrative examples and potential mitigations are relevant for this study. -This and the following \cref{context::weakness-mitigation} present this information for \gls{CWE-633} and selected children. - -% TODO test the autocite command with footnotes -\section{\glsentrylong{CWE-119}} -\label{context::weaknesses-mem-safety::cwe::119} -One of its children weaknesses, \gls{CWE-119}, is particularly interesting. -Manifestations of this weakness are a direct violation of the memory-safety defined in \cref{context::introduction::memory-safety::def} must have occurred, which "can cause read or write operations to be performed on memory locations that may be associated with other variables, data structures, or internal program data. -As a result, an attacker may be able to execute arbitrary code, alter the intended control flow, read sensitive information, or cause the system to crash"\cite{MITRE-CWE-119}. -This can happen on certain languages, which "allow direct addressing of memory locations and do not automatically ensure that these locations are valid for the memory buffer that is being referenced. -\gls{C}, \gls{C++}, \gls{asm} and languages without memory management support"\autocite{MITRE-CWE-119}. -This formulation of languages prone to this weakness is incorrect, as it doesn't conform with the earlier statement of languages that "allow direct addressing of memory locations". -Direct memory addressing support doesn't imply a lack of memory management support. - -There are languages that provide memory management support and still allow direct memory addressing, which is interesting for \gls{os} development. -\gls{Rust} is one of these languages, although it requires the developer to explicitly acknowledge all direct memory access operations with the \textit{unsafe} keyword. -More information on \gls{Rust} follows in \cref{rnd::rust}. - -\section{Statistics} -\label{context::weaknesses-mem-safety::cwe::statistics} -One of the main reasons for me to work on this topic is the increasing number of vulnerabilities based on memory-safety issues. - -This section is intended to express the weakness's severity in real-world software based on available statistics. -The only data available are based on publicly available sources, thus the completeness of it is questionable, because many organizations might choose to not disclose their vulnerabilities, either to protect their reputation or for security reasons as explained in \cref{context::introduction::memory-safety-violation-in-sw}. -The data and visualizations are supplied by the \gls{NVD}, which collects the data based on the umbrella weakness \gls{CWE-635} that was specifically created for the \gls{NVD}. -The numbers of these selected weaknesses are detailed in the following figures, the rest is grouped as \textit{other}. - -\Cref{fig:vulnerability-ratio-history,fig:vulnerability-counts-history} display a decade of data on vulnerabilities grouped by their \gls{CWE} category. -The category called \textit{buffer\footnote{A bounded chunk of memory used by programs to store and exchange data} errors} \gls{CWE-119}. -In \cref{fig:vulnerability-ratio-history} it has the color light blue, 2nd from the bottom in the legend, and in \cref{fig:vulnerability-counts-history} it has the color blue, 2nd from the top in the legend. - -\begin{figure} -\centering -\includegraphics[width=\textwidth]{gfx/Relative-Vulnerability-Type-Totals-By-Year} -\caption{Vulnerability Relative Counts History\cite{NVD}} -\label{fig:vulnerability-ratio-history} -\includegraphics[width=\textwidth]{gfx/Vulnerability-Type-Change-by-Year} -\caption{Vulnerability Absolute Counts History\cite{NVD}} -\label{fig:vulnerability-counts-history} -\end{figure} - -\begin{table} - \centering - \begin{spreadtab}{{tabular}{ c | c | c }} - @ Year & @ \% & @ count \\ - \hline - @ 2007 & 6.75 & 490 \\ - @ 2008 & 10.01 & 550 \\ - @ 2009 & 9.84 & 530 \\ - @ 2010 & 11.58 & 530 \\ - @ 2011 & 15.95 & 600 \\ - @ 2012 & 13.67 & 650 \\ - @ 2013 & 14.63 & 670 \\ - @ 2014 & 9.69 & 800 \\ - @ 2015 & 15.18 & 1050 \\ - @ 2016 & 18.46 & 1150 \\ - @ 2017 & 16.34 & @ - \\ - \hline - @ Average & :={round(sum([0,-11]:[0,-1])/11, 2)} & @- \\ - \end{spreadtab} - \caption{Vulnerability \textit{"buffer error"} Counts History} - \label{tab:vulnerability-buffer-error-by-history} -\end{table} - -In \cref{tab:vulnerability-buffer-error-by-history}, the column \textit{relative count} represents \cref{fig:vulnerability-ratio-history}, and the column \textit{absolute count} represents \cref{fig:vulnerability-counts-history}. -With 16.34 percent of all vulnerabilities known by 2016, and an average of 12.92 percent over ten years, \gls{CWE-119} makes up a significant part of real-world weaknesses. - -\subsection{Vulnerable APIs in Linux and C/C++} -\label{context::weaknesses-mem-safety::vuln-apis-linux-c} -\glspl{api} are a ubiquitous for programmers to access all kinds of functionality, serving as interfaces to network services, providing existing algorithms in form of libraries and frameworks, or interfacing with the local \gls{os}. -It is inherently dangerous to expose any sort of functionality through an \gls{api}, as it might contain bugs that will be spread widely with rising popularity. -Every \gls{os} needs to provide an \gls{api} for it's core functionality to be useful and extendable. - -A very popular and widely supported \gls{os} is \gls{LX}. -The system libraries and the kernel are written in \gls{C}, the latter containing some hardware specific \gls{asm} code. -\gls{LX} is very popular for embedded systems, network servers and large-scale computers. % TODO: reference -Through \gls{android}, \gls{LX} has been distributed to a huge amount of mobile devices within the last decade. % TODO: reference -The list of vulnerabilities that are found in \gls{LX} device drivers which were written by \gls{android} device vendors is very concerning. -Even though Device drivers are not necessarily complex per-se, as they essentially just copy data to and from the hardware they target. -The difficulty is to perform these transfers only under safe circumstances, which are not always straight forward to identify or simply forgotten. - -\gls{LX} has a huge ecosystem with existing libraries for any imaginable use-case from cryptography to artificial intelligence to give random examples. -It is necessary to investigate some of the weakness manifestations in detail in order to analyze if these might be prevented by using \gls{Rust}. - -\section{Manifestations} -\label{context::weaknesses-mem-safety::manifestations} -This section contains real-world manifestations and \textit{re}constructed experiments of memory-safety related weaknesses. -It requires common understanding of the \gls{C} language and knowledge from the previous chapters. - -\subsection{Official CWE Examples} -\label{context::weaknesses-mem-safety::manifestations::cwe-ex} -The following examples are officially listed under various children of \citetitle{MITRE-CWE-633}\cite{MITRE-CWE-633}. -The code and descriptions are copied literally and are used as references throughout the document. -The numbering in the example names aren't contiguous because only a subset was selected, and the naming is supposed to match the \gls{CWE} website. -References to the respective \gls{Rust} version are generally found in \cpnameref{rnd::weakness-mitig-prev::porting-c-vulns}, and referenced under each specific example for easier navigation. - -\subsubsection{\glsentrylong{CWE-119}} - -\paragraph{Example 1} -The following code asks the user to enter their last name and then attempts to store the value entered in the last\_name array. - -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - ... - char last_name[20]; - printf ("Enter your last name: "); - scanf ("%s", last_name); - ... - \end{minted} - \caption{CWE-120: Example 1 Bad Code C} - \label{code::CWE-120::Example1BadCCode} -\end{listing} -\FloatBarrier - -The problem with the code above is that it does not restrict or limit the size of the name entered by the user.If the user enters "Very\_very\_long\_last\_name" which is 24 characters long, then a buffer overflow will occur since the array can only hold 20 characters total. - -% \paragraph{Example 1} -% This example takes an IP address from a user, verifies that it is well formed and then looks up the hostname and copies it into a buffer. -% -% \begin{listing}[htb] -% \begin{minted}[autogobble,linenos,breaklines=true]{c} -% void host_lookup(char *user_supplied_addr){ -% struct hostent *hp; -% in_addr_t *addr; -% char hostname[64]; -% in_addr_t inet_addr(const char *cp); -% -% /*routine that ensures user_supplied_addr is in the right format for conversion */ -% validate_addr_form(user_supplied_addr); -% addr = inet_addr(user_supplied_addr); -% hp = gethostbyaddr( addr, sizeof(struct in_addr), AF_INET); -% strcpy(hostname, hp->h_name); -% } -% \end{minted} -% \caption{CWE-119: Example 1 Bad Code C} -% \label{code::CWE-119::Example1BadCCode} -% \end{listing} -% -% This function allocates a buffer of 64 bytes to store the hostname, however there is no guarantee that the hostname will not be larger than 64 bytes. If an attacker specifies an address which resolves to a very large hostname, then we may overwrite sensitive data or even relinquish control flow to the attacker. -% -% Note that this example also contains an unchecked return value (CWE-252) that can lead to a NULL pointer dereference (CWE-476). - -\paragraph{Example 2} -This example applies an encoding procedure to an input string and stores it into a buffer. - -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - char * copy_input(char *user_supplied_string){ - int i, dst_index; - char *dst_buf = (char*)malloc(4*sizeof(char) * MAX_SIZE); - if ( MAX_SIZE <= strlen(user_supplied_string) ){ - die("user string too long, die evil hacker!"); - } - dst_index = 0; - for ( i = 0; i < strlen(user_supplied_string); i++ ){ - if( '&' == user_supplied_string[i] ){ - dst_buf[dst_index++] = '&'; - dst_buf[dst_index++] = 'a'; - dst_buf[dst_index++] = 'm'; - dst_buf[dst_index++] = 'p'; - dst_buf[dst_index++] = ';'; - } - else if ('<' == user_supplied_string[i] ){ - /* encode to < */ - } else dst_buf[dst_index++] = user_supplied_string[i]; - } - return dst_buf; - } - \end{minted} - \caption{CWE-119: Example 2 Bad Code C} - \label{code::CWE-119::Example2BadCodeC} -\end{listing} -\FloatBarrier -The programmer attempts to encode the ampersand character in the user-controlled string, however the length of the string is validated before the encoding procedure is applied. Furthermore, the programmer assumes encoding expansion will only expand a given character by a factor of 4, while the encoding of the ampersand expands by 5. As a result, when the encoding procedure expands the string it is possible to overflow the destination buffer if the attacker provides a string of many ampersands. - -\paragraph{Example 3} -The following example asks a user for an offset into an array to select an item. - -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - int main (int argc, char **argv) { - char *items[] = {"boat", "car", "truck", "train"}; - int index = GetUntrustedOffset(); - printf("You selected %s\n", items[index-1]); - } - \end{minted} - \caption{CWE-119: Example 3 Bad Code C} - \label{code::CWE-119::Example3BadCodeC} -\end{listing} -\FloatBarrier - The programmer allows the user to specify which element in the list to select, however an attacker can provide an out-of-bounds offset, resulting in a buffer over-read (CWE-126). - -\paragraph{Example 4} -In the following code, the method retrieves a value from an array at a specific array index location that is given as an input parameter to the method -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - int getValueFromArray(int *array, int len, int index) { - - int value; - - // check that the array index is less than the maximum - // length of the array - if (index < len) { - - // get the value at the specified index of the array - value = array[index]; - } - // if array index is invalid then output error message - // and return value indicating error - else { - printf("Value is: %d\n", array[index]); - value = -1; - } - - return value; - } - \end{minted} - \caption{CWE-119: Example 4 Bad Code C} - \label{code::CWE-119::Example4BadCodeC} -\end{listing} -\FloatBarrier - -However, this method only verifies that the given array index is less than the maximum length of the array but does not check for the minimum value (CWE-839). This will allow a negative value to be accepted as the input array index, which will result in a out of bounds read (CWE-125) and may allow access to sensitive memory. The input array index should be checked to verify that is within the maximum and minimum range required for the array (CWE-129). In this example the if statement should be modified to include a minimum range check, as shown below. - -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - ... - - // check that the array index is within the correct - // range of values for the array - if (index >= 0 && index < len) { - - ... - \end{minted} - \caption{CWE-119: Example 4 Good Code C} - \label{code::CWE-119::Example4GoodCodeC} -\end{listing} -\FloatBarrier - - -\subsubsection{\glsentrylong{CWE-122}} - -\paragraph{Example 1} - -While buffer overflow examples can be rather complex, it is possible to have very simple, yet still exploitable, heap-based buffer overflows: -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - #define BUFSIZE 256 - int main(int argc, char **argv) { - char *buf; - buf = (char *)malloc(sizeof(char)*BUFSIZE); - strcpy(buf, argv[1]); - } - \end{minted} - \caption{CWE-120: Example 1 Bad Code C} - \label{code::CWE-122::Example1BadodeC} -\end{listing} -\FloatBarrier -The buffer is allocated heap memory with a fixed size, but there is no guarantee the string in argv[1] will not exceed this size and cause an overflow. - -\subsubsection{\glsentrylong{CWE-134}} - -\paragraph{Example 1} -The following program prints a string provided as an argument. -\begin{listing}[htb] - \begin{minted}[autogobble,linenos,breaklines=true]{c} - #include - - void printWrapper(char *string) { - printf(string); - } - - int main(int argc, char **argv) { - char buf[5012]; - memcpy(buf, argv[1], 5012); - printWrapper(argv[1]); - return (0); - } - \end{minted} - \caption{CWE-134: Example 1 Bad Code C} - \label{code::CWE-134::Example1BadodeC} -\end{listing} -\FloatBarrier -The example is exploitable, because of the call to printf() in the printWrapper() function. Note: The stack buffer was added to make exploitation more simple. - -% \subsection{Heartbleed} -% TODO: paper about hearbleed with Rust - -\subsection{BlueBorne on Linux} -Implementation of complex protocols are extremely dangerous in C as there is no notion of iterating or collections. -\gls{LX}, which is written in C, suffers extensively from buffer related errors and is likely to do so in the foreseeable future. -Many hardware drivers are implementers of such protocols by exchanging data with the hardware and verifying the content. -Often protocols aren't ideal either which makes their implementation even harder. - -A very recent and high impact vulnerability group is code named BlueBorne\cite{Seri2017}. -It has multiple manifestations on various \gls{os}, including Android and \gls{LX}, e.g. CVE-2017-1000251\cite[p.12]{Seri2017} which is explained here. - -The vulnerable code is quite long and staged over three functions, though the following should be enough to indicate the issue: - -\begin{minted}[breaklines]{c} -... - char buf [64]; - len = l2cap_parse_conf_rsp (chan, rsp -> data , len, buf , & result); -... -\end{minted} -\code{buf}, which is subject to overflow in this vulnerability, is a stack array with a fixed size, and its pointer is passed into a function. -Inside the function is a loop which writes \code{len} messages from \code{rsp->data} to \code{buf}, while the last two are both of the "type" \code{void *}. -The length to \code{buf} is not known in the function, thus there is no possibility of intentionally respecting its boundaries. - -\citeauthor{Seri2017} has additional criticism about \gls{LX}'s choice of having this code in the kernel:\footnote{italicized text was added by me} -"L2CAP\textit{, which can be seen as Bluetooth’s equivalent of TCP,} is included as part of the core Linux kernel code. -This is a rather dangerous choice. -Combining a fully exposed communication protocol, arcane features like EFS and a kernel space implementation is a recipe for trouble. -This vulnerability is a classic stack overflow occurring in the context of a kernel thread. -This provides an attacker with a full and reliable kernel-level exploit for any Bluetooth enabled device running Linux, requiring no additional steps. -Moreover, each compromised host can be used to launch secondary attacks, making this vulnerability wormable." - -\subsection{The Stack Clash} -\label{context::weaknesses-mem-safety::manifestations::stack-clash} -A recent high severity vulnerability named \textit{Stack Clash}\cite{TheStackClash}, is briefly described as \textit{"a vulnerability in the memory management of several operating systems. It affects Linux, OpenBSD, NetBSD, FreeBSD and Solaris, on i386 and amd64. It can be exploited by attackers to corrupt memory and execute arbitrary code."} -The \gls{LX} specific vulnerability is listed as CVE-2017-1000364\footnote{\url{http://www.cvedetails.com/cve/CVE-2017-1000364/}}, where \textit{"an issue was discovered in the size of the stack guard page on Linux, specifically a 4k stack guard page is not sufficiently large and can be "jumped" over (the stack guard page is bypassed)"}. -The vulnerability is assigned to the \citetitle{MITRE-CWE-119}\autocite{MITRE-CWE-119} presented in \cref{context::weaknesses-mem-safety::cwe::119}. - -\subsubsection{Affecting Multiple OSs} -The vulnerability is extremely interesting for several reasons. -The issue has been recognized in 2005 and partially fixed in \gls{LX} in 2010 by introducing a guard page, but it wasn't considered to be a high risk. -Several years later, It has been found to affect the memory management of several \glspl{os} which don't share the same code base. - -The two affected architectures are closely related and share the same memory-paging concepts, most of which are explained in \cref{context::os-dev-concepts::hw-supported-mm::multilevel-paging-concept,context::os-dev-concepts::hw-supported-mm::multilevel-paging-amd64,context::os-dev-concepts::hw-supported-mm::page-fault}. -This simply answers the question how this vulnerability can be present on in multiple \glspl{os}; they all implemented the same concept when this vulnerability was not popular enough. - -\subsubsection{Unguarded Stack Growth} -The guard page concept has been introduced to prevent the stack from growing further than it should.\footnote{Related Linux CVE-2010-2240} - -If the stack grows contiguously and thus accesses \glspl{vaddr} that are not mapped to a page, the \gls{os}'s page-fault handler has the chance to inspect the situation. -The \gls{os} simply allocates a new page for the unmapped \gls{vaddr} and allows the process to grow its stack. -When the \gls{vaddr} accesses the defined guard page, the \gls{os} denies the operation and the process will be notified by a segmentation fault. -This works as long one of these conditions is true: -\begin{itemize} - \item The guard page spans a \gls{vaddr} range that is larger than the largest stack increment - \item The area behind the stack page is unmapped and will also a page-fault -\end{itemize} -As the existence of vulnerability proves, these two conditions aren't always met. - -\subsubsection{The Stack and Heap Overlap} -The reason why the existing guard page didn't work as expected are cases were the stack can indeed increment by a large enough amount to leap over the guard page. -And somewhere beyond the guard page is the heap, and the stack can happen to grow large enough to reach it. -This effectively overlaps the stack and the heap and makes it possible to use access one through variables of the other variables. - -The best case scenario is mere memory corruption and the crash of the application. -In the worst case there's possible execution of arbitrary code, which can be by used by an attacker to gain control of the application and possibly the whole system. - -\subsubsection{Proposed Solutions} -\label{context::weaknesses-mem-safety::manifestations::stack-clash::proposals} - -The researchers at \textit{qualys} suggest two solutions\cite[III]{TheStackClash}. - -The first proposed solution is to increase the guard page to 1MB or larger within the \gls{os}. -This doesn't guarantee absolute protection as it still be possible to have a large enough stack growth that surpasses the 1MB guard. - -The second solution doesn't involve the \gls{os} but is about the userspace programs. -The suggestion is to compile all \glspl{app} on the system with the \gls{GCC}\cite{GCC540} with the \code{-fstack-check} option. -This allegedly "prevents the stack-pointer from moving into another memory region without accessing the stack guard-page (it writes one word to every 4KB page allocated on the stack)." -Besides the mention of this option, there is no explanation of what the exact technical outcome is. -This is further investigated in \cref{rnd::weakness-mitig-prev::stack-protection}, as stack protection is also something procedures within the \gls{os} could make use of. - -\chapter{Weakness Mitigation And Prevention Strategies} -\label{context::weakness-mitigation} -This chapter explains what can be done to mitigate and prevent software weaknesses, focusing on actions that can be taken by the developer. -The first step is to leverage the \gls{CWE}'s database with its suggestions. - -\section{CWE-119 Mitigation Suggestions} -\label{context::weakness-mitigation::cwe-119-suggestions} -The \gls{CWE-119} lists mitigation attempts for these software life cycle phases: Requirements, Architecture and Design, Implementation, and Operation. - -\paragraph{Requirements Phase - Choose Language That Avoids Weaknesses} -For the requirements phase, \gls{CWE-119} suggests to "use a language that does not allow this weakness to occur or provides constructs that make this weakness easier to avoid". -Such languages "perform their own memory management are not subject to buffer overflows". - -This is followed by various other suggestions, of which the most relevant ones have been extracted into the following list. -This list serves as additional test criteria against \gls{Rust}, evaluated in \cref{rnd}. - -\paragraph{Architecture and Design - Use Libraries or Frameworks} -Use a vetted library or framework that does not allow this weakness to occur or provides constructs that make this weakness easier to avoid. - -These libraries provide safer versions of overflow-prone string-handling functions: Safe C String Library (SafeStr) - -\paragraph{Build and Compilation - Hardened Compilation} -Run or compile the software using features or extensions that automatically provide a protection mechanism that mitigates or eliminates buffer overflows. - -Examples: FORTIFY\_SOURCE GCC flag - -\paragraph{Implementation} -This refers to the phase during which the programmer writes code. - -\subparagraph{Careful Buffer Handling} -\begin{itemize} -\item Double check that your buffer is as large as you specify. -\item When using functions that accept a number of bytes to copy, such as strncpy(), be aware that if the destination buffer size is equal to the source buffer size, it may not NULL-terminate the string. -\item Check buffer boundaries if accessing the buffer in a loop and make sure you are not in danger of writing past the allocated space. -\item If necessary, truncate all input strings to a reasonable length before passing them to the copy and concatenation functions. -\end{itemize} - -\subparagraph{Use Functions That Support Bounds} -\begin{itemize} - \item Replace unbounded copy functions with analogous functions that support length arguments, such as strcpy with strncpy. - \item Create these if they are not available. -\end{itemize} - -\paragraph{Operation - Address Randomization} -Run or compile the software using features or extensions that randomly arrange the positions of a program's executable and libraries in memory. Because this makes the addresses unpredictable, it can prevent an attacker from reliably jumping to exploitable code. - -Examples: Address Space Layout Randomization (ASLR), Position-Independent Executables (PIE) - -\section{Choice of Language} -All of the suggestions provided by the \gls{CWE} are in one way or another related the to \gls{proglang}. -Depending on the stage of the software development cycle, the suggestion is to either chose a stronger -- in the sense of less prone to weaknesses -- or avoid dangerous elements of a weaker \gls{proglang}. -Combining this with the knowledge from previous chapters, it can be concluded that the choice of \gls{proglang} is significant for memory-safety in \gls{os} as well as \glspl{app}. -This study does not have to workaround the weaknesses of any languages but is on to evaluating a potentially stronger. -Thus, the above information is another motivator for this study's topic. - -The next \cnameref{context::os-dev-lang-choice} further drives this evaluation. - -\chapter{OS Development: Choice of Programming Language} -\label{context::os-dev-lang-choice} -There are dozens of \glspl{proglang} to write \glspl{app}, but only a few are viable for writing \glspl{os}. - -\section{Abstraction Trade Offs: Safety vs. Functionality} -\label{context::os-dev-lang-choice::abstraction-safety-functionality} -In computer systems, safety and functionality are counter-proportional towards each other, because with increased functionality also grows complexity, and error cases become more difficult to find. -Applying this analogy to \glspl{proglang} might be misleading at first. -It might seem that the more abstraction is provided by a language, the higher the available functionality is. -In fact, the opposite is the case. - -Abstraction can be used to impose limits on what the programmer can instruct the system to do, which can reduce functionality and increase safety. -By defining an abstraction layer in form of a \gls{proglang}, the language defines which of the underlying functionality will be exposed through it. -The \gls{proglang} can be designed with obligated rules, that make the written source code easier to analyze in an automated fashion before it gets compiled to the underlying representation. - -\section{Requirements} -\label{context::os-dev-lang-choice::requirements} -Criteria for the choice of \gls{proglang} are much different for an \gls{os} than for other types of \glspl{app}. - -\subsection{Technical} -\begin{itemize} - \item{Compiler that generates machine code} - \item{Inline \gls{cpu} instruction calls} - \item{Direct memory addressing} - \item{No requirement on an internal runtime or virtual machine} - \item{No dependency on platform functionality} -\end{itemize} - -The above features are technical requirements to be able to produce code for the bare machine. - -\subsection{Safety-Critical} -In addition, this study defines more theoretical requirements that help prevent memory-safety critical mistakes. - -\begin{itemize} - \item{Static analysis as part of the language design} - \item{Memory-safety as a 1st-class feature} - \item{Object oriented or similar type system that allows modeling of complex relations between types} -\end{itemize} -These features often come at the cost of a more complex language design, which results in a steep learning curve for beginners and might not seem worth it. - -As explained in \cnameref{context::introduction::memory-safety::time-aspect}, mistakes need to be identified as soon as possible, and static analysis is the technical method for this. -The static analyzer can be designed to target different aspects and the language can be designed to offer these kind of aspects to the static analyzer. -For this study the primary aspect is memory-safety, which is assisted by the type system. -For a simple example, a static analyzer should detect casts between incompatible pointer types, as it will eventually lead to undesired effects. - -\section{Why not \glsentrytext{C}} -\gls{C} fulfills all these requirements, and since it's development it has replaced Assembly as the de-facto standard \gls{proglang} for writing \glspl{os}. -As discussed in \cref{context::weaknesses-mem-safety} it has dangerous weaknesses. -With the growing number of vulnerabilities, various solutions have been proposed to increase the safety of C, either with static code analysis or via \gls{compiler}-generated code that imposes runtime checks. - -Checks that are performed at runtime introduce a high degree of overhead, which makes it a nonviable option in the domain of \gls{os} development, where many code paths must be very fast to ensure the operation of high speed I/O devices\cite[p.~1]{Balasubramanian2017} or tasks with \gls{realtime} requirements. -This has been forcing many developers to prioritize performance over safety, and others to look for alternatives. - -C allows direct access to memory via pointers and arithmetic thereof. -Static source code analysis is difficult on \gls{C}, as it that has not been designed for this purpose and has too many ambiguities.\cite{Kowshik2002} -C has no notion of objects beyond structs, which makes more complex memory structures, e.g. double-ended-queues trees, a hurdle to write and maintain safely. -As recorded by the \gls{CWE} and shown in \cref{context::weaknesses-mem-safety::manifestations::cwe-ex}, even access to simple arrays is often mishandled. -These criteria rule out \gls{C} for the purpose of this study. - -There have been attempts to define subsets of the \gls{C} language that can be safety checked, e.g. Cyclone\cite{Jim2002}, Control-C\cite{Kowshik2002}, but none has managed to take C's position. -\paragraph{Control-C} -Control-C is created on top of \gls{llvm} as a statically provable memory-safe strict subset of C, purposed for real-time control systems.\cite{Kowshik2002}. -Hardware requirements for handling safe runtime errors, which are known as \gls{cpu} exceptions on \gls{amd64}. -In Control-C they could be caused by stack and heap overflow, stack overflow (e.g., due to infinite recursion) and heap overflow due to dynamic allocation. -The latter sounds similar to the concept of guard pages and page-faults described in \cref{context::weaknesses-mem-safety::manifestations::stack-clash,rnd::weakness-mitig-prev::stack-protection::stack-clash}. -Control-C and Rust have certain similarities, e.g. both and aim to guarantee memory-safety, use static type checks and make use of LLVM as the backend. -Control-C uses affine transformation rules to statically guarantee bounds on array operations, which Rust does not do. -It can only be speculated why Control-C hasn't gained popularity since its invention. -It may be possible that it was mislabeled as a real-time only system language and was neglected in more generic language research. - -From today's perspective it appears to be clearly inferior to younger \gls{os} language candidates like Rust, e.g. as it is a subset of C and thus doesn't support object oriented paradigms. - -\section{LLVM: C and Rust} -The \glsentrydesc{llvm} has become popular since it was introduced. -To demonstrate this, \cref{fig:llvm-research-papers-since-2002}\footnote{\url{https://llvm.org/pubs/}} shows a statistic on how many research papers have been published yearly since 2002. - -\begin{figure}[ht!] - \centering - \includegraphics[width=0.6\textwidth]{gfx/llvm-number-paper-pa.png} - \caption{Research Papers About LLVM since 2002} - \label{fig:llvm-research-papers-since-2002} -\end{figure} -\FloatBarrier - -It has since become a viable backend for the \gls{clang} \gls{compiler} and is also used by \gls{Rust}. - -The fronted \glspl{compiler} produce code for the \gls{llvm} Intermediate-Represantion, which is a hardware-independent. -\gls{llvm} can produce machine code for supported architectures, a list of which is found on the official website \footnote{\url{https://llvm.org/docs/CompilerWriterInfo.html\#hardware}}. -It contains all contemporary architectures, including \gls{amd64}. - -\section{Why Investigate Rust} -Rust has gained popularity in among hobby \gls{os} developers. -Reasons for this vary from simply challenging the possibility due to being convinced about its claimed features to guarantee memory-safety. -Theese officially advertised features are\footnote{\url{https://www.rust-lang.org/}} -\begin{itemize} - \item zero-cost abstractions - \item move semantics - \item guaranteed memory safety - \item threads without data races - \item trait-based generics - \item pattern matching - \item type inference - \item minimal runtime - \item efficient C bindings. -\end{itemize} - -Research articles and existing projects that have appeared in recent months and years have contributed motivation and information to this study. -Some of them are listed in \cref{rnd::existing-os-dev-with-rust::papers} next to the existing projects. -Others are referenced throughout the various chapters of \cref{rnd}. -They have all been motivators and plenty of justification for this study to pay closer attention to Rust. - -\section{Next Steps} -At this point, the assumption is made that \gls{Rust} can increase memory-safety in the \gls{os} in comparison to using \gls{C}. -In the next \cnameref{rnd} it is studied which of the above features contribute to memory-safety in the \gls{os}, and the corresponding language items are identified. +\section{Summary} +% Summarize the content of Chapter 1 and preview of content of Chapter 2. diff --git a/src/docs/parts/eval_and_conclusion/eval_and_conclusion.tex b/src/docs/parts/eval_and_conclusion/eval_and_conclusion.tex deleted file mode 100644 index 606e433..0000000 --- a/src/docs/parts/eval_and_conclusion/eval_and_conclusion.tex +++ /dev/null @@ -1,246 +0,0 @@ -% // vim: set ft=tex: -\chapter{Evaluation} -This chapter summarizes the findings of the previous parts. -The summary is then evaluated against the hypothesis, to create the foundation of a concise conclusion. - -\section{Summary} -After defining an exact definition for memory-safety within the OS was found in \cref{context::introduction::memory-safety::def}, various aspects of software vulnerability origin were discussed. -The human was identified to be an error prone weak spot in the process of OS development. -It was found that technical solutions that can detect these errors are to be used as early in the development process as possible. -This point in time was declared the time of software compilation. - -OS development concepts were introduced to for the AMD64 architecture, to lay out the knowledge that to allow an understanding and implementation of OS concepts on AMD64, which was set out for the development process. - -Common Weaknesses in software were identified, and demonstrated how these lead to concrete vulnerabilities. -The stack clash was explained as an architectural and design issue, which requires changes in stack overflow detection in userspace software. -The origin of many of the weaknesses was identified to be based on weak languages, and Rust was verified to be a good alternative to C. -Research was conducted on these common weaknesses through other scientific studies. -This found weaknesses based on -\begin{itemize} - \item use-after-free - \item indexing out of bounds - \item iterator invalidation - \item data races -\end{itemize} -to be prevented by Rust's ownership system\cite{Beingessner2015}. -Stack protection experiments were conducted and found Rust to be less vulnerable to return address manipulation. -These were found to be effectively prevented by static analysis under normal circumstances, since it required multiple explicit features to intentionally force the manipulation to succeed. -Stack overflow could not be statically detected by trying various tweaks to the compiler. -Information that would be required for this static detection was evaluated, and was found to be not completely available in the present compiler architecture. - -A practical introduction to Rust was given, overviewing the encountered language features and the ones that were explicitly investigated. -Rust was found to have extension features only limited by the complexity of its usage, demonstrated by an implementation of pure-software information flow control. - -Existing OS development efforts were investigated to serve as a codebase for the development and to evaluate their usage of Rust for achieving memory-safety. -Redox OS was found to not be vulnerable to the Stack Clash due to design decisions in the OS. -Blog OS was found to demonstrate extensive usage of Rust's type system to model underlying hardware and prevent mistakes in the paging implementation. - -Implementation of preemptive multitasking was chosen to be based on intermezzOS for its simplicity. -After initial problems with the build and debugging tools were solved, the development could proceed quickly. -Based on the state of intermezzOS that allowed the system to boot, a working preemptive multitasking was implemented successfully. -The implementation only supports static memory allocation and no dynamic memory management. -Writing a hardware-driver for the Programmable-Interrupt-Timer was well supported by the module and type system, which allowed an accurate modeling of the underlying hardware. -Global OS state variables can be protected by requiring Rust's unsafe keyword and disallowing the same within additional defined tasks. -Extensive usage of the unsafe keyword was required to perform raw hardware access, but could be limited to well-defined functions. -Inline machine-instructions were found to be well designed and in-line with the rest of the language. -One occurrence of a cast from an untyped pointer was necessary, within the context-switching interrupt handler, to manipulate data on the stack. -A stack overflow on user defined tasks could not be prevented by static analysis, only detected by the OS at runtime. - -\subsection{Thesis Evaluation} -Rust's static analysis lacks the ability of static stack overflow detection, which is a significant counter-indication to the hypothesis. -Using Rust's static memory analysis does not fully guarantee In-Kernel memory-safe. - -\chapter{Conclusions} -While hypothesis was not proven, Rust is still considered to be a significant improvement over C for OS development purposes. - -\paragraph{Rust detects many errors early} -It prevents many errors at compile-time where, they are harmless. -The language is fully extendable via language extensions that allow the insertion of new language features that can be hooked into the static analysis. -The process of making Rust suitable for OS development is driven by many hobby and a few production intended projects. - -\section{Hardware is still hard, but Rust is worth learning} -Even though Rust is understood as a memory-safe language, following the hardware specification is still a memory-safety critical requirement. -OS developers must use the unsafe keyword when performing raw hardware access, which is designed to make them think twice when using it. -If the chance is presented, Rust should be chosen any time over C for implementing software that is close to hardware. -This might be difficult in the first place, but should pay off long-term, as less vulnerabilities will be detected throughout the extended life-cycle of the software. - -\section{Next Step} -Further investigation is required to propose a solution for the lack of static stack size estimation in \gls{Rust}. -The immediate next step is to bring this issue up for a discussion in the Rust community. - -% \chapter{Scratchpad} -% -% \begin{figure}[ht!] -% \centering -% \begin{subfigure}[T]{0.50\textwidth} -% \tikzmarkcountprep{callee} -% \begin{compactminted}[ -% escapeinside=??,linenos,autogobble,highlightlines={} -% ]{nasm} -% mov rax,QWORD PTR [rbp-0x48]?\tikzmarkcount? -% add rsp,0x50?\tikzmarkcount? -% pop rbp?\tikzmarkcount? -% ret?\tikzmarkcount? -% \end{compactminted} -% \tikzmarkdrawcircles -% \caption{Subfig A} -% \end{subfigure} -% \begin{subfigure}[T]{0.45\textwidth} -% \foreach \x/\xtext in { -% 1/{ -% this is going to be a really long sentence with line wraps -% }, -% 2/{ -% second -% } -% } {\tikzmarkcircle{\x}\xtext\\} -% \caption{Subfig B} -% \end{subfigure} -% \caption{Whadup} -% \label{Whadup} -% \end{figure} -% -% \begin{listing} -% \tikzmarkcountprep{example1} -% \begin{minted}[ -% label=example1,labelposition=all,escapeinside=??,linenos,autogobble,highlightlines={} -% ]{nasm} -% mov rax,QWORD PTR [rbp-0x48]?\tikzmarkcount? ?\tikzmark{brace1upper}? -% add rsp,0x50?\tikzmarkcount? -% pop rbp?\tikzmarkcount? -% ret?\tikzmarkcount? ?\tikzmark{brace1lower}? -% \end{minted} -% \begin{minted}[ -% escapeinside=??,linenos,autogobble,highlightlines={} -% ]{nasm} -% mov rax,QWORD PTR [rbp-0x48]?\tikzmarkcount? -% add rsp,0x50 ?\tikzmarkcount? -% pop rbp ?\tikzmarkcount? -% ret ?\tikzmarkcount? -% \end{minted} -% \begin{tikzpicture}[remember picture,overlay] -% \draw[thick,decorate,decoration={brace,raise=1ex}] -% (pic cs:brace1upper)+(0,1.5ex) -- node[shape=coordinate][right=1.5ex] (a) {} (pic cs:brace1lower); -% \fill (a)+(2ex,0) circle[opacity=1,radius=1.1ex] node[white,font=\small]{a}; -% \end{tikzpicture} -% \tikzmarkdrawcircles -% \caption{Minted Listing A} -% % -% \foreach \x/\xtext in { -% 1/{ -% this is going to be a really long sentence with line wraps -% \\} -% ,2/{ -% second -% \\} -% ,5/{},6/{ -% hi -% \\} -% ,a/{ -% hi -% \\} -% } {\tikzmarkcircle{\x}\xtext} -% % -% \end{listing} -% \FloatBarrier -% -% \begin{listing} -% \tikzset{/minted/basename=example} -% \begin{minted}[label=caller,labelposition=topline,escapeinside=??,highlightlines={},autogobble,linenos,breaklines=true]{nasm} -% mov rcx,QWORD PTR [rbp-0x40] ; copy 1st arg to rcx -% mov rsi,QWORD PTR [rbp-0x38] ; copy 2nd arg to rsi -% mov rdx,QWORD PTR [rbp-0x30] ; copy 3rd arg to rdx -% mov QWORD PTR [rbp-0x60],rdi ; save rdi to make it available -% mov rdi,rcx ; copy 1st arg to rdi -% mov QWORD PTR [rbp-0x68],rax ; save rax to make it available -% call 7490?\tikzmark{exampleprecallfrom}? <_ZN14stack_handling3sum17h8f12d2383e075691E> ; push '756e' onto the stack and jump to the first instruction of sum -% mov QWORD PTR [rbp-0x28],rax ; save return value -% \end{minted} -% \caption{Function Call with Three Arguments} -% \begin{tikzpicture}[remember picture,overlay] -% \draw[red,thick] (pic cs:exampleprecallfrom) ellipse (0.7cm and 12pt) node { \textbf{1} }; -% \fill[blue] (pic cs:example1) circle (0.1cm); -% \fill[yellow] (pic cs:example2) circle (0.1cm); -% \end{tikzpicture} -% \end{listing} -% -% \begin{tikzpicture}[node distance=2cm, -% startstop/.style={rectangle, rounded corners, minimum width=3cm, minimum height=1cm,text centered, draw=black, fill=red!30}, -% io/.style = {trapezium, trapezium left angle=70, trapezium right angle=110, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=blue!30}, -% process/.style = {rectangle, minimum width=1cm, minimum height=1cm, text centered, text width=3cm, draw=black, fill=orange!30}, -% decision/.style = {diamond, minimum width=3cm, minimum height=1cm, text centered, draw=black, fill=green!30}, -% arrow/.style = {thick,->,>=stealth} -% ] -% -% %\node (start) [startstop] {Start}; -% %\node (in1) [io, below of=start] {Input}; -% %\node (pro1) [process, below of=in1] {Process 1}; -% %\node (dec1) [decision, below of=pro1, yshift=-0.5cm] {Decision 1}; -% %\node (pro2a) [process, below of=dec1, yshift=-0.5cm] {Process 2a text text text text text text text text text text}; -% %\node (pro2b) [process, right of=dec1, xshift=2cm] {Process 2b}; -% %\node (out1) [io, below of=pro2a] {Output}; -% %\node (stop) [startstop, below of=out1] {Stop}; -% % -% %\draw [arrow] (start) -- (in1); -% %\draw [arrow] (in1) -- (pro1); -% %\draw [arrow] (pro1) -- (dec1); -% %\draw [arrow] (dec1) -- node[anchor=east] {yes} (pro2a); -% %\draw [arrow] (dec1) -- node[anchor=south] {no} (pro2b); -% %\draw [arrow] (pro2b) |- (pro1); -% %\draw [arrow] (pro2a) -- (out1); -% %\draw [arrow] (out1) -- (stop); -% -% \node[process,xshift=0ex,yshift=-0ex] (ua_back) {User Applications}; -% \node[process,xshift=0ex,yshift=-1ex] at (ua_back) {User Applications}; -% \node[process,xshift=0ex,yshift=-2ex] (ua) at (ua_back) {User Applications}; -% -% \node[process,xshift=0ex,yshift=-0ex,below of=ua] (sl_back) {System Libraries}; -% \node[process,xshift=0ex,yshift=-1ex] at (sl_back) {System Libraries}; -% \node[process,xshift=0ex,yshift=-2ex] (sl) at (sl_back) {System Libraries}; -% -% \node[process,xshift=0ex,yshift=-0ex,below of=sl] (os_back) {OS}; -% \node[process,xshift=0ex,yshift=-1ex] at (os_back) {OS API}; -% \node[process,xshift=0ex,yshift=-2ex] (os) at (os_back) {OS}; -% -% \node[process,xshift=0ex,yshift=-0ex,left of=mem, below of=os] (cpu) {CPU}; -% \node[process,xshift=0ex,yshift=-0ex,right of=cpu] (mem) {Memory}; -% \node[process,xshift=0ex,yshift=-0ex,right of=mem] (otherhw) {Other HW}; -% -% \draw [arrow] (ua) -- (sl); -% \draw [arrow] (sl) -- (os); -% \draw [arrow] (os) -- (cpu); -% \draw [arrow] (os) -- (mem); -% \draw [arrow] (os) -- (otherhw); -% -% TODO: improve -% -% \end{tikzpicture} -% -% \begin{markdown} -% # Flow of Reasoning -% * How to mitigate distributed weaknesses -% - Don't distribute vulnerable software -% - Produce less vulnerabilities -% OR -% - Detect vulnerabilities -% * How to prevent vulnerabilities distribution? -% - Human to make less mistakes; NOT VIABLE, see human aspect. -% - Detect them before the \gls{app} is installed; see time aspect -% * How to detect vulnerabilities -% - Write runtime tests for the program -% - Analyze the source code -% * Runtime Tests -% - Runs on every execution, thus wastes \gls{cpu} resources -% - Program needs to handle it -% -> Slow and too late in the software life cycle! -% * Source Code Analysis -% - Difficult for low-level code, would require hardware knowledge -% - Compilers are source code analysers by nature -% - Additional tools can help, but this takes more effort -% -> chose a compiler with high analysis standards -% * Choice of Compiler: Language Dependent -% - C: Safe C, Cyclone, etc.: define sub language that is analyzable. MEH -% - Rust: designed to be analyzable. WIN! -% * Rust -% - Can the analyzes be extended to suite OS dev? -% \end{markdown} diff --git a/src/docs/parts/research/research.tex b/src/docs/parts/research/research.tex new file mode 100644 index 0000000..a9410f0 --- /dev/null +++ b/src/docs/parts/research/research.tex @@ -0,0 +1,69 @@ +% // vim: set ft=tex: +\chapter{Topic Refinement} + +\chapter{Criteria} +This chapter explains the criteria for evaluating available \glspl{pm} for their potential to be part of the developed solution. + +\section{Specification Of Complete} + +\section{Binary- and Source Package Support} + +\section{Installation and Update Mechanism} + +\subsection{Security Updates} + +\section{Usability} + +\section{Support} + +\section{Package Repositories} + +\section{Efficiency} + +\chapter{Available Package Managers (and Build Managers)} + +\section{Portage} + +\section{Nix} + +\section{Guix} +\textbf{Feature} +\begin{description} + \item [Automatic Updates of Sources] { + `guix refresh` can update package sources according to defined updaters + } + \item [Native CI] { https://notabug.org/mthl/cuirass } +\end{description} + + +\section{Spack} +Spack (\url{https://github.com/LLNL/spack}) is a package manager written in Python. + +\begin{description} + \item [Automatic Updates of Sources] { + } + \item [Closure] { + Explained on the website \url{http://software.llnl.gov/spack/basic_usage.html\#filesystem-views} + } +\end{description} + + +\section{bazel} + +In case the dependency tree includes different versions of the same project, only one of them can be included in the WORKSPACE. +This potentially breaks compatibility for the projects that depend on a different version of the dependency. + +% TODO: cite source https://www.bazel.io/versions/master/docs/external.html#transitive-dependencies + +\section{0install} + +\chapter{Languages Used For Packaging} + +\section{Guile Scheme} +Guile implements Scheme and extends it with new features. + +\chapter{Abstraction of \gls{sacr} and \gls{saci}} +\label{chap:research-abstract-acpr-saci} + + +UX: \url{https://logicgrimoire.wordpress.com/2012/08/25/a-first-guile-script} diff --git a/src/docs/parts/research_and_development/research_and_development.tex b/src/docs/parts/research_and_development/research_and_development.tex deleted file mode 100644 index 4732c63..0000000 --- a/src/docs/parts/research_and_development/research_and_development.tex +++ /dev/null @@ -1,1574 +0,0 @@ -% // vim: set ft=tex: -\chapter{Rust} -\label{rnd::rust} -As described by the maintainers, \gls{Rust} is a "systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety."\footnote{\url{www.rust-lang.org}}. -During early development it had a runtime-dependent garbage collector which has since been dropped from the language, making it a viable candidate for \gls{os} development. -It meets all requirements listed in \cref{context::os-dev-lang-choice::requirements} which has enabled many developers to create \gls{os} related projects. -These are the subject of \cnameref{rnd::existing-os-dev-with-rust::systems::blog-os::mm}. - -This chapter gives an introduction to \gls{Rust} from the specific perspective, as a beginner in \gls{Rust} and only academical \gls{os} development experience. -The specific interest according to the topic, is which language aspects help with memory-safety in the latter. -In addition to existing functionality, potentialities are also taken into account, as well as the ability to extend the language for the specific use-case. -The introduction found here is a summary of features that have been encountered throughout this study. -As a more generic introduction to the language, the suggestion is to study at least the introduction in \citetitle{Beingessner2015}\cite{Beingessner2015}, or simply visit the official Rust website which has a complete and beginner-friendly documentation by now.\footnote{\url{https://doc.rust-lang.org}}. -Another note is that this study relies heavily on features that are only available in the nightly version of \gls{Rust}, which is a necessity for \gls{os} development. -These features will be highlighted throughout the various chapters. - -\section{Compiler Architecture} -Detailed information about Rust's compiler architecture seems to be spread over any Rust related development website, including the Rust forums and GitHub, but also blog posts by Rust's developers. - -\Cref{fig:rust-compiler-architecture} shows \gls{Rust}'s chain of compilation. -\begin{figure}[ht!] - \centering - \includegraphics[width=0.7\textwidth]{gfx/rust-compiler-flow.png} - \caption{Rust's current and future compiler architecture} - \label{fig:rust-compiler-architecture} -\end{figure} - -In one of these blog posts \url{https://blog.rust-lang.org/2016/04/19/MIR.html}, one of the maintainers describes the compiler architecture, including planned changes and their improvements. - -Rust, in particular its compiler \code{rustc}, parses and "desugares" the source code. -What is called "desugaring", is the expansion of syntax that exists for the mere purpose of being simple and comfortable to use, which is called syntactical sugar in \gls{proglang} slang. -This step also handles the expansion of Rust's hygienic macros, which are work differently than C macros, and produces the High-Intermediate-Representation -- comparable to an Abstract-Syntax-Tree -- on success. - -The HIR is then type- and borrow-checked, Rust's most unique safety features. -On success, Rust's compiler delivers an Intermediate-Representation to \gls{llvm}, which is then optimized and compiled into machine specific code, instructions for the physical \gls{cpu}. - -Note that the new \emph{MIR} layer has not been fully completed as of today, but it has been activated in the compiler since October 2015\footnote{\url{https://github.com/rust-lang/rust/pull/28748}}. -Its development is based the assumption that "Rust’s rich type system should provide fertile ground for going beyond \gls{llvm}’s optimizations." -Among other improvements, it allows Rust to perform optimizations before monomorphizing the code for \gls{llvm}, which breaks down (looses) all Rust specific type system information, e.g. trait implementations, into a flat model. - -\subsection{Zero-Cost Abstraction} -The optimization on the HIR and MIR is the origin of the term zero-cost abstractions. -By analyzing control flow and evaluation of constant expressions, code paths can be eliminated and the resulting \gls{llvm} IR can be reduced extensively. - -It will also allow type checking to be more effective, as the MIR is simpler than the HIR. -An example of a potential optimization is given in the borrow checker explanation along with the \cnameref{rnd::rust::feat::own-borrow::mir-improvement}. - -\section{Macros And Extensions} -In order to extend Rust for specific use-cases in \gls{os} development with macros or other means, the syntax and its extension mechanisms need to be understood. -In \gls{os} development, macros are often preferred over functions because they are processed at compile time and induce no runtime overhead. -In \gls{Rust}, they are deeply integrated into the language and are not intuitively usable. -This is because they require the developer to write pattern matches on token trees and understand the abstract syntax tree creation. - -The most comprehensive literature on Rust's Macro system, including a thorough explanation of available language is provided as a digital book in the rustdoc format\footnote{url{https://danielkeep.github.io/tlborm/book}}. - -\subsection{Macro Rules} -A simple macro is presented in \cpnameref{rnd::imezzos-preemptive-multitasking::timer-interrupt-scheduling::macro}, where it is used as a template for interrupt handler definitions. - -% \subsubsection{Macro Recursion Limit} -% Macro recursion can be limited via the attribute: -% -% \mintinline{rust}{#![recursion_limit="10"]} - -\subsection{Language Extensions Example: Software Fault Isolation} -Language extensions allow the addition of almost arbitrary functionality to the language. -They can be used for the definition of additional analysis rules to extend safety checks. - -Developing these is difficult, but the effort is justified under conditions where regular macros are insufficient. -The mechanics of language extensions are beyond the scope of this study, instead an example is presented demonstrating the results one can achieve with them. - -\citeauthor{Balasubramanian2017} have achieved an implementation of Information Control Flow (IFC) analysis in \gls{Rust}, which lets the programmer annotate variables with security contexts.\cite{Balasubramanian2017}. - -IFC enables the enforcement of security contexts in information flow without hardware support and detects a violation at compile time. - -The following buffer implementation is initialized with the first data element it receives. -\begin{minted}[breaklines,highlightlines={6,7}]{rust} -struct Buffer{data: Option >} -impl Buffer { - fn new () -> Buffer {Buffer{data: None }} - fn append (& mut self , mut v: Vec ) { - match self.data { - None => self.data = Some(v), - Some(ref mut d) => d.append (& mut v) } - } } -\end{minted} - -In this sample, variables from two different security contexts are stored in the buffer. -The \code{println!} macro is considered \code{non-secret} and may not read the data since it contain \code{secret} items. -\begin{minted}[breaklines,highlightlines={2-3,5-6,9}]{rust} -let mut buf = Buffer::new(); -#[ label(non-secret )] // security annotation for IFC -let nonsec = vec![1,2,3]; - -#[ label(secret )] // security annotation for IFC -let sec = vec![4,5,6]; -buf.append(nonsec); -buf.append(sec); // buf now contains secret data -println!("{:?}" , buf.data ); // ERROR : leaks secret data -\end{minted} - -% TODO: Business Logic Checks -% Examples: -% TLB needs to be reset on Task Change -% ISR-Stack-Frame needs to be updated on context-switch - -% TODO How generic can the memory allocators be written? - -% TODO Guarantees to be statically checked: -% TODO * Control access to duplicates in page tables -% TODO * Tasks can't access unallocated (physical) memory -% TODO * Tasks can't access other tasks memory - -% \subsection{Compiler Plugins} -% The Rust Unstable Book \url{https://doc.rust-lang.org/unstable-book/language-features/plugin.html} -% has a section on compiler plugins, which are user-provided libraries that extend the compiler's behavior with new syntax extensions, lint checks, etc. -% This is - -\subsection{Cargo} -\glsentrydesc{cargo}. - -\subsubsection{Tweaking LLVM Compiler Options} -\label{rnd::rust::cargo::tweak-llvm} -Using \gls{cargo}, arguments for the \gls{llvm} \gls{compiler} can be passed all the way down to by creating the \code{$PROJECT_DIR/.cargo/config} file. - The following is an example which has been used to experiment with stack protection in \cref{rnd::weakness-mitig-prev::stack-protection::stack-clash::user-space}. - -\begin{minted}[breaklines]{yaml} -[build] -rustflags = [ - "-C", "llvm-args=-safe-stack-layout -enable-stackovf-sanitizer -asan-stack -warn-stack-size=1000", -] -\end{minted} - -To enable this configuration, \code{cargo rustc} needs to be invoked for this project to respect the configured rustc options. -The config file shows other stack related options too that were enabled for experimentation purposes. -A full list of supported options can be retrieved with the following C++ program: - -\begin{minted}{cpp} -// Call with `--help-list-hidden` as argument to get a full list -#include "llvm/Support/CommandLine.h" -using namespace llvm; -int main(int argc, char** argv) { - cl::ParseCommandLineOptions(argc, argv, ""); - return 0; -} -\end{minted} -The reason why this is required is that it uses the same \gls{api} as \gls{Rust} to invoke \gls{llvm}, and should give accurate results on what options are supported by \gls{Rust}. -Standalone tools of \gls{llvm} might not expose the same functionality as the \gls{api} used here. - -\section{Investigated Language Features} -The following sentence is placed here according to the Don't-Repeat-Yourself principle as it would have otherwise been in almost every subsection: -Developers unfamiliar with this concept are likely to take a while to get used to it, but safety-gains are well worth the effort. - -% \subsection{Memory Management} -% - TODO: Static Variables on Stack, handled by compiler -% -% - TODO: Heap requires implemented allocator -% -% - TODO: BSYS SS17 GITHUB IO Rust Memory Layout - 4 -% - TODO: How can memory be dynamically allocated and still safety checked? - -% \subsubsection{Custom Allocators} -% - TODO: mention ralloc by redox -% - TODO: simple allocator by Blog OS -% - TODO: Who owns global 'static variables? - -\subsection{Ownership And Borrows} -\citeauthor{Beingessner2015} explores the ownership model in relation to some of the weaknesses explained in \cref{context::weaknesses-mem-safety}. -The ownership model is described as "a system for expressing where and when data lives, and where and when data can be mutated." - -\paragraph{Effectiveness} -The ownership model was found to effectively eliminate vulnerabilities of the following weakness types: -\begin{itemize} - \item use-after-free - \item indexing out of bounds - \item iterator invalidation - \item data races -\end{itemize} - -\paragraph{Not Fully Effective Against Memory Leaks} -It was found that the problem of memory leaks cannot be sufficiently solved by ownership, due to lack of proper linear typing. -It was described that leaked memory is not a direct memory-safety violation because the \gls{os} cleans up leaked memory after the \gls{process}'s termination. - -Note: The suffering \gls{app} will prevent leaked memory from being used by other \glspl{app} until its termination. -However, this should not happen in the \gls{os} as there is no underlying instance that can simply reclaim the leaked memory, thus it will be lost until system reboot. - -\subsubsection{Potential MIR improvements} -\label{rnd::rust::feat::own-borrow::mir-improvement} -An example for potential changes are \emph{vector patterns} taken from the MIR-RFC\url{https://github.com/rust-lang/rfcs/blob/master/text/1211-mir.md} - -The following match shows a vector pattern borrow in a match expression. -While this is legal today -- -\begin{minted}{rust} -let mut vec = [1, 2]; -match vec { - [ref mut p, ref mut q] => { ... } -} -\end{minted} --- one would intuitively expect it to be the same as: -\begin{minted}{rust} -p = &mut vec[0], q = &mut vec[1] -\end{minted} - -In the latter case, the borrow checker would complain. -This is because it does not consider the two constant indices to borrow different items from the vector, but considers the whole vector to be borrowed by the first statement, causing an error for the second borrow attempt of the vector. - -\subsection{Static Analyser} -The static analyser has been studies extensively throughout this part. -Specifically \Cref{rnd::weakness-mitig-prev::stack-protection::stack-clash::user-space}, which tests the capability of static detection of obvious stack overflow scenarios. - -%- TODO: How does the Rust's static analysis work, theoretically and practically -%- TODO: mention electrolyte, formal verification for Rust -%- TODO: How does static typing help with preventing programming errors -% -%- TODO: explain lints - -\subsection{Inline Assembly} -Inline assembly is explored two examples within this study. -Inside the scheduler to instruct the compiler's register clobbering: \cpnameref{rnd::imezzos-preemptive-multitasking::timer-interrupt-scheduling}, -and in the redirection of the boot task shown in \cpnameref{rnd::imezzos-preemptive-multitasking::tasks-stacks::unsafe::jmp}. - -A more formal and helpful tutorial which is suggested, has been found in form of a web article.\footnote{\url{http://embed.rs/articles/2016/arm-inline-assembly-rust/}} - -% \subsection{Lifetimes} -% Lifetimes were not used intensively - -\subsection{Type Safety} - -% - TODO: demonstrate casts -% -% - TODO: demonstrate raw pointers: -% % https://rustbyexample.com/flow_control/match/destructuring/destructure_pointers.html -% -% - TODO: discuss the equivalents of void*? - -\subsubsection{Single Field Structs} -Structs with a single field can be used to wrap a under a different type name, and make it distinguishable for the type system. -This is different from a type alias, which wouldn't prevent the example situation given below. -This extended example\footnote{\url{https://aturon.github.io/features/types/newtype.html}} shows one way of preventing the mix-up of common length units. -Both new types wrap \code{f64} but are not interchangeable. - -%\begin{figure}[ht!] -\begin{minted}[linenos,breaklines]{c} -struct Miles(pub f64); -struct Kilometers(pub f64); - -impl Miles { - fn as_kilometers(&self) -> Kilometers { Kilometers { 0: self.0 * 1.6 } } -} -impl Kilometers { - fn as_miles(&self) -> Miles { Miles { 0: self.0 / 1.6 } } -} - -struct Route { distance: Miles } - -impl Route { - fn are_we_there_yet(&self, distance_travelled: Miles) -> bool { - self.distance.0 <= distance_travelled.0 - } -} - -fn main() { - let distance = Miles { 0: 100.0 }; - let route_miles = Route{ distance } - let travelled = Kilometers { 0: 100.0 }; - let arrived = route_miles.are_we_there_yet( travelled ); - println!("Are we there yet? {}", arrived); -} -\end{minted} -%\caption{} -%\label{code::} -%\end{figure} - -The compiler rightfully rejects the code with the following error, and even gives a suggestion to use the \code{.as_miles()} method. - -\begin{minted}[breaklines]{md} -error[E0308]: mismatched types - --> src/main.rs:33:49 - | -33 | let arrived = route_miles.are_we_there_yet( travelled ); - | ^^^^^^^^^ expected struct `Miles`, found struct `Kilometers` - | - = note: expected type `Miles` - found type `Kilometers` - = help: here are some functions which might fulfill your needs: - - .as_miles() -\end{minted} - -\subsubsection{Uninstantiable Types} -They can be used to statically prevent certain code paths or mark other impossible conditions in the code. -The simplest example is a function that is defined to never return: - -\begin{minted}[linenos,breaklines]{rust} -enum CanNeverExist {} -fn never_returns() -> CanNeverExist { - loop {} -} -\end{minted} - -If line 2 was removed, the compiler would regard it as an error: - -\begin{minted}[breaklines]{md} -error[E0308]: mismatched types - --> src/main.rs:2:37 - | -2 | fn never_returns() -> CanNeverExist { - | _____________________________________^ -3 | | // loop {} -4 | | } - | |_^ expected enum `CanNeverExist`, found () - | - = note: expected type `CanNeverExist` - found type `()` -\end{minted} - -If no value is explicitly given at the end of the function, the compiler implies \code{()} which \emph{something}, unlike the empty enum which is \emph{nothing} and cannot actually be instantiated and returned. -\code{loop{}} among others evaluates to \emph{nothing} as it will never stop and return, that is why the compiler was satisfied with it. -Trying to pass an instance of \code{CanNeverExist} yields the following: - -\begin{minted}{md} -error[E0574]: expected struct, variant or union type, found enum `CanNeverExist` - --> src/main.rs:3:5 - | -3 | CanNeverExist {} - | ^^^^^^^^^^^^^ not a struct, variant or union type -\end{minted} - -This demonstrates that the empty enum cannot be instantiated, and is merely a symbolic type. -Rust includes the \code{!} type for this purpose, and the function could've been written as \mintinline{rust}{fn never_returns() -> ! { loop{} }}. -This pattern can be used in \gls{os} development for the \gls{os}'s function that runs the main loop, and is not supposed to return. - -\paragraph{In Combination With Traits And PhantomData} -Emtpy enums can be used for more advanced use-cases in combination with traits, as shown in \cref{rnd::existing-os-dev-with-rust::systems::blog-os::mm}, where the lowest level of the page hierarchy is prevented from calling the \code{next_table()} method. - -\subsection{Inner- and Outer Mutability} -Some types in \gls{Rust} provide interior mutability, so that their \emph{value} can be mutated even though they have not been declared using \code{mut}. - -This study has two usages of types with interior mutability: -\code{AtomicUsize}, used in \cpnameref{code::imezzos-preemptive-multitasking::clock::tick} and \code{spin::Mutex} used in \cpnameref{rnd::example::mutex}. - -Other types which are not covered in this study include \code{Rc}, \code{Arc}, \code{RefCell}. - -\chapter{Weakness Mitigation And Prevention} -\label{rnd::weakness-mitig-prev} -The terminology \textit{mitigation} used by the \gls{CWE} literally expresses that the suggested measures are not fully preventive. -This chapter practically explores the weaknesses and their mitigation suggestions presented in \cref{context::weaknesses-mem-safety,context::weakness-mitigation}. -As this study is looking for weakness \emph{prevention}, which might be achieved through static analysis, mitigation and prevention are explored side-by-side with in this chapter. -The results are summarized in \cref{enc}. - -\section{Porting \glsentrytext{C} Vulnerabilities} -\label{rnd::weakness-mitig-prev::porting-c-vulns} - -Unlike originally anticipated, the vulnerabilities were not manually ported to Rust. -Instead, the investigation of \cnameref{rnd::weakness-mitig-prev::stack-protection} and \cnameref{rnd::imezzos-preemptive-multitasking} was extended. - -However, \citeauthor{Beingessner2015} was able to identify an underlying pattern of the violation of implicitly trusted invariants a common cause for same-type weaknesses as listed in \cnameref{context::weaknesses-mem-safety::manifestations::cwe-ex}. - - -\section{Stack Protection} -\label{rnd::weakness-mitig-prev::stack-protection} -The goal of this chapter is to learn about \gls{Rust}'s stack protection mechanisms in comparison to C. - -\subsection{Return Address Manipulation Experiments} -\label{rnd::weakness-mitig-prev::stack-protection::ret-addr-experiments} -Return address manipulation is a dangerous stack manipulation as it changes control flow of the program without explicit function calls. -First a \gls{C} example demonstrates the issue, then a \gls{Rust} port is attempted. - -\subsubsection{Example in C} -\label{rnd::weakness-mitig-prev::stack-protection::ret-addr-experiments::c} - -\begin{figure}[ht!] -\begin{minted}[linenos,breaklines]{c} -static void simple_printer(void) { fprintf(stderr, "I wonder who called me?"); } -void modifier(void) { - uint64_t *p; - *(&p + 1) = (uint64_t *)simple_printer; - *(&p + 2) = (uint64_t *)simple_printer; -} -int main(void) { - modifier(); - fprintf(stderr, "main exiting"); - return 0; -} -\end{minted} -\caption{Stack-Frame Modification in C} -\label{code::context::examples::sf-modification-simple-c} -\end{figure} - -\Cref{code::context::examples::sf-modification-simple-c} is a little example program in \gls{C}, which manipulates the return function address stored on the \gls{stack}. -This is done by simple and legal in \gls{C} pointer arithmetic. -It (ab)uses the address of the first local variable to create references into the \gls{sf} below on the \gls{stack}. -Since the first variable is in the beginning of the \gls{sf} of the called function, it can be used to guess the position of the return address on the \gls{stack}. -Depending on the \gls{compiler} settings, the return address is stored either one or two stack entries in front of the first local variable for a function with no arguments. -In a brute-force manner the program simply overwrites both entries with the address of \code{simple_printer}. -By writing a different function address at these entries, the \code{ret} instruction will jump there, since the original return address has been overwritten. - -The output of running this program is -\begin{minted}{md} -I wonder who called me?Segmentation fault -\end{minted} - -\Cref{code::context::examples::sf-modification-simple-c-asm} shows the Assembly code of the \code{modifier()} function from two different compilation runs. -One version makes use of the RBP register as the \gls{sf} Base-Pointer, and the other relies solely on the Stack-Pointer (RSP) for referencing \gls{sf} variables. -The RBP register is pushed onto the \gls{stack} in the function-prologue and restored in the function-epilogue, which takes up one \gls{stack} entry. - -\begin{figure}[ht!] -\begin{subfigure}[T]{0.49\textwidth} -\centering -\begin{minted}[linenos,breaklines]{objdump} -: -push rbp -mov rbp,rsp -movabs rax,0x400690 -mov QWORD PTR [rbp+0x0],rax -mov QWORD PTR [rbp+0x8],rax -pop rbp -ret -nop DWORD PTR [rax+rax*1+0x0] -\end{minted} -\caption{Compiled with \code{-fno-omit-frame-pointer}} -\end{subfigure} -\begin{subfigure}[T]{0.49\textwidth} -\centering -\begin{minted}[linenos,breaklines]{objdump} -: -movabs rax,0x400690 -mov QWORD PTR [rsp],rax -mov QWORD PTR [rsp+0x8],rax -ret -\end{minted} -\subcaption{Compiled with \code{-fomit-frame-pointer}} -\end{subfigure} -\caption{Stack-Frame Modification in C: Assembly} -\label{code::context::examples::sf-modification-simple-c-asm} -\end{figure} - -% \Cref{fig:callstack-manipulation} is an attempt to visualize what happens in memory and with the \gls{stack} and the \gls{cpu}'s RIP {64-Bit Instruction Pointer} register. -% -% \begin{figure} -% TODO -% \includegraphics[width=\textwidth]{gfx/TODO-callstack-manipulation} -% \caption{fig:callstack-manipulation} -% \label{fig:callstack-manipulation} -% \end{figure} -% \FloatBarrier - -\paragraph{Compiler Hardening - Placing A Canary Value} -The manipulation can be mitigated on \gls{C} using the \code{-fstack-protection-all} option with the \gls{clang}. - -\begin{figure}[ht!] -\begin{minted}[linenos,breaklines,highlightlines={3,8-9,13}]{nasm} -: -sub rsp,0x18 -mov rax,QWORD PTR fs:0x28 -mov QWORD PTR [rsp+0x10],rax -mov QWORD PTR [rsp+0x10],0x400770 -mov QWORD PTR [rsp+0x18],0x400770 -mov rax,QWORD PTR fs:0x28 -mov rcx,QWORD PTR [rsp+0x10] -cmp rax,rcx -jne 400760 -add rsp,0x18 -ret -call 4005a0 <__stack_chk_fail@plt> -data16 nop WORD PTR cs:[rax+rax*1+0x0] -\end{minted} -\caption{Stack Frame Modification C/ASM - clang stack protection} -\label{code::examples::sf-modification-clang-protection} -\end{figure} - -The highlighted lines in \cref{code::examples::sf-modification-clang-protection} show the code that are part of the protection mechanism. -On \gls{LX}/\gls{amd64}, it inserts checks into the function prologue and epilogues, that make use the \gls{cpu}'s FS register, which can only be modified by the \gls{os}. -The highlighted lines are part of the stack protection. - -First, the value is written on the \gls{stack} and later checked for equality, this is called a \textit{canary value}. -Inequality indicates a write operation to the stack-frame, so it jumps to the error handler. -This causes the program to quit with the message: \mint{md}{*** stack smashing detected ***: ./stack_handling terminated} - -The following issues can be identified about this detection: -\begin{enumerate} - \item It's not effective in all cases. - If line 5 is omitted, which overwrites the canary value, the check doesn't detect any changes but the return address is manipulated nonetheless by line 6. - \item Checks happen at runtime. - This study is searching for compile-time checks. -\end{enumerate} - -\subsubsection{Porting to Rust} -\label{rnd::weakness-mitig-prev::stack-protection::ret-addr-experiments::rust} -\Cref{code::examples::sf-modification-simple-rust} shows the complete code for the return address modification attempt in Rust. - -\begin{figure}[ht!] -\begin{minted}[linenos,breaklines]{rust} -#![feature(naked_functions)] - -#[inline(never)] -fn modifier() { - let v: usize = 0; - let v_addr = (&v as *const usize) as usize; - unsafe { - *((v_addr + 1 * 8) as *mut usize) = simple_printer as usize; - } -} - -#[naked] -fn simple_printer() { - println!("I wonder who called me?"); -} - -fn main() { - modifier(); - println!("main exiting") -} -\end{minted} -\caption{Stack-Frame Modification \emph{attempt} in Rust} -\label{code::examples::sf-modification-simple-rust} -\end{figure} -\FloatBarrier -The output of running this program is \textit{I wonder who called me?Segmentation fault}, exactly the same as with the C version. - -The \code{unsafe} keyword is required here for writing to the calculated raw pointer. -Removing it will cause the compilation to error as follows: -\begin{minted}[breaklines]{md} -error[E0133]: dereference of raw pointer requires unsafe function or block - --> src/main.rs:96:5 - | -96 | *((v_addr + 1 * 8) as *mut usize) = simple_printer as usize; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ dereference of raw pointer -\end{minted} -Without unsafe, \gls{Rust} doesn't compile the program and stack manipulation in this manner is not possible. - -In addition, two annotations had to be added to the code. -The function \code{simple_printer()} requires \code{\#[naked]}, which prevents the compiler from generate pro- and epilogues for it, which would have made assumptions about the stack that the constructed attack didn't satisfy. -The function \code{modifier()} requires \code{\#[inline(never)]}, which prevents the compiler from copying the function's instructions into the caller, so that there is no actual return made. - -\subsection{Stack Clash} -\label{rnd::weakness-mitig-prev::stack-protection::stack-clash} -This subsection investigates the vulnerability described in \cpnameref{context::weaknesses-mem-safety::manifestations::stack-clash} in detail, from userspace and \gls{os} perspectives. -Current \gls{C} and \gls{Rust} compiler options need to be explored to find mitigation and prevention methods for the issue. -The primary focus is on \gls{Rust}'s static analyzer, while the secondary focus lies on \gls{llvm} , as it is currently the backend used in \gls{Rust}. - -\subsubsection{Inside a hypothetical OS on AMD64} -\label{rnd::weakness-mitig::stack-protection::rust-stack-clash::in-os} -Despite its name, this section is about solving the stack clash that occurs in userpsace by code in the \gls{os}. -As described in \cref{context::os-dev-concepts::hw-supported-mm::multilevel-paging-concept,context::os-dev-concepts::hw-supported-mm::multilevel-paging-amd64}, the \gls{os} works with the \gls{mmu} to implement paging. -The \gls{os} gains control only when a page-fault is triggered, either due to an unmapped \gls{vaddr} or a page protection violation. -The latter is also caused by accessing the guard page behind the \gls{stack}. -The \gls{os} proposal mentioned in \cref{context::weaknesses-mem-safety::manifestations::stack-clash::proposals} suggests to increase this guard page to a bigger guard area. - -\paragraph{Problematic Deferred Page Mapping} -The reason for this mechanism is that some \glspl{os}, including \gls{LX}, perform deferred mapping of pages for the \gls{stack}, i.e. they map the \glspl{vaddr} only when they are accessed by the userspace \gls{app}. -The \gls{stack} can grow by accessing unmapped \glspl{vaddr} until it reaches the guard area. -The issue here is that if a dynamic variable, e.g. a string, could instantly grow large enough to skip the guard area. -If the address at the end of this string would be mapped, e.g. to the heap, the \gls{os} would not even notice that this happened, as the memory access is transparent to the \gls{os}. - -If the \gls{os} forced the \glspl{app} to explicitly request memory instead of mapping on-access, preventing such large growth would be simple. -The trade-offs for this design decision are beyond this study to discuss as it seems to be a mere design decision. -It is also not obvious that \gls{Rust} -- or any compiler for that matter -- could solve this specific problem in the \gls{os}, so there is no more investigation to be done. - -\paragraph{Increasing the Page Size} -The \gls{os} can reserve a sufficiently large area of guard pages behind the process's stack, which are protected so that the process can't access them without causing a page-fault exception. -A sufficiently large stack-based buffer might still allow to jump over the this area\cite{TheStackClash}, so a \gls{os} only solution is not possible. - -The patch\footnote{\url{https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1be7107fbe18eed3e319a6c3e83c78254b693acb}} for the specific vulnerability in \gls{LX} was examined. -The patch doesn't indicate any fixes that could've have been prevented by \gls{Rust}'s static analyzer in the first place. - -\subsubsection{In Userspace} -\label{rnd::weakness-mitig-prev::stack-protection::stack-clash::user-space} -The userspace proposal mentioned in \cref{context::weaknesses-mem-safety::manifestations::stack-clash::proposals} suggests to recompile all \glspl{app} with \code{-fstack-check} enabled in \gls{GCC}, which introduces a certain type of runtime checks. -The search for compile-time checks is documented after explaining this suggestion. - -\paragraph{Runtime Checks} -More specifically, it causes the \gls{compiler} to -"generate code to verify that you do not go beyond the boundary of the stack. ... Note that this switch does not actually cause checking to be done; the operating system or the language runtime must do that. The switch causes generation of code to ensure that they see the stack being extended."\cite[p.~349]{GCC540}. -The note unveils that this mechanism relies on the guard-page to be available. - -\gls{Rust} has a similar feature which it calls stack probing which is turned on by default. -The implementation displayed in \cref{code::context::examples::rust-stackprobe-asm} is extracted from the binary file\footnote{produced with rustc 1.21.0-nightly (aac223f4f 2017-07-30)} compiled from the code shown in \cref{code::examples::huge-stack-rust}. - -\begin{figure}[ht!] -\begin{subfigure}[T]{0.39\textwidth} -\centering -\begin{minted}[linenos=false,breaklines]{nasm} -huge_stack>: -movabs rax,0x100000078 -call 3f4e0 <__rust_probestack> -sub rsp,rax -... -\end{minted} -\caption{Function Prologue} -\end{subfigure} -\begin{subfigure}[T]{0.6\textwidth} -\centering -\begin{minted}[highlightlines={2-7,9},linenos=false,breaklines]{nasm} -__rust_probestack>: -$3f4e0: mov r11,rax -$3f4e3: sub rsp,0x1000 -$3f4ea: test QWORD PTR [rsp+0x8],rsp -$3f4ef: sub r11,0x1000 -$3f4f6: cmp r11,0x1000 -$3f4fd: ja 3f4e3 <__rust_probestack+0x3> -$3f4ff: sub rsp,r11 -$3f502: test QWORD PTR [rsp+0x8],rsp -$3f507: add rsp,rax -$3f50a: ret -$3f50b: nop DWORD PTR [rax+rax*1+0x0] -\end{minted} -\subcaption{Probestack Implementation} -\end{subfigure} -\caption{Rust's Stack Probe Function Assembly} -\label{code::context::examples::rust-stackprobe-asm} -\end{figure} - -On the left, it shows \code{huge_stack}'s function prologue, which has a call to the \code{__rust_probestack} implementation. -It passes the estimated stack size to the probestack via the `rax' resgister. - -On the right side is the probestack implementation. -It is a loop (first highlighted section) which iterates from the stack pointer down to the estimated stack end address ($rsp - r11$) in steps of 0x1000 Byte. -Uncoincidentally this size -- 4 KiB -- is the default page size on \gls{amd64}, this means the loop iterates over every page within the estimated stack. -It calls \code{nasm}{test} on each calculated page address, which acts as an unmodifying access. -This is enough to trigger a page fault in the \gls{mmu}, thus notify the \gls{os} about the stack growth. -The \gls{os} can then check if the guard page was accessed or if the stack is permited to grow this far. - -As this code was extracted from a binary, the estimated stack size must have been calculcated at compile-time. -This is fortunate and drives the investigation further if this check could be performed entirely at compile-time. - -\paragraph{Attempted Compile-Time Prevention} -\label{rnd::weakness-mitig-prev::stack-protection::stack-clash::user-space::compile-time} -%The compile-time prevention of the stack clash depends on the ability to predict the stack size and its boundaries accurately. -%This investigation justifies a separate chapter, please see \cref{rnd::stack-size-estimation}. -% -%\chapter{Compile-Time Stack-Size Estimation} -%\label{rnd::stack-size-estimation} -By estimating the stack size at compile-time the stack clash -- covered in \cref{rnd::weakness-mitig-prev::stack-protection::stack-clash} -- and other undesired stack scenarios, could be predicted without running into them. -In theory, this analysis requires a prediction of the worst-case stack growth for each procedure based on source code information. -This maximum stack growth size must then be compared to stack size limit, as well as the distance and the size of the guard area; it must be equal or less than all given limits. -This could effectively prevent the stack from overflowing and from touching or leaping over the guard area. - -The following simplified unequations must be true: -\begin{equation} - sum~of~all~procedure~stacks =< max~stack~size - \label{equ:size-all-stack-procedures} -\end{equation} -\begin{equation} - each~procedure~stack =< max~procedure~stack~size - \label{equ:size-stack-procedure} -\end{equation} - -The calculation of the above values requires the following variable sizes to be known at the time of calculation: -\begin{listing}[ht!] -\begin{enumerate} - \item{Prologue space allocation, depends on local variables and arguments} - \item{Stack limit: the maximum stack size is not equal on all \glspl{os}, and can even change per process} - \item{The page size and guard area size: not equal on all \glspl{os}} - \item{Dynamically sized stack variables have no static upper boundary} - \item{Cyclic procedure calls cause endless stack growth, including recursion} -\end{enumerate} -\caption{Variables Required for Stack Overflow Prediction} -\label{lst:variables-stack-overflow} -\end{listing} - -The following paragraph -The maximum stack size and the guard area size must be supplied to the compiler. -Dynamically sized stack variables and circular prodecure calls are more difficult to solve. - - -\subparagraph{Rust's State} -\gls{llvm} has an option called \code{-warn-stack-size=}, and has been enabled for this investigation. -How this option can be configured is explained in \cref{rnd::rust::cargo::tweak-llvm}. -Various combinations of the following configuration options have been tried: - -\begin{minted}[breaklines]{markdown} --asan-stack - Handle stack memory --safe-stack-layout - enable safe stack layout --warn-stack-size= - Warn for stack size bigger than the given number -\end{minted} -The first two options are not expected to have an effect on the static analysis yet curious whether they have an additonal effect on runtime overflow detection. - -\begin{figure}[ht!] -\begin{minted}[breaklines,highlightlines={3,4}]{rust} -#[inline(never)] -fn huge_stack() { - const slice_length: usize = 0x100_000_000; - let slice: [u64; slice_length] = [0xdeadbeef; slice_length]; - let slice_start_addr = &slice[0] as *const u64; - let slice_end_addr = &slice[slice_length - 1] as *const u64; - println!("{:?} - {:?} = {:?}", - slice_start_addr, - slice_end_addr, - (slice_end_addr as usize - slice_start_addr as usize) / std::mem::size_of::()); -} - -fn main() { - huge_stack(); - println!("main exiting") -} -\end{minted} -\caption{Program that allocates a huge slice on the stack} -\label{code::examples::huge-stack-rust} -\end{figure} -\FloatBarrier - -The highlighted lines in \cref{code::examples::huge-stack-rust} construct a slice on the stack with the size of $8 * 0x100000000 = 0x800000000 = 4,294,967,296$ Bytes (4GiB), which would fill the main memory of any 32-Bit system and should definitely be enough to trigger the configured stack warning. - -Unexpectedly this program compiled without a warning; -It was expected that the \gls{compiler} detects this huge statically allocated stack slice, compares it to the configured maximum allowed size and reports the violation. -At runtime it crashes with this message: - -\begin{minted}{md} -thread 'main' has overflowed its stack -fatal runtime error: stack overflow -Aborted -\end{minted} -The various optoins had no effect on the runtime output. - -One part of this message is even more unexpected, it is said to have overflowed the \code{main} stack although it is known that \code{huge_stack} is the function that allocates too much space on the \gls{stack}. - -\paragraph{Available Size Information} -Taking a look at the function prologue reminds one that an estimaation of the stack size is in fact calculated, passed to \gls{Rust}'s probestack implementation, and then subtracted from the stack pointer (RSP) to reserve this space onthe \gls{sf}. - -\begin{minted}{nasm} -huge_stack: - movabs rax,0x800000078 - call 3e120 <__rust_probestack> - sub rsp,rax -\end{minted} - -Out of the five variables required (\cpnameref{lst:variables-stack-overflow}), this serves the first a simplest one: prologue-allocated space. -A source code investigation of \gls{rustc} and \gls{llvm} has yielded the information that the function prologue is emmited by \gls{llvm}, and the Rust compiler has no knowledge about the \gls{sf} size.. - -\paragraph{Cyclic Procedure Calls} -Cyclic procedure calls are currently undetected, and the following code compiles fine: -\begin{minted}{rust} -fn a(i: usize) { b(i+3); } -fn b(i: usize) { a(i+5); } -fn main() { a(0); } -\end{minted} -Naturally this program causes a stack overflow at runtime, as it grows its stack with every function call and eventually hits the \gls{os} guard page or the maximum allowed stack size, depending on which is more restrictive. - -\subparagraph{Uncodontional Recursion Detection} -Unconditional recursion is a special case of cyclic procedure calls and is detected in Rust. -The following is a minimal example of such a situation: - -\begin{minted}{rust} -#![deny(unconditional_recursion)] - -fn a() { a(); } -fn main() { a(); } -\end{minted} - -By default, the compiler merely warns upon detection, but via the following line in the source code header it will abort compilation with an error instead: - -\begin{minted}[breaklines]{md} -error: function cannot return without recurring - --> src/main.rs:123:1 - | -123 | fn a() { a(); } - | ^^^^^^^^^^^^^^^ - | -note: lint level defined here - --> src/main.rs:2:9 - | -2 | #![deny(unconditional_recursion)] - | ^^^^^^^^^^^^^^^^^^^^^^^ -note: recursive call site - --> src/main.rs:123:10 - | -123 | fn a() { a(); } - | ^^^ - = help: a `loop` may express intention better if this is on purpose -\end{minted} -The error is very explicit about the finding, including the fact that the denial of unconditional recursion is user intended. - -% TODO: https://gcc.gnu.org/onlinedocs/gnat_ugn/Static-Stack-Usage-Analysis.html - -\paragraph{State Summary and Suggestions} -Not all required information is available at compilation-time. -\Cref{lst:amd64-stack-frame-components} is an extended version of the earlier determined list \cref{lst:variables-stack-overflow}. -This one includes the previous findings and suggestions on how this information could be retrieved. - -\begin{table} - \begin{tabularx}{\textwidth}{@{}lX@{}} - \toprule - Information & Information Availability \\ - \hline - Prologue space allocation & Available in \gls{llvm} \\ - Stack limit & Not available. Suggestions: heuristics in compiler, or provided by user. This must match the target system not necessarily the compiler system. \\ - Page size & see above \\ - Guard area & see above \\ - Recursiv procedure calls & Available. \\ - Cyclic procedure calls & Not available. \\ - \bottomrule - \end{tabularx} -\caption{Result: Variables Required for Stack Overflow Prediction} -\label{lst:result-variables-stack-overflow} -\end{table} -\FloatBarrier - -Dynamically sized stack variables have been omitted from the table since they are irrelevant. -On stack variable-length-arrays and variadic arguments are not supported by \gls{Rust}, and there is no indication of other use-cases. - -% \chapter{\glsentrytext{LX} Modules Written In \glsentrytext{Rust}} -% The numerous \gls{LX} vulnerabilities are a great motivator for using \gls{Rust} for \gls{LX} drivers. -% This chapter presents the attempt to use \gls{Rust} for a simple buffer that is presented to userspace as a character device. -% -% - TODO: explain the difficulty to use the Kernel's C Macros, which are required to expose a character device - -\chapter{Existing \glsentrytext{os}-Development Projects Based On Rust} -\label{rnd::existing-os-dev-with-rust} -This chapter presents research papers and existing projects that are related to this study. -In addition to presenting their content, the author's tangible influence on the Rust language is determined. - -\section{Research Papers} -\label{rnd::existing-os-dev-with-rust::papers} -As Rust is a relatively young language, the selection of research papers relevant for this study is limited. -This is likely due to the fact that Rust hasn't been stabilized until May 15, 2015\footnote{\url{May 15, 2015}}, and relied on a runtime gargabe-collector for a long time of it's pre-stable existence. - -It was decided not to summarize the references, but rather explain the gained insight in particular detail at each relevant occurence. - - -% \subsection{ -% \citetitle{Levy2015a} -% \cite{Levy2015a} -% } - -% \paragraph{ -% \citetitle{Beingessner2015} -% \cite{Beingessner2015} -% } -% -% \paragraph{ -% \citetitle{Reed2015} -% \cite{Reed2015} -% } -% \paragraph{ -% \citetitle{Getreu2016} -% \cite{Getreu2016} -% } -% \paragraph{ -% \citetitle{Balasubramanian2017} -% \cite{Balasubramanian2017} -% } -% \paragraph{ -% \citetitle{Nilsson2017} -% \cite{Nilsson2017} -% } - - -% \section{Libraries} -% -% \subsection{Libfringe} -% % TODO: https://github.com/edef1c/libfringe -% -% -\section{Systems} -Most of the presented systems target the \gls{amd64} architecture; Tock OS, which is targeted towards an ARM variant, is the only exception. -The interesting parts of each \gls{os} are their origin, intentions, their current state, the level of memory-safety, and what design or language features made this level possible. - -\subsection{Blog OS} -\label{rnd::existing-os-dev-with-rust::systems::blog-os} -Blog OS is a hobby project about writing an OS in \gls{Rust}. -It is well documented by the author through insightful blog posts\footnote{\url{https://os.phil-opp.com/}}. - -\subsubsection{General State} -Blog OS has a working memory allocator which allows them to use Rust's heap-based features. -Exception handlers are stubbed and there are no notions of tasks yet. -The focus lies on a Rust-idiomatic implementation of the \gls{os} features. - -\subsubsection{Paging With Type Safety} -\label{rnd::existing-os-dev-with-rust::systems::blog-os::mm} -Blog OS uses Rust's type system to model the hierarchical page tables (\cref{context::os-dev-concepts::hw-supported-mm::multilevel-paging-amd64}) in code in a safe way. -This is explained in on one of his blog posts\footnote{\url{https://os.phil-opp.com/page-tables/}}, and demonstrates how Rust can help to prevent mistakes. - -Please note that the example has been rewritten for a 2-level page table hierarchy simply to save space in this document. -The methodology is the same for all levels above 1, so it is sufficient to have only one level above for demonstration. -The code example includes comments which are relevant for the understanding. - -Starting with the result is the fastest way to explain this. -The highlighted line in the following code is supposed to fail in this test, as the lowest page table hierarchy is not followed by another one. -\begin{minted}[breaklines,highlightlines=5]{rust} -pub const P2: *mut Table = 0xffffffff_fffff000 as *mut _; - -fn test() { - let p2 = unsafe { &*p2 }; - p2.next_table(42) - .and_then(|p1| p1.next_table(0xcafebabe)) -} -\end{minted} - -The \code{P2} pointer is a static memory location, to which the page table has hypothetically been written by the \gls{os}. -It doesn't matter for testing purposes, because this test fails compilation successfully and is not able to run. -The following error occurs on compilation: - -\begin{minted}[breaklines]{md} -error: no method named `next_table` found for type - `&memory::paging::table::Table` - in the current scope -\end{minted} - -This is achievied by defining the types accordingly: - -\begin{minted}[breaklines,highlightlines={}]{rust} -// Empty enum provide distinct type for each level -pub enum Level2 {} -pub enum Level1 {} - -// Trait for the lowest level -pub trait TableLevel {} - -// Trait for all above levels, need a nested type to indicate what type follows after them -trait HierarchicalLevel: TableLevel { - type NextLevel: TableLevel; -} - -// All levels above 1 are hierarchical and statically define what level comes next, e.g. -// ... Level4 { type NextLevel = Level3; } -// ... Level3 { type NextLevel = Level2; } -impl HierarchicalLevel for Level2 { type NextLevel = Level1; } -impl TableLevel for Level1 {} - -// Use PhantomData to consume the TableLevel as it is detected as unused and won't compile -use core::marker::PhantomData; -pub struct Table { - entries: [Entry; ENTRY_COUNT], - level: PhantomData, -} - -// Unified next_table method for all levels! -impl Table where L: HierarchicalLevel -{ - pub fn next_table(&self, index: usize) -> Option<&Table> {...} -} -\end{minted} - -\subsubsection{Influences on Rust} -\label{rnd::existing-os-dev-with-rust::systems::blog-os::influence} -The author filed a pull-request\footnote{\url{https://github.com/rust-lang/rust/pull/39832}} against Rust that enabled the x86-interrupt calling convention for \gls{Rust}, which is supported by the underlying \gls{llvm}. -The change was accepted by the maintainers. - -The pull-request describes the motivations for the change, two of which are: interface safety and increased performce. -In detail: -\begin{enumerate} - \item Safer interfaces: We can write a \code{set_handler} function that takes a \code{extern "x86-interrupt" fn(&ExceptionStackFrame)} and the compiler ensures that we always use the right function type for all handler functions. This isn't possible with the \code{#[naked]} attribute. - \item Higher performance: A naked wrapper function always saves all registers before calling the Rust function. This isn't needed for a compiler supported calling convention, since the compiler knows which registers are clobbered by the interrupt handler. -\end{enumerate} - -Argument 1 is a way to prevent mistakes made by the \gls{os} developer when working on the interrupt handlers, thus increasing safety. -This is smilar to the type safety explained in \cref{rnd::existing-os-dev-with-rust::systems::blog-os::mm}. -It could be strengthended even more, as one could define a type for each specific interrupt handler and entry which are forced to match by the \gls{compiler}'s type checks. - -Argument 2 explains that context switches to and from interrupt handlers can be sped up, as the \gls{compiler} can now examine the interrupt handler and only store and restore those \gls{cpu} registers that are actually used by the function. - -\subsection{Redox OS} -The Redox OS has a "hybrid kernel that supports X86\_64 systems and provides Unix-like syscalls for primarily Rust applications"\footnote{\url{https://doc.redox-os.org/kernel/kernel/}} -Ii is entirely written in Rust (with necessary inline ASM) and supports\footnote{\url{https://www.redox-os.org/}} the Rust standard library. - -\paragraph{General State} -Its state has far surpassed being a hobby project, featuring multitasking on multiple CPUs, user- and kernel-space threads, a file system, rudimentary networking support and graphical output. - -The userland of Redox OS provides a package manager, a graphical desktop environment, and due to its \gls{microkernel} aspect also the device drivers. - -\paragraph{Page Management inspired by Blog OS inspired} -A comment in the Redox kernel\footnote{\url{https://github.com/redox-os/kernel/blob/b364d052f20f1aa8bf4c756a0a1ea9caa6a8f381/src/arch/x86_64/paging/mod.rs\#L2}} explitly states to include code taken from the Blog OS paging implementation \cref{rnd::existing-os-dev-with-rust::systems::blog-os::mm}. - -\subsubsection{Stack Clash Invulnerable} -The page-fault handler in Redox OS is as simple as\footnote{\url{https://github.com/redox-os/kernel/blob/b364d052f20f1aa8bf4c756a0a1ea9caa6a8f381/src/arch/x86_64/interrupt/exception.rs\#L81}} - -\begin{minted}[autogobble,breaklines,highlightlines=6]{rust} -interrupt_error!(page, stack, { - let cr2: usize; - asm!("mov rax, cr2" : "={rax}"(cr2) : : : "intel", "volatile"); - println!("Page fault: {:>02X}:{:>016X} at {:>02X}:{:>016X}", stack.code, cr2, stack.cs, stack.rip); - stack_trace(); - ksignal(SIGSEGV); -}); -\end{minted} -On Redox OS \emph{every} page-fault unconditionally sends the \textit{SIGSEGV} signal (line highlighted) to the process that caused the page-fault. -It does not use deferred page mapping described in \cref{rnd::weakness-mitig::stack-protection::rust-stack-clash::in-os}, and is therefore not vulnerable to the stack clash. -This is based on a design decision and has little to do with \gls{Rust}. - -\subsection{Influences on Rust} -The main author of Redox OS has become an active contributor to the Rust language, likely with the main motivation of making Rust more suitable for \gls{os} development. - -The biggest achievement from the perspective of this study is the successful integration into Rust's libstd, which happened continuously and cannot be referenced easily. -This allows programmers to use Rust with all it's features to develop programs for Redox OS. - -\subsection{Tock OS} -Tock OS is "an embedded operating system designed for running multiple concurrent, mutually distrustful applications on low-memory and low-power microcontrollers."\cite{TockOS} - -\citeauthor{Levy2015a} have been using Rust to develop a new embedded system \gls{os} for microcontrollers called Tock. -They describe to find Rust's ownership model restricting by preventing safe resource sharing in embedded-typical event-based scenarios. -They made suggestions to extend the langauge with Execution Contexts, which would "allow programs to mutably borrow values multiple times as long as those borrows are never shared between threads. Execution contexts allow the compiler to distinguish such sharing from actual errors using only local analysis." - -On their website the authors recently made the following statement: -"After feedback from the Rust developers and the community, we were able to overcome those challanges without modifications to the language. We also learned that we understated how disruptive some of the changes we proposed would be to the language and do not believe they are worthwhile. This has been discussed extensively now in the Rust community. You should read this paper critically, not as conclusive scientific findings, but as the perspectives of the authors during a particular point in the development of Tock."\cite[/papers]{TockOS} -% -% \subsubsection{Task Model} -% \subsubsection{Memory Management} - -\subsection{intermezzOS} -"intermezzOS is a teaching operating system, specifically focused on introducing systems programming concepts to experienced developers from other areas of programming."\footnote{\url{https://intermezzos.github.io/}} - -The project consists of two source code repositories and an accompanying book. -It has been inspired by the Blog OS, the author of which is also a contributor to intermezzOS. - -The "bare-bones" contains only rudimentary machinery, from which the book walks the developer step-by-step to a successful boot of the kernel within a virtual machine emulator. - -The "kernel" contains more advanced development and even surpasses the books latest chapters. -This code base has been chosen as the foundation for the \gls{os} developments for this studies. -Starting with this code base, preemptive multitasking is implemented, with the goal to learn as much as possible about the languages memory-safety aspects. -This development is documented in \cref{rnd::imezzos-preemptive-multitasking}. - -\subsection{Others} -This section gives an overview over the many projects I have stumbled upon that I think are worth mentioning. -All these projects are undertakings to write \gls{os}s in Rust, and interested readers might want to take a look around. -While it's fortunate to see that Rust has gained popularity among \gls{os} development interested programmers, the effort of investigating each cannot be spent in the course of this work. - -\paragraph{Tifflin} -Experimental Kernel (and eventually Operating System). -\url{https://github.com/thepowersgang/rust_os} - -\paragraph{Rust Bare-Bones Kernel} -This is designed to be a rust equivalent of the \url{OSDev.org} Bare\_Bones article, presenting the bare minimum you need to get started. -\url{https://github.com/thepowersgang/rust-barebones-kernel} - -\paragraph{Bare Metal Rust: Building kernels in Rust} -A blog series that advances Blog OS (\cref{rnd::existing-os-dev-with-rust::systems::blog-os}). -\url{http://www.randomhacks.net/bare-metal-rust/} - -\paragraph{The Stupid Operating System} -SOS is a simple, tiny toy OS implemented in Rust. -\url{https://github.com/hawkw/sos-kernel/} - -\chapter{\glsentrytext{imezzos}: Adding Preemptive \glsentrytext{os}-Level Multitasking} -\label{rnd::imezzos-preemptive-multitasking} -Development on intermezzOS -- or any other \gls{os} -- requires features that are only available in Rust's nightly version. -This version is under very active development, and at the I started development on interemezzOS, the project was not compatible with the current version. -Debugging a system that does not work for someone who never experienced the working state is hard, and the initial learning curve for the required tools, in addition to learning a new language, was very steep. -This chapter assumes basic knowledge on how binaries are compiled and linked by a chain of tools. - -\paragraph{On Code Length in this Chapter} -I am aware that the code takes up much space, but I have decided to keep it as is, as this study is about \gls{os} \emph{development}, which is the combination of theoretical knowledge and practical implementation. -To keep this document self-contained and allow a comfortable reading experience, the findings and code are tightly coupled. - -It shows that \gls{GCC} is still required in the development process, at least for this specific \gls{os}. -However, it is only used for linking and not for actual compilation. - -\begin{minted}[breaklines]{diff} ---- a/x86_64-unknown-intermezzos-gnu.json -+++ b/x86_64-unknown-intermezzos-gnu.json -@@ -3,7 +3,8 @@ - "cpu": "x86-64", - "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128", - "executables": true, -- "linker-flavor": "gcc", -+ "linker": "gcc", -+ "linker-flavor": "gcc", - "llvm-target": "x86_64-unknown-none-gnu", - "no-compiler-rt": true, - "os": "intermezzos", -\end{minted} - -The changes not very interesting, but the file per-se is. -It is used to teach \gls{rustc} about the target system, so that it can produce compatible code. - -\paragraph{Toolchain} -The following tools form the toolchain required to work on interemezzOS: -\begin{itemize} - \item rustc -- \glsentrylong{rustc} - \item cargo -- \glsentrylong{cargo} - \item xargo -- \glsentrylong{xargo} - \item nasm -- Assembly compiler - \item ld -- Linker - \item qemu-system-x86\_64 - \item grub2-mkrescue -- GRUB2 Bootloader bmage builder - \item xorriso -- ISO file writer - \item gdb -- Debugger - \item make -- Because Makefile is (still) being used. -\end{itemize} - -\paragraph{Build Process} -The build process gives an impression of what is required to build an \gls{os} executable with \gls{Rust}. -\begin{enumerate} - \item \code{make} manages the inter-dependencies of the build process. - \item \code{nasm} compiles an assembly from that bootstraps the system from multiboot stage for 32-Bit mode. - \item \code{rustc} compiles the Rust programs which contains the 64-Bit code. - \item \code{ld} is used to link these two together and form a multiboot compliant kernel binary. - \item \code{grub2-mkrescue} is used to generate a multiboot-compliant bootloader. It will load the kernel binary. - \item \code{xorriso} combines the kernel binary and bootloader into a bootable ISO - \item \code{qemu-system-x86_64} can be used to boot the ISO -\end{enumerate} - -\section{Development State} -The anticipated development of preemptive multitasking has been reached. -Tasks are represented by plain \code{fn() -> !} instances. -The tasks and the task table are statically defined in the \gls{os} source code. -Task switches are driven by the Programmable-Interrupt-Timer, for which a driver has been implemented. -The task scheduler works in a round-robin fashion and detects stack overflows. -Any stack overflowing task is not scheduled anymore. -The stack size is statically defined and is allocated globally by the compiler. - -The implementation uses no dynamic memory allocations, thus there was no experience gathered with managing dynamic memory within the \gls{os}. -The global state references might be accessed by any defined task, and require prohibition of unwanted access by at least making them \code{unsafe} mandatory. - - -\section{System Clock Driver} -This section will walk through the creation of a simple clock driver. - -The first usage of traits was the definition and implementation of the \code{Clock} trait for the \code{Pic} type. -The trait defines the properties of a driver that implements a Clock, and the Pit is the hardware specific implementation for this trait. - -\subsection{Trait and Pit Implementation} -\paragraph{Trait Definition} -The trait defines a clean interface for any system clock. -The highlighted lines show the \code{unsafe} functions within this trait. -This is used to force the caller to use \code{unsafe}, which must only be done with care and never from a regular task. - -\begin{minted}[breaklines,highlightlines={4,18}]{rust} -/// The Clock trait is for each clock type. -pub trait Clock { - /// Start the clock - unsafe fn start(&self); - - /// Receive the frequency the clock is set for - fn frequency(& self) -> SimpleResult; - - /// Update the internal clock counter by one. - /// The time of one tick is `1/self.frequency()`s. - unsafe fn tick(&self); - - /// Receive the current tick counter - fn ticks(&self) -> SimpleResult<(u64, Duration)>; - - /// Returns the uptime as `Duration`. - /// This assumes that **all** fired clock interrupts have successfully called `self.tick()`. - fn uptime(& self) -> SimpleResult; -} -\end{minted} - -\paragraph{Implementing Clock for Pit} -This code lives in the same file as the Clock trait, but it shows how code can be structured by modules. -The \code{use} statements are required for using anything defined outside of the module, even for parent constants as can been. -This is a clean way of handling hardware-specific constants. - -\begin{minted}[breaklines,highlightlines={4,6}]{rust} -/// This module implements a system clock using the Programmable Interrupt Timer -/// ... -pub mod pit { - ... - use super::consts::NSEC_MULTIPLIER; - ... - - /// Constants definitions for the pit module - pub mod consts { - pub const BASE_FREQUENCY: u32 = 1193182; - ... - pub const CHANNEL_IO_PORTS: [u16; 3] = - [CHANNEL0_IO_PORT, CHANNEL1_IO_PORT, CHANNEL2_IO_PORT]; - } - - ... - - /// Type for the Programmable-Interrupt-Timer - pub struct Pit { - pub frequency: Frequency, - divisor: u16, - pub resolution: u64, - channel: u8, - ticks_atomic: AtomicUsize, - } - ... -\end{minted} -Some of the fields of \code{Pit} are made \code{pub}lic, notice that the counter is not one of them. - -\subsection{Global CLOCK State} -\paragraph{Initialization} -The state of the clock is held globally, though it must be initialized with non-static code. -This is possible in Rust lazy initialization, which works by defining a static reference, implementing the singleton pattern. - -The dereference method for this reference has been automatically generated to call the initialization code on first reference. -The compiler is aware of this and reserves memory at compile time. -This functionality is implemented by an external crate called \emph{lazy\_static}. - -\begin{minted}[breaklines]{rust} -/// Initialization of these references happens on first deref -lazy_static! { - static ref CONTEXT: intermezzos::kernel::Context = intermezzos::kernel::Context::new(); - static ref CLOCK: clock::pit::Pit = clock::pit::new(0, (0x71ae) as u16); - ... -} - -... - -/// Task0 starts the clock, enables interrupts and goes to sleep. -fn task0() { - // This will trigger clock::pit::new(...) and then call .start() - unsafe { CLOCK.start() }; - kprintln!(CONTEXT, - "System clock set up. Frequency: {} / Resolution: {}ns", - // pub fields can be accessed - CLOCK.frequency, - CLOCK.resolution); - ... -} -\end{minted} - -As can be seen in the code snippet, the \gls{os}'s \code{CONTEXT} reference is stored next to it. -The task0 can then simply access the public fields. - -\paragraph{Starting The Clock} -The driver uses an atomic integer type that is part of the core library, which is predestined as a clock counter. -It doesn't require a lock even when shared via multiple tasks. - -\begin{minted}[breaklines]{rust} -pub mod pit { - use x86::shared::io::outb; - use core::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT}; - ... -} -... -impl Clock for Pit { - unsafe fn start(&self) { - let lobyte = (self.divisor & 0xFF) as u8; - let hibyte = ((self.divisor >> 8) & 0xFF) as u8; - unsafe { - outb(consts::COMMAND_PORT, gen_command(self.channel)); - outb(consts::CHANNEL_IO_PORTS[self.channel as usize], lobyte); - outb(consts::CHANNEL_IO_PORTS[self.channel as usize], hibyte); - }; - } - ... - unsafe fn tick(&self) { - self.ticks_atomic.fetch_add(1, Ordering::SeqCst); - } - ... -} -\end{minted} -The \code{start} method is the first occurrence of \code{unsafe}, which is required to perform raw I/O port access using \code{outb}. -\code{tick} is extremely simple, it uses a method to atomically add one, requesting a specific ordering: \textit{SeqCstr: Like AcqRel with the additional guarantee that all threads see all sequentially consistent operations in the same order}.\footnote{\url{https://doc.rust-lang.org/core/sync/atomic/enum.Ordering.html}} -This method is called in the \gls{os} timer interrupt handler, as indicated in \cref{code::imezzos-preemptive-multitasking::clock::tick}. - -\begin{listing}[ht!] -\begin{minted}[breaklines]{rust} - let timer = make_idt_entry!(isr32, esf: &mut ExceptionStackFrame, true, { - ... - unsafe { CLOCK.tick() }; - ... -\end{minted} -\caption{Ticking The Clock} -\label{code::imezzos-preemptive-multitasking::clock::tick} -\end{listing} -\FloatBarrier - -\section{Timer Interrupt For Scheduling and Dispatching} -\label{rnd::imezzos-preemptive-multitasking::timer-interrupt-scheduling} -The timer interrupt will trigger according to the frequency that was set for the \code{Pit} clock driver previously explained. - -\subsection{Macro For Interrupt-Handler Setup} -\label{rnd::imezzos-preemptive-multitasking::timer-interrupt-scheduling::macro} -The handler definition is assisted by the a macro rule that had existed in the codebase but was significantly changed. -It showcases macro and also inline assembly functionality. - -\subsubsection{Macro Semantics} -The macro matches one pattern with five language items. -This means that it cannot be invoked with more or less items. -Additionally, they have different language item types, which make them match only certain token trees. -The passed \code{name} will be the name of the defined function. -\code{$esf:ident: $esfty:ty} at usage, looks like a normal variable definition with a name a and a type. -It is used as the parameter for the interrupt handler. -The \code{$body} is the function body of the interrupt handler, defined by the macro caller. -The last important aspect of the macro semantics is that the last line within the emitted code does not have a semicolon, which means the macro expression will evaluate to this value, namely an instance of \code{IdtEntry}. - -\begin{figure}[ht!] -\begin{minted}[breaklines,linenos,highlightlines={3,6,12,17,21}]{rust} -#[macro_export] -macro_rules! make_idt_entry { - ($name:ident, $esf:ident: $esfty:ty, $ir_gate:expr, $body:expr) => {{ - ... - - extern "x86-interrupt" fn $name($esf: $esfty) { - unsafe { - asm!("" - : // output operands - : // input operands - : // clobbers - "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rbp" - : // options - "intel" "volatile" - ); - } - $body - }; - ... - let handler = VAddr::from_usize($name as usize); - IdtEntry::new(handler, 0x8, PrivilegeLevel::Ring0, $ir_gate) - }}; -} -\end{minted} -\caption{intermezzOS: Macro for defining Interrupt Handlers} -\label{code::imezzos::ir-handler-macro} -\end{figure} -\FloatBarrier - -\subsubsection{OS Semantics} -It is worth explaining how the macro semantics are used to model \gls{os} semantics in \cref{code::imezzos::ir-handler-macro} - -\paragraph{Interrupt ABI Function Type} -For each defined handler, the macro allows to set the argument type for the handler function. -The \emph{value} will be passed at runtime by the \gls{cpu} for each interrupt, and not each interrupt uses the same layout for this argument. -This type must match the exception \gls{sf} layout introduced in \cpnameref{fig:amd64-long-mode-interrupt-stac}, which can be either with or without the error field. -This decision is made by the macro caller, as the interrupt type is not known within the macro, and can only be known by the developer later. - -Thanks to the pull-request described in \cpnameref{rnd::existing-os-dev-with-rust::systems::blog-os::influence}, the \code{extern "x86-interrupt"} can be used for defining the interrupt types. -It enables the proper handling of the first argument, and in combination with the \emph{clobber} registers shown in \cref{code::imezzos::ir-handler-macro} line 11, enables the compiler to generate a functoin pro- and epilogue to automatically \code{PUSH/POP} all named registers from the stack. -As a result, the inline assembly string provided by the programmer is empty, which alleviates the necessatiy of \code{unsafe}. - -\section{Tasks and Stacks} -\label{rnd::imezzos-preemptive-multitasking::tasks-stacks} -The implementation of the tasks has been kept straight forward, using global static variables and simple struct types instead of traits. -Tasks are defined globally with the simple \code{fn() -> !} type. - -\subsection{Initial Unsafe JMP} -\label{rnd::imezzos-preemptive-multitasking::tasks-stacks::unsafe::jmp} -To redirect the codepath of the boot task, the solution in \cref{code::imezzos::jump-to-task0} has been implemented. -The function \code{schedule_and_dispatch()} is called from the \gls{os} \code{main() -> !} method, which will never return. -It shows the combination of Rust high-level code, which leverages the highlighted lifetime to seamlessly lock and drop the \code{TSI} variable. - -The numbered circle show another interesting usage of the language. -The variables are declare immutable (no \code{mut}), and are initialized within the nested scope. -Another write to these variables would result in a compilation error. -They end up being passed to the inline assembly, where they are further processed. - -\begin{listing}[ht!] -\begin{minted}[escapeinside=??,breaklines,highlightlines={7-14,16-30}]{rust} -#[naked] -fn schedule_and_dispatch() { - let rbp; ?\tikzmarkcircle{1}? - let rsp; ?\tikzmarkcircle{2}? - let rip; ?\tikzmarkcircle{3}? - - { - let tsi = TSI.lock(); - let te = tsi.get_current_task(); - - rbp = te.stack.top; ?\tikzmarkcircle{1}? - rsp = te.esf.stack_pointer; ?\tikzmarkcircle{2}? - rip = te.esf.instruction_pointer; ?\tikzmarkcircle{3}? - }; - - unsafe { - asm!(" - mov rbp, $0 "?\tikzmarkcircle{1}?" - jmp $1 "?\tikzmarkcircle{2}?" - " - : // output operands - : // input operands - "r"(rbp) ?\tikzmarkcircle{1}? - "r"(rip) ?\tikzmarkcircle{3}? - "{rsp}="(rsp) ?\tikzmarkcircle{2}? - : // clobbers - : // options - "intel" "volatile" - ); - }; -} -\end{minted} -\caption{intermezzOS: Initial Jump To Task 0} -\label{code::imezzos::jump-to-task0} -\end{listing} - -\subsection{Declaration and Intantiation} -\paragraph{Global Stacks} -\label{rnd::imezzos-preemptive-multitasking::tasks-stacks::dni} -\Cref{code::imezzos::stack-and-tasks-1} defines a \code{Stack} with a top and a bottom address based which are offset by a constant. -Subsequent stacks grow the multiplier by 10, which keeps space between the stacks. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={4-7}]{rust} -const STACKS_TOP: usize = 0x1_000_000; // 15.7MiB -const STACK_SIZE: usize = 0x_002_000; // 64KiB -use tasks::stack::Stack; -const TASK0_STACK: Stack = Stack { - top: STACKS_TOP - 10 * STACK_SIZE, - bottom: STACKS_TOP - (10 + 1) * STACK_SIZE, -}; -\end{minted} -\caption{intermezzOS: Stack and Task Definition - 1} -\label{code::imezzos::stack-and-tasks-1} -\end{listing} - -\paragraph{Global TaskList} -\Cref{code::imezzos::stack-and-tasks-2} defines a \code{TaskEntry} in a static slice of the same. -The highlighted lines are unique to each task. -In the given order, they represent their first instruction, their initial top of stack, and their initial set of \gls{cpu} registers. -Except for the instruction pointer, these variables have their own type and cannot easily be mixed up. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={7,9,12}]{rust} - let tasklist = [ - tasks::TaskEntry { - name: "Task 0", - esf: interrupts::ExceptionStackFrame{ - code_segment: 0x8, - stack_segment: 0x10, - instruction_pointer: task0 as usize, - cpu_flags: 0x200202, - stack_pointer: TASK0_STACK.top, - }, - stack: TASK0_STACK, - registers: tasks::TaskRegisters::empty(), - blocked: false, - }, - ... - tasks::TaskEntry { - ... - }, - ]; -\end{minted} -\caption{intermezzOS: Stack and Task Definition - 2} -\label{code::imezzos::stack-and-tasks-2} -\end{listing} - -\paragraph{Mutex With Interior Mutability} -\label{rnd::example::mutex} -\Cref{code::imezzos::stack-and-tasks-3} wraps this slice in a \code{spin::Mutex}, which is returned by the expression and stored as a \code{lazy_static} reference as explained in the previous section. -The \code{Mutex} type is interesting, as it provides \emph{interior mutability}. -This explains how the tasklist can be mutated at runtime, even though it is not declared as \code{mut}. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={7}]{rust} -lazy_static! { -... -static ref TSI: Mutex = { - let tasklist = [ - ... - ]; - Mutex::new(tasks::TaskStateInformation::new(tasklist)) - }; - ... -}; -\end{minted} -\caption{intermezzOS: Stack and Task Definition - 3} -\label{code::imezzos::stack-and-tasks-3} -\end{listing} - -\subsection{Preemptive Task Switches} -What follows in \cref{code::imezzos::taskswitch-1,} is the most low-level part in this study, the actual context switch within the interrupt handler. - -It does these things: (* marks unsafe actions) -\begin{enumerate} - \item * Read the \gls{sf} Base-Pointer (line 2) - \item Calculate the offset to the Registers on the \gls{sf} - \item * Cast this address to a type that contains all registers (line 5-6) - \item ? Pass the Exception\gls{sf} and the registers along to \code{manage_tasks} - \item Acknowledge the interrupt -\end{enumerate} - -Looking at this code in retrospection suggests that \code{manage_tasks} could be marked as \code{unsafe} too because it does \code{unsafe} things inside. -Through the values it consumed, it is able to directly modify the stack contents. -Arguably it is done through modeled types, but it is not the way the \gls{stack} was designed to be used by a programmer. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={2,5-6},linenos]{rust} - let timer = make_idt_entry!(isr32, esf: &mut ExceptionStackFrame, true, { - let rbp_on_stack: *mut usize = unsafe { (get_register!("rbp") as *mut usize) }; - let rax_offset = 1 - (mem::size_of::() as isize / 8); - let rax_on_stack: *mut usize = unsafe { rbp_on_stack.offset(rax_offset) }; - let registers_on_stack: &mut tasks::TaskRegisters = - unsafe { mem::transmute::<*mut usize, &mut tasks::TaskRegisters>(rax_on_stack) }; - - ... - manage_tasks(esf, registers_on_stack); - pic::eoi_for(32); - }; -\end{minted} -\caption{intermezzOS: Taskswitch - 1} -\label{code::imezzos::taskswitch-1} -\end{listing} - -\subsection{Task Definitions} -The task definitions are straight forward as explained. - -\paragraph{Idle Task} -\Cref{code::imezzos::idel-task} shows the system's idle task, which infinitively calls \code{hlt()} after finishing the boot process. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={5}]{rust} -#[deny(unsafe_code)] -fn task0() -> !{ - unsafe { CLOCK.start() }; - - kprintln!(CONTEXT, - "System clock set up. Frequency: {} / Resolution: {}ns", - CLOCK.frequency, - CLOCK.resolution); - - kprintln!(CONTEXT, - "Kernel initialized, final step: enabling interrupts"); - CONTEXT.idt.enable_interrupts(); - - loop { - hlt(); - } -} -\end{minted} -\caption{intermezzOS: Idle Task} -\label{code::imezzos::idel-task} -\end{listing} - - -\section{Safety Concerns} - -\subsection{Protecting Static Resources} -With this straight forward task implementation any task has access to the globally defined reference variables which hold the system state. -For this reason, the \code{Clock} trait makes use of the \code{unsafe} keyword. - -As seen in this code snippet, the tasks can be prevented from accessing any \code{unsafe} by adding the appropriate annotation. -Of course, this does not make sense for the system's idle task, but it is suitable for an example. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={5}]{rust} -#[deny(unsafe_code)] -fn task0() { - unsafe { CLOCK.start() }; - ... -} -\end{minted} -\end{listing} - -This causes the compiler to abort compilation with the following error: - -\begin{minted}{md} -error: usage of an `unsafe` block - --> src/main.rs:499:5 - | -499 | unsafe { CLOCK.start() }; - | ^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: lint level defined here - --> src/main.rs:497:8 - | -497 | #[deny(unsafe_code)] - | -\end{minted} - -\subsection{Vulnerable To In-Kernel Stack Overflow} -As investigated in \cpnameref{rnd::weakness-mitig-prev::stack-protection::stack-clash::user-space::compile-time}, \gls{Rust} does not detect stack overflows at compile-time. -Without a paging implementation that sets up a guard area for each task, there is no guarantees on memory-safety within this \gls{os}. - -The trivial runtime mitigation is to employ a boundary check in the task management function, as shown in \cref{code::imezzos:stack-of-detect}, and in addition place the task's stacks far apart. -If the stack pointer of the preempted task is not within it's known stack boundaries, the task is blocked from further scheduling. -This solution leaves each task enough time to overflow it's stack by enough space to reach a memory area of another task or the \gls{os}. -This may happen within one scheduling period before it can be detected. - -\begin{listing}[ht!] -\begin{minted}[breaklines,highlightlines={5}]{rust} -fn manage_tasks(esf: &mut ExceptionStackFrame, registers: &mut tasks::TaskRegisters) { -... - if let Some(mut tsi) = TSI.try_lock() { - ... - if !tsi.get_current_task().stack.contains(esf.stack_pointer) { - kprintln_try!(CONTEXT, - "Stack overflow in task {}!\nStack: {:x}\nESF: {:x}\nREGS: {:x}", - tsi.current_task, - tsi.get_current_task().stack, - esf, - registers); - tsi.get_current_task_mut().blocked = true; - } - } -... -} -\end{minted} -\caption{intermezzOS: Runtime Stack Overflow Detection} -\label{code::imezzos:stack-of-detect} -\end{listing} diff --git a/src/docs/thesis.bib b/src/docs/thesis.bib index 7301b8b..a14b856 100644 --- a/src/docs/thesis.bib +++ b/src/docs/thesis.bib @@ -1,361 +1,94 @@ -Automatically generated by Mendeley Desktop 1.17.8 +Automatically generated by Mendeley Desktop 1.16.3 Any changes to this file will be lost if it is regenerated by Mendeley. BibTeX export options can be customized via Options -> BibTeX in Mendeley Desktop -@article{Beingessner2015, -author = {Beingessner, Alexis}, -file = {:home/steveej/src/steveej/msc-thesis/docs/You can't spell trust without Rust.pdf:pdf}, -title = {{YOU CAN'T SPELL TRUST WITHOUT RUST}}, -year = {2015} +@inproceedings{Hallyn2008, +author = {Hallyn, S.E. and Morgan, A.G.}, +booktitle = {Linux Symposium}, +file = {:home/steveej/src/github/steveej/msc-thesis/papers/Linux Capabilities$\backslash$: making them work.pdf:pdf}, +issn = {1440-1746}, +keywords = {Animals,Gastroenterology,Gastrointestinal Diseases,Humans}, +pmid = {21751466}, +title = {{Linux Capabilities: making them work}}, +url = {http://kernel.org/doc/mirror/ols2008v1.pdf{\#}page=163}, +volume = {1}, +year = {2008} } -@misc{OsPhilOpp, -author = {Oppermann, Philipp}, -title = {{Writing an OS in Rust}}, -url = {https://os.phil-opp.com/} +@book{Utrecht2006, +abstract = {Software deployment is the set of activities related to getting$\backslash$r$\backslash$nsoftware components to work on the machines of end users. It includes$\backslash$r$\backslash$nactivities such as installation, upgrading, uninstallation, and so on.$\backslash$r$\backslash$nMany tools have been developed to support deployment, but they all$\backslash$r$\backslash$nhave serious limitations with respect to correctness. For instance,$\backslash$r$\backslash$nthe installation of a component can lead to the failure of previously$\backslash$r$\backslash$ninstalled components; a component might require other components that$\backslash$r$\backslash$nare not present; and it is generally difficult to undo deployment$\backslash$r$\backslash$nactions. The fundamental causes of these problems are a lack of$\backslash$r$\backslash$nisolation between components, the difficulty in identifying the$\backslash$r$\backslash$ndependencies between components, and incompatibilities between$\backslash$r$\backslash$nversions and variants of components.$\backslash$r$\backslash$n $\backslash$r$\backslash$nThis thesis describes a better approach based on a purely functional$\backslash$r$\backslash$ndeployment model, implemented in a deployment system called Nix.$\backslash$r$\backslash$nComponents are stored in isolation from each other in a Nix store.$\backslash$r$\backslash$nEach component has a name that contains a cryptographic hash of all$\backslash$r$\backslash$ninputs that contributed to its build process, and the content of a$\backslash$r$\backslash$ncomponent never changes after it has been built. Hence the model is$\backslash$r$\backslash$npurely functional.$\backslash$r$\backslash$n $\backslash$r$\backslash$nThis storage scheme provides several important advantages. First, it$\backslash$r$\backslash$nensures isolation between components: if two components differ in any$\backslash$r$\backslash$nway, they will be stored in different locations and will not overwrite$\backslash$r$\backslash$neach other. Second, it allows us to identify component dependencies.$\backslash$r$\backslash$nUndeclared build time dependencies are prevented due to the absence of$\backslash$r$\backslash$n"global" component directories used in other deployment systems.$\backslash$r$\backslash$nRuntime dependencies can be found by scanning for cryptographic hashes$\backslash$r$\backslash$nin the binary contents of components, a technique analogous to$\backslash$r$\backslash$nconservative garbage collection in programming language$\backslash$r$\backslash$nimplementation. Since dependency information is complete, complete$\backslash$r$\backslash$ndeployment can be performed by copying closures of components under$\backslash$r$\backslash$nthe dependency relation.$\backslash$r$\backslash$n $\backslash$r$\backslash$nDevelopers and users are not confronted with components' cryptographic$\backslash$r$\backslash$nhashes directly. Components are built automatically from Nix$\backslash$r$\backslash$nexpressions, which describe how to build and compose arbitrary$\backslash$r$\backslash$nsoftware components; hashes are computed as part of this process.$\backslash$r$\backslash$nComponents are automatically made available to users through "user$\backslash$r$\backslash$nenvironments", which are synthesised sets of activated components.$\backslash$r$\backslash$nUser environments enable atomic upgrades and rollbacks, as well as$\backslash$r$\backslash$ndifferent sets of activated components for different users.$\backslash$r$\backslash$n $\backslash$r$\backslash$nNix expressions provide a source-based deployment model. However,$\backslash$r$\backslash$nsource-based deployment can be transparently optimised into binary$\backslash$r$\backslash$ndeployment by making pre-built binaries (keyed on their cryptographic$\backslash$r$\backslash$nhashes) available in a shared location such as a network server. This$\backslash$r$\backslash$nis referred to as transparent source/binary deployment.$\backslash$r$\backslash$n $\backslash$r$\backslash$nThe purely functional deployment model has been validated by applying$\backslash$r$\backslash$nit to the deployment of more than 278 existing Unix packages. In$\backslash$r$\backslash$naddition, this thesis shows that the model can be applied naturally to$\backslash$r$\backslash$nthe related activities of continuous integration using build farms,$\backslash$r$\backslash$nservice deployment and build management.}, +author = {Utrecht, Universiteit and Magnificus, Rector}, +booktitle = {Utrecht University}, +doi = {10.1007/s12630-009-9179-6}, +file = {:home/steveej/src/github/steveej/msc-thesis/papers/nix-phd-thesis.pdf:pdf}, +isbn = {9039341303}, +issn = {14968975}, +number = {12}, +pages = {0--281}, +pmid = {19728000}, +title = {{The Purely Functional Software Deployment Model}}, +url = {http://www.st.ewi.tudelft.nl/{~}dolstra/pubs/phd-thesis.pdf}, +volume = {56}, +year = {2006} } -@article{Balasubramanian2017, -abstract = {Rust is a new system programming language that offers a practical and safe alternative to C. Rust is unique in that it enforces safety without runtime overhead, most importantly, without the overhead of garbage collection. While zero-cost safety is remarkable on its own, we argue that the super-powers of Rust go beyond safety. In particular, Rust's linear type system enables capabilities that cannot be implemented efficiently in traditional languages, both safe and unsafe, and that dramatically improve security and reliability of system software. We show three examples of such capabilities: zero-copy software fault isolation, efficient static information flow analysis, and automatic checkpointing. While these capabilities have been in the spotlight of systems research for a long time, their practical use is hindered by high cost and complexity. We argue that with the adoption of Rust these mechanisms will become commoditized.}, -author = {Balasubramanian, Abhiram and Baranowski, Marek S and Burtsev, Anton and Irvine, Uc and Rakamari, Zvonimir and Ryzhyk, Leonid and Research, Vmware}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/DRAFT$\backslash$: System Programming in Rust$\backslash$: Beyond Safety.pdf:pdf}, -title = {{DRAFT: System Programming in Rust: Beyond Safety}}, -year = {2017} -} -@inproceedings{Ma2013, -abstract = {—Aiming at the problem of higher memory consumption and lower execution efficiency during the dynamic detecting to C/C++ programs memory vulnerabilities, this paper presents a dynamic detection method called ISC. The ISC improves the Safe-C using pointer analysis technology. Firstly, the ISC defines a simple and efficient fat pointer representation instead of the safe pointer in the Safe-C. Furthermore, the ISC uses the unification-based analysis algorithm with one level flow static pointer. This identification reduces the number of pointers that need to be converted to fat pointers. Then in the process of program running, the ISC detects memory vulnerabilities through constantly inspecting the attributes of fat pointers. Experimental results indicate that the ISC could detect memory vulnerabilities such as buffer overflows and dangling pointers. Comparing with the Safe-C, the ISC dramatically reduces the memory consumption and lightly improves the execution efficiency.}, -author = {Ma, Rui and Chen, Lingkui and Hu, Changzhen and Xue, Jingfeng and Zhao, Xiaolin}, -booktitle = {Proceedings - 2013 IEEE 11th International Conference on Dependable, Autonomic and Secure Computing, DASC 2013}, -doi = {10.1109/DASC.2013.37}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/A Dynamic Detection Method to C-C++ Programs Memory Vulnerabilities Based on Pointer Analysis.pdf:pdf}, -isbn = {9781479933815}, -keywords = {dynamic detecting,fat pointer,improved Safe-C,memory vulnerability,pointer analysis}, -pages = {52--57}, -title = {{A dynamic detection method to C/C++ programs memory vulnerabilities based on pointer analysis}}, -year = {2013} -} -@article{Dhurjati2003, -abstract = {Traditional approaches to enforcing memory safety of programs rely heavily on runtime checks of memory accesses and on garbage collection, both of which are unattractive for embedded applications. The long-term goal of our work is to enable 100{\%} static enforcement of memory safety for embedded programs through advanced compiler techniques and minimal semantic restrictions on programs. The key result of this paper is a compiler technique that ensures memory safety of dynamically allocated memory without programmer annotations, runtime checks, or garbage collection, and works for a large subclass of type-safe C programs. The technique is based on a fully automatic pool allocation (i.e., region-inference) algorithm for C programs we developed previously, and it ensures safety of dynamically allocated memory while retaining explicit deallocation of individual objects within regions (to avoid garbage collection). For a diverse set of embedded C programs (and using a previous technique to avoid null pointer checks), we show that we are able to statically ensure the safety of pointer and dynamic memory usage in all these programs. We also describe some improvements over our previous work in static checking of array accesses. Overall, we achieve 100{\%} static enforcement of memory safety without new language syntax for a significant subclass of embedded C programs, and the subclass is much broader if array bounds checks are ignored.}, -author = {Dhurjati, D and Kowshik, S and Adve, V and Lattner, C}, -doi = {10.1145/780742.780743}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/Memory Safety Without Runtime Checks or Garbage.pdf:pdf}, -isbn = {0362-1340}, -issn = {03621340}, -journal = {Acm Sigplan Notices}, -keywords = {automatic pool allocation,compilers,embedded systems,languages,programming languages,region management,security,static analysis}, -number = {7}, -pages = {69--80}, -title = {{Memory safety without runtime checks or garbage collection}}, -volume = {38}, -year = {2003} -} -@article{Junker, -author = {Junker, Stefan}, -file = {:home/steveej/src/steveej/msc-thesis/src/docs/thesis.pdf:pdf}, -title = {{Guarantees On In-Kernel Memory-Safety Using Rust's Static Code Analysis}} -} -@misc{Endler, -author = {Endler, Matthias}, -title = {{A curated list of static analysis tools, linters and code quality checkers for various programming languages}}, -url = {https://github.com/mre/awesome-static-analysis} -} -@article{Corporation2011, -abstract = {The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volume 1, describes the basic architecture and programming environment of Intel 64 and IA-32 processors. The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volumes 2A {\&} 2B, describe the instruction set of the processor and the opcode struc- ture. These volumes apply to application programmers and to programmers who write operating systems or executives. The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volumes 3A {\&} 3B, describe the operating-system support environment of Intel 64 and IA-32 processors. These volumes target operating- system and BIOS designers. In addition, the Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volume 3B, addresses the programming environment for classes of software that host operating systems.}, -author = {Corporation, Intel}, -doi = {10.1109/MAHC.2010.22}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/64-ia-32-architectures-software-developer-system-programming-manual-325384.pdf:pdf}, -isbn = {253665-057US}, -issn = {15222594}, -journal = {System}, -keywords = {253665,IA-32 architecture,Intel 64}, -number = {253665}, -title = {{Intel {\textregistered} 64 and IA-32 Architectures Software Developer ' s Manual Volume 3}}, -volume = {3}, -year = {2011} -} -@article{Matz2009, -author = {Matz, M and Hubicka, J and Jaeger, a and Mitchell, M}, -file = {:home/steveej/src/steveej/msc-thesis/docs/System V Application Binary Interface AMD64 Architecture Processor Supplement Draft Version 0.99.7.pdf:pdf}, -isbn = {013877630X}, -pages = {1--128}, -pmid = {2477614}, -title = {{System V Application Binary Interface AMD64 Architecture Processor Supplement}}, -url = {papers2://publication/uuid/CD8D5668-B1F5-4FE3-BAD8-25F1E589A9E5}, -year = {2009} -} -@misc{MITRE-CWE-134, -author = {MITRE}, -title = {{CWE-134: Use of Externally-Controlled Format String}}, -url = {http://cwe.mitre.org/data/definitions/134.html}, -urldate = {2017-09-20} -} -@article{Seri2017, -abstract = {The dangers of Bluetooth implementations: Unveiling zero day vulnerabilities and security flaws in modern Bluetooth stacks.}, -author = {Seri, Ben and Vishnepolsky, Gregory}, -file = {:home/steveej/src/steveej/msc-thesis/docs/BlueBorne Technical White Paper.pdf:pdf}, -title = {{BlueBorne}}, -url = {http://go.armis.com/blueborne-technical-paper}, -year = {2017} -} -@article{Reed2015, -abstract = {Rust is a new systems language that uses some advanced type system features, specifically affine types and regions, to statically guarantee memory safety and eliminate the need for a garbage collector. While each individual addition to the type system is well understood in isolation and are known to be sound, the combined system is not known to be sound. Furthermore, Rust uses a novel checking scheme for its regions, known as the Borrow Checker, that is not known to be correct. Since Rust's goal is to be a safer alternative to C/C++, we should ensure that this safety scheme actually works. We present a formal semantics that captures the key features relevant to memory safety, unique pointers and borrowed references, specifies how they guarantee memory safety, and describes the operation of the Borrow Checker. We use this model to prove the soudness of some core operations and justify the conjecture that the model, as a whole, is sound. Additionally, our model provides a syntactic version of the Borrow Checker, which may be more understandable than the non-syntactic version in Rust.}, -author = {Reed, Eric}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/Patina$\backslash$: A Formalization of the Rust Programming Language.pdf:pdf}, -number = {February}, -pages = {1--37}, -title = {{Patina: A Formalization of the Rust Programming Language}}, -year = {2015} -} -@misc{IEEEspectrum-proglangs, -author = {IEEE}, -title = {{Interactive: The Top Programming Languages 2017}}, -url = {https://spectrum.ieee.org/static/interactive-the-top-programming-languages-2017}, -urldate = {2017-09-08}, -year = {2017} -} -@article{Mailloux1969, -author = {Mailloux, B. J. and Peck, J. E L and Koster, C. H A}, -doi = {10.1007/BF02163002}, -file = {:home/steveej/src/steveej/msc-thesis/docs/Algol68-RevisedReport.pdf:pdf}, -isbn = {978-3-662-38646-0}, -issn = {0029599X}, -journal = {Numerische Mathematik}, -number = {2}, -pages = {79--218}, -title = {{Report on the Algorithmic Language ALGOL 68}}, -volume = {14}, -year = {1969} -} -@misc{TockOS, -title = {{Tock OS}}, -url = {https://www.tockos.org/}, -urldate = {2017-09-22} -} -@article{Xu2015, -abstract = {Since vulnerabilities in Linux kernel are on the increase, attackers have turned their interests into related exploitation techniques. However, compared with numerous researches on exploiting use-after-free vulnerabilities in the user applications, few efforts studied how to exploit use-after-free vulnerabilities in Linux kernel due to the difficulties that mainly come from the uncertainty of the kernel memory layout. Without specific information leakage, attackers could only conduct a blind memory overwriting strategy trying to corrupt the critical part of the kernel, for which the success rate is negligible. In this work, we present a novel memory collision strategy to exploit the use-after-free vulnerabilities in Linux kernel reliably. The insight of our exploit strategy is that a probabilistic memory collision can be constructed according to the widely deployed kernel memory reuse mechanisms, which significantly increases the success rate of the attack. Based on this insight, we present two practical memory collision attacks: An object-based attack that leverages the memory recycling mechanism of the kernel allocator to achieve freed vulnerable object covering, and a physmap-based attack that takes advantage of the overlap between the physmap and the SLAB caches to achieve a more flexible memory manipulation. Our proposed attacks are universal for various Linux kernels of different architectures and could successfully exploit systems with use-after-free vulnerabilities in kernel. Particularly, we achieve privilege escalation on various popular Android devices (kernel version{\textgreater}=4.3) including those with 64-bit processors by exploiting the CVE-2015-3636 use-after-free vulnerability in Linux kernel. To our knowledge, this is the first generic kernel exploit for the latest version of Android. Finally, to defend this kind of memory collision, we propose two corresponding mitigation schemes.}, -author = {Xu, Wen and Li, Juanru and Shu, Junliang and Yang, Wenbo and Xie, Tianyi and Zhang, Yuanyuan and Gu, Dawu}, -doi = {10.1145/2810103.2813637}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/From Collision To Exploitation$\backslash$: Unleashing Use-After-Free Vulnerabilities in Linux Kernel.pdf:pdf}, -isbn = {978-1-4503-3832-5}, -issn = {15437221}, -journal = {Ccs}, -keywords = {linux kernel exploit,memory collision,user-after-free vulnerability}, -pages = {414--425}, -title = {{From Collision To Exploitation: Unleashing Use-After-Free Vulnerabilities in Linux Kernel}}, -url = {http://dl.acm.org/citation.cfm?doid=2810103.2813637}, -year = {2015} -} -@misc{MITRE-CWE-635, -author = {MITRE}, -title = {{CWE-635: Weaknesses Used by NVD}}, -url = {http://cwe.mitre.org/data/definitions/635.html}, -urldate = {2017-08-05} -} -@misc{NVD, -title = {{National Vulnerability Database}}, -url = {https://nvd.nist.gov/}, -urldate = {2017-08-05} -} -@article{Jim2002, -abstract = {Cyclone is a safe dialect of C. It has been designed from the ground up to prevent the buer overflows, format string attacks, and memory management errors that are common in C programs, while retaining C's syntax and semantics. This paper examines safety violations enabled by C's design, and shows how Cyclone avoids them, without giving up C's hallmark control over low-level details such as data representation and memory management.}, -author = {Jim, Trevor and Morrisett, Greg and Grossman, Dan and Hicks, Michael and Cheney, James and Wang, Yanling}, -isbn = {1-880446-00-6}, -journal = {USENIX Annual Technical Conference}, -pages = {275--288}, -title = {{Cyclone: A safe dialect of C}}, -url = {http://www.usenix.org/events/usenix02/full{\_}papers/jim/jim{\_}html/}, -year = {2002} -} -@misc{MITRE-CWE-633, -author = {MITRE}, -title = {{CWE-633: Weaknesses that Affect Memory}}, -url = {http://cwe.mitre.org/data/definitions/633.html}, -urldate = {2017-08-31}, -year = {2017} -} -@misc{MITRE-CWE-119, -author = {MITRE}, -booktitle = {2.11}, -title = {{CWE-119: Improper Restriction of Operations within the Bounds of a Memory Buffer}}, -url = {http://cwe.mitre.org/data/definitions/119.html}, -urldate = {2017-08-31}, -year = {2017} -} -@misc{MITRE-CWE-122, -author = {MITRE}, -title = {{CWE-122: Heap-based Buffer Overflow}}, -url = {http://cwe.mitre.org/data/definitions/122.html}, -urldate = {2017-09-26} -} -@inproceedings{Kuznetsov2014, -abstract = {Systems code is often written in low-level languages like C/C++, which offer many benefits but also dele- gate memory management to programmers. This invites memory safety bugs that attackers can exploit to divert control flow and compromise the system. Deployed de- fense mechanisms (e.g., ASLR, DEP) are incomplete, and stronger defense mechanisms (e.g., CFI) often have high overhead and limited guarantees [19, 15, 9]. We introduce code-pointer integrity (CPI), a new de- sign point that guarantees the integrity of all code point- ers in a program (e.g., function pointers, saved return ad- dresses) and thereby prevents all control-flow hijack at- tacks, including return-oriented programming. We also introduce code-pointer separation (CPS), a relaxation of CPI with better performance properties. CPI and CPS offer substantially better security-to-overhead ratios than the state of the art, they are practical (we protect a complete FreeBSD system and over 100 packages like apache and postgresql), effective (prevent all attacks in the RIPE benchmark), and efficient: on SPEC CPU2006, CPS averages 1.2{\%} overhead for C and 1.9{\%} for C/C++, while CPI's overhead is 2.9{\%} for C and 8.4{\%} for C/C++. A prototype implementation of CPI and CPS can be obtained from http://levee.epfl.ch. 1}, -author = {Kuznetsov, Volodymyr and Szekeres, L{\'{a}}szl{\'{o}} and Payer, Mathias}, -booktitle = {Proceedings of the 11th USENIX Symposium on Operating Systems Design and Implementation}, -isbn = {9781931971164}, -pages = {147--163}, -title = {{Code-pointer integrity}}, -url = {https://www.usenix.org/conference/osdi14/technical-sessions/presentation/kuznetsov{\%}5Cnhttps://www.usenix.org/system/files/conference/osdi14/osdi14-paper-kuznetsov.pdf?utm{\_}source=dlvr.it{\&}utm{\_}medium=tumblr}, +@article{Fink2014, +abstract = {Docker is a relatively new method of virtualization available natively for 64-bit Linux. Compared to more traditional virtualization techniques, Docker is lighter on system resources, offers a git-like system of commits and tags, and can be scaled from your laptop to the cloud.}, +author = {Fink, John}, +file = {:home/steveej/src/github/steveej/msc-thesis/papers/Docker - a Software as a Service, Operating System-Level Virtualization Framework.pdf:pdf}, +journal = {Code4Lib}, +number = {25}, +pages = {3--5}, +title = {{Docker: a Software as a Service, Operating System-Level Virtualization Framework}}, +url = {http://journal.code4lib.org/articles/9669}, +volume = {1}, year = {2014} } -@article{Lattner2005, -abstract = {The LLVM Compiler Infrastructure (http://llvm.cs. uiuc.edu) is a$\backslash$nrobust system that is well suited for a wide variety of research$\backslash$nand development work. This brief paper introduces the LLVM system$\backslash$nand provides pointers to more extensive documentation, complementing$\backslash$nthe tutorial presented at LCPC.}, +@book{Sarton1975, +author = {Sarton, George}, +doi = {10.1007/978-3-319-33138-6}, +file = {:home/steveej/src/github/steveej/msc-thesis/papers/A Computing History Primer.pdf:pdf}, +isbn = {0882751727 (o.c.)}, +pages = {145}, +title = {{Introduction to the history of science.}}, +year = {1975} +} +@article{Menage2007, +abstract = {While Linux provides copious monitoring and control options for individual processes, it has less support for applying the same operations efficiently to related groups of processes. This has led to multiple proposals for subtly different mechanisms for process aggregation for resource control and isolation. Even though some of these efforts could conceptually operate well together, merging each of them in their current states would lead to duplication in core kernel data structures/routines. The Containers framework, based on the existing cpusets mechanism, provides the generic process group- ing features required by the various different resource controllers and other process-affecting subsystems. The result is to reduce the code (and kernel impact) required for such subsystems, and provide a common interface with greater scope for co-operation. This paper looks at the challenges in meeting the needs of all the stakeholders, which include low overhead, feature richness, completeness and flexible groupings. We demonstrate how to extend containers by writing resource control and monitoring components, we also look at how to implement namespaces and cpusets on top of the framework.}, +author = {Menage, Paul B}, +file = {:home/steveej/src/github/steveej/msc-thesis/papers/Adding Generic Process Containers to the Linux Kernel.pdf:pdf}, +journal = {Proceedings of the Ottawa Linux Symposium}, +pages = {45--58}, +title = {{Adding Generic Process Containers to the Linux Kernel}}, +url = {http://www.kernel.org/doc/ols/2007/ols2007v2-pages-45-58.pdf}, +year = {2007} +} +@inproceedings{Reshetova2014, +abstract = {The need for flexible, low-overhead virtualization is evident on many fronts ranging from high-density cloud servers to mobile devices. During the past decade OS-level virtualization has emerged as a new, efficient approach for virtualization, with implementations in multiple different Unix-based systems. Despite its popularity, there has been no systematic study of OS-level virtualization from the point of view of security. In this report, we conduct a comparative study of several OS-level virtualization systems, discuss their security and identify some gaps in current solutions.}, archivePrefix = {arXiv}, -arxivId = {9780201398298}, -author = {Lattner, Chris and Adve, Vikram}, -doi = {10.1007/11532378_2}, -eprint = {9780201398298}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/The LLVM Compiler Framework and Infrastructure Tutorial.pdf:pdf}, -isbn = {978-3-540-28009-5}, -issn = {03029743}, -journal = {Languages and Compilers for High Performance Computing}, -number = {Part 1}, -pages = {15--16}, -pmid = {4520227}, -title = {{The LLVM Compiler Framework and Infrastructure Tutorial}}, -url = {http://dx.doi.org/10.1007/11532378{\_}2}, -year = {2005} +arxivId = {1407.4245}, +author = {Reshetova, Elena and Karhunen, Janne and Nyman, Thomas and Asokan, N}, +booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, +doi = {10.1007/978-3-319-11599-3_5}, +eprint = {1407.4245}, +file = {:home/steveej/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Reshetova et al. - 2014 - Security of OS-level virtualization technologies(5).pdf:pdf}, +isbn = {9783319115986}, +issn = {16113349}, +pages = {77--93}, +title = {{Security of OS-level virtualization technologies}}, +volume = {8788}, +year = {2014} } -@article{Nilsson2017, -author = {Nilsson, Fredrik}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/A Rust-based Runtime for the Internet of Things.pdf:pdf}, -title = {{A Rust-based Runtime for the Internet of Things}}, -year = {2017} -} -@misc{MITRE-CWE, -author = {MITRE}, -title = {{CWE - Common Weakness Enumeration}}, -url = {http://cwe.mitre.org}, -urldate = {2017-08-31}, -year = {2017} -} -@article{Szekeres2013, -abstract = {Memory corruption bugs in software written in low-level languages like C or C++ are one of the oldest problems in computer security. The lack of safety in these languages allows attackers to alter the program's behavior or take full control over it by hijacking its control flow. This problem has existed for more than 30 years and a vast number of potential solutions have been proposed, yet memory corruption attacks continue to pose a serious threat. Real world exploits show that all currently deployed protections can be defeated. This paper sheds light on the primary reasons for this by describing attacks that succeed on today's systems. We systematize the current knowledge about various protection techniques by setting up a general model for memory corrup- tion attacks. Using this model we show what policies can stop which attacks. The model identifies weaknesses of currently deployed techniques, as well as other proposed protections enforcing stricter policies. We analyze the reasons why protection mechanisms imple- menting stricter polices are not deployed. To achieve wide adoption, protection mechanisms must support a multitude of features and must satisfy a host of requirements. Especially important is performance, as experience shows that only solutions whose overhead is in reasonable bounds get deployed. A comparison of different enforceable policies helps de- signers of new protection mechanisms in finding the balance between effectiveness (security) and efficiency.We identify some open research problems, and provide suggestions on improving the adoption of newer techniques.}, -author = {Szekeres, L??szl?? and Payer, Mathias and Wei, Tao and Song, Dawn}, -doi = {10.1109/SP.2013.13}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/SoK$\backslash$: Eternal War in Memory.pdf:pdf}, -isbn = {9780769549774}, -issn = {10816011}, -journal = {Proceedings - IEEE Symposium on Security and Privacy}, -pages = {48--62}, -title = {{SoK: Eternal war in memory}}, -year = {2013} -} -@article{Arpaci-Dusseau2015, -abstract = {A book covering the fundamentals of operating systems, including virtualization of the CPU and memory, threads and concurrency, and file and storage systems. Written by professors active in the field for 20 years, this text has been developed in the classrooms of the University of Wisconsin-Madison, and has been used in the instruction of thousands of students.}, -author = {{Arpaci-Dusseau Remzi}, Arpaci-Dusseau Andrea}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/operating{\_}systems{\_}{\_}three{\_}easy{\_}pieces{\_}{\_}electronic{\_}version{\_}0{\_}91{\_}.pdf:pdf}, -journal = {Arpaci-Dusseau}, -number = {0.91}, -pages = {665}, -title = {{Operating Systems: Three Easy Pieces}}, -volume = {Electronic}, -year = {2015} -} -@article{Kowshik2002, -abstract = {This paper considers the problem of providing safe programming support and enabling secure online software upgrades for control software in real-time control systems. In such systems, offline techniques for ensuring code safety are greatly preferable to online techniques. We propose a language called Control-C that is essentially a subset of C, but with key restrictions designed to ensure that memory safety of code can be verified entirely by static checking, under certain system assumptions. The language permits pointer-based data structures, restricted dynamic memory allocation, and restricted array operations, without requiring any runtime checks on memory operations and without garbage collection. The language restrictions have been chosen based on an understanding of both compiler technology and the needs of real-time control systems. The paper describes the language design and a compiler implementation for Control-C. We use control codes from three different experimental control systems to evaluate the suitability of the language for these codes, the effort required to port them to Control-C, and the effectiveness of the compiler in detecting a wide range of potential security violations for one of the systems.}, -author = {Kowshik, Sumant and Dhurjati, Dinakar and Adve, Vikram}, -doi = {10.1145/581677.581678}, -file = {:home/steveej/src/steveej/msc-thesis/docs/Ensuring Code Safety Without Runtime Checks for Real-Time Control Systems.pdf:pdf}, -isbn = {1581135750}, -journal = {Proceedings of the international conference on Compilers, architecture, and synthesis for embedded systems - CASES '02}, -keywords = {compiler,control,programming language,real-time,static analy-}, -pages = {288}, -title = {{Ensuring code safety without runtime checks for real-time control systems}}, -url = {http://portal.acm.org/citation.cfm?doid=581630.581678}, -year = {2002} -} -@misc{TheStackClash, -author = {Advisory, Qualys Security}, -file = {:home/steveej/src/steveej/msc-thesis/docs/stack-clash.txt:txt}, -title = {{The Stack Clash}}, -url = {https://www.qualys.com/2017/06/19/stack-clash/stack-clash.txt} -} -@book{AMD64Vol1, -author = {AMD}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/AMD64 Architecture Programmer's Manual Volume 1$\backslash$: Application Programming.pdf:pdf}, -keywords = {AMD64,SIMD,extended media instructions,legacy m}, -number = {26568}, -title = {{AMD64 Architecture Programmer's Manual Volume 1: Application Programming}}, -volume = {4}, -year = {2012} -} -@article{Chisnall2015, -abstract = {We propose a new memory-safe interpretation of the C ab-stract machine that provides stronger protection to benefit security and debugging. Despite ambiguities in the specifi-cation intended to provide implementation flexibility, con-temporary implementations of C have converged on a mem-ory model similar to the PDP-11, the original target for C. This model lacks support for memory safety despite well-documented impacts on security and reliability. Attempts to change this model are often hampered by as-sumptions embedded in a large body of existing C code, dat-ing back to the memory model exposed by the original C compiler for the PDP-11. Our experience with attempting to implement a memory-safe variant of C on the CHERI ex-perimental microprocessor led us to identify a number of problematic idioms. We describe these as well as their in-teraction with existing memory safety schemes and the as-sumptions that they make beyond the requirements of the C specification. Finally, we refine the CHERI ISA and abstract model for C, by combining elements of the CHERI capabil-ity model and fat pointers, and present a softcore CPU that implements a C abstract machine that can run legacy C code with strong memory protection guarantees.}, -author = {Chisnall, David and Rothwell, Colin and Watson, Robert N M and Woodruff, Jonathan and Vadera, Munraj and Moore, Simon W and Roe, Michael and Davis, Brooks and Neumann, Peter G}, -doi = {10.1145/2694344.2694367}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/Beyond the PDP-11$\backslash$: Architectural support for a memory-safe C abstract machine.pdf:pdf}, -isbn = {9781450328357}, -issn = {01635964}, -journal = {Proceedings of the Twentieth International Conference on Architectural Support for Programming Languages and Operating Systems}, -pages = {117--130}, -title = {{Beyond the PDP-11 : Architectural support for a memory-safe C abstract machine}}, -url = {http://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201503-asplos2015-cheri-cmachine.pdf}, -year = {2015} -} -@article{GCC540, -abstract = {This manual documents how to use the GNU compilers, as well as their features and incom- patibilities, and how to report bugs. It corresponds to the compilers (GCC) version 5.4.0. The internals of the GNU compilers, including how to port them to new targets and some information about how to write front ends for new languages, are documented in a separate manual. See Section Introduction in GNU Compiler Collection (GCC) Internals.}, -author = {Stallman, Richard M}, -file = {:home/steveej/src/steveej/msc-thesis/docs/gcc-5.4.0.pdf:pdf}, -isbn = {188211437X}, -journal = {Development}, -title = {{Using the GNU Compiler Collection}}, -url = {https://gcc.gnu.org/onlinedocs/gcc-5.4.0/gcc.pdf} -} -@article{Getreu2016, -annote = {- runtime checkis are expensive - -- critical with energy restriction on the target device}, -author = {Getreu, Jens}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/Embedded System Security with Rust - Case Study of Heartbleed.pdf:pdf}, -pages = {1--24}, -title = {{Embedded System Security with Rust}}, -year = {2016} -} -@book{AMD64Vol2, -author = {AMD}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/AMD64 Architecture Programmer's Manual Volume 2$\backslash$: System Programming.pdf:pdf}, -keywords = {24593,AMD64 Architecture Programmer's Manual Volume 2: S}, -number = {24592}, -title = {{AMD64 Architecture Programmer's Manual Volume 2: System Programming}}, -volume = {1}, -year = {2012} -} -@article{Caballero2012, -abstract = {Use-after-free vulnerabilities are rapidly growing in popularity, especially for exploiting web browsers. Use-after-free (and double-free) vulnerabilities are caused by a program operating on a dangling pointer. In this work we propose early detection, a novel runtime approach for finding and diagnosing use-after-free and double-free vulnerabilities. While previous work focuses on the creation of the vulnerability (i.e., the use of a dangling pointer), early detection shifts the focus to the creation of the dangling pointer(s) at the root of the vulnerability. Early detection increases the effectiveness of testing by identifying unsafe dangling pointers in executions where they are created but not used. It also accelerates vulnerability analysis and minimizes the risk of incomplete fixes, by automatically collecting information about all dangling pointers involved in the vulnerability. We implement our early detection technique in a tool called Undangle. We evaluate Undangle for vulnerability analysis on 8 real-world vulnerabilities. The analysis uncovers that two separate vulnerabilities in Firefox had a common root cause and that their patches did not completely fix the underlying bug. We also evaluate Undangle for testing on the Firefox web browser identifying a potential vulnerability.}, -author = {Caballero, Juan and Grieco, Gustavo and Marron, Mark and Nappa, Antonio}, -doi = {10.1145/2338965.2336769}, -isbn = {9781450314541}, -issn = {1450314546}, -journal = {ISSTA}, -keywords = {automated testing,binary analysis,debugging,dynamic analysis}, -pages = {133}, -title = {{Undangle: early detection of dangling pointers in use-after-free and double-free vulnerabilities}}, -url = {http://dl.acm.org/citation.cfm?doid=2338965.2336769}, -year = {2012} -} -@article{Levy2015a, -abstract = {Rust, a new systems programming language, provides compile-time memory safety checks to help eliminate runtime bugs that manifest from improper memory management. This feature is advantageous for operating system development, and especially for embedded OS development, where recovery and debugging are particularly challenging. However, embedded platforms are highly event-based, and Rust's memory safety mechanisms largely presume threads. In our experience developing an operating system for embedded systems in Rust, we have found that Rust's ownership model prevents otherwise safe resource sharing common in the embedded domain, conflicts with the reality of hardware resources, and hinders using closures for programming asynchronously. We describe these experiences and how they relate to memory safety as well as illustrate our workarounds that preserve the safety guarantees to the largest extent possible. In addition, we draw from our experience to propose a new language extension to Rust that would enable it to provide better memory safety tools for event-driven platforms.}, -author = {Levy, Amit and Andersen, Michael P. and Campbell, Bradford and Culler, David and Dutta, Prabal and Ghena, Branden and Levis, Philip and Pannuto, Pat}, -doi = {10.1145/2818302.2818306}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/tock-plos2015.pdf:pdf}, -isbn = {9781450339421}, -journal = {PLOS: Workshop on Programming Languages and Operating Systems}, -keywords = {embedded operating systems,linear types,ownership,rust}, -pages = {21--26}, -title = {{Ownership is Theft: Experiences Building an Embedded OS in Rust}}, -url = {http://dl.acm.org/citation.cfm?id=2818302.2818306}, -year = {2015} -} -@article{Corporation2011a, -abstract = {The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volume 1, describes the basic architecture and programming environment of Intel 64 and IA-32 processors. The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volumes 2A {\&} 2B, describe the instruction set of the processor and the opcode struc- ture. These volumes apply to application programmers and to programmers who write operating systems or executives. The Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volumes 3A {\&} 3B, describe the operating-system support environment of Intel 64 and IA-32 processors. These volumes target operating- system and BIOS designers. In addition, the Intel{\{}$\backslash$textregistered{\}} 64 and IA-32 Architectures Software Developer's Manual, Volume 3B, addresses the programming environment for classes of software that host operating systems.}, -author = {Corporation, Intel}, -doi = {10.1109/MAHC.2010.22}, -file = {:home/steveej/src/github/steveej/msc-thesis/docs/64-ia-32-architectures-software-developer-vol-1-manual.pdf:pdf}, -isbn = {253665-057US}, -issn = {15222594}, -journal = {System}, -keywords = {253665,64,ia 32 architecture}, -number = {253665}, -title = {{Intel {\textregistered} 64 and IA-32 Architectures Software Developer ' s Manual Volume 1}}, -volume = {1}, -year = {2011} +@article{Felter2014, +abstract = {IBM Research Report Isolation and resource control for cloud applications has traditionally been achieve through the use of virtual machines. Deploying applications in a VM results in reduced performance due to the extra levels of abstraction. In a cloud environment, this results in loss efficiency for the infrastructure. Newer advances in container-based virtualization simplifies the deployment of applications while isolating them from one another. In this paper, we explore the performance of traditional virtual machine deployments, and contrast them with the use of Linux containers. We use a suite of workloads that stress the CPU, memory, storage and networking resources. Our results show that containers result in equal or better performance than VM in almost all cases. Both VMs and containers require tuning to support I/O-intensive applicaions. We also discuss the implications of our performance results for future cloud architecture.}, +author = {Felter, Wes and Ferreira, Alexandre and Rajamony, Ram and Rubio, Juan}, +doi = {10.1109/ISPASS.2015.7095802}, +file = {:home/steveej/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Felter et al. - 2014 - An Updated Performance Comparison of Virtual Machines and Linux Containers(3).pdf:pdf}, +isbn = {9781479919574}, +journal = {Technology}, +keywords = {cloud computing,performance,virtualization}, +pages = {171--172}, +title = {{An Updated Performance Comparison of Virtual Machines and Linux Containers}}, +url = {http://domino.research.ibm.com/library/CyberDig.nsf/papers/0929052195DD819C85257D2300681E7B/{\$}File/rc25482.pdf}, +volume = {25482}, +year = {2014} } diff --git a/src/docs/thesis.tex b/src/docs/thesis.tex index b19e3f5..508c339 100644 --- a/src/docs/thesis.tex +++ b/src/docs/thesis.tex @@ -1,72 +1,28 @@ -% // vim: set ft=tex: \documentclass[12pt,a4paper]{report} -%\overfullrule=1cm - \usepackage[utf8]{inputenc} \usepackage{titlesec} % titleformat \usepackage{blindtext,color,fancyhdr} \usepackage{geometry} -\geometry{a4paper, top=25mm, left=35mm, right=30mm, bottom=35mm, headsep=10mm, footskip=12mm} +\geometry{a4paper, top=25mm, left=30mm, right=35mm, bottom=35mm, headsep=10mm, footskip=12mm} -\usepackage{multirow,tabularx,tabu} -\usepackage{booktabs} -\usepackage{spreadtab} -\usepackage{hhline} -\renewcommand{\arraystretch}{1.2} +%\usepackage{multirow,tabularx,tabu} +\usepackage{ctable,multirow} -\usepackage{colortbl} -\usepackage[dvipsnames]{xcolor} +\usepackage{cite} +\bibliographystyle{plain} -\usepackage[backend=biber,style=numeric,citestyle=numeric,url=true]{biblatex} -\addbibresource{thesis.bib} - -%\usepackage[hyphens]{url} +\usepackage[hyphens]{url} \Urlmuskip = 0mu plus 1mu -%\hyphenpenalty=1 -\pretolerance=5000 -\tolerance=5000 -%\exhyphenpenalty=1 - \usepackage[numberedsection,toc,numberline,nopostdot]{glossaries} \makenoidxglossaries -\usepackage{listings} -\providecommand*{\listingautorefname}{Listing} -\usepackage{minted} -\usepackage{graphicx} -\usepackage{placeins} -\usepackage{tikz} -\usetikzlibrary{tikzmark,mindmap} -\usetikzlibrary{chains,shapes.arrows, arrows, positioning,decorations.pathreplacing,bending} -\usetikzlibrary{calc} -\usetikzlibrary{matrix,shapes,arrows,positioning} -\usetikzlibrary{shapes.geometric, arrows} -\usetikzlibrary{chains,arrows.meta,decorations.pathmorphing,quotes} -\usepackage{smartdiagram} -\usesmartdiagramlibrary{additions} -\usepackage{color} -\usepackage[definitionLists,hashEnumerators,smartEllipses,hybrid]{markdown} -%\usepackage[fencedCode,inlineFootnotes,citations,definitionLists,hashEnumerators,smartEllipses,hybrid]{markdown} +\newcommand{\topic}{A Declarative And Reproducible Approach To The Creation Of Software-Application Container Images} - -\tikzset{/minted/basename/.initial=minted} -\appto\theFancyVerbLine{\tikzmark{\pgfkeysvalueof{/minted/basename}\arabic{FancyVerbLine}}} - -\usepackage{amsmath} -\usepackage{amssymb} -\usepackage{caption} -\usepackage{subcaption} -\usepackage{wrapfig} -\usepackage[parfill]{parskip} - -\newcommand{\topic}{Guarantees On In-Kernel Memory-Safety Using Rust's Static Code Analysis} \newcommand{\authorOne}{Stefan Junker} -\newcommand{\authorOneBirthDate}{23.12.1986} -\newcommand{\authorOneBirthCity}{Rottweil} \newcommand{\authorOneInit}{SJ} \newcommand{\authorOnestreet}{Alemannenstr. 7} \newcommand{\authorOnezip}{78467} @@ -74,28 +30,16 @@ \newcommand{\authorOneCountry}{Germany} \newcommand{\authorOneId}{283751} \newcommand{\supervisorOne}{Prof. Dr. Michael Mächtel} -\newcommand{\supervisorTwo}{Felix Schuckert} -\newcommand{\studies}{Information Technology - Embedded And Mobile Systems} -\newcommand{\startdate}{2017/4/1} -\newcommand{\submitdate}{2017/9/29} -\newcommand{\buzzwords}{memory-safety, operating system development, rust, static software analysis, software vulnerability} -\renewcommand{\abstract}{% -This study evaluated Rust's guarantees on memory safety in the OS through static analysis. -Static analysis was identified as a requirement due to the assumption that humans tend to make mistakes and C was found to be an error prone language. -Rust, as an affine-typed borrow- and lifetime-checked systems language that is equipped with an ownership model, is considered a viable candidate for replacing C for today's OS development. -After identifying common weaknesses of memory vulnerabilities and their manifestations, the choice of programming language was verified as the most effective mitigation attempt. -Rust was chosen to act as a new candidate, and was found to be effective against common errors in buffer handling due to its ownership model and strong type system. -The language shown to be less error prone in memory intensive tasks like buffer handling, which has been identified as a common cause in software vulnerabilities. -After experimentation with various stack protection scenarios and implementing preemptive multitasking on top of interemezzOS, the downside was discovered that Rust cannot statically detect stack overflows of any type. -Implementing this detection was considered beyond scope, although requirements could be identified for future reference. -It was concluded that Rust's static checks of all kinds are a big improvement in OS development, where object orientation and other paradigms are not simple to manage. -Despite the failed hypotheses of guaranteeing full memory-safety in OS development, Rust is suggested as the language for today's and future OS development. -} +\newcommand{\supervisorTwo}{Jürgen Keppler} +\newcommand{\studies}{TODO studies} +\newcommand{\startdate}{2016/9/1} +\newcommand{\submitdate}{2017/2/28} +\newcommand{\buzzwords}{TODO buzzwords} % Numbered Subsubsections \setcounter{secnumdepth}{3} -\date{} +\date{Wintersemester 2016/2017} \title{\topic} \author{authorOne} @@ -141,120 +85,30 @@ Despite the failed hypotheses of guaranteeing full memory-safety in OS developme \titlespacing*{\chapter}{0cm}{-1cm}{0.75cm} \titleformat{\chapter}[hang]{\normalfont\Large\bfseries}{\thechapter}{0.5cm}{} -\usepackage{hyperref} -\usepackage{cleveref} - \makeatletter -\newcommand{\cnameref}[1]{\cref{#1} \textit{(\nameref{#1})}} -\newcommand{\Cnameref}[1]{\Cref{#1} \textit{(\nameref{#1})}} - -\newcommand{\cpnameref}[1]{\cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}} -\newcommand{\Cpnameref}[1]{\Cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}} - -\newcommand{\cnamepref}[1]{\cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}} -\newcommand{\Cnamepref}[1]{\Cref{#1} \textit{(\nameref{#1}, \cpageref{#1})}} - %\renewcommand\paragraph{\startsection{paragraph}{4}{\z}% % {-3.25ex\plus -1ex \minus -.2ex}% % {0.0001pt \plus 0.2ex}% % {\normalfont\normalsize\bfseries}} -%\renewcommand\subparagraph{\startsection{subparagraph}{5}{\z}% -% {-3.25ex\plus -1ex \minus -.2ex}% -% {0.0001pt \plus 0.2ex}% -% {\normalfont\normalsize\bfseries}} - -\newcommand{\iitemA}{\setlength\itemindent{0pt}\item} -\newcommand{\iitemB}{\setlength\itemindent{25pt}\item} -\newcommand{\iitemC}{\setlength\itemindent{50pt}\item} - -\let\Partmark\partmark -\def\partmark#1{\def\Partname{#1}\Partmark{#1}} -\let\Chaptermark\chaptermark -\def\chaptermark#1{\def\Chaptername{#1}\Chaptermark{#1}} -\let\Sectionmark\sectionmark -\def\sectionmark#1{\def\Sectionname{#1}\Sectionmark{#1}} -\let\Subsectionmark\subsectionmark -\def\subsectionmark#1{\def\Subsectionname{#1}\Subsectionmark{#1}} -\let\Subsubsectionmark\subsubsectionmark -\def\subsubsectionmark#1{\def\Subsubsectionname{#1}\Subsubsectionmark{#1}} - - -\newenvironment{compactminted}{% - \VerbatimEnvironment - \let\FV@ListVSpace\relax - \begin{minted}}% - {\end{minted}} - -\tikzset{west above/.code=\tikz@lib@place@handle@{#1}{south west}{0}{1}{north west}{1}} -\tikzset{west below/.code=\tikz@lib@place@handle@{#1}{north west}{0}{-1}{south west}{1}} -\tikzset{east above/.code=\tikz@lib@place@handle@{#1}{south east}{0}{1}{north east}{1}} -\tikzset{east below/.code=\tikz@lib@place@handle@{#1}{north east}{0}{-1}{south east}{1}} - -% Tikzmark code helpers -\newcommand{\tikzmarkprefix}{\pgfkeysvalueof{/tikz/tikzmark prefix}} -\newcommand{\tikzmarkcountprep}[1]{% -\tikzset{tikzmark prefix=#1}% -\newcounter{Tikzcounter#1}% -\setcounter{Tikzcounter#1}{0}% -} -\newcommand{\tikzmarkcount}[1][\tikzmarkprefix]{% -\stepcounter{Tikzcounter#1}% -\tikzmark{\arabic{Tikzcounter#1}}% -} -\newcommand{\tikzmarkgetcount}[1][\tikzmarkprefix]{% -\expandafter\arabic\expandafter{Tikzcounter#1}% -} -\newcommand{\tikzmarkcircle}[1]{% -\tikz[baseline=-0.77ex]\fill circle[fill=black,radius=1.1ex] node[font=\small,color=white]{#1};% -} -\newcommand{\tikzmarkdrawcirclesarg}[1]{% - \begin{tikzpicture}[remember picture,overlay] - \foreach \x in {1,...,\expandafter\arabic{Tikzcounter#1}} - \fill (pic cs:\x)+(1.3ex,0.5ex) circle[fill=black,radius=1.1ex,anchor=west] node[font=\small,color=white]{$\x$}; - \end{tikzpicture}% -} -\newcommand{\tikzmarkdrawcircles}{% - \begin{tikzpicture}[remember picture,overlay] - \foreach \x in {1,...,\expandafter\arabic\expandafter{Tikzcounter\expandafter\tikzmarkprefix}} - \fill (pic cs:\x)+(1.3ex,0.5ex) circle[fill=black,radius=1.1ex,anchor=west] node[font=\small,color=white]{$\x$}; - \end{tikzpicture}% -} - -% capitablize every First Letter -\let\oldmakefirstuc\makefirstuc -\renewcommand*{\makefirstuc}[1]{% - \def\gls@add@space{}% - \mfu@capitalisewords#1 \@nil\mfu@endcap -} -\def\mfu@capitalisewords#1 #2\mfu@endcap{% - \def\mfu@cap@first{#1}% - \def\mfu@cap@second{#2}% - \gls@add@space - \oldmakefirstuc{#1}% - \def\gls@add@space{ }% - \ifx\mfu@cap@second\@nnil - \let\next@mfu@cap\mfu@noop - \else - \let\next@mfu@cap\mfu@capitalisewords - \fi - \next@mfu@cap#2\mfu@endcap -} -\newcommand{\code}[2][md]{\mintinline{#1}{`#2`}} - +\renewcommand\subparagraph{\startsection{subparagraph}{5}{\z}% + {-3.25ex\plus -1ex \minus -.2ex}% + {0.0001pt \plus 0.2ex}% + {\normalfont\normalsize\bfseries}} \makeatother \include{glossary} +\overfullrule=1cm \begin{document} - \include{cover} + %TODO: \include{cover} \pagestyle{front} \include{title} \pagestyle{preamble} \include{abstract} - \include{affidavit} + %TODO: \include{affidavit} \cleardoublepage \newcounter{roman_pagenumbers} % save page number \setcounter{roman_pagenumbers}{\value{page}} @@ -262,28 +116,32 @@ Despite the failed hypotheses of guaranteeing full memory-safety in OS developme \pagestyle{main} %TODO \include{acknowledgments} + \chapter*{Preface} + This thesis is original, unpublished, independent work by the author, \authorOne. + I strongly believe in openness and collaboration in the development of new technology, therefore the development will be based solely on Open-Source software. + The results of this project will be freely available on my personal Github site\footnote{https://github.com/steveeJ/msc-thesis} once the academic process of this project is complete. + + \tableofcontents \part{Context} - \label{context} \printnoidxglossary \include{parts/context/context} - \part{Research And Development} - \label{rnd} - \include{parts/research_and_development/research_and_development} + \part{Research} + \label{part:research} + \include{parts/research/research} - \part{Evaluation And Conclusion} - \label{enc} - \include{parts/eval_and_conclusion/eval_and_conclusion} + \part{Development} + \part{Conclusion} \newpage %TODO \listofmyequations \listoftables %TODO \lstlistoflistings - \listoffigures - \printbibliography + \listoffigures + \bibliography{thesis} \end{document} diff --git a/src/docs/title.tex b/src/docs/title.tex index d259adc..979a79c 100644 --- a/src/docs/title.tex +++ b/src/docs/title.tex @@ -1,7 +1,7 @@ { \setlength{\parskip}{0.5cm} \begin{center} - \textbf{\huge Thesis} + \textbf{\huge Master's Thesis} \textbf{for achieving the academic degree} @@ -12,7 +12,7 @@ [10ex] \textsf{\Large Faculty For Information Technology}\\ - Studies: \studies + Studies \studies \end{center} } @@ -21,7 +21,7 @@ \begin{tabular}{p{3cm}p{10cm}} Topic: & \textbf{\large \topic} \\[10ex] Applicant: & \authorOne, \authorOnestreet, \authorOnezip$ $ \authorOnecity, \authorOneCountry \\ - Matr-Nr.: & \authorOneId\\[10ex] + & Student Identification Number: \authorOneId\\[10ex] 1st Supervisor: & \supervisorOne\\ 2nd Supervisor: & \supervisorTwo\\[10ex] Start Date: & \startdate\\ diff --git a/src/examples/guile/hello_world/hello_world.scm b/src/examples/guile/hello_world/hello_world.scm new file mode 100755 index 0000000..46a26b7 --- /dev/null +++ b/src/examples/guile/hello_world/hello_world.scm @@ -0,0 +1,3 @@ +#!/usr/bin/env guile +!# +(display (+ 3 7)) diff --git a/static/pandoc-preview.html b/static/pandoc-preview.html deleted file mode 100644 index 66c9289..0000000 --- a/static/pandoc-preview.html +++ /dev/null @@ -1,423 +0,0 @@ - - - - - Markdown Preview - - - - - -
-
-
-
- - -