要学习基于IO_FILE的堆利用就得了解它的本质,以下会介绍几个主要的IO函数,结合源码和动态调试去学习。 调试环境搭建可参考环境从零开始配置pwn环境:从零开始配置pwn环境:优化pwn虚拟机配置支持libc等指令-CSDN博客
前面分析了系统如何为FILE结构体分配内存并将其链接进_IO_list_all,那么这里则是讲述创建文件FILE之后,fread如何实现从文件中读取数据的。fread的大致流程如下。
整体流程为fread调用vtable中的IO_file_xsgetn,其中IO_file_xsgetn是fread的核心函数,它的流程大致如下:
-
判断fp->_IO_buf_base输入缓冲区是否为空,如果为空则调用_IO_doalllocbuf去初始化输入缓冲区。
-
在分配完输入缓冲区或输入缓冲区不为空的情况下,判断输入缓冲区是否存在数据。
-
如果输入缓冲区有数据则直接拷贝至用户缓冲区,如果没有或不够则调用_underflow函数执行系统调用读取数据到输入缓冲区,再拷贝到用户缓冲区。
fread的函数原型是:
size_t fread ( void * ptr, size_t size, size_t count, FILE * stream );
其中,ptr:指向保存结果的指针;size:每个数据类型的大小;count:数据的个数;stream:文件指针函数返回读取数据的个数。
1.首先是编写一个简单的调用fread函数的C程序
#include<stdio.h>int main(){FILE* fp = fopen("test","rb");char *ptr = malloc(0x20);fread(ptr, 1, 20, fp);return 0;
}
2.调试fopen程序
2.1 获得可执行程序
gcc -g fread.c -o fread
2.2 调试程序
编译完成后用gdb进行调试。
断点下在fread,在开始之前先查看下FILE结构体fp的内容。从下面的图里可以看到此时_IO_read_ptr和_IO_buf_base等指针都是空的,后面的分析一个很重要的步骤就是看这些指针是如何被赋值以及发挥作用的。
pwndbg> r
Starting program: /ctf/work/wolf/iofile/fread Breakpoint 1, main () at fread.c:6
6 FILE* fp = fopen("test","rb");
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x4005b6 (main) ◂— push rbpRBX 0x0RCX 0x0RDX 0x7fffffffe6b8 —▸ 0x7fffffffe8fa ◂— 'LESSOPEN=| /usr/bin/lesspipe %s'RDI 0x1RSI 0x7fffffffe6a8 —▸ 0x7fffffffe8de ◂— '/ctf/work/wolf/iofile/fread'R8 0x400680 (__libc_csu_fini) ◂— ret R9 0x7ffff7de7ac0 (_dl_fini) ◂— push rbpR10 0x846R11 0x7ffff7a2d740 (__libc_start_main) ◂— push r14R12 0x4004c0 (_start) ◂— xor ebp, ebpR13 0x7fffffffe6a0 ◂— 0x1R14 0x0R15 0x0RBP 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15RSP 0x7fffffffe5b0 —▸ 0x7fffffffe6a0 ◂— 0x1RIP 0x4005be (main+8) ◂— mov esi, 0x400694
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────► 0x4005be <main+8> mov esi, 0x4006940x4005c3 <main+13> mov edi, 0x4006970x4005c8 <main+18> call fopen@plt <0x4004a0>0x4005cd <main+23> mov qword ptr [rbp - 0x10], rax0x4005d1 <main+27> mov edi, 0x200x4005d6 <main+32> call malloc@plt <0x400490>0x4005db <main+37> mov qword ptr [rbp - 8], rax0x4005df <main+41> mov rdx, qword ptr [rbp - 0x10]0x4005e3 <main+45> mov rax, qword ptr [rbp - 8]0x4005e7 <main+49> mov rcx, rdx0x4005ea <main+52> mov edx, 0x14
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/fread.c1 #include<stdio.h>2 #include<stdlib.h>3 4 int main(){5 ► 6 FILE* fp = fopen("test","rb");7 char *ptr = malloc(0x20);8 fread(ptr, 1, 20, fp);9 return 0;10 }
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe5b0 —▸ 0x7fffffffe6a0 ◂— 0x1
01:0008│ 0x7fffffffe5b8 ◂— 0x0
02:0010│ rbp 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15
03:0018│ 0x7fffffffe5c8 —▸ 0x7ffff7a2d830 (__libc_start_main+240) ◂— mov edi, eax
04:0020│ 0x7fffffffe5d0 —▸ 0x7fffffffe6a8 —▸ 0x7fffffffe8de ◂— '/ctf/work/wolf/iofile/fread'
... ↓
06:0030│ 0x7fffffffe5e0 ◂— 0x1f7b99608
07:0038│ 0x7fffffffe5e8 —▸ 0x4005b6 (main) ◂— push rbp
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 4005be main+8f 1 7ffff7a2d830 __libc_start_main+240
Breakpoint main
pwndbg> n
7 char *ptr = malloc(0x20);
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x602010 ◂— 0xfbad2488RBX 0x0RCX 0x7ffff7b04040 (__open_nocancel+7) ◂— cmp rax, -0xfffRDX 0x0RDI 0x602010 ◂— 0xfbad2488RSI 0x7ffff7b9ab9f ◂— sub al, 0x63 /* ',ccs=' */R8 0x0R9 0x1R10 0x0R11 0x246R12 0x4004c0 (_start) ◂— xor ebp, ebpR13 0x7fffffffe6a0 ◂— 0x1R14 0x0R15 0x0RBP 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15RSP 0x7fffffffe5b0 —▸ 0x602010 ◂— 0xfbad2488RIP 0x4005d1 (main+27) ◂— mov edi, 0x20
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────0x4005be <main+8> mov esi, 0x4006940x4005c3 <main+13> mov edi, 0x4006970x4005c8 <main+18> call fopen@plt <0x4004a0>0x4005cd <main+23> mov qword ptr [rbp - 0x10], rax► 0x4005d1 <main+27> mov edi, 0x200x4005d6 <main+32> call malloc@plt <0x400490>0x4005db <main+37> mov qword ptr [rbp - 8], rax0x4005df <main+41> mov rdx, qword ptr [rbp - 0x10]0x4005e3 <main+45> mov rax, qword ptr [rbp - 8]0x4005e7 <main+49> mov rcx, rdx0x4005ea <main+52> mov edx, 0x14
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/fread.c2 #include<stdlib.h>3 4 int main(){5 6 FILE* fp = fopen("test","rb");► 7 char *ptr = malloc(0x20);8 fread(ptr, 1, 20, fp);9 return 0;10 }
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe5b0 —▸ 0x602010 ◂— 0xfbad2488
01:0008│ 0x7fffffffe5b8 ◂— 0x0
02:0010│ rbp 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15
03:0018│ 0x7fffffffe5c8 —▸ 0x7ffff7a2d830 (__libc_start_main+240) ◂— mov edi, eax
04:0020│ 0x7fffffffe5d0 —▸ 0x7fffffffe6a8 —▸ 0x7fffffffe8de ◂— '/ctf/work/wolf/iofile/fread'
... ↓
06:0030│ 0x7fffffffe5e0 ◂— 0x1f7b99608
07:0038│ 0x7fffffffe5e8 —▸ 0x4005b6 (main) ◂— push rbp
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 4005d1 main+27f 1 7ffff7a2d830 __libc_start_main+240
pwndbg> n
8 fread(ptr, 1, 20, fp);
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x602240 ◂— 0x0RBX 0x0RCX 0x7ffff7dd1b20 (main_arena) ◂— 0x100000000RDX 0x602240 ◂— 0x0RDI 0x0RSI 0x602260 ◂— 0x0R8 0x0R9 0x1R10 0x4a1R11 0x7ffff7a91130 (malloc) ◂— push rbpR12 0x4004c0 (_start) ◂— xor ebp, ebpR13 0x7fffffffe6a0 ◂— 0x1R14 0x0R15 0x0RBP 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15RSP 0x7fffffffe5b0 —▸ 0x602010 ◂— 0xfbad2488RIP 0x4005df (main+41) ◂— mov rdx, qword ptr [rbp - 0x10]
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────0x4005c8 <main+18> call fopen@plt <0x4004a0>0x4005cd <main+23> mov qword ptr [rbp - 0x10], rax0x4005d1 <main+27> mov edi, 0x200x4005d6 <main+32> call malloc@plt <0x400490>0x4005db <main+37> mov qword ptr [rbp - 8], rax► 0x4005df <main+41> mov rdx, qword ptr [rbp - 0x10]0x4005e3 <main+45> mov rax, qword ptr [rbp - 8]0x4005e7 <main+49> mov rcx, rdx0x4005ea <main+52> mov edx, 0x140x4005ef <main+57> mov esi, 10x4005f4 <main+62> mov rdi, rax
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/fread.c3 4 int main(){5 6 FILE* fp = fopen("test","rb");7 char *ptr = malloc(0x20);► 8 fread(ptr, 1, 20, fp);9 return 0;10 }
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe5b0 —▸ 0x602010 ◂— 0xfbad2488
01:0008│ 0x7fffffffe5b8 —▸ 0x602240 ◂— 0x0
02:0010│ rbp 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15
03:0018│ 0x7fffffffe5c8 —▸ 0x7ffff7a2d830 (__libc_start_main+240) ◂— mov edi, eax
04:0020│ 0x7fffffffe5d0 —▸ 0x7fffffffe6a8 —▸ 0x7fffffffe8de ◂— '/ctf/work/wolf/iofile/fread'
... ↓
06:0030│ 0x7fffffffe5e0 ◂— 0x1f7b99608
07:0038│ 0x7fffffffe5e8 —▸ 0x4005b6 (main) ◂— push rbp
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 4005df main+41f 1 7ffff7a2d830 __libc_start_main+240
pwndbg> s
__GI__IO_fread (buf=0x602240, size=1, count=20, fp=0x602010) at iofread.c:31
warning: Source file is more recent than executable.
31 {
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x602240 ◂— 0x0RBX 0x0RCX 0x602010 ◂— 0xfbad2488RDX 0x14RDI 0x602240 ◂— 0x0RSI 0x1R8 0x0R9 0x1R10 0x632R11 0x7ffff7a7b1a0 (fread) ◂— push r13R12 0x4004c0 (_start) ◂— xor ebp, ebpR13 0x7fffffffe6a0 ◂— 0x1R14 0x0R15 0x0RBP 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15RSP 0x7fffffffe5a8 —▸ 0x4005fc (main+70) ◂— mov eax, 0RIP 0x7ffff7a7b1a0 (fread) ◂— push r13
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────► 0x7ffff7a7b1a0 <fread> push r130x7ffff7a7b1a2 <fread+2> push r120x7ffff7a7b1a4 <fread+4> mov r12, rsi0x7ffff7a7b1a7 <fread+7> imul r12, rdx0x7ffff7a7b1ab <fread+11> push rbp0x7ffff7a7b1ac <fread+12> push rbx0x7ffff7a7b1ad <fread+13> sub rsp, 80x7ffff7a7b1b1 <fread+17> test r12, r120x7ffff7a7b1b4 <fread+20> je fread+256 <0x7ffff7a7b2a0>0x7ffff7a7b1ba <fread+26> mov eax, dword ptr [rcx]0x7ffff7a7b1bc <fread+28> mov r9, rdi
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/iofread.c26 27 #include "libioP.h"28 29 _IO_size_t30 _IO_fread (void *buf, _IO_size_t size, _IO_size_t count, _IO_FILE *fp)► 31 {32 _IO_size_t bytes_requested = size * count;33 _IO_size_t bytes_read;34 CHECK_FILE (fp, 0);35 if (bytes_requested == 0)36 return 0;
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe5a8 —▸ 0x4005fc (main+70) ◂— mov eax, 0
01:0008│ 0x7fffffffe5b0 —▸ 0x602010 ◂— 0xfbad2488
02:0010│ 0x7fffffffe5b8 —▸ 0x602240 ◂— 0x0
03:0018│ rbp 0x7fffffffe5c0 —▸ 0x400610 (__libc_csu_init) ◂— push r15
04:0020│ 0x7fffffffe5c8 —▸ 0x7ffff7a2d830 (__libc_start_main+240) ◂— mov edi, eax
05:0028│ 0x7fffffffe5d0 —▸ 0x7fffffffe6a8 —▸ 0x7fffffffe8de ◂— '/ctf/work/wolf/iofile/fread'
... ↓
07:0038│ 0x7fffffffe5e0 ◂— 0x1f7b99608
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 7ffff7a7b1a0 freadf 1 4005fc main+70f 2 7ffff7a2d830 __libc_start_main+240
pwndbg> p *_IO_list_all
$1 = {file = {_flags = -72539000, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x0, _IO_buf_end = 0x0, _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x7ffff7dd2540 <_IO_2_1_stderr_>, _fileno = 3, _flags2 = 0, _old_offset = 0, _cur_column = 0, _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x6020f0, _offset = -1, _codecvt = 0x0, _wide_data = 0x602100, _freeres_list = 0x0, _freeres_buf = 0x0, __pad5 = 0, _mode = 0, _unused2 = '\000' <repeats 19 times>}, vtable = 0x7ffff7dd06e0 <_IO_file_jumps>
}
pwndbg>
2.3 vtable中的指针内容如下
pwndbg> p *_IO_list_all->vtable
$2 = {__dummy = 0, __dummy2 = 0, __finish = 0x7ffff7a869c0 <_IO_new_file_finish>, __overflow = 0x7ffff7a87730 <_IO_new_file_overflow>, __underflow = 0x7ffff7a874a0 <_IO_new_file_underflow>, __uflow = 0x7ffff7a88600 <__GI__IO_default_uflow>, __pbackfail = 0x7ffff7a89980 <__GI__IO_default_pbackfail>, __xsputn = 0x7ffff7a861e0 <_IO_new_file_xsputn>, __xsgetn = 0x7ffff7a85ec0 <__GI__IO_file_xsgetn>, __seekoff = 0x7ffff7a854c0 <_IO_new_file_seekoff>, __seekpos = 0x7ffff7a88a00 <_IO_default_seekpos>, __setbuf = 0x7ffff7a85430 <_IO_new_file_setbuf>, __sync = 0x7ffff7a85370 <_IO_new_file_sync>, __doallocate = 0x7ffff7a7a180 <__GI__IO_file_doallocate>, __read = 0x7ffff7a861a0 <__GI__IO_file_read>, __write = 0x7ffff7a85b70 <_IO_new_file_write>, __seek = 0x7ffff7a85970 <__GI__IO_file_seek>, __close = 0x7ffff7a85340 <__GI__IO_file_close>, __stat = 0x7ffff7a85b60 <__GI__IO_file_stat>, __showmanyc = 0x7ffff7a89af0 <_IO_default_showmanyc>, __imbue = 0x7ffff7a89b00 <_IO_default_imbue>
}
pwndbg>
2.4 fread实际上是_IO_fread函数,文件目录为/libio/iofread.c
可到这里下载复制到相应目录 https://github.com/balexios/glibc2.23/blob/master/libio/iofread.c
_IO_size_t
_IO_fread (void *buf, _IO_size_t size, _IO_size_t count, _IO_FILE *fp)
{_IO_size_t bytes_requested = size * count;_IO_size_t bytes_read;CHECK_FILE (fp, 0);if (bytes_requested == 0)return 0;_IO_acquire_lock (fp);# 调用_IO_sgetn函数bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);_IO_release_lock (fp);return bytes_requested == bytes_read ? count : bytes_read / size;
}
libc_hidden_def (_IO_fread)
}
2.5 _IO_fread函数调用了_IO_sgetn函数,跟进该函数。
_IO_size_t
_IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{/* FIXME handle putback buffer here! */return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)
2.6 又看到其调用了_IO_XSGETN函数,查看其定义。
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)
实际上就是FILE结构体中vtable的__xsgetn函数,跟进去/libio/fileops.c。
_IO_file_xsgetn是处理fread读入数据的核心函数,分为以下几个部分:
-
fp->_IO_buf_base为空时,表明此时的FILE结构体中的指针未被初始化,输入缓冲区未建立,则调用_IO_doallocbuf去初始化指针,建立输入缓冲区。
-
输入缓冲区有输入,即fp->_IO_read_ptr小于fp->_IO_read_end,此时将缓冲区里的数据直接拷贝到目标buff。
-
输入缓冲区里的数据为空或者是不能满足全部的需求,则调用__underflow调用系统调用读入数据。
_IO_size_t
_IO_file_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{_IO_size_t want, have;_IO_ssize_t count;char *s = data;want = n;if (fp->_IO_buf_base == NULL){...# 1、如果fp->_IO_buf_base为空的话则调用_IO_doallocbuf_IO_doallocbuf (fp);}while (want > 0){have = fp->_IO_read_end - fp->_IO_read_ptr;if (want <= have) # 2、输入缓冲区里已经有足够的字符,则直接把缓冲区里的字符给目标buff{memcpy (s, fp->_IO_read_ptr, want);fp->_IO_read_ptr += want;want = 0;}else{if (have > 0) # 3、输入缓冲区里有部分字符,但是没有达到fread的size需求,先把已有的拷贝至目标buff{...memcpy (s, fp->_IO_read_ptr, have);s += have;want -= have;fp->_IO_read_ptr += have;}if (fp->_IO_buf_base&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)){if (__underflow (fp) == EOF) # 4、输入缓冲区里不能满足需求,调用__underflow读入数据break;continue;}...return n - want;
}
libc_hidden_def (_IO_file_xsgetn)
2.7 接下来对_IO_file_xsgetn这三部分进行跟进并分析。
首先在fp->_IO_buf_base为空时,也就是输入缓冲区未建立时,代码调用_IO_doallocbuf去建立输入缓冲区。跟进_IO_doallocbuf函数,看下它是如何初始化缓冲区的,为输入缓冲区分配空间的,文件在/libio/genops.c中。
void
_IO_doallocbuf (_IO_FILE *fp)
{if (fp->_IO_buf_base) # 如果输入缓冲区不为空,直接返回return;if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0) # 检查标志位if (_IO_DOALLOCATE (fp) != EOF) # 调用vtable函数return;_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)
函数首先检查fp->_IO_buf_base是否为空,如果不为空表明该输入缓冲区已被初始化,那么直接返回。如果为空,则检查fp->_flags看它是不是_IO_UNBUFFERED或者fp->_mode大于0,如果满足条件则调用FILE的vtable中的_IO_file_doallocate,跟进该函数,在/libio/filedoalloc.c中。
_IO_file_doallocate (_IO_FILE *fp)
{_IO_size_t size;char *p;struct stat64 st;...size = _IO_BUFSIZ;...if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0) # 调用_IO_SYSSTAT获取FILE信息{... if (st.st_blksize > 0)size = st.st_blksize;...}p = malloc (size);..._IO_setb (fp, p, p + size, 1); # 调用_IO_setb设置FILE缓冲区return 1;
}
libc_hidden_def (_IO_file_doallocate)
可以看到_IO_file_doallocate函数是分配输入缓冲区的实现函数,首先调用_IO_SYSSTAT去获取文件信息,_IO_SYSSTAT函数是vtable中的__stat函数。获取文件信息,修改相应需要申请的size。可以看到在执行完_IO_SYSSTAT函数后,st结构体的值为下图所示。
pwndbg> n
112 if (S_ISCHR (st.st_mode))
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x0RBX 0x602010 ◂— 0xfbad2488RCX 0x7ffff7b03c34 (__fxstat64+20) ◂— cmp rax, -0x1000 /* 'H=' */RDX 0x7fffffffe480 ◂— 0x801RDI 0x3RSI 0x7fffffffe480 ◂— 0x801R8 0x6020f0 ◂— 0x100000001R9 0x602240 ◂— 0x0R10 0x7ffff7feb700 ◂— 0x7ffff7feb700R11 0x246R12 0x14R13 0x14R14 0x602240 ◂— 0x0R15 0x0RBP 0x602010 ◂— 0xfbad2488RSP 0x7fffffffe480 ◂— 0x801RIP 0x7ffff7a7a1a9 (_IO_file_doallocate+41) ◂— mov eax, dword ptr [rsp + 0x18]
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────► 0x7ffff7a7a1a9 <_IO_file_doallocate+41> mov eax, dword ptr [rsp + 0x18]0x7ffff7a7a1ad <_IO_file_doallocate+45> and eax, 0xf0000x7ffff7a7a1b2 <_IO_file_doallocate+50> cmp eax, 0x20000x7ffff7a7a1b7 <_IO_file_doallocate+55> je _IO_file_doallocate+136 <0x7ffff7a7a208>0x7ffff7a7a1b9 <_IO_file_doallocate+57> mov rbx, qword ptr [rsp + 0x38]0x7ffff7a7a1be <_IO_file_doallocate+62> test rbx, rbx0x7ffff7a7a1c1 <_IO_file_doallocate+65> jg _IO_file_doallocate+77 <0x7ffff7a7a1cd>↓0x7ffff7a7a1cd <_IO_file_doallocate+77> mov rdi, rbx0x7ffff7a7a1d0 <_IO_file_doallocate+80> call 0x7ffff7a2c8a00x7ffff7a7a1d5 <_IO_file_doallocate+85> mov rsi, rax0x7ffff7a7a1d8 <_IO_file_doallocate+88> mov eax, 0xffffffff
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/filedoalloc.c107 #endif108 109 size = _IO_BUFSIZ;110 if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)111 {► 112 if (S_ISCHR (st.st_mode))113 {114 /* Possibly a tty. */115 if (116 #ifdef DEV_TTY_P117 DEV_TTY_P (&st) ||
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rdx rsi rsp 0x7fffffffe480 ◂— 0x801
01:0008│ 0x7fffffffe488 ◂— 0x2a06c9
02:0010│ 0x7fffffffe490 ◂— 0x1
03:0018│ 0x7fffffffe498 ◂— 0x81a4
04:0020│ 0x7fffffffe4a0 ◂— 0x0
... ↓
07:0038│ 0x7fffffffe4b8 ◂— 0x1000
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 7ffff7a7a1a9 _IO_file_doallocate+41f 1 7ffff7a88594 _IO_doallocbuf+52f 2 7ffff7a8609c __GI__IO_file_xsgetn+476f 3 7ffff7a7b236 fread+150f 4 4005fc main+70f 5 7ffff7a2d830 __libc_start_main+240
pwndbg> p st
$3 = {st_dev = 2049, st_ino = 2754249, st_nlink = 1, st_mode = 33188, st_uid = 0, st_gid = 0, __pad0 = 0, st_rdev = 0, st_size = 0, st_blksize = 4096, st_blocks = 0, st_atim = {tv_sec = 1708698179, tv_nsec = 787840012}, st_mtim = {tv_sec = 1708612374, tv_nsec = 364342316}, st_ctim = {tv_sec = 1708612374, tv_nsec = 364342316}, __glibc_reserved = {0, 0, 0}
}
pwndbg>
因此size被修改为st.st_blksize所对应大小的4096即0x1000,接着调用malloc去申请内存,申请出来的堆块如下图所示。
空间申请出来后,调用_IO_setb,跟进去看它干了些啥,文件在/libio/genops.c中。
void
_IO_setb (_IO_FILE *f, char *b, char *eb, int a)
{...f->_IO_buf_base = b; # 设置_IO_buf_base f->_IO_buf_end = eb; # 设置_IO_buf_end...
}
libc_hidden_def (_IO_setb)
2.8 函数逻辑比较简单,就是设置了_IO_buf_base和_IO_buf_end,那么在IO_setb执行完之后,fp的这两个指针被赋上了值。
pwndbg> n
131 return 1;
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0xfbad2488RBX 0x1000RCX 0x1RDX 0xfbad2488RDI 0x0RSI 0x602270 ◂— 0x0R8 0x6020f0 ◂— 0x100000001R9 0x602240 ◂— 0x0R10 0x7ffff7feb700 ◂— 0x7ffff7feb700R11 0x246R12 0x14R13 0x14R14 0x602240 ◂— 0x0R15 0x0RBP 0x602010 ◂— 0xfbad2488RSP 0x7fffffffe480 ◂— 0x801RIP 0x7ffff7a7a1f3 (_IO_file_doallocate+115) ◂— mov eax, 1
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────0x7ffff7a7a1e0 <_IO_file_doallocate+96> je _IO_file_doallocate+120 <0x7ffff7a7a1f8>0x7ffff7a7a1e2 <_IO_file_doallocate+98> lea rdx, [rsi + rbx]0x7ffff7a7a1e6 <_IO_file_doallocate+102> mov ecx, 10x7ffff7a7a1eb <_IO_file_doallocate+107> mov rdi, rbp0x7ffff7a7a1ee <_IO_file_doallocate+110> call _IO_setb <0x7ffff7a88500>► 0x7ffff7a7a1f3 <_IO_file_doallocate+115> mov eax, 10x7ffff7a7a1f8 <_IO_file_doallocate+120> add rsp, 0x900x7ffff7a7a1ff <_IO_file_doallocate+127> pop rbx0x7ffff7a7a200 <_IO_file_doallocate+128> pop rbp0x7ffff7a7a201 <_IO_file_doallocate+129> pop r120x7ffff7a7a203 <_IO_file_doallocate+131> ret
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/filedoalloc.c126 }127 p = malloc (size);128 if (__glibc_unlikely (p == NULL))129 return EOF;130 _IO_setb (fp, p, p + size, 1);► 131 return 1;132 }133 libc_hidden_def (_IO_file_doallocate)
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe480 ◂— 0x801
01:0008│ 0x7fffffffe488 ◂— 0x2a06c9
02:0010│ 0x7fffffffe490 ◂— 0x1
03:0018│ 0x7fffffffe498 ◂— 0x81a4
04:0020│ 0x7fffffffe4a0 ◂— 0x0
... ↓
07:0038│ 0x7fffffffe4b8 ◂— 0x1000
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 7ffff7a7a1f3 _IO_file_doallocate+115f 1 7ffff7a88594 _IO_doallocbuf+52f 2 7ffff7a8609c __GI__IO_file_xsgetn+476f 3 7ffff7a7b236 fread+150f 4 4005fc main+70f 5 7ffff7a2d830 __libc_start_main+240
pwndbg> p *_IO_list_al
No symbol "_IO_list_al" in current context.
pwndbg> p *_IO_list_all
$4 = {file = {_flags = -72539000, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x0, _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x602270 "", _IO_buf_end = 0x603270 "", _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x7ffff7dd2540 <_IO_2_1_stderr_>, _fileno = 3, _flags2 = 0, _old_offset = 0, _cur_column = 0, _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x6020f0, _offset = -1, _codecvt = 0x0, _wide_data = 0x602100, _freeres_list = 0x0, _freeres_buf = 0x0, __pad5 = 0, _mode = 0, _unused2 = '\000' <repeats 19 times>}, vtable = 0x7ffff7dd06e0 <_IO_file_jumps>
}
pwndbg>
到此,初始化缓冲区就完成了,函数返回_IO_file_doallocate后,接着_IO_file_doallocate也返回,回到_IO_file_xsgetn函数中。
接下来程序也就进入到了第二部分,拷贝输入缓冲区数据,如果输入缓冲区存在已输入的数据,则把它直接拷贝到目标缓冲区里。
需要说明下的是从这里可以看出来fp->_IO_read_ptr指向的是输入缓冲区的起始地址,fp->_IO_read_end指向的是输入缓冲区的结束地址。
将fp->_IO_read_ptr到fp->_IO_read_end之间的数据通过memcpy拷贝到目标缓冲区中。
2.9 在输入缓冲区为0或者是不能满足需求的时候则会执行到最后一步__underflow去执行系统调用read读取数据,并放入到输入缓冲区里。因为我们的这个示例程序是第一次读取数据,此时的fp->_IO_read_end和fp->_IO_read_ptr都是0,因此会进入到__underflow,跟进去细看,文件在/libio/genops.c中。
int
__underflow (_IO_FILE *fp)
{# 额外的检查...if (fp->_IO_read_ptr < fp->_IO_read_end)return *(unsigned char *) fp->_IO_read_ptr;...# 调用_IO_UNDERFLOWreturn _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)
2.10 函数稍微做一些检查就会调用_IO_UNDERFLOW函数,其中一个检查是如果fp->_IO_read_ptr小于fp->_IO_read_end则表明输入缓冲区里存在数据,可直接返回,否则表示需要继续读入数据。该函数是FILE结构体vtable里的_IO_new_file_underflow,跟进去看文件在/libio/fileops.c。
int
_IO_new_file_underflow (_IO_FILE *fp)
{_IO_ssize_t count;...# 如果存在_IO_NO_READS标志,则直接返回if (fp->_flags & _IO_NO_READS){fp->_flags |= _IO_ERR_SEEN;__set_errno (EBADF);return EOF;}# 如果输入缓冲区里存在数据,则直接返回if (fp->_IO_read_ptr < fp->_IO_read_end)return *(unsigned char *) fp->_IO_read_ptr;...## 如果没有输入缓冲区,则调用_IO_doallocbuf分配输入缓冲区if (fp->_IO_buf_base == NULL){..._IO_doallocbuf (fp);}...# 设置FILE结构体指针fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;fp->_IO_read_end = fp->_IO_buf_base;fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end= fp->_IO_buf_base;# 调用_IO_SYSREAD函数最终执行系统调用读取数据count = _IO_SYSREAD (fp, fp->_IO_buf_base,fp->_IO_buf_end - fp->_IO_buf_base);...# 设置结构体指针fp->_IO_read_end += count;...return *(unsigned char *) fp->_IO_read_ptr;
}
libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)
这个_IO_new_file_underflow
函数,是最终调用系统调用的地方,在最终执行系统调用之前,仍然有一些检查,整个流程为:
-
检查FILE结构体的_flag标志位是否包含_IO_NO_READS,如果存在这个标志位则直接返回EOF,其中_IO_NO_READS标志位的定义是#define _IO_NO_READS 4 /* Reading not allowed */。
-
如果fp->_IO_buf_base为NULL,则调用_IO_doallocbuf分配输入缓冲区。
-
接着初始化设置FILE结构体指针,将他们都设置成fp->_IO_buf_base
-
调用_IO_SYSREAD(vtable中的_IO_file_read函数),该函数最终执行系统调用read,读取文件数据,数据读入到fp->_IO_buf_base中,读入大小为输入缓冲区的大小fp->_IO_buf_end - fp->_IO_buf_base。
-
设置输入缓冲区已有数据的size,即设置fp->_IO_read_end为fp->_IO_read_end += count。
其中第二步里面的如果fp->_IO_buf_base
为NULL,则调用_IO_doallocbuf
分配输入缓冲区。
其中第四步的_IO_SYSREAD
(vtable中的_IO_file_read
函数)的源码比较简单,就是执行系统调用函数read去读取文件数据,文件在libio/fileops.c
,源码如下:
_IO_ssize_t
_IO_file_read (_IO_FILE *fp, void *buf, _IO_ssize_t size)
{return (__builtin_expect (fp->_flags2 & _IO_FLAGS2_NOTCANCEL, 0)? read_not_cancel (fp->_fileno, buf, size): read (fp->_fileno, buf, size));}
2.11 _IO_file_underflow函数执行完毕以后,FILE结构体中各个指针已被赋值,且文件数据已读入,输入缓冲区里已经有数据,结构体值如下,其中fp->_IO_read_ptr指向输入缓冲区数据的开始位置,fp->_IO_read_end指向输入缓冲区数据结束的位置:
192 return 0;
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
───────────────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]───────────────────────────────────────────────────────────────────────────────────────────────────RAX 0x0RBX 0x602010 ◂— 0xfbad2488RCX 0x602270 ◂— 0x0RDX 0xfbad2488RDI 0x602010 ◂— 0xfbad2488RSI 0x0R8 0x6020f0 ◂— 0x100000001R9 0x602240 ◂— 0x0R10 0x7ffff7feb700 ◂— 0x7ffff7feb700R11 0x346R12 0x14R13 0x14R14 0x602240 ◂— 0x0R15 0x0RBP 0x0RSP 0x7fffffffe520 —▸ 0x602010 ◂— 0xfbad2488RIP 0x7ffff7a881a8 (_IO_switch_to_get_mode+56) ◂— xor eax, eax
────────────────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]─────────────────────────────────────────────────────────────────────────────────────────────────────0x7ffff7a88197 <_IO_switch_to_get_mode+39> and dh, 0xf70x7ffff7a8819a <_IO_switch_to_get_mode+42> mov qword ptr [rbx + 8], rax0x7ffff7a8819e <_IO_switch_to_get_mode+46> mov qword ptr [rbx + 0x30], rax0x7ffff7a881a2 <_IO_switch_to_get_mode+50> mov qword ptr [rbx + 0x20], rax0x7ffff7a881a6 <_IO_switch_to_get_mode+54> mov dword ptr [rbx], edx► 0x7ffff7a881a8 <_IO_switch_to_get_mode+56> xor eax, eax0x7ffff7a881aa <_IO_switch_to_get_mode+58> pop rbx0x7ffff7a881ab <_IO_switch_to_get_mode+59> ret ↓0x7ffff7a875b8 <_IO_file_underflow+280> mov rsi, qword ptr [rbx + 0x38]0x7ffff7a875bc <_IO_file_underflow+284> mov rdx, qword ptr [rbx + 0x40]0x7ffff7a875c0 <_IO_file_underflow+288> mov rdi, rbx
─────────────────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────────────────────────────────────────────
In file: /ctf/work/wolf/iofile/genops.c187 fp->_IO_read_ptr = fp->_IO_write_ptr;188 189 fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_read_ptr;190 191 fp->_flags &= ~_IO_CURRENTLY_PUTTING;► 192 return 0;193 }194 libc_hidden_def (_IO_switch_to_get_mode)195 196 void197 _IO_free_backup_area (_IO_FILE *fp)
─────────────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe520 —▸ 0x602010 ◂— 0xfbad2488
01:0008│ 0x7fffffffe528 —▸ 0x7ffff7a875b8 (_IO_file_underflow+280) ◂— mov rsi, qword ptr [rbx + 0x38]
02:0010│ 0x7fffffffe530 —▸ 0x602010 ◂— 0xfbad2488
... ↓
04:0020│ 0x7fffffffe540 ◂— 0x0
05:0028│ 0x7fffffffe548 —▸ 0x7ffff7a86058 (__GI__IO_file_xsgetn+408) ◂— cmp eax, -1
06:0030│ 0x7fffffffe550 —▸ 0x602010 ◂— 0xfbad2488
07:0038│ 0x7fffffffe558 ◂— 0x1
───────────────────────────────────────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────────────────────────────────────► f 0 7ffff7a881a8 _IO_switch_to_get_mode+56f 1 7ffff7a875b8 _IO_file_underflow+280f 2 7ffff7a86058 __GI__IO_file_xsgetn+408f 3 7ffff7a7b236 fread+150f 4 4005fc main+70f 5 7ffff7a2d830 __libc_start_main+240
pwndbg> p *_IO_list_all
$21 = {file = {_flags = -72539000, _IO_read_ptr = 0x0, _IO_read_end = 0x0, _IO_read_base = 0x602270 "", _IO_write_base = 0x0, _IO_write_ptr = 0x0, _IO_write_end = 0x0, _IO_buf_base = 0x602270 "", _IO_buf_end = 0x603270 "", _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x7ffff7dd2540 <_IO_2_1_stderr_>, _fileno = 3, _flags2 = 0, _old_offset = 0, _cur_column = 0, _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x6020f0, _offset = -1, _codecvt = 0x0, _wide_data = 0x602100, _freeres_list = 0x0, _freeres_buf = 0x0, __pad5 = 0, _mode = -1, _unused2 = '\000' <repeats 19 times>}, vtable = 0x7ffff7dd06e0 <_IO_file_jumps>
}
pwndbg>
函数执行完,返回到_IO_file_xsgetn函数中,由于while循环的存在,重新执行第二部分,此时将输入缓冲区拷贝到目标缓冲区,最终返回。
至此,对于fread的源码分析结束。