平台
ARM64
Linux 6.10
作者
pengdonglin137@163.com
背景
最近在学习Linux的缺页异常时突然奇想,在不进行内存换出的情况下,如何让进程再次触发缺页?
基于对ARMv8的理解,它的MMU的页表项中有个AF位,当AF为0时,当访问到对应的虚拟页时,会触发缺页。
如果AF位为0,当访问到对应的虚拟页时,会触发MMU的Access flags fault。然后软件需要将这个AF位置1,之后再次访问时就不会触发这个异常了,而Linux中会使用下面的接口来清除和设置AF位:
// 清除
pmdp_test_and_clear_young
ptep_test_and_clear_young// 设置
pte_mkyoung
以ptep_test_and_clear_young
为例:
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,unsigned long address,pte_t *ptep)
{pte_t pte = ptep_get(ptep);int r = 1;if (!pte_young(pte))r = 0;elseset_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));return r;
}#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))static inline pte_t pte_mkold(pte_t pte)
{return clear_pte_bit(pte, __pgprot(PTE_AF));
}
这个接口用于清除PTE页表项的AF位,当再次访问时,会在缺页处理中设置AF位:
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{pte_t entry;if (unlikely(pmd_none(*vmf->pmd))) {/** Leave __pte_alloc() until later: because vm_ops->fault may* want to allocate huge page, and if we expose page table* for an instant, it will be difficult to retract from* concurrent faults and from rmap lookups.*/vmf->pte = NULL;vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID;} else {/** A regular pmd is established and it can't morph into a huge* pmd by anon khugepaged, since that takes mmap_lock in write* mode; but shmem or file collapse to THP could still morph* it into a huge pmd: just retry later if so.*/vmf->pte = pte_offset_map_nolock(vmf->vma->vm_mm, vmf->pmd,vmf->address, &vmf->ptl);if (unlikely(!vmf->pte))return 0;vmf->orig_pte = ptep_get_lockless(vmf->pte);vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;if (pte_none(vmf->orig_pte)) {pte_unmap(vmf->pte);vmf->pte = NULL;}}// 如果还没有映射物理页,其中在填充页表的时候会设置AF位,可以参考vm_get_page_protif (!vmf->pte)return do_pte_missing(vmf);// 如果已经被交换出去if (!pte_present(vmf->orig_pte))return do_swap_page(vmf);// 用于触发NUMA平衡,它会周期地把部分虚拟页对应PTE设置位PROT_NONE,读和写都会触发异常if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))return do_numa_page(vmf);spin_lock(vmf->ptl);entry = vmf->orig_pte;// 通过其他路径已经设置了页表项if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);goto unlock;}if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {if (!pte_write(entry)) // 写时复制return do_wp_page(vmf);else if (likely(vmf->flags & FAULT_FLAG_WRITE))entry = pte_mkdirty(entry);}// 对于AF位触发的缺页,上面地条件不会满足,会走这里,设置AF位entry = pte_mkyoung(entry);if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,vmf->flags & FAULT_FLAG_WRITE)) {update_mmu_cache_range(vmf, vmf->vma, vmf->address,vmf->pte, 1);} else {/* Skip spurious TLB flush for retried page fault */if (vmf->flags & FAULT_FLAG_TRIED)goto unlock;/** This is needed only for protection faults but the arch code* is not yet telling us if this is a protection fault or not.* This still avoids useless tlb flushes for .text page faults* with threads.*/if (vmf->flags & FAULT_FLAG_WRITE)flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,vmf->pte);}
unlock:pte_unmap_unlock(vmf->pte, vmf->ptl);return 0;
}
而clear_refs地实现就利用了这一点,这里是关于这个节点的用法:/proc/pid/clear_refs
# 清除进程所有虚拟区域的 Access/PG_reference
# DEFINE: CLEAR_REFS_ALL 1
echo 1 > /proc/PID/clear_refs# 清除进程所有匿名映射区域的 Access/PG_reference
# DEFINE: CLEAR_REFS_ANON 2
echo 2 > /proc/PID/clear_refs# 清除进程所有文件映射区域的 Access/PG_reference
# DEFINE: CLEAR_REFS_MAPPED 3
echo 3 > /proc/PID/clear_refs# 清除进程所有软脏页标志
# DEFINE: CLEAR_REFS_SOFT_DIRTY 4
echo 4 > /proc/PID/clear_refs# 重置进程的 Hiwater_rss
# DEFINE: CLEAR_REFS_MM_HIWATER_RSS 5
echo 5 > /proc/PID/clear_refs
实现
当向clear_refs写入数值时,函数clear_refs_write被回调,这个函数中会调用:
walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
其中walk_page_range负责遍历页表,在遍历的过程中会回调clear_refs_walk_ops中的函数:
static const struct mm_walk_ops clear_refs_walk_ops = {.pmd_entry = clear_refs_pte_range,.test_walk = clear_refs_test_walk,.walk_lock = PGWALK_WRLOCK,
};
- test_walk回调:用于判断是否跳过当前vma,返回0表示需要遍历当前vma,返回-1表示结束遍历,返回1表示跳过当前vma
- pmd_entry回调:处理一个非空的PMD entry
先看一下如何判断是否遍历当前vma的实现:
static int clear_refs_test_walk(unsigned long start, unsigned long end,struct mm_walk *walk)
{struct clear_refs_private *cp = walk->private;struct vm_area_struct *vma = walk->vma;// 不是通过struct page来映射的if (vma->vm_flags & VM_PFNMAP)return 1;/** Writing 1 to /proc/pid/clear_refs affects all pages.* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.* Writing 3 to /proc/pid/clear_refs only affects file mapped pages.* Writing 4 to /proc/pid/clear_refs affects all pages.*/// 如果要清除的是匿名页,但是当前vma映射到的是文件,那么跳过当前vmaif (cp->type == CLEAR_REFS_ANON && vma->vm_file)return 1;// 如果要清除的是文件页,但是当前vma是匿名的,那么跳过当前vmaif (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)return 1;// 处理当前vmareturn 0;
}
接下来看看如何清除页表项的AF位:
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,unsigned long end, struct mm_walk *walk)
{struct clear_refs_private *cp = walk->private;struct vm_area_struct *vma = walk->vma;pte_t *pte, ptent;spinlock_t *ptl;struct folio *folio;ptl = pmd_trans_huge_lock(pmd, vma);if (ptl) { // 如果是PMD映射的巨型页if (cp->type == CLEAR_REFS_SOFT_DIRTY) {clear_soft_dirty_pmd(vma, addr, pmd);goto out;}if (!pmd_present(*pmd)) // 如果被swap出去了,跳过goto out;folio = pmd_folio(*pmd);/* Clear accessed and referenced bits. */pmdp_test_and_clear_young(vma, addr, pmd); // 清除PMD页表项的AF位folio_test_clear_young(folio);folio_clear_referenced(folio);
out:spin_unlock(ptl);return 0;}pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);if (!pte) {walk->action = ACTION_AGAIN;return 0;}for (; addr != end; pte++, addr += PAGE_SIZE) {ptent = ptep_get(pte);if (cp->type == CLEAR_REFS_SOFT_DIRTY) {clear_soft_dirty(vma, addr, pte);continue;}if (!pte_present(ptent)) // 如果被swap出去了,跳过continue;folio = vm_normal_folio(vma, addr, ptent);if (!folio)continue;/* Clear accessed and referenced bits. */ptep_test_and_clear_young(vma, addr, pte); // 清除PTE页表项的AF位folio_test_clear_young(folio);folio_clear_referenced(folio);}pte_unmap_unlock(pte - 1, ptl);cond_resched();return 0;
}
实验
下面通过实验来观察和学习:
思路是:
- 进程通过malloc申请一块匿名内存,然后通过memset或者mlock等接口事先分配好物理页。接着反复去访问这段内存
- 通过向clear_refs写入2来清除匿名页的AF位
- 通过各种工具来观察缺页
测试程序
leak2.c
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>char *addr;// 16MB
int size = 0x1000*0x1000;int func3(void)
{static int i = 0;char *access;int ret = 0;printf("%s enter.\n", __func__);access = addr + 0x1000*i;printf("p: %d, s: %p, e: %p, %s access addr: %p\n",getpid(), addr, addr + size,i&0x1 ? "write" : "read",access);if (i & 0x1)*access = 0x5a; // 触发写访问缺页elseret = *access; // 触发读访问缺页sleep(1);i++;if (i >= 0x1000)i = 0;return ret;
}int func2(void)
{printf("%s enter.\n", __func__);return func3();
}int func1(void)
{printf("%s enter.\n", __func__);return func2();
}int main(void)
{int ret;printf("%s enter.\n", __func__);addr = malloc(size);if (!addr) {printf("alloc buf failed\n");return -1;}/*为了测试方便,使更容易观察到缺页,不使用THP,即不使用透明巨型页映射。需要注意的是,不能把将THP的策略配置为always,否则总是会按照2MB的巨型页去映射root@arm64:/sys/kernel/mm/transparent_hugepage# cat enabledalways [madvise] never*/ret = madvise((void *)((unsigned long)addr & ~(0x1000 - 1)), size, MADV_NOHUGEPAGE);if (ret < 0) {perror("set nohugepage failed");return -1;}// 这个区域如果发生缺页的话,一次只映射一个page,由于下面用了mlockall,这步可以不做ret = madvise((void *)((unsigned long)addr & ~(0x1000 - 1)), size, MADV_RANDOM);if (ret < 0) {perror("set random failed\n");return -1;}// 预先给这片区域映射物理页// memset(addr, 0, size);if (mlockall(MCL_CURRENT | MCL_FUTURE) < 0) {perror("mlockall failed");return -1;}while (1)func1();return 0;
}
开始运行后,可以看到如下日志:
func1 enter.
func2 enter.
func3 enter.
p: 2058, s: 0xffff9a600010, e: 0xffff9b600010, write access addr: 0xffff9a611010
func1 enter.
func2 enter.
func3 enter.
p: 2058, s: 0xffff9a600010, e: 0xffff9b600010, read access addr: 0xffff9a612010
查看映射
root@arm64:/sys/kernel/mm/transparent_hugepage# pmap -x `pidof leak2`
2058: ./leak2
Address Kbytes RSS Dirty Mode Mapping
0000aaaab9690000 4 4 0 r-x-- leak2
0000aaaab96a1000 4 4 4 r---- leak2
0000aaaab96a2000 4 4 4 rw--- leak2
0000aaaae9238000 132 132 132 rw--- [ anon ]
> 0000ffff9a600000 16384 16384 16384 rw--- [ anon ]
0000ffff9b600000 4 4 4 rw--- [ anon ]
0000ffff9b796000 1388 1388 0 r-x-- libc-2.31.so
0000ffff9b8f1000 60 0 0 ----- libc-2.31.so
0000ffff9b900000 16 16 16 r---- libc-2.31.so
0000ffff9b904000 8 8 8 rw--- libc-2.31.so
0000ffff9b906000 12 12 12 rw--- [ anon ]
0000ffff9b909000 132 132 0 r-x-- ld-2.31.so
0000ffff9b92b000 8 8 8 rw--- [ anon ]
0000ffff9b937000 8 0 0 r---- [ anon ]
0000ffff9b939000 4 4 0 r-x-- [ anon ]
0000ffff9b93a000 4 4 4 r---- ld-2.31.so
0000ffff9b93b000 8 8 8 rw--- ld-2.31.so
0000ffffd0312000 132 132 132 rw--- [ stack ]
---------------- ------- ------- -------
total kB 18312 18244 16716
使用crash的vtop命令确认一下是否为按4KB的物理页映射的:
crash> vtop ffff9a600000
VIRTUAL PHYSICAL
ffff9a600000 138ae7000PAGE DIRECTORY: ffff0000d719b000PGD: ffff0000d719bff8 => 800000116e70003PUD: ffff0000d6e70ff0 => 800000116f6d003PMD: ffff0000d6f6d698 => 800000116b4d003PTE: ffff0000d6b4d000 => e8000138ae7f43PAGE: 138ae7000PTE PHYSICAL FLAGS
e8000138ae7f43 138ae7000 (VALID|USER|SHARED|AF|NG|PXN|UXN|DIRTY)VMA START END FLAGS FILE
ffff0000d739d768 ffff9a600000 ffff9b600000 40112073PAGE PHYSICAL MAPPING INDEX CNT FLAGS
fffffdffc3e2b9c0 138ae7000 ffff0000cc765cc9 ffff9a600 1 bfffe00001d0028 uptodate,lru,mappedtodisk,swapbacked,unevictable,mlocked
上面PTE这行就是虚拟地址ffff9a600000对用的PTE页表项的内容的解析。
上面指示的区域就是malloc申请的16MB的匿名页内存区域,RSS大小也是16MB,意味着这块虚拟内存已经全部映射到了物理页。
缺页次数
top - 14:59:55 up 28 min, 4 users, load average: 0.08, 0.33, 0.50
Tasks: 1 total, 0 running, 1 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.0 us, 2.3 sy, 0.0 ni, 97.7 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 3658.2 total, 2607.2 free, 612.3 used, 438.7 buff/cache
MiB Swap: 0.0 total, 0.0 free, 0.0 used. 2904.3 avail MemnMaj nMin PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND0 4271 2058 pengdl 20 0 18312 18100 1460 S 0.0 0.5 0:01.67 leak2
通过top命令统计leak2进程的发生的缺页次数,主要是nMin(次缺页)。
正常情况下,nMin是恒定的,当执行下面的命令后:
echo 2 > /proc/`pidof leak2`/clear_refs
然后可以看到nMin会每秒增加1。用pidstat也可以佐证:
root@arm64:~# pidstat -r 1 -p `pidof leak2`
Linux 6.10.0+ (arm64) 08/23/24 _aarch64_ (4 CPU)15:16:33 UID PID minflt/s majflt/s VSZ RSS %MEM Command
15:16:34 1000 2058 0.99 0.00 18312 18100 0.48 leak2
15:16:35 1000 2058 1.00 0.00 18312 18100 0.48 leak2
15:16:36 1000 2058 1.00 0.00 18312 18100 0.48 leak2
15:16:37 1000 2058 0.99 0.00 18312 18100 0.48 leak2
15:16:38 1000 2058 1.00 0.00 18312 18100 0.48 leak2
15:16:39 1000 2058 1.00 0.00 18312 18100 0.48 leak2
15:16:40 1000 2058 1.00 0.00 18312 18100 0.48 leak2
内核是如何统计nMaj和nMin的呢?可以参考mm_account_fault。nMaj表示在处理缺页的时候需要从后备存储(如文件、swap设备、块设备等)读取数据到page,然后进行映射。而nMin表示数据已经在内存里了,只需要修改一下页表映射,相比之下nMin的开销要比nMaj小很多。
使用perf观察缺页,并且记录调用栈
perf支持缺页事件:
# perf list
...major-faults [Software event]minor-faults [Software event]page-faults OR faults [Software event]
...
可以参考内核代码,其实上面两个事件也是在mm_account_fault中进行记录的:
/*** mm_account_fault - Do page fault accounting* @mm: mm from which memcg should be extracted. It can be NULL.* @regs: the pt_regs struct pointer. When set to NULL, will skip accounting* of perf event counters, but we'll still do the per-task accounting to* the task who triggered this page fault.* @address: the faulted address.* @flags: the fault flags.* @ret: the fault retcode.** This will take care of most of the page fault accounting. Meanwhile, it* will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter* updates. However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should* still be in per-arch page fault handlers at the entry of page fault.*/
static inline void mm_account_fault(struct mm_struct *mm, struct pt_regs *regs,unsigned long address, unsigned int flags,vm_fault_t ret)
{bool major;/* Incomplete faults will be accounted upon completion. */if (ret & VM_FAULT_RETRY)return;/** To preserve the behavior of older kernels, PGFAULT counters record* both successful and failed faults, as opposed to perf counters,* which ignore failed cases.*/count_vm_event(PGFAULT);count_memcg_event_mm(mm, PGFAULT);/** Do not account for unsuccessful faults (e.g. when the address wasn't* valid). That includes arch_vma_access_permitted() failing before* reaching here. So this is not a "this many hardware page faults"* counter. We should use the hw profiling for that.*/if (ret & VM_FAULT_ERROR)return;/** We define the fault as a major fault when the final successful fault* is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't* handle it immediately previously).*/major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED);if (major)current->maj_flt++;elsecurrent->min_flt++;/** If the fault is done for GUP, regs will be NULL. We only do the* accounting for the per thread fault counters who triggered the* fault, and we skip the perf event updates.*/if (!regs)return;if (major)perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);elseperf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}
执行如下命令:
# perf record -e minor-faults -g -p `pidof leak2`
# perf script
root@arm64:~# perf script
leak2 2058 4140.293988: 1 minor-faults:aaaab9690a60 func3+0xe4 (/home/pengdl/demo/kmemleak/leak2)aaaab9690ae4 func2+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690b0c func1+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690c54 main+0x140 (/home/pengdl/demo/kmemleak/leak2)ffff9b7b6e10 __libc_start_main+0xe8 (/usr/lib/aarch64-linux-gnu/libc-2.31.so)aaaab96908a4 _start+0x34 (/home/pengdl/demo/kmemleak/leak2)leak2 2058 4141.307048: 1 minor-faults:aaaab9690a6c func3+0xf0 (/home/pengdl/demo/kmemleak/leak2)aaaab9690ae4 func2+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690b0c func1+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690c54 main+0x140 (/home/pengdl/demo/kmemleak/leak2)ffff9b7b6e10 __libc_start_main+0xe8 (/usr/lib/aarch64-linux-gnu/libc-2.31.so)aaaab96908a4 _start+0x34 (/home/pengdl/demo/kmemleak/leak2)
...
使用mem_abort事件
trace_event
内核导出了下面的trace point:
root@arm64:/sys/kernel/debug/tracing/events/exceptions# ls -l
total 0
-rw-r----- 1 root root 0 Aug 23 15:43 enable
-rw-r----- 1 root root 0 Aug 23 15:43 filter
drwxr-xr-x 1 root root 0 Aug 23 15:36 mem_abort_kernel
drwxr-xr-x 1 root root 0 Aug 23 15:36 mem_abort_user
从名字可以看到,当进程在用户态触发了mem abort,那么会触发mem_abort_user事件:
root@arm64:/sys/kernel/debug/tracing/events/exceptions/mem_abort_user# cat format
name: mem_abort_user
ID: 32
format:field:unsigned short common_type; offset:0; size:2; signed:0;field:unsigned char common_flags; offset:2; size:1; signed:0;field:unsigned char common_preempt_count; offset:3; size:1; signed:0;field:int common_pid; offset:4; size:4; signed:1;field:unsigned long address; offset:8; size:8; signed:0;field:unsigned long ip; offset:16; size:8; signed:0;field:unsigned long error_code; offset:24; size:8; signed:0;print fmt: "address=%ps ip=%ps error_code=0x%lx", (void *)REC->address, (void *)REC->ip, REC->error_code
可以使用这个事件进行测试:
# cd /sys/kernel/debug/tracing/events/exceptions/mem_abort_user
# echo 'comm ~ "leak2"' > filter
# echo 1 > enable
# echo 1 > /sys/kernel/tracing/tracing_on
可以看到如下日志:
root@arm64:/sys/kernel/debug/tracing/events/exceptions/mem_abort_user# cat /sys/kernel/tracing/trace_pipeleak2-2058 [001] ..... 5312.321453: mem_abort_user: address=0xffff9a71e010 ip=0xaaaab9690a6c error_code=0x9200000bleak2-2058 [001] ..... 5313.327391: mem_abort_user: address=0xffff9a71f010 ip=0xaaaab9690a60 error_code=0x9200004bleak2-2058 [001] ..... 5314.331440: mem_abort_user: address=0xffff9a720010 ip=0xaaaab9690a6c error_code=0x9200000bleak2-2058 [001] ..... 5315.337957: mem_abort_user: address=0xffff9a721010 ip=0xaaaab9690a60 error_code=0x9200004b
此外,也可以对trace event进行配置,当记录事件的时候把内核栈和用户栈也一并记录下来:
# cd /sys/kernel/debug/tracing/options
# echo 1 > userstacktrace
# echo 1 > stacktrace
# echo 1 > sym-userobj
此时看到的日志如下:
leak2-2058 [001] ..... 5537.394169: mem_abort_user: address=0xffff9a7fe010 ip=0xaaaab9690a6c error_code=0x9200000bleak2-2058 [001] ..... 5537.394650: <stack trace>=> do_mem_abort=> el0_da=> el0t_64_sync_handler=> el0t_64_syncleak2-2058 [001] ..... 5537.394672: <user stack trace>=> /home/pengdl/demo/kmemleak/leak2[+0xa6c]=> /home/pengdl/demo/kmemleak/leak2[+0xae4]=> /home/pengdl/demo/kmemleak/leak2[+0xb0c]=> /home/pengdl/demo/kmemleak/leak2[+0xc54]=> /usr/lib/aarch64-linux-gnu/libc-2.31.so[+0x20e10]=> /home/pengdl/demo/kmemleak/leak2[+0x8a4]leak2-2058 [001] ..... 5538.399253: mem_abort_user: address=0xffff9a7ff010 ip=0xaaaab9690a60 error_code=0x9200004bleak2-2058 [001] ..... 5538.401479: <stack trace>=> do_mem_abort=> el0_da=> el0t_64_sync_handler=> el0t_64_syncleak2-2058 [001] ..... 5538.401545: <user stack trace>=> /home/pengdl/demo/kmemleak/leak2[+0xa60]=> /home/pengdl/demo/kmemleak/leak2[+0xae4]=> /home/pengdl/demo/kmemleak/leak2[+0xb0c]=> /home/pengdl/demo/kmemleak/leak2[+0xc54]=> /usr/lib/aarch64-linux-gnu/libc-2.31.so[+0x20e10]=> /home/pengdl/demo/kmemleak/leak2[+0x8a4]
使用perf
内核导出了mem_abort事件:
root@arm64:~# perf list | grep mem_abortexceptions:mem_abort_kernel [Tracepoint event]exceptions:mem_abort_user [Tracepoint event]
然后使用下面的命令记录:
# perf record -e exceptions:mem_abort_user -g -p `pidof leak2`
解析抓到的数据:
root@arm64:~# perf script
leak2 2058 [000] 4855.336563: exceptions:mem_abort_user: address=0xffff9b557010 ip=0xaaaab9690a60 error_code=0x9200004bffff80008002aae0 do_mem_abort+0xc8 ([kernel.kallsyms])ffff80008002aae0 do_mem_abort+0xc8 ([kernel.kallsyms])ffff800080c73380 el0_da+0x38 ([kernel.kallsyms])ffff800080c74504 el0t_64_sync_handler+0xe4 ([kernel.kallsyms])ffff80008001150c el0t_64_sync+0x14c ([kernel.kallsyms])aaaab9690a60 func3+0xe4 (/home/pengdl/demo/kmemleak/leak2)aaaab9690ae4 func2+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690b0c func1+0x20 (/home/pengdl/demo/kmemleak/leak2)aaaab9690c54 main+0x140 (/home/pengdl/demo/kmemleak/leak2)ffff9b7b6e10 __libc_start_main+0xe8 (/usr/lib/aarch64-linux-gnu/libc-2.31.so)aaaab96908a4 _start+0x34 (/home/pengdl/demo/kmemleak/leak2)
此外,因为是基于trace point,所以还可以对数据进行筛选和过滤,比如:
root@arm64:~# perf record -e exceptions:mem_abort_user -g --filter 'address <= 0xffff9b600010 && address >= 0xffff9a600010 && common_pid == 2058 && comm ~ "leak2"'
完。