本文将基于一个简单的用户态段错误问题,简单梳理下arm64平台SegmentFault处理流程。
1. demo
1.1 运行环境
- Linux + arm64平台
- kernel 4.9
- gcc version 6.3.1 20170404 (Linaro GCC 6.3-2017.05)
1.2 测试程序(el0_da.c)
1 |
|
反汇编如下
1 | 0000000000000000 <main>: |
1.3 运行结果
1 | ./run.sh: line 9: 1131 Segmentation fault (core dumped) ./el0_da |
dmesg打印的kenrel log如下
1 | $ dmesg -c |
2. 处理流程
2.1 page fault
- 用户态进程访问了非法地址后, CPU的MMU无法完成虚拟地址到物理地址的转换,从而产生page fault异常。
- 此后,由用户态切换到内核态。
2.2 异常向量表
- 源码位于arch/arm64/kernel/entry.S
- 用户态触发的访问内存异常, 最终会进入到异常向量表的el0_sync
el0_sync如下
1 | /* |
这里简单解释下
- kernel_entry: 构造pt_regs相关的数据(包括通用目的寄存器,sp, pc等),保存到当前内核栈
- esr_el1是异常诊断寄存器,用于存储跳转EL1的异常相关信息
高6位是exception class, 用于标识当前异常的类型
根据前面的测试用例,esr值为0x92000045,则exception class= esr >> 26 = 0x24, 对应ESR_ELx_EC_DABT_LOW
1 | #define ESR_ELx_EC_DABT_LOW (0x24) |
会跳到el0_da继续处理,el0_da的实现如下
1 | el0_da: |
el0_da的操作
do_mem_abort()
- far_el1是出错的内存地址,保存到x0
- x25是esr_el1,保存到x1
- sp是保存的struct pt_regs基地址,保存到x2
ret_to_user()
- 调用kernel_exit 0, 最终返回用户态。
2.3 do_mem_abort
源码位于arch/arm64/mm/fault.c
1 | /* |
esr_to_fault_info()函数用于从esr的低6bit取出错误状态码DFSC(Data Fault Status Code)
DFSC | 说明 |
---|---|
000000 | Address size fault, level 0 of translation or translation table base register |
000001 | Address size fault, level 1 |
000010 | Address size fault, level 2 |
000011 | Address size fault, level 3 |
000100 | Translation fault, level 0 |
000101 | Translation fault, level 1 |
000110 | Translation fault, level 2 |
000111 | Translation fault, level 3 |
001001 | Access flag fault, level 1 |
001010 | Access flag fault, level 2 |
001011 | Access flag fault, level 3 |
001101 | Permission fault, level 1 |
001110 | Permission fault, level 2 |
001111 | Permission fault, level 3 |
010000 | Synchronous External abort, not on translation table walk |
011000 | Synchronous parity or ECC error on memory access, not on translation table walk |
010100 | Synchronous External abort, on translation table walk, level 0 |
010101 | Synchronous External abort, on translation table walk, level 1 |
010110 | Synchronous External abort, on translation table walk, level 2 |
010111 | Synchronous External abort, on translation table walk, level 3 |
011100 | Synchronous parity or ECC error on memory access on translation table walk, level 0 |
011101 | Synchronous parity or ECC error on memory access on translation table walk, level 1 |
011110 | Synchronous parity or ECC error on memory access on translation table walk, level 2 |
011111 | Synchronous parity or ECC error on memory access on translation table walk, level 3 |
100001 | Alignment fault |
110000 | TLB conflict abort |
110001 | Unsupported atomic hardware update fault, if the implementation includes ARMv8.1-TTHM. Otherwise reserved. |
110100 | IMPLEMENTATION DEFINED fault (Lockdown) |
110101 | IMPLEMENTATION DEFINED fault (Unsupported Exclusive or Atomic access) |
111101 | Section Domain Fault, used only for faults reported in the PAR_EL1 |
111110 | Page Domain Fault, used only for faults reported in the PAR_EL1 |
而fault_info[]是一个struct fault_info结构体数组,对应这64种错误状态码的处理
1 | static const struct fault_info fault_info[] = { |
dfsc = esr & 0x3f = 0x92000045 & 0x3f = 0x5, 对应fault_info[]中的第5个元素”level 1 translation fault”,下一步会跳到do_translation_fault()处理。
2.4 do_translation_fault
1 | /* |
这里会跳到do_page_fault()
2.5 do_page_fault
do_page_fault()主要会调用
- __do_page_fault()
- __do_user_fault()
__do_page_fault()的实现如下
1 | static int __do_page_fault(struct mm_struct *mm, unsigned long addr, |
__do_page_fault()这里, 没有找到相应的vma, 则会直接返回。
前面的page fault无法处理后, 若是用户态page fault,最终会走到__do_user_fault()
2.6 __do_user_fault
1 | static void __do_user_fault(struct task_struct *tsk, unsigned long addr, |
__do_user_fault()主要做几件事:
2.6.1 打印出错进程信息
1 | el0_da[1131]: unhandled level 1 translation fault (11) at 0x00000000, esr 0x92000045 |
2.6.2 show_pte()
- 打印pgd/pud/pmd等信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39/*
* Dump out the page tables associated with 'addr' in mm 'mm'.
*/
void show_pte(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
if (!mm)
mm = &init_mm;
pr_alert("pgd = %p\n", mm->pgd);
pgd = pgd_offset(mm, addr);
pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (pgd_none(*pgd) || pgd_bad(*pgd))
break;
pud = pud_offset(pgd, addr);
printk(", *pud=%016llx", pud_val(*pud));
if (pud_none(*pud) || pud_bad(*pud))
break;
pmd = pmd_offset(pud, addr);
printk(", *pmd=%016llx", pmd_val(*pmd));
if (pmd_none(*pmd) || pmd_bad(*pmd))
break;
pte = pte_offset_map(pmd, addr);
printk(", *pte=%016llx", pte_val(*pte));
pte_unmap(pte);
} while(0);
printk("\n");
}
2.6.3 show_regs()
- 源码位于arch/arm64/kernel/process.c
- 打印PC/LR/SP/通用目的寄存器等
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43void __show_regs(struct pt_regs *regs)
{
int i, top_reg;
u64 lr, sp;
if (compat_user_mode(regs)) {
lr = regs->compat_lr;
sp = regs->compat_sp;
top_reg = 12;
} else {
lr = regs->regs[30];
sp = regs->sp;
top_reg = 29;
}
show_regs_print_info(KERN_DEFAULT);
print_symbol("PC is at %s\n", instruction_pointer(regs));
print_symbol("LR is at %s\n", lr);
printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
regs->pc, lr, regs->pstate);
printk("sp : %016llx\n", sp);
i = top_reg;
while (i >= 0) {
printk("x%-2d: %016llx ", i, regs->regs[i]);
i--;
if (i % 2 == 0) {
pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
i--;
}
pr_cont("\n");
}
printk("\n");
}
void show_regs(struct pt_regs * regs)
{
printk("\n");
__show_regs(regs);
}
show_regs_print_info()相关
- 源码位于kernel/printk/printk.c
- 用于打印通用的debug信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29void dump_stack_print_info(const char *log_lvl)
{
printk("%sCPU: %d PID: %d Comm: %.20s %s %s %.*s\n",
log_lvl, raw_smp_processor_id(), current->pid, current->comm,
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
if (dump_stack_arch_desc_str[0] != '\0')
printk("%sHardware name: %s\n",
log_lvl, dump_stack_arch_desc_str);
print_worker_info(log_lvl, current);
}
/**
* show_regs_print_info - print generic debug info for show_regs()
* @log_lvl: log level
*
* show_regs() implementations can use this function to print out generic
* debug information.
*/
void show_regs_print_info(const char *log_lvl)
{
dump_stack_print_info(log_lvl);
printk("%stask: %p task.stack: %p\n",
log_lvl, current, task_stack_page(current));
}
2.6.4 force_sig_info()
- 源码位于source/kernel/signal.c
- 用于向进程发送信号信息
1 | /* |
3. 总结
- 本文通过简单例子,分析SegmentFault的处理流程
- 针对SegmentFault问题,可以借助gdb在线分析进程或离线分core dump等,来定位具体出错的地方。