我们知道kasan是通过影子区域的进行映射内存的,本文主要通过代码的方式,详细说明一下kasan在我的机器上的内存布局情况。
首先可以通过Documentation/arm64/memory.rst
查看内存布局情况,假设是4k+4levels的配置。如下
Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map ffff800000000000 ffff9fffffffffff 32TB kasan shadow region ffffa00000000000 ffffa00007ffffff 128MB bpf jit region ffffa00008000000 ffffa0000fffffff 128MB modules ffffa00010000000 fffffdffbffeffff ~93TB vmalloc fffffdffbfff0000 fffffdfffe5f8fff ~998MB [guard region] fffffdfffe5f9000 fffffdfffe9fffff 4124KB fixed mappings fffffdfffea00000 fffffdfffebfffff 2MB [guard region] fffffdfffec00000 fffffdffffbfffff 16MB PCI I/O space fffffdffffc00000 fffffdffffdfffff 2MB [guard region] fffffdffffe00000 ffffffffffdfffff 2TB vmemmap ffffffffffe00000 ffffffffffffffff 2MB [guard region]
然而实际上我的设备是3levels+4k的配置,地址空间是39bit。所以上述这个不适用我的实际情况。为了了解实际情况,如下两种方法能够得到信息
首先显而易见的是内核提供了打印pt信息的驱动,我们打开即可,如下
CONFIG_PTDUMP_CORE=y CONFIG_PTDUMP_DEBUGFS=y
此时我们可以获取当前使用情况下的布局,如下
# cat /sys/kernel/debug/kernel_page_tables 0x0000000000000000-0xffffff8000000000 17179868672G PGD 0xffffff8000000000-0xffffff8000200000 2M PMD 0xffffff8000200000-0xffffff8000400000 2M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED 0xffffff8200000000-0xffffffc000000000 248G PGD ---[ Linear Mapping end ]--- ---[ Kasan shadow start ]--- 0xffffffc000000000-0xffffffc000040000 256K PTE 0xffffffcfd7ffe000-0xffffffd000000000 655368K PTE ro NX SHD AF UXN MEM/NORMAL ---[ Kasan shadow end ]--- ---[ Modules start ]--- 0xffffffd000000000-0xffffffd003800000 56M PMD 0xffffffd003800000-0xffffffd003a00000 2M PTE 0xffffffd003a00000-0xffffffd008000000 70M PMD ---[ Modules end ]--- ---[ vmalloc() area ]--- 0xffffffd008000000-0xffffffd00b800000 56M PMD ro x SHD AF BLK UXN MEM/NORMAL 0xfffffffebfde0000-0xfffffffebfff0000 2112K PTE RW NX SHD AF UXN MEM/NORMAL ---[ vmalloc() end ]--- 0xfffffffebfff0000-0xfffffffec0000000 64K PTE 0xfffffffec0000000-0xfffffffefe400000 996M PMD 0xfffffffefe400000-0xfffffffefe5f9000 2020K PTE ---[ Fixmap start ]--- 0xfffffffefe5f9000-0xfffffffefe5fb000 8K PTE 0xfffffffefe600000-0xfffffffefe800000 2M PMD ro NX SHD AF BLK UXN MEM/NORMAL 0xfffffffefe800000-0xfffffffefea00000 2M PMD ---[ Fixmap end ]--- 0xfffffffefea00000-0xfffffffefec00000 2M PMD ---[ PCI I/O start ]--- 0xfffffffefec00000-0xfffffffeffc00000 16M PMD ---[ PCI I/O end ]--- 0xfffffffeffc00000-0xfffffffeffe00000 2M PMD ---[ vmemmap start ]--- 0xfffffffeffe00000-0xffffffff03a00000 60M PMD RW NX SHD AF BLK UXN MEM/NORMAL 0xffffffff40000000-0x0000000000000000 3G PGD
可以看到,这里将当前系统的内存布局打印出来了。这是获取其内存布局的第一个方法
早期的linux启动时会打印内存布局,但是如下这笔提交删掉了,我们恢复其内容即可
commit 071929dbdd865f779a89ba3f1e06ba8d17dd3743 Author: Laura Abbott <labbott@redhat.com> Date: Tue Dec 19 11:28:10 2017 -0800 arm64: Stop printing the virtual memory layout Printing kernel addresses should be done in limited circumstances, mostly for debugging purposes. Printing out the virtual memory layout at every kernel bootup doesn't really fall into this category so delete the prints. There are other ways to get the same information. Acked-by: Kees Cook <keescook@chromium.org> Signed-off-by: Laura Abbott <labbott@redhat.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
修改代码如下
# git diff arch/arm64/mm/init.c diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 82cdb35eda6b..e17c132f9b86 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -598,6 +598,49 @@ void __init mem_init(void) mem_init_print_info(NULL); +#define MLK(b, t) b, t, ((t) - (b)) >> 10 +#define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLG(b, t) b, t, ((t) - (b)) >> 30 +#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) + + pr_notice("Virtual kernel memory layout:\n"); +#ifdef CONFIG_KASAN + pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", + MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); +#endif + pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", + MLM(MODULES_VADDR, MODULES_END)); + pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", + MLG(VMALLOC_START, VMALLOC_END)); + pr_notice(" .text : 0x%px" " - 0x%px" " (%6ld KB)\n", + MLK_ROUNDUP(_text, _etext)); + pr_notice(" .rodata : 0x%px" " - 0x%px" " (%6ld KB)\n", + MLK_ROUNDUP(__start_rodata, __init_begin)); + pr_notice(" .init : 0x%px" " - 0x%px" " (%6ld KB)\n", + MLK_ROUNDUP(__init_begin, __init_end)); + pr_notice(" .data : 0x%px" " - 0x%px" " (%6ld KB)\n", + MLK_ROUNDUP(_sdata, _edata)); + pr_notice(" .bss : 0x%px" " - 0x%px" " (%6ld KB)\n", + MLK_ROUNDUP(__bss_start, __bss_stop)); + pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", + MLK(FIXADDR_START, FIXADDR_TOP)); + pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", + MLM(PCI_IO_START, PCI_IO_END)); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", + MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); + pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", + MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), + (unsigned long)virt_to_page(high_memory))); +#endif + pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", + MLM(__phys_to_virt(memblock_start_of_DRAM()), + (unsigned long)high_memory)); + +#undef MLK +#undef MLM +#undef MLK_ROUNDUP + /* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already.
此时启动的打印如下
[ 5.744182] Memory: 3093508K/4175872K available (57344K kernel code, 14170K rwdata, 16288K rodata, 20224K init, 1361K bss, 1074172K reserved, 8192K cma-reserved) [ 5.745514] Virtual kernel memory layout: [ 5.745894] kasan : 0xffffffc000000000 - 0xffffffd000000000 ( 64 GB) [ 5.746570] modules : 0xffffffd000000000 - 0xffffffd008000000 ( 128 MB) [ 5.747246] vmalloc : 0xffffffd008000000 - 0xfffffffebfff0000 ( 186 GB) [ 5.747922] .text : 0xffffffd008000000 - 0xffffffd00b810000 ( 57408 KB) [ 5.748598] .rodata : 0xffffffd00b810000 - 0xffffffd00c800000 ( 16320 KB) [ 5.749270] .init : 0xffffffd00c800000 - 0xffffffd00dbc0000 ( 20224 KB) [ 5.749945] .data : 0xffffffd00dbc0000 - 0xffffffd00e996a00 ( 14171 KB) [ 5.750620] .bss : 0xffffffd00e997000 - 0xffffffd00eaeb7e0 ( 1362 KB) [ 5.751296] fixed : 0xfffffffefe5f9000 - 0xfffffffefea00000 ( 4124 KB) [ 5.751972] PCI I/O : 0xfffffffefec00000 - 0xfffffffeffc00000 ( 16 MB) [ 5.752647] vmemmap : 0xfffffffeffe00000 - 0xffffffffffe00000 ( 4 GB maximum) [ 5.753385] 0xfffffffeffe08000 - 0xffffffff07e00000 ( 127 MB actual) [ 5.754116] memory : 0xffffff8000200000 - 0xffffff8200000000 ( 8190 MB)
这里也清晰的打印了内存的布局,可以看到的是其忽略了线性映射区,这没有关系。
根据上述信息,可以得出当前我的机器环境的内存布局(3 levels + 4k page + 39 va_bit)如下
Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000007fffffffff 512GB user ffffff8000000000 ffffffc000000000 256GB kernel logical memory map ffffffc000000000 ffffffd000000000 64GB kasan shadow region ffffffd000000000 ffffffd008000000 128MB modules ffffffd008000000 ffffffd00eaf0000 ~106MB kimage(inside vmalloc) ffffffd008000000 fffffffebfff0000 ~186GB vmalloc fffffffebfff0000 fffffffefe5f9000 ~998MB [guard region] fffffffefe5f9000 fffffffefea00000 4124KB fixed mappings fffffffefea00000 fffffffefec00000 2MB [guard region] fffffffefec00000 fffffffeffc00000 16MB PCI I/O space fffffffeffc00000 fffffffeffe00000 2MB [guard region] fffffffeffe00000 ffffffffffffffff ~4GB vmemmap
为了了解kasan是如何划分影子区域的,我们需要阅读一下源码,流程如下
start_kernel setup_arch kasan_init kasan_init_shadow
重要函数是kasan_init_shadow,贴出来分析
static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK; kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); dsb(ishst); cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { BUILD_BUG_ON(VMALLOC_START != MODULES_END); kasan_populate_early_shadow((void *)vmalloc_shadow_end, (void *)KASAN_SHADOW_END); } else { kasan_populate_early_shadow((void *)kimg_shadow_end, (void *)KASAN_SHADOW_END); if (kimg_shadow_start > mod_shadow_end) kasan_populate_early_shadow((void *)mod_shadow_end, (void *)kimg_shadow_start); } for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); void *end = (void *)__phys_to_virt(pa_end); if (start >= end) break; kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), (unsigned long)kasan_mem_to_shadow(end), early_pfn_to_nid(virt_to_pfn(start))); } /* * KAsan may reuse the contents of kasan_early_shadow_pte directly, * so we should make sure that it maps the zero page read-only. */ for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], pfn_pte(sym_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO)); memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); }
主要行为如下几点:
可以看到,这里就是根据linux的内存布局进行了所有内存的映射。
最后也可以通过分析kasan的代码实现上,插入一条printk,如下
# git diff arch/arm64/mm/kasan_init.c diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 1cc2cde94e94..72ea811de19f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -245,6 +245,8 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); + pr_info("tf: PAGE_OFFSET=%lx kimg_start=%llx kimg_end=%llx mod_start=%llx mod_end=%llx vmalloc_end=%llx shadow_start=%lx shadow_end=%lx\n", PAGE_OFFSET, kimg_shadow_start, kimg_shadow_end, mod_shadow_start, mod_shadow_end, vmalloc_shadow_end, KASAN_SHADOW_START, KASAN_SHADOW_END); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { BUILD_BUG_ON(VMALLOC_START != MODULES_END); kasan_populate_early_shadow((void *)vmalloc_shadow_end,
此时的日志如下
[ 3.552907] kasan: tf: PAGE_OFFSET=ffffff8000000000 kimg_start=ffffffca01000000 kimg_end=ffffffca01d5e000 mod_start=ffffffca00000000 mod_end=ffffffca01000000 vmalloc_end=ffffffcfd7ffe000 shadow_start=ffffffc000000000 shadow_end=ffffffd000000000
我们知道影子内存的计算方法如下
static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; }
借助文章《使用ASAN调试内存问题》的python脚本,为内核稍作修改,可以轻松转换,内容如下
# cat kasan_shadow.py #!/usr/bin/python3 import sys def asan_shadow_addr(addr): result = ((addr >> 3) + 0xdfffffd000000000) & 0xFFFFFFFFFFFFFFFF return hex(result) if __name__ == "__main__": addr_str = sys.argv[1] addr = int(addr_str, 0) output = asan_shadow_addr(addr) print(f"shadow_addr:{output}")
现在我们根据printk的打印和linux内存布局对应关系解释
根据上面针对机器的内存布局情况,再结合这个printk的打印,以及这个python脚本,可以做一个换算,如下
# ./kasan_shadow.py 0xffffffd008000000 shadow_addr:0xffffffca01000000 # ./kasan_shadow.py 0xffffffd000000000 shadow_addr:0xffffffca00000000
符合打印mod_start=ffffffca00000000 mod_end=ffffffca01000000
# ./kasan_shadow.py 0xffffffd00eaf0000 shadow_addr:0xffffffca01d5e000 # ./kasan_shadow.py 0xffffffd008000000 shadow_addr:0xffffffca01000000
符合打印kimg_start=ffffffca01000000 kimg_end=ffffffca01d5e000
# ./kasan_shadow.py 0xfffffffebfff0000 shadow_addr:0xffffffcfd7ffe000 # ./kasan_shadow.py 0xffffffd008000000 shadow_addr:0xffffffca01000000
符合打印mod_end=ffffffca01000000 vmalloc_end=ffffffcfd7ffe000
其他的就不计算了,这里只计算这几个重要的内存映射位置。值得注意的是,slab的内存在线性映射区,所以在ffffff8000000000 ffffffc000000000
,这点可以通过crash的kmem -s
查看,这就不演示了。其他mapped io的地址,也按照具体地址来计算即可。
本文根据linux内存布局的情况,解析了kasan是如何为影子内存建立映射的。并通过实战的办法演示了具体映射情况和地址范围。对于理解kasan的影子内存而言非常有意义。