On Wed, 18 Nov 2020 20:48:41 +0100 Thomas Gleixner wrote: > kmap_local() and related interfaces are NOOPs on 64bit and only create > temporary fixmaps for highmem pages on 32bit. That means the test coverage > for this code is pretty small. > > CONFIG_KMAP_LOCAL can be enabled independent from CONFIG_HIGHMEM, which > allows to provide support for enforced kmap_local() debugging even on > 64bit. > > For 32bit the support is unconditional, for 64bit it's only supported when > CONFIG_NR_CPUS <= 4096 as supporting it for 8192 CPUs would require to set > up yet another fixmap PGT. > > If CONFIG_KMAP_LOCAL_FORCE_DEBUG is enabled then kmap_local()/kmap_atomic() > will use the temporary fixmap mapping path. > > Signed-off-by: Thomas Gleixner > --- > V4: New patch > --- > arch/x86/Kconfig | 1 + > arch/x86/include/asm/fixmap.h | 12 +++++++++--- > arch/x86/include/asm/pgtable_64_types.h | 6 +++++- > 3 files changed, 15 insertions(+), 4 deletions(-) > > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -93,6 +93,7 @@ config X86 > select ARCH_SUPPORTS_ACPI > select ARCH_SUPPORTS_ATOMIC_RMW > select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 I triggered the following crash on x86_32 by simply doing a: (ssh'ing into the box) # head -100 /tmp/output-file Where the /tmp/output-file was the output of a trace-cmd report. Even after rebooting and not running the tracing code, simply doing the head command still crashed. BUG: unable to handle page fault for address: fff58000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page *pdpt = 0000000006de9001 *pde = 0000000001968063 *pte = 0000000000000000 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 3935 Comm: sshd Not tainted 5.11.0-rc2-test+ #2 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 EIP: skb_copy_bits+0x10c/0x1b9 Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83 EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900 ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286 CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0 Call Trace: skb_segment+0x4a3/0x828 ? __tcp_mtu_to_mss+0x2d/0x6b tcp_gso_segment+0xf6/0x336 ? list_add+0x26/0x26 tcp4_gso_segment+0x77/0x7c ? tcp_gso_segment+0x336/0x336 inet_gso_segment+0x1a1/0x2df ? inet_unregister_protosw+0x5e/0x5e skb_mac_gso_segment+0xb9/0x107 __skb_gso_segment+0xdf/0x10f ? netif_skb_features+0x1ca/0x24a ? __qdisc_run+0x1e4/0x418 validate_xmit_skb.constprop.0+0x10f/0x1ad validate_xmit_skb_list+0x25/0x45 sch_direct_xmit+0x5c/0x19d __qdisc_run+0x3e3/0x418 ? qdisc_run_begin+0x53/0x5d qdisc_run+0x26/0x30 __dev_queue_xmit+0x2bd/0x524 ? mark_held_locks+0x40/0x51 dev_queue_xmit+0xf/0x11 ip_finish_output2+0x378/0x3d7 __ip_finish_output+0xd6/0xe2 ip_output+0x8c/0xbb ? ip_mc_output+0x18d/0x18d dst_output+0x27/0x2d ip_local_out+0x2b/0x30 __ip_queue_xmit+0x32e/0x38e ? __copy_skb_header+0x4b/0x98 ? __ip_queue_xmit+0x38e/0x38e ip_queue_xmit+0x16/0x1b __tcp_transmit_skb+0x731/0x794 tcp_transmit_skb+0x16/0x18 tcp_write_xmit+0x7b4/0xa90 __tcp_push_pending_frames+0x2c/0x6b tcp_push+0x8c/0xf1 tcp_sendmsg_locked+0x74a/0x7f2 ? tcp_sendmsg_locked+0x7f2/0x7f2 tcp_sendmsg+0x27/0x38 ? tcp_sendmsg_locked+0x7f2/0x7f2 inet_sendmsg+0x3c/0x5f ? inet_send_prepare+0x3b/0x3b sock_sendmsg_nosec+0x1a/0x2d sock_sendmsg+0x25/0x29 sock_write_iter+0x84/0xa7 vfs_write+0xf5/0x19b ksys_write+0x68/0xaa __ia32_sys_write+0x15/0x17 __do_fast_syscall_32+0x66/0x76 do_fast_syscall_32+0x29/0x5b do_SYSENTER_32+0x15/0x17 entry_SYSENTER_32+0x9f/0xf2 EIP: 0xb7ee3545 Code: c4 01 10 03 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: ffffffda EBX: 00000003 ECX: 01d12448 EDX: 00002028 ESI: 00002028 EDI: 01d12448 EBP: bff4e388 ESP: bff4e328 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00200246 ? asm_exc_nmi+0xc5/0x2ab Modules linked in: ppdev parport_pc parport CR2: 00000000fff58000 ---[ end trace 3d4582614c9c2a0e ]--- EIP: skb_copy_bits+0x10c/0x1b9 Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83 EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900 ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286 CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This was against 5.11-rc2. I bisected it down to the commit that added this patch. > + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 If I remove the above line, it works fine. Attached is the config file. -- Steve > select ARCH_USE_BUILTIN_BSWAP > select ARCH_USE_QUEUED_RWLOCKS > select ARCH_USE_QUEUED_SPINLOCKS > --- a/arch/x86/include/asm/fixmap.h > +++ b/arch/x86/include/asm/fixmap.h > @@ -14,13 +14,20 @@ > #ifndef _ASM_X86_FIXMAP_H > #define _ASM_X86_FIXMAP_H > > +#include > + > /* > * Exposed to assembly code for setting up initial page tables. Cannot be > * calculated in assembly code (fixmap entries are an enum), but is sanity > * checked in the actual fixmap C code to make sure that the fixmap is > * covered fully. > */ > -#define FIXMAP_PMD_NUM 2 > +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP > +# define FIXMAP_PMD_NUM 2 > +#else > +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) > +# define FIXMAP_PMD_NUM (KM_PMDS + 2) > +#endif > /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ > #define FIXMAP_PMD_TOP 507 > > @@ -31,7 +38,6 @@ > #include > #ifdef CONFIG_X86_32 > #include > -#include > #else > #include > #endif > @@ -92,7 +98,7 @@ enum fixed_addresses { > FIX_IO_APIC_BASE_0, > FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, > #endif > -#ifdef CONFIG_X86_32 > +#ifdef CONFIG_KMAP_LOCAL > FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ > FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, > #ifdef CONFIG_PCI_MMCONFIG > --- a/arch/x86/include/asm/pgtable_64_types.h > +++ b/arch/x86/include/asm/pgtable_64_types.h > @@ -143,7 +143,11 @@ extern unsigned int ptrs_per_p4d; > > #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) > /* The module sections ends with the start of the fixmap */ > -#define MODULES_END _AC(0xffffffffff000000, UL) > +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP > +# define MODULES_END _AC(0xffffffffff000000, UL) > +#else > +# define MODULES_END _AC(0xfffffffffe000000, UL) > +#endif > #define MODULES_LEN (MODULES_END - MODULES_VADDR) > > #define ESPFIX_PGD_ENTRY _AC(-2, UL) > >