I go through the switch_mm more, and come up one more question: Why we don't need to clear prev cpumask in line between line 59 and 60? Say 1) Context is switch from process A to kernel, then kernel has active_mm-> A's mm 2) Context is switch from kernel to A, in sched.c oldmm = A's mm; mm = A's mm 3) it will call arch/x86/include/asm/mmu_context.h:60, since prev = next; if another CPU flush A's mm, but this cpu don't clear CPU mask, it might enter IPI interrput routine, and also find cpu_tlbstate.state is TLBSTATE_OK. Could this possible? kernel/sched.c 2999 context_switch(struct rq *rq, struct task_struct *prev, 3000 struct task_struct *next) 3001 { 3002 struct mm_struct *mm, *oldmm; 3003 3004 prepare_task_switch(rq, prev, next); 3005 trace_sched_switch(rq, prev, next); 3006 mm = next->mm; 3007 oldmm = prev->active_mm; 3008 /* 3009 * For paravirt, this is coupled with an exit in switch_to to 3010 * combine the page table reload and the switch backend into 3011 * one hypercall. 3012 */ 3013 arch_start_context_switch(prev); 3014 3015 if (unlikely(!mm)) { 3016 next->active_mm = oldmm; 3017 atomic_inc(&oldmm->mm_count); 3018 enter_lazy_tlb(oldmm, next); 3019 } else 3020 switch_mm(oldmm, mm, next); 3021 3022 if (unlikely(!prev->mm)) { 3023 prev->active_mm = NULL; 3024 rq->prev_mm = oldmm; 3025 } 33 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 34 struct task_struct *tsk) 35 { 36 unsigned cpu = smp_processor_id(); 37 38 if (likely(prev != next)) { 39 /* stop flush ipis for the previous mm */ 40 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 41 42 43 #ifdef CONFIG_SMP 44 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 45 percpu_write(cpu_tlbstate.active_mm, next); 46 #endif 47 cpumask_set_cpu(cpu, mm_cpumask(next)); 48 49 /* Re-load page tables */ 50 load_cr3(next->pgd); 51 52 /* 53 * load the LDT, if the LDT is different: 54 */ 55 if (unlikely(prev->context.ldt != next->context.ldt)) 56 load_LDT_nolock(&next->context); 57 } 58 #ifdef CONFIG_SMP 59 else { 60 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 61 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); 62 63 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { 64 /* We were in lazy tlb mode and leave_mm disabled 65 * tlb flush IPI delivery. We must reload CR3 66 * to make sure to use no freed page tables. 67 */ 68 load_cr3(next->pgd); 69 load_LDT_nolock(&next->context); 70 } 71 }