Hi Miaohe, Thank you for the patch! Yet something to improve: [auto build test ERROR on linux/master] [also build test ERROR on linus/master hnaz-linux-mm/master v5.12-rc6 next-20210408] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Miaohe-Lin/close-various-race-windows-for-swap/20210408-211224 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486 config: mips-randconfig-r016-20210408 (attached as .config) compiler: mipsel-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/56e65e21c8c9858e36c3bca84006a15fe9b85efd git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Miaohe-Lin/close-various-race-windows-for-swap/20210408-211224 git checkout 56e65e21c8c9858e36c3bca84006a15fe9b85efd # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=mips If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All error/warnings (new ones prefixed by >>): mm/memory.c: In function 'do_swap_page': >> mm/memory.c:3300:7: error: implicit declaration of function 'get_swap_device'; did you mean 'get_cpu_device'? [-Werror=implicit-function-declaration] 3300 | si = get_swap_device(entry); | ^~~~~~~~~~~~~~~ | get_cpu_device >> mm/memory.c:3300:5: warning: assignment to 'struct swap_info_struct *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 3300 | si = get_swap_device(entry); | ^ >> mm/memory.c:3483:3: error: implicit declaration of function 'put_swap_device'; did you mean 'put_swap_page'? [-Werror=implicit-function-declaration] 3483 | put_swap_device(si); | ^~~~~~~~~~~~~~~ | put_swap_page cc1: some warnings being treated as errors vim +3300 mm/memory.c 3258 3259 /* 3260 * We enter with non-exclusive mmap_lock (to exclude vma changes, 3261 * but allow concurrent faults), and pte mapped but not yet locked. 3262 * We return with pte unmapped and unlocked. 3263 * 3264 * We return with the mmap_lock locked or unlocked in the same cases 3265 * as does filemap_fault(). 3266 */ 3267 vm_fault_t do_swap_page(struct vm_fault *vmf) 3268 { 3269 struct vm_area_struct *vma = vmf->vma; 3270 struct page *page = NULL, *swapcache; 3271 struct swap_info_struct *si = NULL; 3272 swp_entry_t entry; 3273 pte_t pte; 3274 int locked; 3275 int exclusive = 0; 3276 vm_fault_t ret = 0; 3277 void *shadow = NULL; 3278 3279 if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) 3280 goto out; 3281 3282 entry = pte_to_swp_entry(vmf->orig_pte); 3283 if (unlikely(non_swap_entry(entry))) { 3284 if (is_migration_entry(entry)) { 3285 migration_entry_wait(vma->vm_mm, vmf->pmd, 3286 vmf->address); 3287 } else if (is_device_private_entry(entry)) { 3288 vmf->page = device_private_entry_to_page(entry); 3289 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); 3290 } else if (is_hwpoison_entry(entry)) { 3291 ret = VM_FAULT_HWPOISON; 3292 } else { 3293 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); 3294 ret = VM_FAULT_SIGBUS; 3295 } 3296 goto out; 3297 } 3298 3299 > 3300 si = get_swap_device(entry); 3301 /* In case we raced with swapoff. */ 3302 if (unlikely(!si)) 3303 goto out; 3304 3305 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 3306 page = lookup_swap_cache(entry, vma, vmf->address); 3307 swapcache = page; 3308 3309 if (!page) { 3310 struct swap_info_struct *si = swp_swap_info(entry); 3311 3312 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && 3313 __swap_count(entry) == 1) { 3314 /* skip swapcache */ 3315 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, 3316 vmf->address); 3317 if (page) { 3318 int err; 3319 3320 __SetPageLocked(page); 3321 __SetPageSwapBacked(page); 3322 set_page_private(page, entry.val); 3323 3324 /* Tell memcg to use swap ownership records */ 3325 SetPageSwapCache(page); 3326 err = mem_cgroup_charge(page, vma->vm_mm, 3327 GFP_KERNEL); 3328 ClearPageSwapCache(page); 3329 if (err) { 3330 ret = VM_FAULT_OOM; 3331 goto out_page; 3332 } 3333 3334 shadow = get_shadow_from_swap_cache(entry); 3335 if (shadow) 3336 workingset_refault(page, shadow); 3337 3338 lru_cache_add(page); 3339 swap_readpage(page, true); 3340 } 3341 } else { 3342 page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, 3343 vmf); 3344 swapcache = page; 3345 } 3346 3347 if (!page) { 3348 /* 3349 * Back out if somebody else faulted in this pte 3350 * while we released the pte lock. 3351 */ 3352 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 3353 vmf->address, &vmf->ptl); 3354 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) 3355 ret = VM_FAULT_OOM; 3356 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 3357 goto unlock; 3358 } 3359 3360 /* Had to read the page from swap area: Major fault */ 3361 ret = VM_FAULT_MAJOR; 3362 count_vm_event(PGMAJFAULT); 3363 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); 3364 } else if (PageHWPoison(page)) { 3365 /* 3366 * hwpoisoned dirty swapcache pages are kept for killing 3367 * owner processes (which may be unknown at hwpoison time) 3368 */ 3369 ret = VM_FAULT_HWPOISON; 3370 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 3371 goto out_release; 3372 } 3373 3374 locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); 3375 3376 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 3377 if (!locked) { 3378 ret |= VM_FAULT_RETRY; 3379 goto out_release; 3380 } 3381 3382 /* 3383 * Make sure try_to_free_swap or reuse_swap_page or swapoff did not 3384 * release the swapcache from under us. The page pin, and pte_same 3385 * test below, are not enough to exclude that. Even if it is still 3386 * swapcache, we need to check that the page's swap has not changed. 3387 */ 3388 if (unlikely((!PageSwapCache(page) || 3389 page_private(page) != entry.val)) && swapcache) 3390 goto out_page; 3391 3392 page = ksm_might_need_to_copy(page, vma, vmf->address); 3393 if (unlikely(!page)) { 3394 ret = VM_FAULT_OOM; 3395 page = swapcache; 3396 goto out_page; 3397 } 3398 3399 cgroup_throttle_swaprate(page, GFP_KERNEL); 3400 3401 /* 3402 * Back out if somebody else already faulted in this pte. 3403 */ 3404 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, 3405 &vmf->ptl); 3406 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) 3407 goto out_nomap; 3408 3409 if (unlikely(!PageUptodate(page))) { 3410 ret = VM_FAULT_SIGBUS; 3411 goto out_nomap; 3412 } 3413 3414 /* 3415 * The page isn't present yet, go ahead with the fault. 3416 * 3417 * Be careful about the sequence of operations here. 3418 * To get its accounting right, reuse_swap_page() must be called 3419 * while the page is counted on swap but not yet in mapcount i.e. 3420 * before page_add_anon_rmap() and swap_free(); try_to_free_swap() 3421 * must be called after the swap_free(), or it will never succeed. 3422 */ 3423 3424 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 3425 dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); 3426 pte = mk_pte(page, vma->vm_page_prot); 3427 if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { 3428 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 3429 vmf->flags &= ~FAULT_FLAG_WRITE; 3430 ret |= VM_FAULT_WRITE; 3431 exclusive = RMAP_EXCLUSIVE; 3432 } 3433 flush_icache_page(vma, page); 3434 if (pte_swp_soft_dirty(vmf->orig_pte)) 3435 pte = pte_mksoft_dirty(pte); 3436 if (pte_swp_uffd_wp(vmf->orig_pte)) { 3437 pte = pte_mkuffd_wp(pte); 3438 pte = pte_wrprotect(pte); 3439 } 3440 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); 3441 arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); 3442 vmf->orig_pte = pte; 3443 3444 /* ksm created a completely new copy */ 3445 if (unlikely(page != swapcache && swapcache)) { 3446 page_add_new_anon_rmap(page, vma, vmf->address, false); 3447 lru_cache_add_inactive_or_unevictable(page, vma); 3448 } else { 3449 do_page_add_anon_rmap(page, vma, vmf->address, exclusive); 3450 } 3451 3452 swap_free(entry); 3453 if (mem_cgroup_swap_full(page) || 3454 (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 3455 try_to_free_swap(page); 3456 unlock_page(page); 3457 if (page != swapcache && swapcache) { 3458 /* 3459 * Hold the lock to avoid the swap entry to be reused 3460 * until we take the PT lock for the pte_same() check 3461 * (to avoid false positives from pte_same). For 3462 * further safety release the lock after the swap_free 3463 * so that the swap count won't change under a 3464 * parallel locked swapcache. 3465 */ 3466 unlock_page(swapcache); 3467 put_page(swapcache); 3468 } 3469 3470 if (vmf->flags & FAULT_FLAG_WRITE) { 3471 ret |= do_wp_page(vmf); 3472 if (ret & VM_FAULT_ERROR) 3473 ret &= VM_FAULT_ERROR; 3474 goto out; 3475 } 3476 3477 /* No need to invalidate - it was non-present before */ 3478 update_mmu_cache(vma, vmf->address, vmf->pte); 3479 unlock: 3480 pte_unmap_unlock(vmf->pte, vmf->ptl); 3481 out: 3482 if (si) > 3483 put_swap_device(si); 3484 return ret; 3485 out_nomap: 3486 pte_unmap_unlock(vmf->pte, vmf->ptl); 3487 out_page: 3488 unlock_page(page); 3489 out_release: 3490 put_page(page); 3491 if (page != swapcache && swapcache) { 3492 unlock_page(swapcache); 3493 put_page(swapcache); 3494 } 3495 if (si) 3496 put_swap_device(si); 3497 return ret; 3498 } 3499 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org