Also in madvise_free_pte_range() you could just remove the swap entry as it's no longer needed. Alistair Popple writes: > Miaohe Lin writes: > >> There is a bug in unuse_pte(): when swap page happens to be unreadable, >> page filled with random data is mapped into user address space. In case >> of error, a special swap entry indicating swap read fails is set to the >> page table. So the swapcache page can be freed and the user won't end up >> with a permanently mounted swap because a sector is bad. And if the page >> is accessed later, the user process will be killed so that corrupted data >> is never consumed. On the other hand, if the page is never accessed, the >> user won't even notice it. > > Hi Miaohe, > > It seems we're not actually using the pfn that gets stored in the special swap > entry here. Is my understanding correct? If so I think it would be better to use > the new PTE markers Peter introduced[1] rather than adding another swap entry > type. > > [1] - > >> Signed-off-by: Miaohe Lin >> --- >> v2: >> use special swap entry to avoid permanently mounted swap >> free the bad page in swapcache >> --- >> include/linux/swap.h | 7 ++++++- >> include/linux/swapops.h | 10 ++++++++++ >> mm/memory.c | 5 ++++- >> mm/swapfile.c | 11 +++++++++++ >> 4 files changed, 31 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/swap.h b/include/linux/swap.h >> index d112434f85df..03c576111737 100644 >> --- a/include/linux/swap.h >> +++ b/include/linux/swap.h >> @@ -55,6 +55,10 @@ static inline int current_is_kswapd(void) >> * actions on faults. >> */ >> >> +#define SWAP_READ_ERROR_NUM 1 >> +#define SWAP_READ_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ >> + SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ >> + SWP_PTE_MARKER_NUM) >> /* >> * PTE markers are used to persist information onto PTEs that are mapped with >> * file-backed memories. As its name "PTE" hints, it should only be applied to >> @@ -120,7 +124,8 @@ static inline int current_is_kswapd(void) >> >> #define MAX_SWAPFILES \ >> ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ >> - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM) >> + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ >> + SWP_PTE_MARKER_NUM - SWAP_READ_ERROR_NUM) >> >> /* >> * Magic header for a swap area. The first part of the union is >> diff --git a/include/linux/swapops.h b/include/linux/swapops.h >> index fffbba0036f6..d1093384de9f 100644 >> --- a/include/linux/swapops.h >> +++ b/include/linux/swapops.h >> @@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) >> return xa_mk_value(entry.val); >> } >> >> +static inline swp_entry_t make_swapin_error_entry(struct page *page) >> +{ >> + return swp_entry(SWAP_READ_ERROR, page_to_pfn(page)); >> +} >> + >> +static inline int is_swapin_error_entry(swp_entry_t entry) >> +{ >> + return swp_type(entry) == SWAP_READ_ERROR; >> +} >> + >> #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) >> static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) >> { >> diff --git a/mm/memory.c b/mm/memory.c >> index e6434b824009..34d1d66a05bd 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -1476,7 +1476,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, >> /* Only drop the uffd-wp marker if explicitly requested */ >> if (!zap_drop_file_uffd_wp(details)) >> continue; >> - } else if (is_hwpoison_entry(entry)) { >> + } else if (is_hwpoison_entry(entry) || >> + is_swapin_error_entry(entry)) { >> if (!should_zap_cows(details)) >> continue; >> } else { >> @@ -3724,6 +3725,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) >> ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); >> } else if (is_hwpoison_entry(entry)) { >> ret = VM_FAULT_HWPOISON; >> + } else if (is_swapin_error_entry(entry)) { >> + ret = VM_FAULT_SIGBUS; >> } else if (is_pte_marker_entry(entry)) { >> ret = handle_pte_marker(vmf); >> } else { >> diff --git a/mm/swapfile.c b/mm/swapfile.c >> index 9398e915b36b..95b63f69f388 100644 >> --- a/mm/swapfile.c >> +++ b/mm/swapfile.c >> @@ -1797,6 +1797,17 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, >> goto out; >> } >> >> + if (unlikely(!PageUptodate(page))) { >> + pte_t pteval; >> + >> + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); >> + pteval = swp_entry_to_pte(make_swapin_error_entry(page)); >> + set_pte_at(vma->vm_mm, addr, pte, pteval); >> + swap_free(entry); >> + ret = 0; >> + goto out; >> + } >> + >> /* See do_swap_page() */ >> BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); >> BUG_ON(PageAnon(page) && PageAnonExclusive(page));