From: Gleb Natapov <gleb@redhat.com> To: kvm@vger.kernel.org Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com, mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de, hpa@zytor.com, riel@redhat.com, cl@linux-foundation.org, mtosatti@redhat.com Subject: [PATCH v5 06/12] Add get_user_pages() variant that fails if major fault is required. Date: Mon, 19 Jul 2010 18:30:56 +0300 [thread overview] Message-ID: <1279553462-7036-7-git-send-email-gleb@redhat.com> (raw) In-Reply-To: <1279553462-7036-1-git-send-email-gleb@redhat.com> This patch add get_user_pages() variant that only succeeds if getting a reference to a page doesn't require major fault. Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> --- fs/ncpfs/mmap.c | 2 ++ include/linux/mm.h | 5 +++++ mm/filemap.c | 3 +++ mm/memory.c | 31 ++++++++++++++++++++++++++++--- mm/shmem.c | 8 +++++++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 56f5b3a..b9c4f36 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, int bufsize; int pos; /* XXX: loff_t ? */ + if (vmf->flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; /* * ncpfs has nothing against high pages as long * as recvmsg and memset works on it diff --git a/include/linux/mm.h b/include/linux/mm.h index 4238a9c..2bfc85a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -140,6 +140,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_MINOR 0x08 /* Do only minor fault */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -843,6 +844,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); @@ -1373,6 +1377,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_MINOR 0x20 /* do only minor page faults */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/filemap.c b/mm/filemap.c index 20e5642..1186338 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto no_cached_page; } } else { + if (vmf->flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + /* No page in the page cache at all */ do_sync_mmap_readahead(vma, ra, file, offset); count_vm_event(PGMAJFAULT); diff --git a/mm/memory.c b/mm/memory.c index 119b7cc..7dfaba2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1433,10 +1433,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; + unsigned int fault_fl = + ((foll_flags & FOLL_WRITE) ? + FAULT_FLAG_WRITE : 0) | + ((foll_flags & FOLL_MINOR) ? + FAULT_FLAG_MINOR : 0); - ret = handle_mm_fault(mm, vma, start, - (foll_flags & FOLL_WRITE) ? - FAULT_FLAG_WRITE : 0); + ret = handle_mm_fault(mm, vma, start, fault_fl); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) @@ -1444,6 +1447,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; + else if (ret & VM_FAULT_MAJOR) + return i ? i : -EFAULT; BUG(); } if (ret & VM_FAULT_MAJOR) @@ -1554,6 +1559,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas) +{ + int flags = FOLL_TOUCH | FOLL_MINOR; + + if (pages) + flags |= FOLL_GET; + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + + return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); +} +EXPORT_SYMBOL(get_user_pages_noio); + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address @@ -2640,6 +2662,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { + if (flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + grab_swap_token(mm); /* Contend for token _before_ read-in */ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma, address); diff --git a/mm/shmem.c b/mm/shmem.c index f65f840..acc8958 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1227,6 +1227,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, swp_entry_t swap; gfp_t gfp; int error; + int flags = type ? *type : 0; if (idx >= SHMEM_MAX_INDEX) return -EFBIG; @@ -1275,6 +1276,11 @@ repeat: swappage = lookup_swap_cache(swap); if (!swappage) { shmem_swp_unmap(entry); + if (flags & FAULT_FLAG_MINOR) { + spin_unlock(&info->lock); + *type = VM_FAULT_MAJOR | VM_FAULT_ERROR; + goto failed; + } /* here we actually do the io */ if (type && !(*type & VM_FAULT_MAJOR)) { __count_vm_event(PGMAJFAULT); @@ -1483,7 +1489,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; int error; - int ret; + int ret = (int)vmf->flags; if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; -- 1.7.1
WARNING: multiple messages have this Message-ID (diff)
From: Gleb Natapov <gleb@redhat.com> To: kvm@vger.kernel.org Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com, mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de, hpa@zytor.com, riel@redhat.com, cl@linux-foundation.org, mtosatti@redhat.com Subject: [PATCH v5 06/12] Add get_user_pages() variant that fails if major fault is required. Date: Mon, 19 Jul 2010 18:30:56 +0300 [thread overview] Message-ID: <1279553462-7036-7-git-send-email-gleb@redhat.com> (raw) In-Reply-To: <1279553462-7036-1-git-send-email-gleb@redhat.com> This patch add get_user_pages() variant that only succeeds if getting a reference to a page doesn't require major fault. Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> --- fs/ncpfs/mmap.c | 2 ++ include/linux/mm.h | 5 +++++ mm/filemap.c | 3 +++ mm/memory.c | 31 ++++++++++++++++++++++++++++--- mm/shmem.c | 8 +++++++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 56f5b3a..b9c4f36 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, int bufsize; int pos; /* XXX: loff_t ? */ + if (vmf->flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; /* * ncpfs has nothing against high pages as long * as recvmsg and memset works on it diff --git a/include/linux/mm.h b/include/linux/mm.h index 4238a9c..2bfc85a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -140,6 +140,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_MINOR 0x08 /* Do only minor fault */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -843,6 +844,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); @@ -1373,6 +1377,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_MINOR 0x20 /* do only minor page faults */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/filemap.c b/mm/filemap.c index 20e5642..1186338 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto no_cached_page; } } else { + if (vmf->flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + /* No page in the page cache at all */ do_sync_mmap_readahead(vma, ra, file, offset); count_vm_event(PGMAJFAULT); diff --git a/mm/memory.c b/mm/memory.c index 119b7cc..7dfaba2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1433,10 +1433,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; + unsigned int fault_fl = + ((foll_flags & FOLL_WRITE) ? + FAULT_FLAG_WRITE : 0) | + ((foll_flags & FOLL_MINOR) ? + FAULT_FLAG_MINOR : 0); - ret = handle_mm_fault(mm, vma, start, - (foll_flags & FOLL_WRITE) ? - FAULT_FLAG_WRITE : 0); + ret = handle_mm_fault(mm, vma, start, fault_fl); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) @@ -1444,6 +1447,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; + else if (ret & VM_FAULT_MAJOR) + return i ? i : -EFAULT; BUG(); } if (ret & VM_FAULT_MAJOR) @@ -1554,6 +1559,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int nr_pages, int write, int force, + struct page **pages, struct vm_area_struct **vmas) +{ + int flags = FOLL_TOUCH | FOLL_MINOR; + + if (pages) + flags |= FOLL_GET; + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + + return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); +} +EXPORT_SYMBOL(get_user_pages_noio); + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address @@ -2640,6 +2662,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { + if (flags & FAULT_FLAG_MINOR) + return VM_FAULT_MAJOR | VM_FAULT_ERROR; + grab_swap_token(mm); /* Contend for token _before_ read-in */ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma, address); diff --git a/mm/shmem.c b/mm/shmem.c index f65f840..acc8958 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1227,6 +1227,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, swp_entry_t swap; gfp_t gfp; int error; + int flags = type ? *type : 0; if (idx >= SHMEM_MAX_INDEX) return -EFBIG; @@ -1275,6 +1276,11 @@ repeat: swappage = lookup_swap_cache(swap); if (!swappage) { shmem_swp_unmap(entry); + if (flags & FAULT_FLAG_MINOR) { + spin_unlock(&info->lock); + *type = VM_FAULT_MAJOR | VM_FAULT_ERROR; + goto failed; + } /* here we actually do the io */ if (type && !(*type & VM_FAULT_MAJOR)) { __count_vm_event(PGMAJFAULT); @@ -1483,7 +1489,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; int error; - int ret; + int ret = (int)vmf->flags; if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; -- 1.7.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-07-19 15:33 UTC|newest] Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top 2010-07-19 15:30 [PATCH v5 00/12] KVM: Add host swap event notifications for PV guest Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-07-19 15:30 ` [PATCH v5 01/12] Move kvm_smp_prepare_boot_cpu() from kvmclock.c to kvm.c Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-07-19 15:30 ` [PATCH v5 02/12] Add PV MSR to enable asynchronous page faults delivery Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-08-23 15:22 ` Avi Kivity 2010-08-23 15:22 ` Avi Kivity 2010-08-23 15:29 ` Gleb Natapov 2010-08-23 15:29 ` Gleb Natapov 2010-07-19 15:30 ` [PATCH v5 03/12] Add async PF initialization to PV guest Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-07-19 19:52 ` Rik van Riel 2010-07-19 19:52 ` Rik van Riel 2010-08-23 15:26 ` Avi Kivity 2010-08-23 15:26 ` Avi Kivity 2010-08-23 15:35 ` Gleb Natapov 2010-08-23 15:35 ` Gleb Natapov 2010-08-23 16:08 ` Christoph Lameter 2010-08-23 16:08 ` Christoph Lameter 2010-08-23 16:10 ` Gleb Natapov 2010-08-23 16:10 ` Gleb Natapov 2010-08-23 16:19 ` Avi Kivity 2010-08-23 16:19 ` Avi Kivity 2010-07-19 15:30 ` [PATCH v5 04/12] Provide special async page fault handler when async PF capability is detected Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-08-23 15:48 ` Avi Kivity 2010-08-23 15:48 ` Avi Kivity 2010-08-23 15:52 ` Rik van Riel 2010-08-23 15:52 ` Rik van Riel 2010-08-23 16:22 ` Avi Kivity 2010-08-23 16:22 ` Avi Kivity 2010-08-24 7:31 ` Gleb Natapov 2010-08-24 7:31 ` Gleb Natapov 2010-08-24 9:02 ` Avi Kivity 2010-08-24 9:02 ` Avi Kivity 2010-07-19 15:30 ` [PATCH v5 05/12] Export __get_user_pages_fast Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov [this message] 2010-07-19 15:30 ` [PATCH v5 06/12] Add get_user_pages() variant that fails if major fault is required Gleb Natapov 2010-08-23 15:50 ` Avi Kivity 2010-08-23 15:50 ` Avi Kivity 2010-07-19 15:30 ` [PATCH v5 07/12] Maintain memslot version number Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-08-23 15:53 ` Avi Kivity 2010-08-23 15:53 ` Avi Kivity 2010-07-19 15:30 ` [PATCH v5 08/12] Inject asynchronous page fault into a guest if page is swapped out Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-08-23 16:17 ` Avi Kivity 2010-08-23 16:17 ` Avi Kivity 2010-08-24 7:52 ` Gleb Natapov 2010-08-24 7:52 ` Gleb Natapov 2010-08-24 9:04 ` Avi Kivity 2010-08-24 9:04 ` Avi Kivity 2010-08-24 12:28 ` Gleb Natapov 2010-08-24 12:28 ` Gleb Natapov 2010-08-24 12:33 ` Avi Kivity 2010-08-24 12:33 ` Avi Kivity 2010-07-19 15:30 ` [PATCH v5 09/12] Retry fault before vmentry Gleb Natapov 2010-07-19 15:30 ` Gleb Natapov 2010-08-24 9:25 ` Avi Kivity 2010-08-24 9:25 ` Avi Kivity 2010-08-24 9:33 ` Gleb Natapov 2010-08-24 9:33 ` Gleb Natapov 2010-08-24 9:38 ` Avi Kivity 2010-08-24 9:38 ` Avi Kivity 2010-07-19 15:31 ` [PATCH v5 10/12] Handle async PF in non preemptable context Gleb Natapov 2010-07-19 15:31 ` Gleb Natapov 2010-08-24 9:30 ` Avi Kivity 2010-08-24 9:30 ` Avi Kivity 2010-08-24 9:36 ` Gleb Natapov 2010-08-24 9:36 ` Gleb Natapov 2010-08-24 9:46 ` Avi Kivity 2010-08-24 9:46 ` Avi Kivity 2010-07-19 15:31 ` [PATCH v5 11/12] Let host know whether the guest can handle async PF in non-userspace context Gleb Natapov 2010-07-19 15:31 ` Gleb Natapov 2010-08-24 9:31 ` Avi Kivity 2010-08-24 9:31 ` Avi Kivity 2010-07-19 15:31 ` [PATCH v5 12/12] Send async PF when guest is not in userspace too Gleb Natapov 2010-07-19 15:31 ` Gleb Natapov 2010-08-24 9:36 ` Avi Kivity 2010-08-24 9:36 ` Avi Kivity
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1279553462-7036-7-git-send-email-gleb@redhat.com \ --to=gleb@redhat.com \ --cc=a.p.zijlstra@chello.nl \ --cc=avi@redhat.com \ --cc=cl@linux-foundation.org \ --cc=hpa@zytor.com \ --cc=kvm@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=mingo@elte.hu \ --cc=mtosatti@redhat.com \ --cc=riel@redhat.com \ --cc=tglx@linutronix.de \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.