* Re: [PATCH] mm: introduce kvvirt_to_page() helper [not found] <1534596541-31393-1-git-send-email-lirongqing@baidu.com> @ 2018-08-20 14:41 ` Michal Hocko 2018-08-20 14:49 ` Matthew Wilcox 2018-08-20 17:38 ` Matthew Wilcox 0 siblings, 2 replies; 9+ messages in thread From: Michal Hocko @ 2018-08-20 14:41 UTC (permalink / raw) To: Li RongQing Cc: Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Sat 18-08-18 20:49:01, Li RongQing wrote: > The new helper returns address mapping page, which has several users > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > in af_packet.c, unify them kvvirt_to_page is a weird name. I guess you wanted it to fit into kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page would be slightly better. Other than that the patch makes sense to me. It would be great to add some documentation and be explicit that the call is only safe on directly mapped kernel address and vmalloc areas. > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > Signed-off-by: Li RongQing <lirongqing@baidu.com> > --- > fs/xfs/xfs_buf.c | 13 +------------ > include/linux/mm.h | 8 ++++++++ > net/9p/trans_virtio.c | 5 +---- > net/ceph/crypto.c | 6 +----- > net/packet/af_packet.c | 31 ++++++++++++------------------- > 5 files changed, 23 insertions(+), 40 deletions(-) > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index e839907e8492..edd22b33f49e 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -866,17 +866,6 @@ xfs_buf_set_empty( > bp->b_maps[0].bm_len = bp->b_length; > } > > -static inline struct page * > -mem_to_page( > - void *addr) > -{ > - if ((!is_vmalloc_addr(addr))) { > - return virt_to_page(addr); > - } else { > - return vmalloc_to_page(addr); > - } > -} > - > int > xfs_buf_associate_memory( > xfs_buf_t *bp, > @@ -909,7 +898,7 @@ xfs_buf_associate_memory( > bp->b_offset = offset; > > for (i = 0; i < bp->b_page_count; i++) { > - bp->b_pages[i] = mem_to_page((void *)pageaddr); > + bp->b_pages[i] = kvvirt_to_page((void *)pageaddr); > pageaddr += PAGE_SIZE; > } > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 68a5121694ef..bb34a3c71df5 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -599,6 +599,14 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) > return kvmalloc_array(n, size, flags | __GFP_ZERO); > } > > +static inline struct page *kvvirt_to_page(const void *addr) > +{ > + if (!is_vmalloc_addr(addr)) > + return virt_to_page(addr); > + else > + return vmalloc_to_page(addr); > +} > + > extern void kvfree(const void *addr); > > static inline atomic_t *compound_mapcount_ptr(struct page *page) > diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c > index 05006cbb3361..8f1895f15593 100644 > --- a/net/9p/trans_virtio.c > +++ b/net/9p/trans_virtio.c > @@ -368,10 +368,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, > *need_drop = 0; > p -= (*offs = offset_in_page(p)); > for (index = 0; index < nr_pages; index++) { > - if (is_vmalloc_addr(p)) > - (*pages)[index] = vmalloc_to_page(p); > - else > - (*pages)[index] = kmap_to_page(p); > + (*pages)[index] = kvvirt_to_page(p); > p += PAGE_SIZE; > } > return len; > diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c > index 02172c408ff2..cc5f0723a44d 100644 > --- a/net/ceph/crypto.c > +++ b/net/ceph/crypto.c > @@ -191,11 +191,7 @@ static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, > struct page *page; > unsigned int len = min(chunk_len - off, buf_len); > > - if (is_vmalloc) > - page = vmalloc_to_page(buf); > - else > - page = virt_to_page(buf); > - > + page = kvvirt_to_page(buf); > sg_set_page(sg, page, len, off); > > off = 0; > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c > index 5610061e7f2e..1bec111122ad 100644 > --- a/net/packet/af_packet.c > +++ b/net/packet/af_packet.c > @@ -359,13 +359,6 @@ static void unregister_prot_hook(struct sock *sk, bool sync) > __unregister_prot_hook(sk, sync); > } > > -static inline struct page * __pure pgv_to_page(void *addr) > -{ > - if (is_vmalloc_addr(addr)) > - return vmalloc_to_page(addr); > - return virt_to_page(addr); > -} > - > static void __packet_set_status(struct packet_sock *po, void *frame, int status) > { > union tpacket_uhdr h; > @@ -374,15 +367,15 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) > switch (po->tp_version) { > case TPACKET_V1: > h.h1->tp_status = status; > - flush_dcache_page(pgv_to_page(&h.h1->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_status)); > break; > case TPACKET_V2: > h.h2->tp_status = status; > - flush_dcache_page(pgv_to_page(&h.h2->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h2->tp_status)); > break; > case TPACKET_V3: > h.h3->tp_status = status; > - flush_dcache_page(pgv_to_page(&h.h3->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h3->tp_status)); > break; > default: > WARN(1, "TPACKET version not supported.\n"); > @@ -401,13 +394,13 @@ static int __packet_get_status(struct packet_sock *po, void *frame) > h.raw = frame; > switch (po->tp_version) { > case TPACKET_V1: > - flush_dcache_page(pgv_to_page(&h.h1->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_status)); > return h.h1->tp_status; > case TPACKET_V2: > - flush_dcache_page(pgv_to_page(&h.h2->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h2->tp_status)); > return h.h2->tp_status; > case TPACKET_V3: > - flush_dcache_page(pgv_to_page(&h.h3->tp_status)); > + flush_dcache_page(kvvirt_to_page(&h.h3->tp_status)); > return h.h3->tp_status; > default: > WARN(1, "TPACKET version not supported.\n"); > @@ -462,7 +455,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, > } > > /* one flush is safe, as both fields always lie on the same cacheline */ > - flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_sec)); > smp_wmb(); > > return ts_status; > @@ -728,7 +721,7 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1, > > end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); > for (; start < end; start += PAGE_SIZE) > - flush_dcache_page(pgv_to_page(start)); > + flush_dcache_page(kvvirt_to_page(start)); > > smp_wmb(); > #endif > @@ -741,7 +734,7 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1, > > #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 > start = (u8 *)pbd1; > - flush_dcache_page(pgv_to_page(start)); > + flush_dcache_page(kvvirt_to_page(start)); > > smp_wmb(); > #endif > @@ -2352,7 +2345,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, > macoff + snaplen); > > for (start = h.raw; start < end; start += PAGE_SIZE) > - flush_dcache_page(pgv_to_page(start)); > + flush_dcache_page(kvvirt_to_page(start)); > } > smp_wmb(); > #endif > @@ -2508,7 +2501,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, > return -EFAULT; > } > > - page = pgv_to_page(data); > + page = kvvirt_to_page(data); > data += len; > flush_dcache_page(page); > get_page(page); > @@ -4385,7 +4378,7 @@ static int packet_mmap(struct file *file, struct socket *sock, > int pg_num; > > for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { > - page = pgv_to_page(kaddr); > + page = kvvirt_to_page(kaddr); > err = vm_insert_page(vma, start, page); > if (unlikely(err)) > goto out; > -- > 2.16.2 > -- Michal Hocko SUSE Labs ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 14:41 ` [PATCH] mm: introduce kvvirt_to_page() helper Michal Hocko @ 2018-08-20 14:49 ` Matthew Wilcox 2018-08-20 16:24 ` Michal Hocko 2018-08-20 17:38 ` Matthew Wilcox 1 sibling, 1 reply; 9+ messages in thread From: Matthew Wilcox @ 2018-08-20 14:49 UTC (permalink / raw) To: Michal Hocko Cc: Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > On Sat 18-08-18 20:49:01, Li RongQing wrote: > > The new helper returns address mapping page, which has several users > > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > > in af_packet.c, unify them > > kvvirt_to_page is a weird name. I guess you wanted it to fit into > kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page > would be slightly better. > > Other than that the patch makes sense to me. It would be great to add > some documentation and be explicit that the call is only safe on > directly mapped kernel address and vmalloc areas. ... and not safe if the length crosses a page boundary. I don't want to see new code emerge that does kvmalloc(PAGE_SIZE * 2, ...); kvmem_to_page() and have it randomly crash when kvmalloc happens to fall back to vmalloc() under heavy memory pressure. Also, people are going to start using this for stack addresses. Perhaps we should have a debug option to guard against them doing that. > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > Signed-off-by: Li RongQing <lirongqing@baidu.com> > > --- > > fs/xfs/xfs_buf.c | 13 +------------ > > include/linux/mm.h | 8 ++++++++ > > net/9p/trans_virtio.c | 5 +---- > > net/ceph/crypto.c | 6 +----- > > net/packet/af_packet.c | 31 ++++++++++++------------------- > > 5 files changed, 23 insertions(+), 40 deletions(-) > > > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > > index e839907e8492..edd22b33f49e 100644 > > --- a/fs/xfs/xfs_buf.c > > +++ b/fs/xfs/xfs_buf.c > > @@ -866,17 +866,6 @@ xfs_buf_set_empty( > > bp->b_maps[0].bm_len = bp->b_length; > > } > > > > -static inline struct page * > > -mem_to_page( > > - void *addr) > > -{ > > - if ((!is_vmalloc_addr(addr))) { > > - return virt_to_page(addr); > > - } else { > > - return vmalloc_to_page(addr); > > - } > > -} > > - > > int > > xfs_buf_associate_memory( > > xfs_buf_t *bp, > > @@ -909,7 +898,7 @@ xfs_buf_associate_memory( > > bp->b_offset = offset; > > > > for (i = 0; i < bp->b_page_count; i++) { > > - bp->b_pages[i] = mem_to_page((void *)pageaddr); > > + bp->b_pages[i] = kvvirt_to_page((void *)pageaddr); > > pageaddr += PAGE_SIZE; > > } > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 68a5121694ef..bb34a3c71df5 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -599,6 +599,14 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) > > return kvmalloc_array(n, size, flags | __GFP_ZERO); > > } > > > > +static inline struct page *kvvirt_to_page(const void *addr) > > +{ > > + if (!is_vmalloc_addr(addr)) > > + return virt_to_page(addr); > > + else > > + return vmalloc_to_page(addr); > > +} > > + > > extern void kvfree(const void *addr); > > > > static inline atomic_t *compound_mapcount_ptr(struct page *page) > > diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c > > index 05006cbb3361..8f1895f15593 100644 > > --- a/net/9p/trans_virtio.c > > +++ b/net/9p/trans_virtio.c > > @@ -368,10 +368,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, > > *need_drop = 0; > > p -= (*offs = offset_in_page(p)); > > for (index = 0; index < nr_pages; index++) { > > - if (is_vmalloc_addr(p)) > > - (*pages)[index] = vmalloc_to_page(p); > > - else > > - (*pages)[index] = kmap_to_page(p); > > + (*pages)[index] = kvvirt_to_page(p); > > p += PAGE_SIZE; > > } > > return len; > > diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c > > index 02172c408ff2..cc5f0723a44d 100644 > > --- a/net/ceph/crypto.c > > +++ b/net/ceph/crypto.c > > @@ -191,11 +191,7 @@ static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, > > struct page *page; > > unsigned int len = min(chunk_len - off, buf_len); > > > > - if (is_vmalloc) > > - page = vmalloc_to_page(buf); > > - else > > - page = virt_to_page(buf); > > - > > + page = kvvirt_to_page(buf); > > sg_set_page(sg, page, len, off); > > > > off = 0; > > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c > > index 5610061e7f2e..1bec111122ad 100644 > > --- a/net/packet/af_packet.c > > +++ b/net/packet/af_packet.c > > @@ -359,13 +359,6 @@ static void unregister_prot_hook(struct sock *sk, bool sync) > > __unregister_prot_hook(sk, sync); > > } > > > > -static inline struct page * __pure pgv_to_page(void *addr) > > -{ > > - if (is_vmalloc_addr(addr)) > > - return vmalloc_to_page(addr); > > - return virt_to_page(addr); > > -} > > - > > static void __packet_set_status(struct packet_sock *po, void *frame, int status) > > { > > union tpacket_uhdr h; > > @@ -374,15 +367,15 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) > > switch (po->tp_version) { > > case TPACKET_V1: > > h.h1->tp_status = status; > > - flush_dcache_page(pgv_to_page(&h.h1->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_status)); > > break; > > case TPACKET_V2: > > h.h2->tp_status = status; > > - flush_dcache_page(pgv_to_page(&h.h2->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h2->tp_status)); > > break; > > case TPACKET_V3: > > h.h3->tp_status = status; > > - flush_dcache_page(pgv_to_page(&h.h3->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h3->tp_status)); > > break; > > default: > > WARN(1, "TPACKET version not supported.\n"); > > @@ -401,13 +394,13 @@ static int __packet_get_status(struct packet_sock *po, void *frame) > > h.raw = frame; > > switch (po->tp_version) { > > case TPACKET_V1: > > - flush_dcache_page(pgv_to_page(&h.h1->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_status)); > > return h.h1->tp_status; > > case TPACKET_V2: > > - flush_dcache_page(pgv_to_page(&h.h2->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h2->tp_status)); > > return h.h2->tp_status; > > case TPACKET_V3: > > - flush_dcache_page(pgv_to_page(&h.h3->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h3->tp_status)); > > return h.h3->tp_status; > > default: > > WARN(1, "TPACKET version not supported.\n"); > > @@ -462,7 +455,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, > > } > > > > /* one flush is safe, as both fields always lie on the same cacheline */ > > - flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); > > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_sec)); > > smp_wmb(); > > > > return ts_status; > > @@ -728,7 +721,7 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1, > > > > end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); > > for (; start < end; start += PAGE_SIZE) > > - flush_dcache_page(pgv_to_page(start)); > > + flush_dcache_page(kvvirt_to_page(start)); > > > > smp_wmb(); > > #endif > > @@ -741,7 +734,7 @@ static void prb_flush_block(struct tpacket_kbdq_core *pkc1, > > > > #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 > > start = (u8 *)pbd1; > > - flush_dcache_page(pgv_to_page(start)); > > + flush_dcache_page(kvvirt_to_page(start)); > > > > smp_wmb(); > > #endif > > @@ -2352,7 +2345,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, > > macoff + snaplen); > > > > for (start = h.raw; start < end; start += PAGE_SIZE) > > - flush_dcache_page(pgv_to_page(start)); > > + flush_dcache_page(kvvirt_to_page(start)); > > } > > smp_wmb(); > > #endif > > @@ -2508,7 +2501,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, > > return -EFAULT; > > } > > > > - page = pgv_to_page(data); > > + page = kvvirt_to_page(data); > > data += len; > > flush_dcache_page(page); > > get_page(page); > > @@ -4385,7 +4378,7 @@ static int packet_mmap(struct file *file, struct socket *sock, > > int pg_num; > > > > for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { > > - page = pgv_to_page(kaddr); > > + page = kvvirt_to_page(kaddr); > > err = vm_insert_page(vma, start, page); > > if (unlikely(err)) > > goto out; > > -- > > 2.16.2 > > > > -- > Michal Hocko > SUSE Labs > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 14:49 ` Matthew Wilcox @ 2018-08-20 16:24 ` Michal Hocko 2018-08-20 17:07 ` Matthew Wilcox 0 siblings, 1 reply; 9+ messages in thread From: Michal Hocko @ 2018-08-20 16:24 UTC (permalink / raw) To: Matthew Wilcox Cc: Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon 20-08-18 07:49:23, Matthew Wilcox wrote: > On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > > On Sat 18-08-18 20:49:01, Li RongQing wrote: > > > The new helper returns address mapping page, which has several users > > > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > > > in af_packet.c, unify them > > > > kvvirt_to_page is a weird name. I guess you wanted it to fit into > > kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page > > would be slightly better. > > > > Other than that the patch makes sense to me. It would be great to add > > some documentation and be explicit that the call is only safe on > > directly mapped kernel address and vmalloc areas. > > ... and not safe if the length crosses a page boundary. I don't want to > see new code emerge that does kvmalloc(PAGE_SIZE * 2, ...); kvmem_to_page() > and have it randomly crash when kvmalloc happens to fall back to vmalloc() > under heavy memory pressure. > > Also, people are going to start using this for stack addresses. Perhaps > we should have a debug option to guard against them doing that. I do agree that such an interface is quite dangerous. That's why I was stressing out the proper documentation. I would be much happier if we could do without it altogether. Maybe the existing users can be rewoked to not rely on the addr2page functionality. If that is not the case then we should probably offer a helper. With some WARN_ONs to catch misuse would be really nice. I am not really sure how many abuses can we catch actually though. -- Michal Hocko SUSE Labs ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 16:24 ` Michal Hocko @ 2018-08-20 17:07 ` Matthew Wilcox 2018-08-20 19:15 ` Michal Hocko 2018-08-20 21:53 ` Dave Chinner 0 siblings, 2 replies; 9+ messages in thread From: Matthew Wilcox @ 2018-08-20 17:07 UTC (permalink / raw) To: Michal Hocko Cc: Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon, Aug 20, 2018 at 06:24:06PM +0200, Michal Hocko wrote: > On Mon 20-08-18 07:49:23, Matthew Wilcox wrote: > > On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > > > On Sat 18-08-18 20:49:01, Li RongQing wrote: > > > > The new helper returns address mapping page, which has several users > > > > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > > > > in af_packet.c, unify them > > > > > > kvvirt_to_page is a weird name. I guess you wanted it to fit into > > > kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page > > > would be slightly better. > > > > > > Other than that the patch makes sense to me. It would be great to add > > > some documentation and be explicit that the call is only safe on > > > directly mapped kernel address and vmalloc areas. > > > > ... and not safe if the length crosses a page boundary. I don't want to > > see new code emerge that does kvmalloc(PAGE_SIZE * 2, ...); kvmem_to_page() > > and have it randomly crash when kvmalloc happens to fall back to vmalloc() > > under heavy memory pressure. > > > > Also, people are going to start using this for stack addresses. Perhaps > > we should have a debug option to guard against them doing that. > > I do agree that such an interface is quite dangerous. That's why I was > stressing out the proper documentation. I would be much happier if we > could do without it altogether. Maybe the existing users can be rewoked > to not rely on the addr2page functionality. If that is not the case then > we should probably offer a helper. With some WARN_ONs to catch misuse > would be really nice. I am not really sure how many abuses can we catch > actually though. I certainly understand the enthusiasm for sharing this code rather than having dozens of places outside the VM implement their own version of it. But I think most of these users are using code that's working at the wrong level. Most of them seem to have an address range which may-or-may-not-be virtually mapped and they want to get an array-of-pages for that. Perhaps we should offer -that- API instead. vmalloc/vmap already has an array-of-pages, and the various users could be given a pointer to that array. If the memory isn't vmapped, maybe the caller could pass an array pointer like XFS does, or we could require them to pass GFP flags to allocate a new array. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 17:07 ` Matthew Wilcox @ 2018-08-20 19:15 ` Michal Hocko 2018-08-20 21:53 ` Dave Chinner 1 sibling, 0 replies; 9+ messages in thread From: Michal Hocko @ 2018-08-20 19:15 UTC (permalink / raw) To: Matthew Wilcox Cc: Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon 20-08-18 10:07:44, Matthew Wilcox wrote: > On Mon, Aug 20, 2018 at 06:24:06PM +0200, Michal Hocko wrote: > > On Mon 20-08-18 07:49:23, Matthew Wilcox wrote: > > > On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > > > > On Sat 18-08-18 20:49:01, Li RongQing wrote: > > > > > The new helper returns address mapping page, which has several users > > > > > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > > > > > in af_packet.c, unify them > > > > > > > > kvvirt_to_page is a weird name. I guess you wanted it to fit into > > > > kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page > > > > would be slightly better. > > > > > > > > Other than that the patch makes sense to me. It would be great to add > > > > some documentation and be explicit that the call is only safe on > > > > directly mapped kernel address and vmalloc areas. > > > > > > ... and not safe if the length crosses a page boundary. I don't want to > > > see new code emerge that does kvmalloc(PAGE_SIZE * 2, ...); kvmem_to_page() > > > and have it randomly crash when kvmalloc happens to fall back to vmalloc() > > > under heavy memory pressure. > > > > > > Also, people are going to start using this for stack addresses. Perhaps > > > we should have a debug option to guard against them doing that. > > > > I do agree that such an interface is quite dangerous. That's why I was > > stressing out the proper documentation. I would be much happier if we > > could do without it altogether. Maybe the existing users can be rewoked > > to not rely on the addr2page functionality. If that is not the case then > > we should probably offer a helper. With some WARN_ONs to catch misuse > > would be really nice. I am not really sure how many abuses can we catch > > actually though. > > I certainly understand the enthusiasm for sharing this code rather than > having dozens of places outside the VM implement their own version of it. > But I think most of these users are using code that's working at the wrong > level. Most of them seem to have an address range which may-or-may-not-be > virtually mapped and they want to get an array-of-pages for that. > > Perhaps we should offer -that- API instead. vmalloc/vmap already has > an array-of-pages, and the various users could be given a pointer to > that array. If the memory isn't vmapped, maybe the caller could pass > an array pointer like XFS does, or we could require them to pass GFP flags > to allocate a new array. Sure, I wouldn't be opposed if there was a model which doesn't force them to do hacks like this. -- Michal Hocko SUSE Labs ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 17:07 ` Matthew Wilcox 2018-08-20 19:15 ` Michal Hocko @ 2018-08-20 21:53 ` Dave Chinner 1 sibling, 0 replies; 9+ messages in thread From: Dave Chinner @ 2018-08-20 21:53 UTC (permalink / raw) To: Matthew Wilcox Cc: Michal Hocko, Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon, Aug 20, 2018 at 10:07:44AM -0700, Matthew Wilcox wrote: > On Mon, Aug 20, 2018 at 06:24:06PM +0200, Michal Hocko wrote: > > On Mon 20-08-18 07:49:23, Matthew Wilcox wrote: > > > On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > > > > On Sat 18-08-18 20:49:01, Li RongQing wrote: > > > > > The new helper returns address mapping page, which has several users > > > > > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > > > > > in af_packet.c, unify them > > > > > > > > kvvirt_to_page is a weird name. I guess you wanted it to fit into > > > > kv*alloc, kvfree naming, right? If yes then I guess kvmem_to_page > > > > would be slightly better. > > > > > > > > Other than that the patch makes sense to me. It would be great to add > > > > some documentation and be explicit that the call is only safe on > > > > directly mapped kernel address and vmalloc areas. > > > > > > ... and not safe if the length crosses a page boundary. I don't want to > > > see new code emerge that does kvmalloc(PAGE_SIZE * 2, ...); kvmem_to_page() > > > and have it randomly crash when kvmalloc happens to fall back to vmalloc() > > > under heavy memory pressure. > > > > > > Also, people are going to start using this for stack addresses. Perhaps > > > we should have a debug option to guard against them doing that. > > > > I do agree that such an interface is quite dangerous. That's why I was > > stressing out the proper documentation. I would be much happier if we > > could do without it altogether. Maybe the existing users can be rewoked > > to not rely on the addr2page functionality. If that is not the case then > > we should probably offer a helper. With some WARN_ONs to catch misuse > > would be really nice. I am not really sure how many abuses can we catch > > actually though. > > I certainly understand the enthusiasm for sharing this code rather than > having dozens of places outside the VM implement their own version of it. > But I think most of these users are using code that's working at the wrong > level. Most of them seem to have an address range which may-or-may-not-be > virtually mapped and they want to get an array-of-pages for that. > > Perhaps we should offer -that- API instead. vmalloc/vmap already has > an array-of-pages, and the various users could be given a pointer to > that array. If the memory isn't vmapped, maybe the caller could pass > an array pointer like XFS does, or we could require them to pass GFP flags > to allocate a new array. We have code in XFS to avoid allocating the array for the common case - it ends up embedded in the struct xfs_buf instead and so we avoid needing and extra alloc/free for every single buffer in the fast path. Hence I'd prefer the interface lets the caller to supply the result array, similar to the way callers provide their own pagevecs for bulk page lookup functions... Cheers, Dave. -- Dave Chinner david@fromorbit.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-20 14:41 ` [PATCH] mm: introduce kvvirt_to_page() helper Michal Hocko 2018-08-20 14:49 ` Matthew Wilcox @ 2018-08-20 17:38 ` Matthew Wilcox 1 sibling, 0 replies; 9+ messages in thread From: Matthew Wilcox @ 2018-08-20 17:38 UTC (permalink / raw) To: Michal Hocko Cc: Li RongQing, Andrew Morton, Dan Williams, linux-mm, linux-kernel, linux-xfs, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder, darrick.wong On Mon, Aug 20, 2018 at 04:41:16PM +0200, Michal Hocko wrote: > > +++ b/fs/xfs/xfs_buf.c > > for (i = 0; i < bp->b_page_count; i++) { > > - bp->b_pages[i] = mem_to_page((void *)pageaddr); > > + bp->b_pages[i] = kvvirt_to_page((void *)pageaddr); > > pageaddr += PAGE_SIZE; This wants mem_range_to_page_array(). > > +++ b/net/9p/trans_virtio.c > > for (index = 0; index < nr_pages; index++) { > > - if (is_vmalloc_addr(p)) > > - (*pages)[index] = vmalloc_to_page(p); > > - else > > - (*pages)[index] = kmap_to_page(p); > > + (*pages)[index] = kvvirt_to_page(p); Also mem_range_to_page_array(). > > +++ b/net/ceph/crypto.c > > @@ -191,11 +191,7 @@ static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, > > struct page *page; > > unsigned int len = min(chunk_len - off, buf_len); > > > > - if (is_vmalloc) > > - page = vmalloc_to_page(buf); > > - else > > - page = virt_to_page(buf); > > - > > + page = kvvirt_to_page(buf); > > sg_set_page(sg, page, len, off); > > > > off = 0; This whole function wants to move into the core and be called something like sg_alloc_table_from_virt(). > > +++ b/net/packet/af_packet.c > > @@ -374,15 +367,15 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) > > switch (po->tp_version) { > > case TPACKET_V1: > > h.h1->tp_status = status; > > - flush_dcache_page(pgv_to_page(&h.h1->tp_status)); > > + flush_dcache_page(kvvirt_to_page(&h.h1->tp_status)); This driver really wants flush_dcache_range(start, len). ^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] mm: introduce kvvirt_to_page() helper @ 2018-08-16 9:17 Li RongQing 2018-08-16 9:21 ` Michal Hocko 0 siblings, 1 reply; 9+ messages in thread From: Li RongQing @ 2018-08-16 9:17 UTC (permalink / raw) To: Andrew Morton, Dan Williams, linux-mm, linux-kernel Cc: Michal Hocko, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder The new helper returns address mapping page, which has several users in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page in af_packet.c, after this, they can be unified Signed-off-by: Zhang Yu <zhangyu31@baidu.com> Signed-off-by: Li RongQing <lirongqing@baidu.com> --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 68a5121694ef..bb34a3c71df5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -599,6 +599,14 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) return kvmalloc_array(n, size, flags | __GFP_ZERO); } +static inline struct page *kvvirt_to_page(const void *addr) +{ + if (!is_vmalloc_addr(addr)) + return virt_to_page(addr); + else + return vmalloc_to_page(addr); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) -- 2.16.2 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] mm: introduce kvvirt_to_page() helper 2018-08-16 9:17 Li RongQing @ 2018-08-16 9:21 ` Michal Hocko 0 siblings, 0 replies; 9+ messages in thread From: Michal Hocko @ 2018-08-16 9:21 UTC (permalink / raw) To: Li RongQing Cc: Andrew Morton, Dan Williams, linux-mm, linux-kernel, Kirill A. Shutemov, Matthew Wilcox, Souptick Joarder On Thu 16-08-18 17:17:37, Li RongQing wrote: > The new helper returns address mapping page, which has several users > in individual subsystem, like mem_to_page in xfs_buf.c and pgv_to_page > in af_packet.c, after this, they can be unified Please add users along with the new helper. > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > Signed-off-by: Li RongQing <lirongqing@baidu.com> > --- > include/linux/mm.h | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 68a5121694ef..bb34a3c71df5 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -599,6 +599,14 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) > return kvmalloc_array(n, size, flags | __GFP_ZERO); > } > > +static inline struct page *kvvirt_to_page(const void *addr) > +{ > + if (!is_vmalloc_addr(addr)) > + return virt_to_page(addr); > + else > + return vmalloc_to_page(addr); > +} > + > extern void kvfree(const void *addr); > > static inline atomic_t *compound_mapcount_ptr(struct page *page) > -- > 2.16.2 > -- Michal Hocko SUSE Labs ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2018-08-20 21:53 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <1534596541-31393-1-git-send-email-lirongqing@baidu.com> 2018-08-20 14:41 ` [PATCH] mm: introduce kvvirt_to_page() helper Michal Hocko 2018-08-20 14:49 ` Matthew Wilcox 2018-08-20 16:24 ` Michal Hocko 2018-08-20 17:07 ` Matthew Wilcox 2018-08-20 19:15 ` Michal Hocko 2018-08-20 21:53 ` Dave Chinner 2018-08-20 17:38 ` Matthew Wilcox 2018-08-16 9:17 Li RongQing 2018-08-16 9:21 ` Michal Hocko
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).