All of lore.kernel.org
 help / color / mirror / Atom feed
From: Balbir Singh <bsingharora@gmail.com>
To: "Jérôme Glisse" <jglisse@redhat.com>,
	akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: Re: [HMM v13 06/18] mm/ZONE_DEVICE/unaddressable: add special swap for unaddressable
Date: Mon, 21 Nov 2016 13:06:45 +1100	[thread overview]
Message-ID: <3f759fff-fe8d-89c4-5c86-c9f27403bf3b@gmail.com> (raw)
In-Reply-To: <1479493107-982-7-git-send-email-jglisse@redhat.com>



On 19/11/16 05:18, Jérôme Glisse wrote:
> To allow use of device un-addressable memory inside a process add a
> special swap type. Also add a new callback to handle page fault on
> such entry.
> 
> Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
>  fs/proc/task_mmu.c       | 10 +++++++-
>  include/linux/memremap.h |  5 ++++
>  include/linux/swap.h     | 18 ++++++++++---
>  include/linux/swapops.h  | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/memremap.c        | 14 ++++++++++
>  mm/Kconfig               | 12 +++++++++
>  mm/memory.c              | 24 +++++++++++++++++
>  mm/mprotect.c            | 12 +++++++++
>  8 files changed, 158 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 6909582..0726d39 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -544,8 +544,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
>  			} else {
>  				mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
>  			}
> -		} else if (is_migration_entry(swpent))
> +		} else if (is_migration_entry(swpent)) {
>  			page = migration_entry_to_page(swpent);
> +		} else if (is_device_entry(swpent)) {
> +			page = device_entry_to_page(swpent);
> +		}


So the reason there is a device swap entry for a page belonging to a user process is
that it is in the middle of migration or is it always that a swap entry represents
unaddressable memory belonging to a GPU device, but its tracked in the page table
entries of the process.

>  	} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
>  							&& pte_none(*pte))) {
>  		page = find_get_entry(vma->vm_file->f_mapping,
> @@ -708,6 +711,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
>  
>  		if (is_migration_entry(swpent))
>  			page = migration_entry_to_page(swpent);
> +		if (is_device_entry(swpent))
> +			page = device_entry_to_page(swpent);
>  	}
>  	if (page) {
>  		int mapcount = page_mapcount(page);
> @@ -1191,6 +1196,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
>  		flags |= PM_SWAP;
>  		if (is_migration_entry(entry))
>  			page = migration_entry_to_page(entry);
> +
> +		if (is_device_entry(entry))
> +			page = device_entry_to_page(entry);
>  	}
>  
>  	if (page && !PageAnon(page))
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index b6f03e9..d584c74 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -47,6 +47,11 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
>   */
>  struct dev_pagemap {
>  	void (*free_devpage)(struct page *page, void *data);
> +	int (*fault)(struct vm_area_struct *vma,
> +		     unsigned long addr,
> +		     struct page *page,
> +		     unsigned flags,
> +		     pmd_t *pmdp);
>  	struct vmem_altmap *altmap;
>  	const struct resource *res;
>  	struct percpu_ref *ref;
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7e553e1..599cb54 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -50,6 +50,17 @@ static inline int current_is_kswapd(void)
>   */
>  
>  /*
> + * Un-addressable device memory support
> + */
> +#ifdef CONFIG_DEVICE_UNADDRESSABLE
> +#define SWP_DEVICE_NUM 2
> +#define SWP_DEVICE_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + SWP_MIGRATION_NUM)
> +#define SWP_DEVICE (MAX_SWAPFILES + SWP_HWPOISON_NUM + SWP_MIGRATION_NUM + 1)
> +#else
> +#define SWP_DEVICE_NUM 0
> +#endif
> +
> +/*
>   * NUMA node memory migration support
>   */
>  #ifdef CONFIG_MIGRATION
> @@ -71,7 +82,8 @@ static inline int current_is_kswapd(void)
>  #endif
>  
>  #define MAX_SWAPFILES \
> -	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> +	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
> +	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
>  
>  /*
>   * Magic header for a swap area. The first part of the union is
> @@ -442,8 +454,8 @@ static inline void show_swap_cache_info(void)
>  {
>  }
>  
> -#define free_swap_and_cache(swp)	is_migration_entry(swp)
> -#define swapcache_prepare(swp)		is_migration_entry(swp)
> +#define free_swap_and_cache(e) (is_migration_entry(e) || is_device_entry(e))
> +#define swapcache_prepare(e) (is_migration_entry(e) || is_device_entry(e))
>  
>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>  {
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 5c3a5f3..d1aa425 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -100,6 +100,73 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
>  	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
>  }
>  
> +#ifdef CONFIG_DEVICE_UNADDRESSABLE
> +static inline swp_entry_t make_device_entry(struct page *page, bool write)
> +{
> +	return swp_entry(write?SWP_DEVICE_WRITE:SWP_DEVICE, page_to_pfn(page));

Code style checks

> +}
> +
> +static inline bool is_device_entry(swp_entry_t entry)
> +{
> +	int type = swp_type(entry);
> +	return type == SWP_DEVICE || type == SWP_DEVICE_WRITE;
> +}
> +
> +static inline void make_device_entry_read(swp_entry_t *entry)
> +{
> +	*entry = swp_entry(SWP_DEVICE, swp_offset(*entry));
> +}
> +
> +static inline bool is_write_device_entry(swp_entry_t entry)
> +{
> +	return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
> +}
> +
> +static inline struct page *device_entry_to_page(swp_entry_t entry)
> +{
> +	return pfn_to_page(swp_offset(entry));
> +}
> +
> +int device_entry_fault(struct vm_area_struct *vma,
> +		       unsigned long addr,
> +		       swp_entry_t entry,
> +		       unsigned flags,
> +		       pmd_t *pmdp);
> +#else /* CONFIG_DEVICE_UNADDRESSABLE */
> +static inline swp_entry_t make_device_entry(struct page *page, bool write)
> +{
> +	return swp_entry(0, 0);
> +}
> +
> +static inline void make_device_entry_read(swp_entry_t *entry)
> +{
> +}
> +
> +static inline bool is_device_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline bool is_write_device_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline struct page *device_entry_to_page(swp_entry_t entry)
> +{
> +	return NULL;
> +}
> +
> +static inline int device_entry_fault(struct vm_area_struct *vma,
> +				     unsigned long addr,
> +				     swp_entry_t entry,
> +				     unsigned flags,
> +				     pmd_t *pmdp)
> +{
> +	return VM_FAULT_SIGBUS;
> +}
> +#endif /* CONFIG_DEVICE_UNADDRESSABLE */
> +
>  #ifdef CONFIG_MIGRATION
>  static inline swp_entry_t make_migration_entry(struct page *page, int write)
>  {
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index cf83928..0670015 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -18,6 +18,8 @@
>  #include <linux/io.h>
>  #include <linux/mm.h>
>  #include <linux/memory_hotplug.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
>  
>  #ifndef ioremap_cache
>  /* temporary while we convert existing ioremap_cache users to memremap */
> @@ -200,6 +202,18 @@ void put_zone_device_page(struct page *page)
>  }
>  EXPORT_SYMBOL(put_zone_device_page);
>  
> +int device_entry_fault(struct vm_area_struct *vma,
> +		       unsigned long addr,
> +		       swp_entry_t entry,
> +		       unsigned flags,
> +		       pmd_t *pmdp)
> +{
> +	struct page *page = device_entry_to_page(entry);
> +
> +	return page->pgmap->fault(vma, addr, page, flags, pmdp);
> +}
> +EXPORT_SYMBOL(device_entry_fault);
> +
>  static void pgmap_radix_release(struct resource *res)
>  {
>  	resource_size_t key, align_start, align_size, align_end;
> diff --git a/mm/Kconfig b/mm/Kconfig
> index be0ee11..0a21411 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -704,6 +704,18 @@ config ZONE_DEVICE
>  
>  	  If FS_DAX is enabled, then say Y.
>  
> +config DEVICE_UNADDRESSABLE
> +	bool "Un-addressable device memory (GPU memory, ...)"
> +	depends on ZONE_DEVICE
> +
> +	help
> +	  Allow to create struct page for un-addressable device memory
> +	  ie memory that is only accessible by the device (or group of
> +	  devices).
> +
> +	  This allow to migrate chunk of process memory to device memory
> +	  while that memory is use by the device.
> +
>  config FRAME_VECTOR
>  	bool
>  
> diff --git a/mm/memory.c b/mm/memory.c
> index 15f2908..a83d690 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -889,6 +889,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  					pte = pte_swp_mksoft_dirty(pte);
>  				set_pte_at(src_mm, addr, src_pte, pte);
>  			}
> +		} else if (is_device_entry(entry)) {
> +			page = device_entry_to_page(entry);
> +
> +			get_page(page);
> +			rss[mm_counter(page)]++;

Why does rss count go up?

> +			page_dup_rmap(page, false);
> +
> +			if (is_write_device_entry(entry) &&
> +			    is_cow_mapping(vm_flags)) {
> +				make_device_entry_read(&entry);
> +				pte = swp_entry_to_pte(entry);
> +				if (pte_swp_soft_dirty(*src_pte))
> +					pte = pte_swp_mksoft_dirty(pte);
> +				set_pte_at(src_mm, addr, src_pte, pte);
> +			}
>  		}
>  		goto out_set_pte;
>  	}
> @@ -1191,6 +1206,12 @@ again:
>  
>  			page = migration_entry_to_page(entry);
>  			rss[mm_counter(page)]--;
> +		} else if (is_device_entry(entry)) {
> +			struct page *page = device_entry_to_page(entry);
> +			rss[mm_counter(page)]--;
> +
> +			page_remove_rmap(page, false);
> +			put_page(page);
>  		}
>  		if (unlikely(!free_swap_and_cache(entry)))
>  			print_bad_pte(vma, addr, ptent, NULL);
> @@ -2536,6 +2557,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
>  	if (unlikely(non_swap_entry(entry))) {
>  		if (is_migration_entry(entry)) {
>  			migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
> +		} else if (is_device_entry(entry)) {
> +			ret = device_entry_fault(vma, fe->address, entry,
> +						 fe->flags, fe->pmd);

What does device_entry_fault() actually do here?

>  		} else if (is_hwpoison_entry(entry)) {
>  			ret = VM_FAULT_HWPOISON;
>  		} else {
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 1bc1eb3..70aff3a 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -139,6 +139,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  
>  				pages++;
>  			}
> +
> +			if (is_write_device_entry(entry)) {
> +				pte_t newpte;
> +
> +				make_device_entry_read(&entry);
> +				newpte = swp_entry_to_pte(entry);
> +				if (pte_swp_soft_dirty(oldpte))
> +					newpte = pte_swp_mksoft_dirty(newpte);
> +				set_pte_at(mm, addr, pte, newpte);
> +
> +				pages++;
> +			}

Does it make sense to call mprotect() on device memory ranges?

>  		}
>  	} while (pte++, addr += PAGE_SIZE, addr != end);
>  	arch_leave_lazy_mmu_mode();
> 

WARNING: multiple messages have this Message-ID (diff)
From: Balbir Singh <bsingharora@gmail.com>
To: "Jérôme Glisse" <jglisse@redhat.com>,
	akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: Re: [HMM v13 06/18] mm/ZONE_DEVICE/unaddressable: add special swap for unaddressable
Date: Mon, 21 Nov 2016 13:06:45 +1100	[thread overview]
Message-ID: <3f759fff-fe8d-89c4-5c86-c9f27403bf3b@gmail.com> (raw)
In-Reply-To: <1479493107-982-7-git-send-email-jglisse@redhat.com>



On 19/11/16 05:18, JA(C)rA'me Glisse wrote:
> To allow use of device un-addressable memory inside a process add a
> special swap type. Also add a new callback to handle page fault on
> such entry.
> 
> Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
>  fs/proc/task_mmu.c       | 10 +++++++-
>  include/linux/memremap.h |  5 ++++
>  include/linux/swap.h     | 18 ++++++++++---
>  include/linux/swapops.h  | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/memremap.c        | 14 ++++++++++
>  mm/Kconfig               | 12 +++++++++
>  mm/memory.c              | 24 +++++++++++++++++
>  mm/mprotect.c            | 12 +++++++++
>  8 files changed, 158 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 6909582..0726d39 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -544,8 +544,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
>  			} else {
>  				mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
>  			}
> -		} else if (is_migration_entry(swpent))
> +		} else if (is_migration_entry(swpent)) {
>  			page = migration_entry_to_page(swpent);
> +		} else if (is_device_entry(swpent)) {
> +			page = device_entry_to_page(swpent);
> +		}


So the reason there is a device swap entry for a page belonging to a user process is
that it is in the middle of migration or is it always that a swap entry represents
unaddressable memory belonging to a GPU device, but its tracked in the page table
entries of the process.

>  	} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
>  							&& pte_none(*pte))) {
>  		page = find_get_entry(vma->vm_file->f_mapping,
> @@ -708,6 +711,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
>  
>  		if (is_migration_entry(swpent))
>  			page = migration_entry_to_page(swpent);
> +		if (is_device_entry(swpent))
> +			page = device_entry_to_page(swpent);
>  	}
>  	if (page) {
>  		int mapcount = page_mapcount(page);
> @@ -1191,6 +1196,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
>  		flags |= PM_SWAP;
>  		if (is_migration_entry(entry))
>  			page = migration_entry_to_page(entry);
> +
> +		if (is_device_entry(entry))
> +			page = device_entry_to_page(entry);
>  	}
>  
>  	if (page && !PageAnon(page))
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index b6f03e9..d584c74 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -47,6 +47,11 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
>   */
>  struct dev_pagemap {
>  	void (*free_devpage)(struct page *page, void *data);
> +	int (*fault)(struct vm_area_struct *vma,
> +		     unsigned long addr,
> +		     struct page *page,
> +		     unsigned flags,
> +		     pmd_t *pmdp);
>  	struct vmem_altmap *altmap;
>  	const struct resource *res;
>  	struct percpu_ref *ref;
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7e553e1..599cb54 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -50,6 +50,17 @@ static inline int current_is_kswapd(void)
>   */
>  
>  /*
> + * Un-addressable device memory support
> + */
> +#ifdef CONFIG_DEVICE_UNADDRESSABLE
> +#define SWP_DEVICE_NUM 2
> +#define SWP_DEVICE_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + SWP_MIGRATION_NUM)
> +#define SWP_DEVICE (MAX_SWAPFILES + SWP_HWPOISON_NUM + SWP_MIGRATION_NUM + 1)
> +#else
> +#define SWP_DEVICE_NUM 0
> +#endif
> +
> +/*
>   * NUMA node memory migration support
>   */
>  #ifdef CONFIG_MIGRATION
> @@ -71,7 +82,8 @@ static inline int current_is_kswapd(void)
>  #endif
>  
>  #define MAX_SWAPFILES \
> -	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> +	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
> +	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
>  
>  /*
>   * Magic header for a swap area. The first part of the union is
> @@ -442,8 +454,8 @@ static inline void show_swap_cache_info(void)
>  {
>  }
>  
> -#define free_swap_and_cache(swp)	is_migration_entry(swp)
> -#define swapcache_prepare(swp)		is_migration_entry(swp)
> +#define free_swap_and_cache(e) (is_migration_entry(e) || is_device_entry(e))
> +#define swapcache_prepare(e) (is_migration_entry(e) || is_device_entry(e))
>  
>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>  {
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 5c3a5f3..d1aa425 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -100,6 +100,73 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
>  	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
>  }
>  
> +#ifdef CONFIG_DEVICE_UNADDRESSABLE
> +static inline swp_entry_t make_device_entry(struct page *page, bool write)
> +{
> +	return swp_entry(write?SWP_DEVICE_WRITE:SWP_DEVICE, page_to_pfn(page));

Code style checks

> +}
> +
> +static inline bool is_device_entry(swp_entry_t entry)
> +{
> +	int type = swp_type(entry);
> +	return type == SWP_DEVICE || type == SWP_DEVICE_WRITE;
> +}
> +
> +static inline void make_device_entry_read(swp_entry_t *entry)
> +{
> +	*entry = swp_entry(SWP_DEVICE, swp_offset(*entry));
> +}
> +
> +static inline bool is_write_device_entry(swp_entry_t entry)
> +{
> +	return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
> +}
> +
> +static inline struct page *device_entry_to_page(swp_entry_t entry)
> +{
> +	return pfn_to_page(swp_offset(entry));
> +}
> +
> +int device_entry_fault(struct vm_area_struct *vma,
> +		       unsigned long addr,
> +		       swp_entry_t entry,
> +		       unsigned flags,
> +		       pmd_t *pmdp);
> +#else /* CONFIG_DEVICE_UNADDRESSABLE */
> +static inline swp_entry_t make_device_entry(struct page *page, bool write)
> +{
> +	return swp_entry(0, 0);
> +}
> +
> +static inline void make_device_entry_read(swp_entry_t *entry)
> +{
> +}
> +
> +static inline bool is_device_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline bool is_write_device_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline struct page *device_entry_to_page(swp_entry_t entry)
> +{
> +	return NULL;
> +}
> +
> +static inline int device_entry_fault(struct vm_area_struct *vma,
> +				     unsigned long addr,
> +				     swp_entry_t entry,
> +				     unsigned flags,
> +				     pmd_t *pmdp)
> +{
> +	return VM_FAULT_SIGBUS;
> +}
> +#endif /* CONFIG_DEVICE_UNADDRESSABLE */
> +
>  #ifdef CONFIG_MIGRATION
>  static inline swp_entry_t make_migration_entry(struct page *page, int write)
>  {
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index cf83928..0670015 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -18,6 +18,8 @@
>  #include <linux/io.h>
>  #include <linux/mm.h>
>  #include <linux/memory_hotplug.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
>  
>  #ifndef ioremap_cache
>  /* temporary while we convert existing ioremap_cache users to memremap */
> @@ -200,6 +202,18 @@ void put_zone_device_page(struct page *page)
>  }
>  EXPORT_SYMBOL(put_zone_device_page);
>  
> +int device_entry_fault(struct vm_area_struct *vma,
> +		       unsigned long addr,
> +		       swp_entry_t entry,
> +		       unsigned flags,
> +		       pmd_t *pmdp)
> +{
> +	struct page *page = device_entry_to_page(entry);
> +
> +	return page->pgmap->fault(vma, addr, page, flags, pmdp);
> +}
> +EXPORT_SYMBOL(device_entry_fault);
> +
>  static void pgmap_radix_release(struct resource *res)
>  {
>  	resource_size_t key, align_start, align_size, align_end;
> diff --git a/mm/Kconfig b/mm/Kconfig
> index be0ee11..0a21411 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -704,6 +704,18 @@ config ZONE_DEVICE
>  
>  	  If FS_DAX is enabled, then say Y.
>  
> +config DEVICE_UNADDRESSABLE
> +	bool "Un-addressable device memory (GPU memory, ...)"
> +	depends on ZONE_DEVICE
> +
> +	help
> +	  Allow to create struct page for un-addressable device memory
> +	  ie memory that is only accessible by the device (or group of
> +	  devices).
> +
> +	  This allow to migrate chunk of process memory to device memory
> +	  while that memory is use by the device.
> +
>  config FRAME_VECTOR
>  	bool
>  
> diff --git a/mm/memory.c b/mm/memory.c
> index 15f2908..a83d690 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -889,6 +889,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  					pte = pte_swp_mksoft_dirty(pte);
>  				set_pte_at(src_mm, addr, src_pte, pte);
>  			}
> +		} else if (is_device_entry(entry)) {
> +			page = device_entry_to_page(entry);
> +
> +			get_page(page);
> +			rss[mm_counter(page)]++;

Why does rss count go up?

> +			page_dup_rmap(page, false);
> +
> +			if (is_write_device_entry(entry) &&
> +			    is_cow_mapping(vm_flags)) {
> +				make_device_entry_read(&entry);
> +				pte = swp_entry_to_pte(entry);
> +				if (pte_swp_soft_dirty(*src_pte))
> +					pte = pte_swp_mksoft_dirty(pte);
> +				set_pte_at(src_mm, addr, src_pte, pte);
> +			}
>  		}
>  		goto out_set_pte;
>  	}
> @@ -1191,6 +1206,12 @@ again:
>  
>  			page = migration_entry_to_page(entry);
>  			rss[mm_counter(page)]--;
> +		} else if (is_device_entry(entry)) {
> +			struct page *page = device_entry_to_page(entry);
> +			rss[mm_counter(page)]--;
> +
> +			page_remove_rmap(page, false);
> +			put_page(page);
>  		}
>  		if (unlikely(!free_swap_and_cache(entry)))
>  			print_bad_pte(vma, addr, ptent, NULL);
> @@ -2536,6 +2557,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)
>  	if (unlikely(non_swap_entry(entry))) {
>  		if (is_migration_entry(entry)) {
>  			migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
> +		} else if (is_device_entry(entry)) {
> +			ret = device_entry_fault(vma, fe->address, entry,
> +						 fe->flags, fe->pmd);

What does device_entry_fault() actually do here?

>  		} else if (is_hwpoison_entry(entry)) {
>  			ret = VM_FAULT_HWPOISON;
>  		} else {
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 1bc1eb3..70aff3a 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -139,6 +139,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  
>  				pages++;
>  			}
> +
> +			if (is_write_device_entry(entry)) {
> +				pte_t newpte;
> +
> +				make_device_entry_read(&entry);
> +				newpte = swp_entry_to_pte(entry);
> +				if (pte_swp_soft_dirty(oldpte))
> +					newpte = pte_swp_mksoft_dirty(newpte);
> +				set_pte_at(mm, addr, pte, newpte);
> +
> +				pages++;
> +			}

Does it make sense to call mprotect() on device memory ranges?

>  		}
>  	} while (pte++, addr += PAGE_SIZE, addr != end);
>  	arch_leave_lazy_mmu_mode();
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2016-11-21  2:06 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-18 18:18 [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13 Jérôme Glisse
2016-11-18 18:18 ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 01/18] mm/memory/hotplug: convert device parameter bool to set of flags Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  0:44   ` Balbir Singh
2016-11-21  0:44     ` Balbir Singh
2016-11-21  4:53     ` Jerome Glisse
2016-11-21  4:53       ` Jerome Glisse
2016-11-21  6:57       ` Anshuman Khandual
2016-11-21  6:57         ` Anshuman Khandual
2016-11-21 12:19         ` Jerome Glisse
2016-11-21 12:19           ` Jerome Glisse
2016-11-21  6:41   ` Anshuman Khandual
2016-11-21  6:41     ` Anshuman Khandual
2016-11-21 12:27     ` Jerome Glisse
2016-11-21 12:27       ` Jerome Glisse
2016-11-22  5:35       ` Anshuman Khandual
2016-11-22  5:35         ` Anshuman Khandual
2016-11-22 14:08         ` Jerome Glisse
2016-11-22 14:08           ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 02/18] mm/ZONE_DEVICE/unaddressable: add support for un-addressable device memory Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  8:06   ` Anshuman Khandual
2016-11-21  8:06     ` Anshuman Khandual
2016-11-21 12:33     ` Jerome Glisse
2016-11-21 12:33       ` Jerome Glisse
2016-11-22  5:15       ` Anshuman Khandual
2016-11-22  5:15         ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 03/18] mm/ZONE_DEVICE/free_hot_cold_page: catch ZONE_DEVICE pages Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  8:18   ` Anshuman Khandual
2016-11-21  8:18     ` Anshuman Khandual
2016-11-21 12:50     ` Jerome Glisse
2016-11-21 12:50       ` Jerome Glisse
2016-11-22  4:30       ` Anshuman Khandual
2016-11-22  4:30         ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 04/18] mm/ZONE_DEVICE/free-page: callback when page is freed Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  1:49   ` Balbir Singh
2016-11-21  1:49     ` Balbir Singh
2016-11-21  4:57     ` Jerome Glisse
2016-11-21  4:57       ` Jerome Glisse
2016-11-21  8:26   ` Anshuman Khandual
2016-11-21  8:26     ` Anshuman Khandual
2016-11-21 12:34     ` Jerome Glisse
2016-11-21 12:34       ` Jerome Glisse
2016-11-22  5:02       ` Anshuman Khandual
2016-11-22  5:02         ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 05/18] mm/ZONE_DEVICE/devmem_pages_remove: allow early removal of device memory Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21 10:37   ` Anshuman Khandual
2016-11-21 10:37     ` Anshuman Khandual
2016-11-21 12:39     ` Jerome Glisse
2016-11-21 12:39       ` Jerome Glisse
2016-11-22  4:54       ` Anshuman Khandual
2016-11-22  4:54         ` Anshuman Khandual
2016-11-18 18:18 ` [HMM v13 06/18] mm/ZONE_DEVICE/unaddressable: add special swap for unaddressable Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  2:06   ` Balbir Singh [this message]
2016-11-21  2:06     ` Balbir Singh
2016-11-21  5:05     ` Jerome Glisse
2016-11-21  5:05       ` Jerome Glisse
2016-11-22  2:19       ` Balbir Singh
2016-11-22  2:19         ` Balbir Singh
2016-11-22 13:59         ` Jerome Glisse
2016-11-22 13:59           ` Jerome Glisse
2016-11-21 11:10     ` Anshuman Khandual
2016-11-21 11:10       ` Anshuman Khandual
2016-11-21 10:58   ` Anshuman Khandual
2016-11-21 10:58     ` Anshuman Khandual
2016-11-21 12:42     ` Jerome Glisse
2016-11-21 12:42       ` Jerome Glisse
2016-11-22  4:48       ` Anshuman Khandual
2016-11-22  4:48         ` Anshuman Khandual
2016-11-24 13:56         ` Jerome Glisse
2016-11-24 13:56           ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 07/18] mm/ZONE_DEVICE/x86: add support for un-addressable device memory Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  2:08   ` Balbir Singh
2016-11-21  2:08     ` Balbir Singh
2016-11-21  5:08     ` Jerome Glisse
2016-11-21  5:08       ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 08/18] mm/hmm: heterogeneous memory management (HMM for short) Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  2:29   ` Balbir Singh
2016-11-21  2:29     ` Balbir Singh
2016-11-21  5:14     ` Jerome Glisse
2016-11-21  5:14       ` Jerome Glisse
2016-11-23  4:03   ` Anshuman Khandual
2016-11-23  4:03     ` Anshuman Khandual
2016-11-27 13:10     ` Jerome Glisse
2016-11-27 13:10       ` Jerome Glisse
2016-11-28  2:58       ` Anshuman Khandual
2016-11-28  2:58         ` Anshuman Khandual
2016-11-28  9:41         ` Jerome Glisse
2016-11-28  9:41           ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 09/18] mm/hmm/mirror: mirror process address space on device with HMM helpers Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-21  2:42   ` Balbir Singh
2016-11-21  2:42     ` Balbir Singh
2016-11-21  5:18     ` Jerome Glisse
2016-11-21  5:18       ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 10/18] mm/hmm/mirror: add range lock helper, prevent CPU page table update for the range Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 11/18] mm/hmm/mirror: add range monitor helper, to monitor CPU page table update Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 12/18] mm/hmm/mirror: helper to snapshot CPU page table Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 13/18] mm/hmm/mirror: device page fault handler Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 14/18] mm/hmm/migrate: support un-addressable ZONE_DEVICE page in migration Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 15/18] mm/hmm/migrate: add new boolean copy flag to migratepage() callback Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 16/18] mm/hmm/migrate: new memory migration helper for use with device memory Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 19:57   ` Aneesh Kumar K.V
2016-11-18 19:57     ` Aneesh Kumar K.V
2016-11-18 20:15     ` Jerome Glisse
2016-11-18 20:15       ` Jerome Glisse
2016-11-19 14:32   ` Aneesh Kumar K.V
2016-11-19 14:32     ` Aneesh Kumar K.V
2016-11-19 17:17     ` Jerome Glisse
2016-11-19 17:17       ` Jerome Glisse
2016-11-20 18:21       ` Aneesh Kumar K.V
2016-11-20 18:21         ` Aneesh Kumar K.V
2016-11-20 20:06         ` Jerome Glisse
2016-11-20 20:06           ` Jerome Glisse
2016-11-21  3:30   ` Balbir Singh
2016-11-21  3:30     ` Balbir Singh
2016-11-21  5:31     ` Jerome Glisse
2016-11-21  5:31       ` Jerome Glisse
2016-11-18 18:18 ` [HMM v13 17/18] mm/hmm/devmem: device driver helper to hotplug ZONE_DEVICE memory Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-18 18:18 ` [HMM v13 18/18] mm/hmm/devmem: dummy HMM device as an helper for " Jérôme Glisse
2016-11-18 18:18   ` Jérôme Glisse
2016-11-19  0:41 ` [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13 John Hubbard
2016-11-19  0:41   ` John Hubbard
2016-11-19 14:50   ` Aneesh Kumar K.V
2016-11-19 14:50     ` Aneesh Kumar K.V
2016-11-23  9:16 ` Haggai Eran
2016-11-23  9:16   ` Haggai Eran
2016-11-25 16:16   ` Jerome Glisse
2016-11-25 16:16     ` Jerome Glisse
2016-11-27 13:27     ` Haggai Eran
2016-11-27 13:27       ` Haggai Eran

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3f759fff-fe8d-89c4-5c86-c9f27403bf3b@gmail.com \
    --to=bsingharora@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.