All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@mellanox.com>
To: Ralph Campbell <rcampbell@nvidia.com>
Cc: Jerome Glisse <jglisse@redhat.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Christoph Hellwig <hch@lst.de>,
	"linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v3 3/3] mm/hmm/test: add self tests for HMM
Date: Tue, 29 Oct 2019 17:58:41 +0000	[thread overview]
Message-ID: <20191029175837.GS22766@mellanox.com> (raw)
In-Reply-To: <20191023195515.13168-4-rcampbell@nvidia.com>

On Wed, Oct 23, 2019 at 12:55:15PM -0700, Ralph Campbell wrote:
> Add self tests for HMM.
>
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>

> ---
>  MAINTAINERS                            |    3 +
>  drivers/char/Kconfig                   |   11 +
>  drivers/char/Makefile                  |    1 +
>  drivers/char/hmm_dmirror.c             | 1566 ++++++++++++++++++++++++
>  include/Kbuild                         |    1 +
>  include/uapi/linux/hmm_dmirror.h       |   74 ++
>  tools/testing/selftests/vm/.gitignore  |    1 +
>  tools/testing/selftests/vm/Makefile    |    3 +
>  tools/testing/selftests/vm/config      |    3 +
>  tools/testing/selftests/vm/hmm-tests.c | 1311 ++++++++++++++++++++
>  tools/testing/selftests/vm/run_vmtests |   16 +
>  tools/testing/selftests/vm/test_hmm.sh |   97 ++
>  12 files changed, 3087 insertions(+)
>  create mode 100644 drivers/char/hmm_dmirror.c
>  create mode 100644 include/uapi/linux/hmm_dmirror.h
>  create mode 100644 tools/testing/selftests/vm/hmm-tests.c
>  create mode 100755 tools/testing/selftests/vm/test_hmm.sh

This is really big, it would be nice to get a comment from the various
kernel testing folks if this approach makes sense with the test
frameworks. Do we have other drivers that are only intended to be used
by selftests?

Frankly, I'm not super excited about the idea of a 'test driver', it
seems more logical for testing to have some way for a test harness to
call hmm_range_fault() under various conditions and check the results?

It seems especially over-complicated to use a full page table layout
for this, wouldn't something simple like an xarray be good enough for
test purposes?

> +/*
> + * Below are the file operation for the dmirror device file. Only ioctl matters.
> + *
> + * Note this is highly specific to the dmirror device driver and should not be
> + * construed as an example on how to design the API a real device driver would
> + * expose to userspace.
> + */
> +static ssize_t dmirror_fops_read(struct file *filp,
> +			       char __user *buf,
> +			       size_t count,
> +			       loff_t *ppos)
> +{
> +	return -EINVAL;
> +}
> +
> +static ssize_t dmirror_fops_write(struct file *filp,
> +				const char __user *buf,
> +				size_t count,
> +				loff_t *ppos)
> +{
> +	return -EINVAL;
> +}
> +
> +static int dmirror_fops_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	/* Forbid mmap of the dmirror device file. */
> +	return -EINVAL;
> +}

I'm pretty sure these can just be left as NULL in the fops?

> +static int dmirror_fault(struct dmirror *dmirror,
> +			 unsigned long start,
> +			 unsigned long end,
> +			 bool write)
> +{
> +	struct mm_struct *mm = dmirror->mirror.hmm->mmu_notifier.mm;
> +	unsigned long addr;
> +	unsigned long next;
> +	uint64_t pfns[64];
> +	struct hmm_range range = {
> +		.pfns = pfns,
> +		.flags = dmirror_hmm_flags,
> +		.values = dmirror_hmm_values,
> +		.pfn_shift = DPT_SHIFT,
> +		.pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] |
> +				    dmirror_hmm_flags[HMM_PFN_WRITE]),
> +		.default_flags = dmirror_hmm_flags[HMM_PFN_VALID] |
> +				(write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0),
> +	};
> +	int ret = 0;
> +
> +	for (addr = start; addr < end; ) {
> +		long count;
> +
> +		next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
> +		range.start = addr;
> +		range.end = next;
> +
> +		down_read(&mm->mmap_sem);
> +
> +		ret = hmm_range_register(&range, &dmirror->mirror);
> +		if (ret) {
> +			up_read(&mm->mmap_sem);
> +			break;
> +		}
> +
> +		if (!hmm_range_wait_until_valid(&range,
> +						DMIRROR_RANGE_FAULT_TIMEOUT)) {
> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			continue;
> +		}
> +
> +		count = hmm_range_fault(&range, 0);
> +		if (count < 0) {
> +			ret = count;
> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			break;
> +		}
> +
> +		if (!hmm_range_valid(&range)) {

There is no 'driver lock' being held here, how does this work?
Shouldn't it hold dmirror->mutex for this sequence?

> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			continue;
> +		}
> +		mutex_lock(&dmirror->mutex);
> +		ret = dmirror_pt_walk(dmirror, dmirror_do_fault,
> +				      addr, next, &range, true);
> +		mutex_unlock(&dmirror->mutex);

Ie move it down into this block

> +		hmm_range_unregister(&range);
> +		up_read(&mm->mmap_sem);
> +		if (ret)
> +			break;
> +
> +		addr = next;
> +	}
> +
> +	return ret;
> +}

> +static int dmirror_read(struct dmirror *dmirror,
> +			struct hmm_dmirror_cmd *cmd)
> +{

Why not just use pread()/pwrite() for this instead of an ioctl?

> +	struct dmirror_bounce bounce;
> +	unsigned long start, end;
> +	unsigned long size = cmd->npages << PAGE_SHIFT;
> +	int ret;
> +
> +	start = cmd->addr;
> +	end = start + size;
> +	if (end < start)
> +		return -EINVAL;
> +
> +	ret = dmirror_bounce_init(&bounce, start, size);
> +	if (ret)
> +		return ret;
> +
> +static int dmirror_snapshot(struct dmirror *dmirror,
> +			    struct hmm_dmirror_cmd *cmd)
> +{
> +	struct mm_struct *mm = dmirror->mirror.hmm->mmu_notifier.mm;
> +	unsigned long start, end;
> +	unsigned long size = cmd->npages << PAGE_SHIFT;
> +	unsigned long addr;
> +	unsigned long next;
> +	uint64_t pfns[64];
> +	unsigned char perm[64];
> +	char __user *uptr;
> +	struct hmm_range range = {
> +		.pfns = pfns,
> +		.flags = dmirror_hmm_flags,
> +		.values = dmirror_hmm_values,
> +		.pfn_shift = DPT_SHIFT,
> +		.pfn_flags_mask = ~0ULL,
> +	};
> +	int ret = 0;
> +
> +	start = cmd->addr;
> +	end = start + size;
> +	uptr = (void __user *)cmd->ptr;
> +
> +	for (addr = start; addr < end; ) {
> +		long count;
> +		unsigned long i;
> +		unsigned long n;
> +
> +		next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
> +		range.start = addr;
> +		range.end = next;
> +
> +		down_read(&mm->mmap_sem);
> +
> +		ret = hmm_range_register(&range, &dmirror->mirror);
> +		if (ret) {
> +			up_read(&mm->mmap_sem);
> +			break;
> +		}
> +
> +		if (!hmm_range_wait_until_valid(&range,
> +						DMIRROR_RANGE_FAULT_TIMEOUT)) {
> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			continue;
> +		}
> +
> +		count = hmm_range_fault(&range, HMM_FAULT_SNAPSHOT);
> +		if (count < 0) {
> +			ret = count;
> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			if (ret == -EBUSY)
> +				continue;
> +			break;
> +		}
> +
> +		if (!hmm_range_valid(&range)) {

Same as for dmirror_fault

> +			hmm_range_unregister(&range);
> +			up_read(&mm->mmap_sem);
> +			continue;
> +		}
> +
> +		n = (next - addr) >> PAGE_SHIFT;
> +		for (i = 0; i < n; i++)
> +			dmirror_mkentry(dmirror, &range, perm + i, pfns[i]);

Is this missing locking too?

> +static int dmirror_remove(struct platform_device *pdev)
> +{
> +	/* all probe actions are unwound by devm */
> +	return 0;
> +}
> +
> +static struct platform_driver dmirror_device_driver = {
> +	.probe		= dmirror_probe,
> +	.remove		= dmirror_remove,
> +	.driver		= {
> +		.name	= "HMM_DMIRROR",
> +	},
> +};

This presence of a platform_driver and device is very confusing. I'm
sure Greg KH would object to this as a misuse of platform drivers.

A platform device isn't needed to create a char dev, so what is this for?

> diff --git a/include/Kbuild b/include/Kbuild
> index ffba79483cc5..6ffb44a45957 100644
> --- a/include/Kbuild
> +++ b/include/Kbuild
> @@ -1063,6 +1063,7 @@ header-test-			+= uapi/linux/coda_psdev.h
>  header-test-			+= uapi/linux/errqueue.h
>  header-test-			+= uapi/linux/eventpoll.h
>  header-test-			+= uapi/linux/hdlc/ioctl.h
> +header-test-			+= uapi/linux/hmm_dmirror.h

Why? This list should only be updated if the header is broken in some
way.


> diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
> new file mode 100644
> index 000000000000..f4ae6188fd0e
> --- /dev/null
> +++ b/tools/testing/selftests/vm/hmm-tests.c
> @@ -0,0 +1,1311 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2013 Red Hat Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.

btw, I think if a SPDX is present I don't think the license text is
required, just the copyright.

I think these tests should also study the various case of invoke
pte_hole, ie faulting/snappshotting before/after a vma, or across a
vma range with a hole, etc, etc.

Jason

  parent reply	other threads:[~2019-10-29 17:58 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-23 19:55 [PATCH v3 0/3] HMM tests and minor fixes Ralph Campbell
2019-10-23 19:55 ` [PATCH v3 1/3] mm/hmm: make full use of walk_page_range() Ralph Campbell
2019-10-29 17:40   ` Jason Gunthorpe
2019-10-23 19:55 ` [PATCH v3 2/3] mm/hmm: allow snapshot of the special zero page Ralph Campbell
2019-10-23 20:27   ` Jerome Glisse
2019-10-24  9:27   ` David Hildenbrand
2019-10-29 17:27   ` Jason Gunthorpe
2019-10-23 19:55 ` [PATCH v3 3/3] mm/hmm/test: add self tests for HMM Ralph Campbell
2019-10-23 20:28   ` Jerome Glisse
2019-10-23 21:55     ` Ralph Campbell
2019-10-29 17:58   ` Jason Gunthorpe [this message]
2019-10-29 21:16     ` Ralph Campbell
2019-10-29 23:12       ` Jason Gunthorpe
2019-10-31  0:14         ` Ralph Campbell
2019-10-31 12:42           ` Jason Gunthorpe
2019-10-31 17:28             ` Ralph Campbell
2019-10-31 17:34               ` Jason Gunthorpe
2019-10-31 17:48                 ` Ralph Campbell
2019-10-30 18:34     ` Qian Cai
2019-10-30 18:34       ` Qian Cai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191029175837.GS22766@mellanox.com \
    --to=jgg@mellanox.com \
    --cc=hch@lst.de \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rcampbell@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.