All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: dri-devel@lists.freedesktop.org
Cc: intel-gfx@lists.freedesktop.org
Subject: [RFC] drm: Optimise drm_ioctl() for small user args
Date: Tue, 30 May 2017 13:55:20 +0100	[thread overview]
Message-ID: <20170530125520.14030-1-chris@chris-wilson.co.uk> (raw)

When looking at simple ioctls coupled with conveniently small user
parameters, the overhead of the syscall and drm_ioctl() present large
low hanging fruit. Profiling trivial microbenchmarks around
i915_gem_busy_ioctl, the low hanging fruit comprises of the call to
copy_user(). Those calls are only inlined by the macro where the
constant is known at compile-time, but the ioctl argument size depends
on the ioctl number. To help the compiler, explicitly add switches for
the small sizes that expand to simple moves to/from user. Doing the
multiple inlines does add significant code bloat, so it is very
debatable as to its value. Back to the trivial, but frequently used,
example of i915_gem_busy_ioctl() on a Broadwell avoiding the call gives
us a 15-25% improvement:

			 before		  after
	single		100.173ns	 84.496ns
	parallel (x4)	204.275ns	152.957ns

On a baby Broxton nuc:

			before           after
	single		245.355ns	199.477ns
	parallel (x2)	280.892ns	232.726ns

Looking at the cost distribution by moving an equivalent switch into
arch/x86/lib/usercopy, the overhead to the copy user is split almost
equally between the function call and the actual copy itself. It seems
copy_user_enhanced_fast_string simply is not that good at small (single
register) copies. Something as simple as

@@ -28,6 +28,9 @@ copy_user_generic(void *to, const void *from, unsigned len)
 {
        unsigned ret;

+       if (len <= 16)
+               return copy_user_generic_unrolled(to, from, len);

is enough to speed up i915_gem_busy_ioctl() by 10% :|

Note that this overhead may entirely be x86 specific.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_ioctl.c | 111 ++++++++++++++++++++++++++++++++------------
 1 file changed, 82 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 865e3ee4d743..93ba59a30a85 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -715,11 +715,11 @@ long drm_ioctl(struct file *filp,
 	const struct drm_ioctl_desc *ioctl = NULL;
 	drm_ioctl_t *func;
 	unsigned int nr = DRM_IOCTL_NR(cmd);
-	int retcode = -EINVAL;
 	char stack_kdata[128];
-	char *kdata = NULL;
+	char *kdata = stack_kdata;
 	unsigned int in_size, out_size, drv_size, ksize;
 	bool is_driver_ioctl;
+	int retcode;
 
 	dev = file_priv->minor->dev;
 
@@ -731,12 +731,12 @@ long drm_ioctl(struct file *filp,
 	if (is_driver_ioctl) {
 		/* driver ioctl */
 		if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls)
-			goto err_i1;
+			goto err_invalid_ioctl;
 		ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE];
 	} else {
 		/* core ioctl */
 		if (nr >= DRM_CORE_IOCTL_COUNT)
-			goto err_i1;
+			goto err_invalid_ioctl;
 		ioctl = &drm_ioctls[nr];
 	}
 
@@ -758,29 +758,50 @@ long drm_ioctl(struct file *filp,
 
 	if (unlikely(!func)) {
 		DRM_DEBUG("no function\n");
-		retcode = -EINVAL;
-		goto err_i1;
+		goto err_invalid;
 	}
 
 	retcode = drm_ioctl_permit(ioctl->flags, file_priv);
 	if (unlikely(retcode))
-		goto err_i1;
-
-	if (ksize <= sizeof(stack_kdata)) {
-		kdata = stack_kdata;
-	} else {
-		kdata = kmalloc(ksize, GFP_KERNEL);
-		if (!kdata) {
-			retcode = -ENOMEM;
-			goto err_i1;
+		goto out;
+
+	if (in_size) {
+		if (unlikely(!access_ok(VERIFY_READ, arg, in_size)))
+			goto err_invalid_user;
+
+		switch (in_size) {
+		case 4:
+			if (unlikely(__copy_from_user(kdata, (void __user *)arg,
+						      4)))
+				goto err_invalid_user;
+			break;
+		case 8:
+			if (unlikely(__copy_from_user(kdata, (void __user *)arg,
+						      8)))
+				goto err_invalid_user;
+			break;
+		case 16:
+			if (unlikely(__copy_from_user(kdata, (void __user *)arg,
+						      16)))
+				goto err_invalid_user;
+			break;
+
+		default:
+			if (ksize > sizeof(stack_kdata)) {
+				kdata = kmalloc(ksize, GFP_KERNEL);
+				if (unlikely(!kdata)) {
+					retcode = -ENOMEM;
+					goto out;
+				}
+			}
+
+			if (unlikely(__copy_from_user(kdata, (void __user *)arg,
+						      in_size)))
+				goto err_invalid_user;
+			break;
 		}
 	}
 
-	if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) {
-		retcode = -EFAULT;
-		goto err_i1;
-	}
-
 	if (ksize > in_size)
 		memset(kdata + in_size, 0, ksize - in_size);
 
@@ -794,21 +815,53 @@ long drm_ioctl(struct file *filp,
 		mutex_unlock(&drm_global_mutex);
 	}
 
-	if (copy_to_user((void __user *)arg, kdata, out_size) != 0)
-		retcode = -EFAULT;
-
-      err_i1:
-	if (!ioctl)
-		DRM_DEBUG("invalid ioctl: pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n",
-			  task_pid_nr(current),
-			  (long)old_encode_dev(file_priv->minor->kdev->devt),
-			  file_priv->authenticated, cmd, nr);
+	if (out_size) {
+		if (unlikely(!access_ok(VERIFY_WRITE, arg, out_size)))
+			goto err_invalid_user;
+
+		switch (out_size) {
+		case 4:
+			if (unlikely(__copy_to_user((void __user *)arg,
+						    kdata, 4)))
+				goto err_invalid_user;
+			break;
+		case 8:
+			if (unlikely(__copy_to_user((void __user *)arg,
+						    kdata, 8)))
+				goto err_invalid_user;
+			break;
+		case 16:
+			if (unlikely(__copy_to_user((void __user *)arg,
+						    kdata, 16)))
+				goto err_invalid_user;
+			break;
+		default:
+			if (unlikely(__copy_to_user((void __user *)arg,
+						    kdata, out_size)))
+				goto err_invalid_user;
+			break;
+		}
+	}
 
+out:
 	if (kdata != stack_kdata)
 		kfree(kdata);
 	if (retcode)
 		DRM_DEBUG("ret = %d\n", retcode);
 	return retcode;
+
+err_invalid_ioctl:
+	DRM_DEBUG("invalid ioctl: pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n",
+		  task_pid_nr(current),
+		  (long)old_encode_dev(file_priv->minor->kdev->devt),
+		  file_priv->authenticated, cmd, nr);
+err_invalid:
+	retcode = -EINVAL;
+	goto out;
+
+err_invalid_user:
+	retcode = -EFAULT;
+	goto out;
 }
 EXPORT_SYMBOL(drm_ioctl);
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

             reply	other threads:[~2017-05-30 12:55 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-30 12:55 Chris Wilson [this message]
2017-05-30 13:24 ` [RFC] drm: Optimise drm_ioctl() for small user args Joonas Lahtinen
2017-05-30 14:27 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-05-30 15:24 ` [RFC] " Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170530125520.14030-1-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.