All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] char_dev: replace cdev_map with an xarray
@ 2021-01-11 17:05 Christoph Hellwig
  2021-01-11 17:35 ` Matthew Wilcox
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Christoph Hellwig @ 2021-01-11 17:05 UTC (permalink / raw)
  To: gregkh; +Cc: linux-kernel, linux-fsdevel

None of the complicated overlapping regions bits of the kobj_map are
required for the character device lookup, so just a trivial xarray
instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/base/Makefile    |   5 +-
 drivers/base/map.c       | 154 ---------------------------------------
 fs/char_dev.c            |  93 ++++++++++++-----------
 fs/dcache.c              |   1 -
 fs/internal.h            |   5 --
 include/linux/kobj_map.h |  20 -----
 6 files changed, 48 insertions(+), 230 deletions(-)
 delete mode 100644 drivers/base/map.c
 delete mode 100644 include/linux/kobj_map.h

diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5e7bf9669a81f8..ff0be4d875cfaf 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,9 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for the Linux device tree
 
-obj-y			:= component.o core.o bus.o dd.o syscore.o \
-			   driver.o class.o platform.o \
-			   cpu.o firmware.o init.o map.o devres.o \
+obj-y			:= component.o core.o bus.o dd.o syscore.o driver.o \
+			   class.o platform.o cpu.o firmware.o init.o devres.o \
 			   attribute_container.o transport_class.o \
 			   topology.o container.o property.o cacheinfo.o \
 			   swnode.o
diff --git a/drivers/base/map.c b/drivers/base/map.c
deleted file mode 100644
index 5650ab2b247ada..00000000000000
--- a/drivers/base/map.c
+++ /dev/null
@@ -1,154 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  linux/drivers/base/map.c
- *
- * (C) Copyright Al Viro 2002,2003
- *
- * NOTE: data structure needs to be changed.  It works, but for large dev_t
- * it will be too slow.  It is isolated, though, so these changes will be
- * local to that file.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/kdev_t.h>
-#include <linux/kobject.h>
-#include <linux/kobj_map.h>
-
-struct kobj_map {
-	struct probe {
-		struct probe *next;
-		dev_t dev;
-		unsigned long range;
-		struct module *owner;
-		kobj_probe_t *get;
-		int (*lock)(dev_t, void *);
-		void *data;
-	} *probes[255];
-	struct mutex *lock;
-};
-
-int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
-	     struct module *module, kobj_probe_t *probe,
-	     int (*lock)(dev_t, void *), void *data)
-{
-	unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
-	unsigned index = MAJOR(dev);
-	unsigned i;
-	struct probe *p;
-
-	if (n > 255)
-		n = 255;
-
-	p = kmalloc_array(n, sizeof(struct probe), GFP_KERNEL);
-	if (p == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < n; i++, p++) {
-		p->owner = module;
-		p->get = probe;
-		p->lock = lock;
-		p->dev = dev;
-		p->range = range;
-		p->data = data;
-	}
-	mutex_lock(domain->lock);
-	for (i = 0, p -= n; i < n; i++, p++, index++) {
-		struct probe **s = &domain->probes[index % 255];
-		while (*s && (*s)->range < range)
-			s = &(*s)->next;
-		p->next = *s;
-		*s = p;
-	}
-	mutex_unlock(domain->lock);
-	return 0;
-}
-
-void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range)
-{
-	unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
-	unsigned index = MAJOR(dev);
-	unsigned i;
-	struct probe *found = NULL;
-
-	if (n > 255)
-		n = 255;
-
-	mutex_lock(domain->lock);
-	for (i = 0; i < n; i++, index++) {
-		struct probe **s;
-		for (s = &domain->probes[index % 255]; *s; s = &(*s)->next) {
-			struct probe *p = *s;
-			if (p->dev == dev && p->range == range) {
-				*s = p->next;
-				if (!found)
-					found = p;
-				break;
-			}
-		}
-	}
-	mutex_unlock(domain->lock);
-	kfree(found);
-}
-
-struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
-{
-	struct kobject *kobj;
-	struct probe *p;
-	unsigned long best = ~0UL;
-
-retry:
-	mutex_lock(domain->lock);
-	for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
-		struct kobject *(*probe)(dev_t, int *, void *);
-		struct module *owner;
-		void *data;
-
-		if (p->dev > dev || p->dev + p->range - 1 < dev)
-			continue;
-		if (p->range - 1 >= best)
-			break;
-		if (!try_module_get(p->owner))
-			continue;
-		owner = p->owner;
-		data = p->data;
-		probe = p->get;
-		best = p->range - 1;
-		*index = dev - p->dev;
-		if (p->lock && p->lock(dev, data) < 0) {
-			module_put(owner);
-			continue;
-		}
-		mutex_unlock(domain->lock);
-		kobj = probe(dev, index, data);
-		/* Currently ->owner protects _only_ ->probe() itself. */
-		module_put(owner);
-		if (kobj)
-			return kobj;
-		goto retry;
-	}
-	mutex_unlock(domain->lock);
-	return NULL;
-}
-
-struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct mutex *lock)
-{
-	struct kobj_map *p = kmalloc(sizeof(struct kobj_map), GFP_KERNEL);
-	struct probe *base = kzalloc(sizeof(*base), GFP_KERNEL);
-	int i;
-
-	if ((p == NULL) || (base == NULL)) {
-		kfree(p);
-		kfree(base);
-		return NULL;
-	}
-
-	base->dev = 1;
-	base->range = ~0;
-	base->get = base_probe;
-	for (i = 0; i < 255; i++)
-		p->probes[i] = base;
-	p->lock = lock;
-	return p;
-}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index ba0ded7842a779..82c26ed407ff65 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -17,7 +17,6 @@
 #include <linux/seq_file.h>
 
 #include <linux/kobject.h>
-#include <linux/kobj_map.h>
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
@@ -25,8 +24,7 @@
 
 #include "internal.h"
 
-static struct kobj_map *cdev_map;
-
+static DEFINE_XARRAY(cdev_map);
 static DEFINE_MUTEX(chrdevs_lock);
 
 #define CHRDEV_MAJOR_HASH_SIZE 255
@@ -367,6 +365,28 @@ void cdev_put(struct cdev *p)
 	}
 }
 
+static struct cdev *cdev_lookup(dev_t dev)
+{
+	struct cdev *cdev;
+
+	mutex_lock(&chrdevs_lock);
+	cdev = xa_load(&cdev_map, dev);
+	if (!cdev) {
+		mutex_unlock(&chrdevs_lock);
+		if (request_module("char-major-%d-%d",
+				   MAJOR(dev), MINOR(dev)) > 0)
+			/* Make old-style 2.4 aliases work */
+			request_module("char-major-%d", MAJOR(dev));
+		mutex_lock(&chrdevs_lock);
+
+		cdev = xa_load(&cdev_map, dev);
+	}
+	if (cdev && !cdev_get(cdev))
+		cdev = NULL;
+	mutex_unlock(&chrdevs_lock);
+	return cdev;
+}
+
 /*
  * Called every time a character special file is opened
  */
@@ -380,13 +400,10 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
-		struct kobject *kobj;
-		int idx;
 		spin_unlock(&cdev_lock);
-		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
-		if (!kobj)
+		new = cdev_lookup(inode->i_rdev);
+		if (!new)
 			return -ENXIO;
-		new = container_of(kobj, struct cdev, kobj);
 		spin_lock(&cdev_lock);
 		/* Check i_cdev again in case somebody beat us to it while
 		   we dropped the lock. */
@@ -454,18 +471,6 @@ const struct file_operations def_chr_fops = {
 	.llseek = noop_llseek,
 };
 
-static struct kobject *exact_match(dev_t dev, int *part, void *data)
-{
-	struct cdev *p = data;
-	return &p->kobj;
-}
-
-static int exact_lock(dev_t dev, void *data)
-{
-	struct cdev *p = data;
-	return cdev_get(p) ? 0 : -1;
-}
-
 /**
  * cdev_add() - add a char device to the system
  * @p: the cdev structure for the device
@@ -478,7 +483,7 @@ static int exact_lock(dev_t dev, void *data)
  */
 int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 {
-	int error;
+	int error, i;
 
 	p->dev = dev;
 	p->count = count;
@@ -486,14 +491,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 	if (WARN_ON(dev == WHITEOUT_DEV))
 		return -EBUSY;
 
-	error = kobj_map(cdev_map, dev, count, NULL,
-			 exact_match, exact_lock, p);
-	if (error)
-		return error;
+	mutex_lock(&chrdevs_lock);
+	for (i = 0; i < count; i++) {
+		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
+		if (error)
+			goto out_unwind;
+	}
+	mutex_unlock(&chrdevs_lock);
 
 	kobject_get(p->kobj.parent);
-
 	return 0;
+
+out_unwind:
+	while (--i >= 0)
+		xa_erase(&cdev_map, dev + i);
+	mutex_unlock(&chrdevs_lock);
+	return error;
 }
 
 /**
@@ -575,11 +588,6 @@ void cdev_device_del(struct cdev *cdev, struct device *dev)
 		cdev_del(cdev);
 }
 
-static void cdev_unmap(dev_t dev, unsigned count)
-{
-	kobj_unmap(cdev_map, dev, count);
-}
-
 /**
  * cdev_del() - remove a cdev from the system
  * @p: the cdev structure to be removed
@@ -593,11 +601,16 @@ static void cdev_unmap(dev_t dev, unsigned count)
  */
 void cdev_del(struct cdev *p)
 {
-	cdev_unmap(p->dev, p->count);
+	int i;
+
+	mutex_lock(&chrdevs_lock);
+	for (i = 0; i < p->count; i++)
+		xa_erase(&cdev_map, p->dev + i);
+	mutex_unlock(&chrdevs_lock);
+
 	kobject_put(&p->kobj);
 }
 
-
 static void cdev_default_release(struct kobject *kobj)
 {
 	struct cdev *p = container_of(kobj, struct cdev, kobj);
@@ -656,20 +669,6 @@ void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 	cdev->ops = fops;
 }
 
-static struct kobject *base_probe(dev_t dev, int *part, void *data)
-{
-	if (request_module("char-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
-		/* Make old-style 2.4 aliases work */
-		request_module("char-major-%d", MAJOR(dev));
-	return NULL;
-}
-
-void __init chrdev_init(void)
-{
-	cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
-}
-
-
 /* Let modules do char dev stuff */
 EXPORT_SYMBOL(register_chrdev_region);
 EXPORT_SYMBOL(unregister_chrdev_region);
diff --git a/fs/dcache.c b/fs/dcache.c
index 97e81a844a966c..3e0329b3de0a2d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3240,5 +3240,4 @@ void __init vfs_caches_init(void)
 	files_maxfiles_init();
 	mnt_init();
 	bdev_cache_init();
-	chrdev_init();
 }
diff --git a/fs/internal.h b/fs/internal.h
index 77c50befbfbe96..a2985ee25c3b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -50,11 +50,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb)
 extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block, struct iomap *iomap);
 
-/*
- * char_dev.c
- */
-extern void __init chrdev_init(void);
-
 /*
  * fs_context.c
  */
diff --git a/include/linux/kobj_map.h b/include/linux/kobj_map.h
deleted file mode 100644
index c9919f8b22932c..00000000000000
--- a/include/linux/kobj_map.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * kobj_map.h
- */
-
-#ifndef _KOBJ_MAP_H_
-#define _KOBJ_MAP_H_
-
-#include <linux/mutex.h>
-
-typedef struct kobject *kobj_probe_t(dev_t, int *, void *);
-struct kobj_map;
-
-int kobj_map(struct kobj_map *, dev_t, unsigned long, struct module *,
-	     kobj_probe_t *, int (*)(dev_t, void *), void *);
-void kobj_unmap(struct kobj_map *, dev_t, unsigned long);
-struct kobject *kobj_lookup(struct kobj_map *, dev_t, int *);
-struct kobj_map *kobj_map_init(kobj_probe_t *, struct mutex *);
-
-#endif /* _KOBJ_MAP_H_ */
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 17:05 [PATCH] char_dev: replace cdev_map with an xarray Christoph Hellwig
@ 2021-01-11 17:35 ` Matthew Wilcox
  2021-01-11 18:11   ` Greg KH
  2021-01-11 18:10 ` Greg KH
  2021-01-11 20:58 ` Matthew Wilcox
  2 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox @ 2021-01-11 17:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: gregkh, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> None of the complicated overlapping regions bits of the kobj_map are
> required for the character device lookup, so just a trivial xarray
> instead.

Thanks for doing this.  We could make it more efficient for chardevs
that occupy 64 or more consecutive/aligned devices -- is it worth doing?

> +static struct cdev *cdev_lookup(dev_t dev)
> +{
> +	struct cdev *cdev;
> +
> +	mutex_lock(&chrdevs_lock);
> +	cdev = xa_load(&cdev_map, dev);
> +	if (!cdev) {
> +		mutex_unlock(&chrdevs_lock);
> +		if (request_module("char-major-%d-%d",
> +				   MAJOR(dev), MINOR(dev)) > 0)
> +			/* Make old-style 2.4 aliases work */
> +			request_module("char-major-%d", MAJOR(dev));
> +		mutex_lock(&chrdevs_lock);
> +
> +		cdev = xa_load(&cdev_map, dev);
> +	}
> +	if (cdev && !cdev_get(cdev))
> +		cdev = NULL;
> +	mutex_unlock(&chrdevs_lock);
> +	return cdev;

What does the mutex protect here?  Is it cdev being freed?

> @@ -593,11 +601,16 @@ static void cdev_unmap(dev_t dev, unsigned count)
>   */
>  void cdev_del(struct cdev *p)
>  {
> -	cdev_unmap(p->dev, p->count);
> +	int i;
> +
> +	mutex_lock(&chrdevs_lock);
> +	for (i = 0; i < p->count; i++)
> +		xa_erase(&cdev_map, p->dev + i);
> +	mutex_unlock(&chrdevs_lock);

I don't understand what it's protecting here.  It's clearly not cdev_get
as that could happen before we acquire the mutex.  This also suggests
I should add an xa_erase_range() to the API.

But there's nothing wrong here, just some places that maybe could be
better, so:

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 17:05 [PATCH] char_dev: replace cdev_map with an xarray Christoph Hellwig
  2021-01-11 17:35 ` Matthew Wilcox
@ 2021-01-11 18:10 ` Greg KH
  2021-01-11 20:58 ` Matthew Wilcox
  2 siblings, 0 replies; 11+ messages in thread
From: Greg KH @ 2021-01-11 18:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> None of the complicated overlapping regions bits of the kobj_map are
> required for the character device lookup, so just a trivial xarray
> instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Thanks for doing this!

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 17:35 ` Matthew Wilcox
@ 2021-01-11 18:11   ` Greg KH
  2021-01-11 18:20     ` Matthew Wilcox
  0 siblings, 1 reply; 11+ messages in thread
From: Greg KH @ 2021-01-11 18:11 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 05:35:00PM +0000, Matthew Wilcox wrote:
> On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > None of the complicated overlapping regions bits of the kobj_map are
> > required for the character device lookup, so just a trivial xarray
> > instead.
> 
> Thanks for doing this.  We could make it more efficient for chardevs
> that occupy 64 or more consecutive/aligned devices -- is it worth doing?

efficient in what way?  Space or faster lookup?

THis shouldn't be on a "fast" lookup path, so I doubt that's worth
optimizing for.  Space, maybe, for systems with thousands of scsi
devices, but usually they just stick to the block device, not a char
device from what I remember.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 18:11   ` Greg KH
@ 2021-01-11 18:20     ` Matthew Wilcox
  2021-01-11 18:33       ` Greg KH
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox @ 2021-01-11 18:20 UTC (permalink / raw)
  To: Greg KH; +Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 07:11:25PM +0100, Greg KH wrote:
> On Mon, Jan 11, 2021 at 05:35:00PM +0000, Matthew Wilcox wrote:
> > On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > > None of the complicated overlapping regions bits of the kobj_map are
> > > required for the character device lookup, so just a trivial xarray
> > > instead.
> > 
> > Thanks for doing this.  We could make it more efficient for chardevs
> > that occupy 64 or more consecutive/aligned devices -- is it worth doing?
> 
> efficient in what way?  Space or faster lookup?

Both, but primarily space.

The radix tree underlying the xarray allows N consecutive entries with
the same value to be represented as a single entry; if there are at
least 64 entries then we get to skip an entire level of the tree (saving
1/7 of a page).  Of course, we'd need to go from the 'head' pointer to
the correct pointer, something like p += rdev - p->rdev.

> THis shouldn't be on a "fast" lookup path, so I doubt that's worth
> optimizing for.  Space, maybe, for systems with thousands of scsi
> devices, but usually they just stick to the block device, not a char
> device from what I remember.

/dev/sgX is a chardev?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 18:20     ` Matthew Wilcox
@ 2021-01-11 18:33       ` Greg KH
  2021-01-11 18:51         ` Matthew Wilcox
  0 siblings, 1 reply; 11+ messages in thread
From: Greg KH @ 2021-01-11 18:33 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 06:20:29PM +0000, Matthew Wilcox wrote:
> On Mon, Jan 11, 2021 at 07:11:25PM +0100, Greg KH wrote:
> > On Mon, Jan 11, 2021 at 05:35:00PM +0000, Matthew Wilcox wrote:
> > > On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > > > None of the complicated overlapping regions bits of the kobj_map are
> > > > required for the character device lookup, so just a trivial xarray
> > > > instead.
> > > 
> > > Thanks for doing this.  We could make it more efficient for chardevs
> > > that occupy 64 or more consecutive/aligned devices -- is it worth doing?
> > 
> > efficient in what way?  Space or faster lookup?
> 
> Both, but primarily space.
> 
> The radix tree underlying the xarray allows N consecutive entries with
> the same value to be represented as a single entry; if there are at
> least 64 entries then we get to skip an entire level of the tree (saving
> 1/7 of a page).  Of course, we'd need to go from the 'head' pointer to
> the correct pointer, something like p += rdev - p->rdev.

How much "space" are you talking about here?

A "normal" machine has about 100-200 char devices.  Servers, maybe more,
but probably not.

The kobject being used previously wasn't really "small" at all, so odds
are any conversion to not use it like this will be better overall.

> > THis shouldn't be on a "fast" lookup path, so I doubt that's worth
> > optimizing for.  Space, maybe, for systems with thousands of scsi
> > devices, but usually they just stick to the block device, not a char
> > device from what I remember.
> 
> /dev/sgX is a chardev?

I sure hope no one is using /dev/sgX for tens of thousands of block
device accesses, if so, they have bigger problems than this :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 18:33       ` Greg KH
@ 2021-01-11 18:51         ` Matthew Wilcox
  0 siblings, 0 replies; 11+ messages in thread
From: Matthew Wilcox @ 2021-01-11 18:51 UTC (permalink / raw)
  To: Greg KH; +Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 07:33:15PM +0100, Greg KH wrote:
> On Mon, Jan 11, 2021 at 06:20:29PM +0000, Matthew Wilcox wrote:
> > > efficient in what way?  Space or faster lookup?
> > 
> > Both, but primarily space.
> > 
> > The radix tree underlying the xarray allows N consecutive entries with
> > the same value to be represented as a single entry; if there are at
> > least 64 entries then we get to skip an entire level of the tree (saving
> > 1/7 of a page).  Of course, we'd need to go from the 'head' pointer to
> > the correct pointer, something like p += rdev - p->rdev.
> 
> How much "space" are you talking about here?

576 bytes -- 1/7 of a page.

> A "normal" machine has about 100-200 char devices.  Servers, maybe more,
> but probably not.
> 
> The kobject being used previously wasn't really "small" at all, so odds
> are any conversion to not use it like this will be better overall.

Yes.

> > > THis shouldn't be on a "fast" lookup path, so I doubt that's worth
> > > optimizing for.  Space, maybe, for systems with thousands of scsi
> > > devices, but usually they just stick to the block device, not a char
> > > device from what I remember.
> > 
> > /dev/sgX is a chardev?
> 
> I sure hope no one is using /dev/sgX for tens of thousands of block
> device accesses, if so, they have bigger problems than this :)

There is one sgX char dev for every /dev/sdN, so anyone with a thousand
SCSI devices also has a thousand char devices.  On the other hand,
they're added one at a time, so there is no chance to optimise here:

        cdev = cdev_alloc();
...
        error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1);


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 17:05 [PATCH] char_dev: replace cdev_map with an xarray Christoph Hellwig
  2021-01-11 17:35 ` Matthew Wilcox
  2021-01-11 18:10 ` Greg KH
@ 2021-01-11 20:58 ` Matthew Wilcox
  2021-01-12  9:35   ` Greg KH
  2 siblings, 1 reply; 11+ messages in thread
From: Matthew Wilcox @ 2021-01-11 20:58 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: gregkh, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> @@ -486,14 +491,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
>  	if (WARN_ON(dev == WHITEOUT_DEV))
>  		return -EBUSY;
>  
> -	error = kobj_map(cdev_map, dev, count, NULL,
> -			 exact_match, exact_lock, p);
> -	if (error)
> -		return error;
> +	mutex_lock(&chrdevs_lock);
> +	for (i = 0; i < count; i++) {
> +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> +		if (error)
> +			goto out_unwind;
> +	}
> +	mutex_unlock(&chrdevs_lock);

Looking at some of the users ...

#define BSG_MAX_DEVS            32768
...
        ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);

So this is going to allocate 32768 entries; at 8 bytes each, that's 256kB.
With XArray overhead, it works out to 73 pages or 292kB.  While I don't
have bsg loaded on my laptop, I imagine a lot of machines do.

drivers/net/tap.c:#define TAP_NUM_DEVS (1U << MINORBITS)
include/linux/kdev_t.h:#define MINORBITS        20
drivers/net/tap.c:      err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);

That's going to be even worse -- 8MB plus the overhead to be closer to 9MB.

I think we do need to implement the 'store a range' option ;-(

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-11 20:58 ` Matthew Wilcox
@ 2021-01-12  9:35   ` Greg KH
  2021-01-12 10:00     ` David Laight
  0 siblings, 1 reply; 11+ messages in thread
From: Greg KH @ 2021-01-12  9:35 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

On Mon, Jan 11, 2021 at 08:58:18PM +0000, Matthew Wilcox wrote:
> On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > @@ -486,14 +491,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
> >  	if (WARN_ON(dev == WHITEOUT_DEV))
> >  		return -EBUSY;
> >  
> > -	error = kobj_map(cdev_map, dev, count, NULL,
> > -			 exact_match, exact_lock, p);
> > -	if (error)
> > -		return error;
> > +	mutex_lock(&chrdevs_lock);
> > +	for (i = 0; i < count; i++) {
> > +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> > +		if (error)
> > +			goto out_unwind;
> > +	}
> > +	mutex_unlock(&chrdevs_lock);
> 
> Looking at some of the users ...
> 
> #define BSG_MAX_DEVS            32768
> ...
>         ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
> 
> So this is going to allocate 32768 entries; at 8 bytes each, that's 256kB.
> With XArray overhead, it works out to 73 pages or 292kB.  While I don't
> have bsg loaded on my laptop, I imagine a lot of machines do.
> 
> drivers/net/tap.c:#define TAP_NUM_DEVS (1U << MINORBITS)
> include/linux/kdev_t.h:#define MINORBITS        20
> drivers/net/tap.c:      err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);
> 
> That's going to be even worse -- 8MB plus the overhead to be closer to 9MB.
> 
> I think we do need to implement the 'store a range' option ;-(

Looks like it will be needed here.

Wow that's a lot of tap devices allocated all at once, odd...

Note, we will get some additional savings when this goes in as I can
delete the kobject from the cdev (after cleaning up the misguided
drivers that tried to set it thinking it was used), which will help out
a lot for the individual structures being created, but not for these
ranges.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-12  9:35   ` Greg KH
@ 2021-01-12 10:00     ` David Laight
  2021-01-12 10:25       ` 'Greg KH'
  0 siblings, 1 reply; 11+ messages in thread
From: David Laight @ 2021-01-12 10:00 UTC (permalink / raw)
  To: 'Greg KH', Matthew Wilcox
  Cc: Christoph Hellwig, linux-kernel, linux-fsdevel

From: Greg KH
> Sent: 12 January 2021 09:35
> 
> On Mon, Jan 11, 2021 at 08:58:18PM +0000, Matthew Wilcox wrote:
> > On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > > @@ -486,14 +491,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
> > >  	if (WARN_ON(dev == WHITEOUT_DEV))
> > >  		return -EBUSY;
> > >
> > > -	error = kobj_map(cdev_map, dev, count, NULL,
> > > -			 exact_match, exact_lock, p);
> > > -	if (error)
> > > -		return error;
> > > +	mutex_lock(&chrdevs_lock);
> > > +	for (i = 0; i < count; i++) {
> > > +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> > > +		if (error)
> > > +			goto out_unwind;
> > > +	}
> > > +	mutex_unlock(&chrdevs_lock);
> >
> > Looking at some of the users ...
> >
> > #define BSG_MAX_DEVS            32768
> > ...
> >         ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
> >
> > So this is going to allocate 32768 entries; at 8 bytes each, that's 256kB.
> > With XArray overhead, it works out to 73 pages or 292kB.  While I don't
> > have bsg loaded on my laptop, I imagine a lot of machines do.
> >
> > drivers/net/tap.c:#define TAP_NUM_DEVS (1U << MINORBITS)
> > include/linux/kdev_t.h:#define MINORBITS        20
> > drivers/net/tap.c:      err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);
> >
> > That's going to be even worse -- 8MB plus the overhead to be closer to 9MB.
> >
> > I think we do need to implement the 'store a range' option ;-(
> 
> Looks like it will be needed here.
> 
> Wow that's a lot of tap devices allocated all at once, odd...

Aren't there two distinct use cases that probably need treating
separately?

1) A driver that uses it's own major (private) major number.
2) Drivers that share a major number (eg serial ports).

In the first case you just want all minors to come to your driver.
In the second case different (ranges of) minors need to go to
different drivers.

Until the major number is actually shared only the upper limit
is an any way relevant.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] char_dev: replace cdev_map with an xarray
  2021-01-12 10:00     ` David Laight
@ 2021-01-12 10:25       ` 'Greg KH'
  0 siblings, 0 replies; 11+ messages in thread
From: 'Greg KH' @ 2021-01-12 10:25 UTC (permalink / raw)
  To: David Laight
  Cc: Matthew Wilcox, Christoph Hellwig, linux-kernel, linux-fsdevel

On Tue, Jan 12, 2021 at 10:00:25AM +0000, David Laight wrote:
> From: Greg KH
> > Sent: 12 January 2021 09:35
> > 
> > On Mon, Jan 11, 2021 at 08:58:18PM +0000, Matthew Wilcox wrote:
> > > On Mon, Jan 11, 2021 at 06:05:13PM +0100, Christoph Hellwig wrote:
> > > > @@ -486,14 +491,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
> > > >  	if (WARN_ON(dev == WHITEOUT_DEV))
> > > >  		return -EBUSY;
> > > >
> > > > -	error = kobj_map(cdev_map, dev, count, NULL,
> > > > -			 exact_match, exact_lock, p);
> > > > -	if (error)
> > > > -		return error;
> > > > +	mutex_lock(&chrdevs_lock);
> > > > +	for (i = 0; i < count; i++) {
> > > > +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> > > > +		if (error)
> > > > +			goto out_unwind;
> > > > +	}
> > > > +	mutex_unlock(&chrdevs_lock);
> > >
> > > Looking at some of the users ...
> > >
> > > #define BSG_MAX_DEVS            32768
> > > ...
> > >         ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
> > >
> > > So this is going to allocate 32768 entries; at 8 bytes each, that's 256kB.
> > > With XArray overhead, it works out to 73 pages or 292kB.  While I don't
> > > have bsg loaded on my laptop, I imagine a lot of machines do.
> > >
> > > drivers/net/tap.c:#define TAP_NUM_DEVS (1U << MINORBITS)
> > > include/linux/kdev_t.h:#define MINORBITS        20
> > > drivers/net/tap.c:      err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);
> > >
> > > That's going to be even worse -- 8MB plus the overhead to be closer to 9MB.
> > >
> > > I think we do need to implement the 'store a range' option ;-(
> > 
> > Looks like it will be needed here.
> > 
> > Wow that's a lot of tap devices allocated all at once, odd...
> 
> Aren't there two distinct use cases that probably need treating
> separately?
> 
> 1) A driver that uses it's own major (private) major number.
> 2) Drivers that share a major number (eg serial ports).
> 
> In the first case you just want all minors to come to your driver.
> In the second case different (ranges of) minors need to go to
> different drivers.
> 
> Until the major number is actually shared only the upper limit
> is an any way relevant.

Patches gladly reviewed.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-01-12 10:25 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-11 17:05 [PATCH] char_dev: replace cdev_map with an xarray Christoph Hellwig
2021-01-11 17:35 ` Matthew Wilcox
2021-01-11 18:11   ` Greg KH
2021-01-11 18:20     ` Matthew Wilcox
2021-01-11 18:33       ` Greg KH
2021-01-11 18:51         ` Matthew Wilcox
2021-01-11 18:10 ` Greg KH
2021-01-11 20:58 ` Matthew Wilcox
2021-01-12  9:35   ` Greg KH
2021-01-12 10:00     ` David Laight
2021-01-12 10:25       ` 'Greg KH'

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.