All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Make vm_max_readahead configurable at run-time
@ 2010-02-09 11:29 ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-09 11:29 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jens Axboe, linux-kernel, linux-mm

Make vm_max_readahead configurable at run-time. Expose a sysctl knob
in procfs to change it. This would ensure that new disks added would
use this value as their default read_ahead_kb.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

Index: linux-2.6/block/blk-core.c
===================================================================
--- linux-2.6.orig/block/blk-core.c
+++ linux-2.6/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c
+++ linux-2.6/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define INITIAL_VM_MAX_READAHEAD	128	/* kbytes */
+extern unsigned long vm_max_readahead;
+
+int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
+					void __user *, size_t *, loff_t *);
 
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c
+++ linux-2.6/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c
+++ linux-2.6/mm/readahead.c
@@ -17,6 +17,19 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
+
+int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
+
+	default_backing_dev_info.ra_pages =
+			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
+
+	return 0;
+}
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+	{
+		.procname	= "max_readahead_kb",
+		.data		= &vm_max_readahead,
+		.maxlen		= sizeof(vm_max_readahead),
+		.mode		= 0644,
+		.proc_handler	= sysctl_vm_max_readahead_handler,
+	},
 	{ }
 };
 
Index: linux-2.6/Documentation/sysctl/vm.txt
===================================================================
--- linux-2.6.orig/Documentation/sysctl/vm.txt
+++ linux-2.6/Documentation/sysctl/vm.txt
@@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
 - laptop_mode
 - legacy_va_layout
 - lowmem_reserve_ratio
+- max_readahead_kb
 - max_map_count
 - memory_failure_early_kill
 - memory_failure_recovery
@@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
 
 ==============================================================
 
+max_readahead_kb:
+
+This file contains the default maximum readahead that would be used.
+
+==============================================================
+
 max_map_count:
 
 This file contains the maximum number of memory map areas a process

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH] Make vm_max_readahead configurable at run-time
@ 2010-02-09 11:29 ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-09 11:29 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jens Axboe, linux-kernel, linux-mm

Make vm_max_readahead configurable at run-time. Expose a sysctl knob
in procfs to change it. This would ensure that new disks added would
use this value as their default read_ahead_kb.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

Index: linux-2.6/block/blk-core.c
===================================================================
--- linux-2.6.orig/block/blk-core.c
+++ linux-2.6/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c
+++ linux-2.6/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define INITIAL_VM_MAX_READAHEAD	128	/* kbytes */
+extern unsigned long vm_max_readahead;
+
+int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
+					void __user *, size_t *, loff_t *);
 
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c
+++ linux-2.6/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c
+++ linux-2.6/mm/readahead.c
@@ -17,6 +17,19 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
+
+int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
+
+	default_backing_dev_info.ra_pages =
+			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
+
+	return 0;
+}
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+	{
+		.procname	= "max_readahead_kb",
+		.data		= &vm_max_readahead,
+		.maxlen		= sizeof(vm_max_readahead),
+		.mode		= 0644,
+		.proc_handler	= sysctl_vm_max_readahead_handler,
+	},
 	{ }
 };
 
Index: linux-2.6/Documentation/sysctl/vm.txt
===================================================================
--- linux-2.6.orig/Documentation/sysctl/vm.txt
+++ linux-2.6/Documentation/sysctl/vm.txt
@@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
 - laptop_mode
 - legacy_va_layout
 - lowmem_reserve_ratio
+- max_readahead_kb
 - max_map_count
 - memory_failure_early_kill
 - memory_failure_recovery
@@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
 
 ==============================================================
 
+max_readahead_kb:
+
+This file contains the default maximum readahead that would be used.
+
+==============================================================
+
 max_map_count:
 
 This file contains the maximum number of memory map areas a process

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] Make vm_max_readahead configurable at run-time
  2010-02-09 11:29 ` Nikanth Karthikesan
@ 2010-02-09 23:22   ` Andrew Morton
  -1 siblings, 0 replies; 40+ messages in thread
From: Andrew Morton @ 2010-02-09 23:22 UTC (permalink / raw)
  To: Nikanth Karthikesan; +Cc: Jens Axboe, linux-kernel, linux-mm, Wu Fengguang

On Tue, 9 Feb 2010 16:59:26 +0530
Nikanth Karthikesan <knikanth@suse.de> wrote:

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.
> 

hm, I guess that's useful.

> +int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
> +					void __user *, size_t *, loff_t *);

I don't particuarly like the practice of leaving out the identifiers. 
They're useful for documentation purposes and it's irritating when you
look at a declaration for some real reason, only to find that the identifiers
were left out.

>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
>  
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
>  
> +unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
> +
> +	return 0;
> +}

Hang on.  This doesn't only affect newly-added disks.  It also affects
presently-mounted filesystems which are using default_backing_dev_info.
xfs, btrfs, blockdevs, nilfs, raw, mtd.

What's the effect of this change?  (It should be in the changelog)

>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead,
> +		.maxlen		= sizeof(vm_max_readahead),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_handler,
> +	},

It'd be nice if the in-kernel and /proc/identifiers were more similar. 
That would require that vm_max_readahead be renamed to
vm_max_readahead_kb.  We could not bother, I guess.  But
vm_max_readahead_kb is a better identifier.

>  	{ }
>  };
>  
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
>  
>  ==============================================================
>  
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be used.
> +

I think we could provide a more detailed description than this, please.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] Make vm_max_readahead configurable at run-time
@ 2010-02-09 23:22   ` Andrew Morton
  0 siblings, 0 replies; 40+ messages in thread
From: Andrew Morton @ 2010-02-09 23:22 UTC (permalink / raw)
  To: Nikanth Karthikesan; +Cc: Jens Axboe, linux-kernel, linux-mm, Wu Fengguang

On Tue, 9 Feb 2010 16:59:26 +0530
Nikanth Karthikesan <knikanth@suse.de> wrote:

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.
> 

hm, I guess that's useful.

> +int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
> +					void __user *, size_t *, loff_t *);

I don't particuarly like the practice of leaving out the identifiers. 
They're useful for documentation purposes and it's irritating when you
look at a declaration for some real reason, only to find that the identifiers
were left out.

>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
>  
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
>  
> +unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
> +
> +	return 0;
> +}

Hang on.  This doesn't only affect newly-added disks.  It also affects
presently-mounted filesystems which are using default_backing_dev_info.
xfs, btrfs, blockdevs, nilfs, raw, mtd.

What's the effect of this change?  (It should be in the changelog)

>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead,
> +		.maxlen		= sizeof(vm_max_readahead),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_handler,
> +	},

It'd be nice if the in-kernel and /proc/identifiers were more similar. 
That would require that vm_max_readahead be renamed to
vm_max_readahead_kb.  We could not bother, I guess.  But
vm_max_readahead_kb is a better identifier.

>  	{ }
>  };
>  
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
>  
>  ==============================================================
>  
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be used.
> +

I think we could provide a more detailed description than this, please.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] Make vm_max_readahead configurable at run-time
  2010-02-09 11:29 ` Nikanth Karthikesan
@ 2010-02-10  6:25   ` Balbir Singh
  -1 siblings, 0 replies; 40+ messages in thread
From: Balbir Singh @ 2010-02-10  6:25 UTC (permalink / raw)
  To: Nikanth Karthikesan; +Cc: Andrew Morton, Jens Axboe, linux-kernel, linux-mm

* Nikanth Karthikesan <knikanth@suse.de> [2010-02-09 16:59:26]:

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.
> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
>

Could you help us understand how you use this? The patch is straight
forward except some of the objections pointed out by Andrew, but the
help text below should help the user understand the trade-offs of
increasing or lowering the value.
 
> ---
> 
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
>  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
>  	q->backing_dev_info.unplug_io_data = q;
>  	q->backing_dev_info.ra_pages =
> -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +			(vm_max_readahead * 1024) / PAGE_CACHE_SIZE;

Why not use (vm_max_readahead >> (PAGE_CACHE_SHIFT - 10))? While you are
looking at it, might as well clean it up :) I am quite sure the
compiler gets it right, but might as well be sure.

>  	q->backing_dev_info.state = 0;
>  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
>  	q->backing_dev_info.name = "block";
> Index: linux-2.6/fs/fuse/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/fuse/inode.c
> +++ linux-2.6/fs/fuse/inode.c
> @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
>  	int err;
> 
>  	fc->bdi.name = "fuse";
> -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +	fc->bdi.ra_pages = (vm_max_readahead * 1024) / PAGE_CACHE_SIZE;

Ditto

>  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
>  	/* fuse does it's own writeback accounting */
>  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
>  void task_dirty_inc(struct task_struct *tsk);
> 
>  /* readahead.c */
> -#define VM_MAX_READAHEAD	128	/* kbytes */
> +#define INITIAL_VM_MAX_READAHEAD	128	/* kbytes */
> +extern unsigned long vm_max_readahead;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
> +					void __user *, size_t *, loff_t *);
> 
>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> 
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
> 
> +unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
> +

Aaah.. here you got it right, please be consistent and use the same
thing everywhere.

> +	return 0;
> +}
> +
>  /*
>   * Initialise a struct file's readahead state.  Assumes that the caller has
>   * memset *ra to zero.
> Index: linux-2.6/kernel/sysctl.c
> ===================================================================
> --- linux-2.6.orig/kernel/sysctl.c
> +++ linux-2.6/kernel/sysctl.c
> @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
>  		.extra2		= &one,
>  	},
>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead,
> +		.maxlen		= sizeof(vm_max_readahead),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_handler,
> +	},
>  	{ }
>  };
> 
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
> 
>  ==============================================================
> 
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be used.
> +
> +==============================================================
> +
>  max_map_count:
> 
>  This file contains the maximum number of memory map areas a process
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
	Three Cheers,
	Balbir

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] Make vm_max_readahead configurable at run-time
@ 2010-02-10  6:25   ` Balbir Singh
  0 siblings, 0 replies; 40+ messages in thread
From: Balbir Singh @ 2010-02-10  6:25 UTC (permalink / raw)
  To: Nikanth Karthikesan; +Cc: Andrew Morton, Jens Axboe, linux-kernel, linux-mm

* Nikanth Karthikesan <knikanth@suse.de> [2010-02-09 16:59:26]:

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.
> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
>

Could you help us understand how you use this? The patch is straight
forward except some of the objections pointed out by Andrew, but the
help text below should help the user understand the trade-offs of
increasing or lowering the value.
 
> ---
> 
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
>  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
>  	q->backing_dev_info.unplug_io_data = q;
>  	q->backing_dev_info.ra_pages =
> -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +			(vm_max_readahead * 1024) / PAGE_CACHE_SIZE;

Why not use (vm_max_readahead >> (PAGE_CACHE_SHIFT - 10))? While you are
looking at it, might as well clean it up :) I am quite sure the
compiler gets it right, but might as well be sure.

>  	q->backing_dev_info.state = 0;
>  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
>  	q->backing_dev_info.name = "block";
> Index: linux-2.6/fs/fuse/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/fuse/inode.c
> +++ linux-2.6/fs/fuse/inode.c
> @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
>  	int err;
> 
>  	fc->bdi.name = "fuse";
> -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +	fc->bdi.ra_pages = (vm_max_readahead * 1024) / PAGE_CACHE_SIZE;

Ditto

>  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
>  	/* fuse does it's own writeback accounting */
>  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
>  void task_dirty_inc(struct task_struct *tsk);
> 
>  /* readahead.c */
> -#define VM_MAX_READAHEAD	128	/* kbytes */
> +#define INITIAL_VM_MAX_READAHEAD	128	/* kbytes */
> +extern unsigned long vm_max_readahead;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *, int,
> +					void __user *, size_t *, loff_t *);
> 
>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> 
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
> 
> +unsigned long vm_max_readahead = INITIAL_VM_MAX_READAHEAD;
> +
> +int sysctl_vm_max_readahead_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead >> (PAGE_CACHE_SHIFT - 10);
> +

Aaah.. here you got it right, please be consistent and use the same
thing everywhere.

> +	return 0;
> +}
> +
>  /*
>   * Initialise a struct file's readahead state.  Assumes that the caller has
>   * memset *ra to zero.
> Index: linux-2.6/kernel/sysctl.c
> ===================================================================
> --- linux-2.6.orig/kernel/sysctl.c
> +++ linux-2.6/kernel/sysctl.c
> @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
>  		.extra2		= &one,
>  	},
>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead,
> +		.maxlen		= sizeof(vm_max_readahead),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_handler,
> +	},
>  	{ }
>  };
> 
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,12 @@ The minimum value is 1 (1/1 -> 100%).
> 
>  ==============================================================
> 
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be used.
> +
> +==============================================================
> +
>  max_map_count:
> 
>  This file contains the maximum number of memory map areas a process
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
	Three Cheers,
	Balbir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v2] Make vm_max_readahead configurable at run-time
  2010-02-10  6:25   ` Balbir Singh
@ 2010-02-10 10:53     ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-10 10:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: balbir, Jens Axboe, linux-kernel, linux-mm, Wu Fengguang

Incorporated coding-style changes, better changelog and documentation as
suggested by Andrew and Balbir.

Thanks
Nikanth

Make vm_max_readahead configurable at run-time. Expose a sysctl knob
in procfs to change it. This would ensure that new disks added would
use this value as their default read_ahead_kb.

Also filesystems which use default_backing_dev_info would also
use this new value, even if they were already mounted.

Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.


Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

Index: linux-2.6/block/blk-core.c
===================================================================
--- linux-2.6.orig/block/blk-core.c
+++ linux-2.6/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c
+++ linux-2.6/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define INITIAL_VM_MAX_READAHEAD_KB	128
+extern unsigned long vm_max_readahead_kb;
+
+int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos);
 
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c
+++ linux-2.6/mm/backing-dev.c
@@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
+					>> (PAGE_CACHE_SHIFT - 10),
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c
+++ linux-2.6/mm/readahead.c
@@ -17,6 +17,19 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
+
+int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
+
+	default_backing_dev_info.ra_pages =
+			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
+
+	return 0;
+}
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+	{
+		.procname	= "max_readahead_kb",
+		.data		= &vm_max_readahead_kb,
+		.maxlen		= sizeof(vm_max_readahead_kb),
+		.mode		= 0644,
+		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
+	},
 	{ }
 };
 
Index: linux-2.6/Documentation/sysctl/vm.txt
===================================================================
--- linux-2.6.orig/Documentation/sysctl/vm.txt
+++ linux-2.6/Documentation/sysctl/vm.txt
@@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
 - laptop_mode
 - legacy_va_layout
 - lowmem_reserve_ratio
+- max_readahead_kb
 - max_map_count
 - memory_failure_early_kill
 - memory_failure_recovery
@@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
 
 ==============================================================
 
+max_readahead_kb:
+
+This file contains the default maximum readahead that would be
+used, when new disks would be added to the system.
+
+Also filesystems which use default_backing_dev_info would also
+use this new value, even if they were already mounted.
+
+xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
+
+==============================================================
+
 max_map_count:
 
 This file contains the maximum number of memory map areas a process


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v2] Make vm_max_readahead configurable at run-time
@ 2010-02-10 10:53     ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-10 10:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: balbir, Jens Axboe, linux-kernel, linux-mm, Wu Fengguang

Incorporated coding-style changes, better changelog and documentation as
suggested by Andrew and Balbir.

Thanks
Nikanth

Make vm_max_readahead configurable at run-time. Expose a sysctl knob
in procfs to change it. This would ensure that new disks added would
use this value as their default read_ahead_kb.

Also filesystems which use default_backing_dev_info would also
use this new value, even if they were already mounted.

Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.


Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

Index: linux-2.6/block/blk-core.c
===================================================================
--- linux-2.6.orig/block/blk-core.c
+++ linux-2.6/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c
+++ linux-2.6/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define INITIAL_VM_MAX_READAHEAD_KB	128
+extern unsigned long vm_max_readahead_kb;
+
+int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos);
 
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c
+++ linux-2.6/mm/backing-dev.c
@@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
+					>> (PAGE_CACHE_SHIFT - 10),
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c
+++ linux-2.6/mm/readahead.c
@@ -17,6 +17,19 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
+
+int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length, loff_t *ppos)
+{
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
+
+	default_backing_dev_info.ra_pages =
+			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
+
+	return 0;
+}
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+	{
+		.procname	= "max_readahead_kb",
+		.data		= &vm_max_readahead_kb,
+		.maxlen		= sizeof(vm_max_readahead_kb),
+		.mode		= 0644,
+		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
+	},
 	{ }
 };
 
Index: linux-2.6/Documentation/sysctl/vm.txt
===================================================================
--- linux-2.6.orig/Documentation/sysctl/vm.txt
+++ linux-2.6/Documentation/sysctl/vm.txt
@@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
 - laptop_mode
 - legacy_va_layout
 - lowmem_reserve_ratio
+- max_readahead_kb
 - max_map_count
 - memory_failure_early_kill
 - memory_failure_recovery
@@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
 
 ==============================================================
 
+max_readahead_kb:
+
+This file contains the default maximum readahead that would be
+used, when new disks would be added to the system.
+
+Also filesystems which use default_backing_dev_info would also
+use this new value, even if they were already mounted.
+
+xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
+
+==============================================================
+
 max_map_count:
 
 This file contains the maximum number of memory map areas a process

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
  2010-02-10 10:53     ` Nikanth Karthikesan
@ 2010-02-10 11:05       ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-10 11:05 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm

Nikanth,

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.

Do you have use case, or customer demand for it?

> Also filesystems which use default_backing_dev_info would also
> use this new value, even if they were already mounted.
> 
> Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.

This sounds like bad interface, in that users will be confused by the
tricky details of "works for new devices" and "works for some fs".

One more tricky point is, btrfs/md/dm readahead size may not be
influenced if some of the component disks are hot added.

So this patch is only going to work for hot-plugged disks that
contains _standalone_ filesystem. Is this typical use case in servers?

Thanks,
Fengguang

> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> 
> ---
> 
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
>  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
>  	q->backing_dev_info.unplug_io_data = q;
>  	q->backing_dev_info.ra_pages =
> -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
>  	q->backing_dev_info.state = 0;
>  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
>  	q->backing_dev_info.name = "block";
> Index: linux-2.6/fs/fuse/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/fuse/inode.c
> +++ linux-2.6/fs/fuse/inode.c
> @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
>  	int err;
>  
>  	fc->bdi.name = "fuse";
> -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
>  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
>  	/* fuse does it's own writeback accounting */
>  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
>  void task_dirty_inc(struct task_struct *tsk);
>  
>  /* readahead.c */
> -#define VM_MAX_READAHEAD	128	/* kbytes */
> +#define INITIAL_VM_MAX_READAHEAD_KB	128
> +extern unsigned long vm_max_readahead_kb;
> +
> +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos);
>  
>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
>  
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> +					>> (PAGE_CACHE_SHIFT - 10),
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
>  
> +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> +
> +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> +
> +	return 0;
> +}
> +
>  /*
>   * Initialise a struct file's readahead state.  Assumes that the caller has
>   * memset *ra to zero.
> Index: linux-2.6/kernel/sysctl.c
> ===================================================================
> --- linux-2.6.orig/kernel/sysctl.c
> +++ linux-2.6/kernel/sysctl.c
> @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
>  		.extra2		= &one,
>  	},
>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead_kb,
> +		.maxlen		= sizeof(vm_max_readahead_kb),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> +	},
>  	{ }
>  };
>  
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
>  
>  ==============================================================
>  
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be
> +used, when new disks would be added to the system.
> +
> +Also filesystems which use default_backing_dev_info would also
> +use this new value, even if they were already mounted.
> +
> +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> +
> +==============================================================
> +
>  max_map_count:
>  
>  This file contains the maximum number of memory map areas a process

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
@ 2010-02-10 11:05       ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-10 11:05 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm

Nikanth,

> Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> in procfs to change it. This would ensure that new disks added would
> use this value as their default read_ahead_kb.

Do you have use case, or customer demand for it?

> Also filesystems which use default_backing_dev_info would also
> use this new value, even if they were already mounted.
> 
> Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.

This sounds like bad interface, in that users will be confused by the
tricky details of "works for new devices" and "works for some fs".

One more tricky point is, btrfs/md/dm readahead size may not be
influenced if some of the component disks are hot added.

So this patch is only going to work for hot-plugged disks that
contains _standalone_ filesystem. Is this typical use case in servers?

Thanks,
Fengguang

> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> 
> ---
> 
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
>  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
>  	q->backing_dev_info.unplug_io_data = q;
>  	q->backing_dev_info.ra_pages =
> -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
>  	q->backing_dev_info.state = 0;
>  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
>  	q->backing_dev_info.name = "block";
> Index: linux-2.6/fs/fuse/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/fuse/inode.c
> +++ linux-2.6/fs/fuse/inode.c
> @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
>  	int err;
>  
>  	fc->bdi.name = "fuse";
> -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
>  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
>  	/* fuse does it's own writeback accounting */
>  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
>  void task_dirty_inc(struct task_struct *tsk);
>  
>  /* readahead.c */
> -#define VM_MAX_READAHEAD	128	/* kbytes */
> +#define INITIAL_VM_MAX_READAHEAD_KB	128
> +extern unsigned long vm_max_readahead_kb;
> +
> +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos);
>  
>  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  			pgoff_t offset, unsigned long nr_to_read);
> Index: linux-2.6/mm/backing-dev.c
> ===================================================================
> --- linux-2.6.orig/mm/backing-dev.c
> +++ linux-2.6/mm/backing-dev.c
> @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
>  
>  struct backing_dev_info default_backing_dev_info = {
>  	.name		= "default",
> -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> +					>> (PAGE_CACHE_SHIFT - 10),
>  	.state		= 0,
>  	.capabilities	= BDI_CAP_MAP_COPY,
>  	.unplug_io_fn	= default_unplug_io_fn,
> Index: linux-2.6/mm/readahead.c
> ===================================================================
> --- linux-2.6.orig/mm/readahead.c
> +++ linux-2.6/mm/readahead.c
> @@ -17,6 +17,19 @@
>  #include <linux/pagevec.h>
>  #include <linux/pagemap.h>
>  
> +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> +
> +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int write,
> +		void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> +
> +	default_backing_dev_info.ra_pages =
> +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> +
> +	return 0;
> +}
> +
>  /*
>   * Initialise a struct file's readahead state.  Assumes that the caller has
>   * memset *ra to zero.
> Index: linux-2.6/kernel/sysctl.c
> ===================================================================
> --- linux-2.6.orig/kernel/sysctl.c
> +++ linux-2.6/kernel/sysctl.c
> @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
>  		.extra2		= &one,
>  	},
>  #endif
> -
> +	{
> +		.procname	= "max_readahead_kb",
> +		.data		= &vm_max_readahead_kb,
> +		.maxlen		= sizeof(vm_max_readahead_kb),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> +	},
>  	{ }
>  };
>  
> Index: linux-2.6/Documentation/sysctl/vm.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/sysctl/vm.txt
> +++ linux-2.6/Documentation/sysctl/vm.txt
> @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
>  - laptop_mode
>  - legacy_va_layout
>  - lowmem_reserve_ratio
> +- max_readahead_kb
>  - max_map_count
>  - memory_failure_early_kill
>  - memory_failure_recovery
> @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
>  
>  ==============================================================
>  
> +max_readahead_kb:
> +
> +This file contains the default maximum readahead that would be
> +used, when new disks would be added to the system.
> +
> +Also filesystems which use default_backing_dev_info would also
> +use this new value, even if they were already mounted.
> +
> +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> +
> +==============================================================
> +
>  max_map_count:
>  
>  This file contains the maximum number of memory map areas a process

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
  2010-02-10 11:05       ` Wu Fengguang
@ 2010-02-10 13:52         ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-10 13:52 UTC (permalink / raw)
  To: Wu Fengguang; +Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm

On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> Nikanth,
> 
> > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > in procfs to change it. This would ensure that new disks added would
> > use this value as their default read_ahead_kb.
> 
> Do you have use case, or customer demand for it?
> 

No body requested for it. But when doing some performance testing with 
readahead_kb re-compiling would be a pain, and thought that having a 
configurable default might be useful.

> > Also filesystems which use default_backing_dev_info would also
> > use this new value, even if they were already mounted.
> >
> > Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> 
> This sounds like bad interface, in that users will be confused by the
> tricky details of "works for new devices" and "works for some fs".
> 
> One more tricky point is, btrfs/md/dm readahead size may not be
> influenced if some of the component disks are hot added.
> 
> So this patch is only going to work for hot-plugged disks that
> contains _standalone_ filesystem. Is this typical use case in servers?
> 

Yes, it would work only if the top-level disk is hot-plugged/created.

Thanks
Nikanth


> Thanks,
> Fengguang
> 
> > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> >
> > ---
> >
> > Index: linux-2.6/block/blk-core.c
> > ===================================================================
> > --- linux-2.6.orig/block/blk-core.c
> > +++ linux-2.6/block/blk-core.c
> > @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
> >  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
> >  	q->backing_dev_info.unplug_io_data = q;
> >  	q->backing_dev_info.ra_pages =
> > -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> >  	q->backing_dev_info.state = 0;
> >  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
> >  	q->backing_dev_info.name = "block";
> > Index: linux-2.6/fs/fuse/inode.c
> > ===================================================================
> > --- linux-2.6.orig/fs/fuse/inode.c
> > +++ linux-2.6/fs/fuse/inode.c
> > @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
> >  	int err;
> >
> >  	fc->bdi.name = "fuse";
> > -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> >  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
> >  	/* fuse does it's own writeback accounting */
> >  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> > Index: linux-2.6/include/linux/mm.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/mm.h
> > +++ linux-2.6/include/linux/mm.h
> > @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
> >  void task_dirty_inc(struct task_struct *tsk);
> >
> >  /* readahead.c */
> > -#define VM_MAX_READAHEAD	128	/* kbytes */
> > +#define INITIAL_VM_MAX_READAHEAD_KB	128
> > +extern unsigned long vm_max_readahead_kb;
> > +
> > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > write, +		void __user *buffer, size_t *length, loff_t *ppos);
> >
> >  int force_page_cache_readahead(struct address_space *mapping, struct
> > file *filp, pgoff_t offset, unsigned long nr_to_read);
> > Index: linux-2.6/mm/backing-dev.c
> > ===================================================================
> > --- linux-2.6.orig/mm/backing-dev.c
> > +++ linux-2.6/mm/backing-dev.c
> > @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> >
> >  struct backing_dev_info default_backing_dev_info = {
> >  	.name		= "default",
> > -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> > +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> > +					>> (PAGE_CACHE_SHIFT - 10),
> >  	.state		= 0,
> >  	.capabilities	= BDI_CAP_MAP_COPY,
> >  	.unplug_io_fn	= default_unplug_io_fn,
> > Index: linux-2.6/mm/readahead.c
> > ===================================================================
> > --- linux-2.6.orig/mm/readahead.c
> > +++ linux-2.6/mm/readahead.c
> > @@ -17,6 +17,19 @@
> >  #include <linux/pagevec.h>
> >  #include <linux/pagemap.h>
> >
> > +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> > +
> > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > write, +		void __user *buffer, size_t *length, loff_t *ppos)
> > +{
> > +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> > +
> > +	default_backing_dev_info.ra_pages =
> > +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Initialise a struct file's readahead state.  Assumes that the caller
> > has * memset *ra to zero.
> > Index: linux-2.6/kernel/sysctl.c
> > ===================================================================
> > --- linux-2.6.orig/kernel/sysctl.c
> > +++ linux-2.6/kernel/sysctl.c
> > @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
> >  		.extra2		= &one,
> >  	},
> >  #endif
> > -
> > +	{
> > +		.procname	= "max_readahead_kb",
> > +		.data		= &vm_max_readahead_kb,
> > +		.maxlen		= sizeof(vm_max_readahead_kb),
> > +		.mode		= 0644,
> > +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> > +	},
> >  	{ }
> >  };
> >
> > Index: linux-2.6/Documentation/sysctl/vm.txt
> > ===================================================================
> > --- linux-2.6.orig/Documentation/sysctl/vm.txt
> > +++ linux-2.6/Documentation/sysctl/vm.txt
> > @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
> >  - laptop_mode
> >  - legacy_va_layout
> >  - lowmem_reserve_ratio
> > +- max_readahead_kb
> >  - max_map_count
> >  - memory_failure_early_kill
> >  - memory_failure_recovery
> > @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
> >
> >  ==============================================================
> >
> > +max_readahead_kb:
> > +
> > +This file contains the default maximum readahead that would be
> > +used, when new disks would be added to the system.
> > +
> > +Also filesystems which use default_backing_dev_info would also
> > +use this new value, even if they were already mounted.
> > +
> > +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > +
> > +==============================================================
> > +
> >  max_map_count:
> >
> >  This file contains the maximum number of memory map areas a process
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
@ 2010-02-10 13:52         ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-10 13:52 UTC (permalink / raw)
  To: Wu Fengguang; +Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm

On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> Nikanth,
> 
> > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > in procfs to change it. This would ensure that new disks added would
> > use this value as their default read_ahead_kb.
> 
> Do you have use case, or customer demand for it?
> 

No body requested for it. But when doing some performance testing with 
readahead_kb re-compiling would be a pain, and thought that having a 
configurable default might be useful.

> > Also filesystems which use default_backing_dev_info would also
> > use this new value, even if they were already mounted.
> >
> > Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> 
> This sounds like bad interface, in that users will be confused by the
> tricky details of "works for new devices" and "works for some fs".
> 
> One more tricky point is, btrfs/md/dm readahead size may not be
> influenced if some of the component disks are hot added.
> 
> So this patch is only going to work for hot-plugged disks that
> contains _standalone_ filesystem. Is this typical use case in servers?
> 

Yes, it would work only if the top-level disk is hot-plugged/created.

Thanks
Nikanth


> Thanks,
> Fengguang
> 
> > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> >
> > ---
> >
> > Index: linux-2.6/block/blk-core.c
> > ===================================================================
> > --- linux-2.6.orig/block/blk-core.c
> > +++ linux-2.6/block/blk-core.c
> > @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
> >  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
> >  	q->backing_dev_info.unplug_io_data = q;
> >  	q->backing_dev_info.ra_pages =
> > -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> >  	q->backing_dev_info.state = 0;
> >  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
> >  	q->backing_dev_info.name = "block";
> > Index: linux-2.6/fs/fuse/inode.c
> > ===================================================================
> > --- linux-2.6.orig/fs/fuse/inode.c
> > +++ linux-2.6/fs/fuse/inode.c
> > @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
> >  	int err;
> >
> >  	fc->bdi.name = "fuse";
> > -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> >  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
> >  	/* fuse does it's own writeback accounting */
> >  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> > Index: linux-2.6/include/linux/mm.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/mm.h
> > +++ linux-2.6/include/linux/mm.h
> > @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
> >  void task_dirty_inc(struct task_struct *tsk);
> >
> >  /* readahead.c */
> > -#define VM_MAX_READAHEAD	128	/* kbytes */
> > +#define INITIAL_VM_MAX_READAHEAD_KB	128
> > +extern unsigned long vm_max_readahead_kb;
> > +
> > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > write, +		void __user *buffer, size_t *length, loff_t *ppos);
> >
> >  int force_page_cache_readahead(struct address_space *mapping, struct
> > file *filp, pgoff_t offset, unsigned long nr_to_read);
> > Index: linux-2.6/mm/backing-dev.c
> > ===================================================================
> > --- linux-2.6.orig/mm/backing-dev.c
> > +++ linux-2.6/mm/backing-dev.c
> > @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> >
> >  struct backing_dev_info default_backing_dev_info = {
> >  	.name		= "default",
> > -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> > +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> > +					>> (PAGE_CACHE_SHIFT - 10),
> >  	.state		= 0,
> >  	.capabilities	= BDI_CAP_MAP_COPY,
> >  	.unplug_io_fn	= default_unplug_io_fn,
> > Index: linux-2.6/mm/readahead.c
> > ===================================================================
> > --- linux-2.6.orig/mm/readahead.c
> > +++ linux-2.6/mm/readahead.c
> > @@ -17,6 +17,19 @@
> >  #include <linux/pagevec.h>
> >  #include <linux/pagemap.h>
> >
> > +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> > +
> > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > write, +		void __user *buffer, size_t *length, loff_t *ppos)
> > +{
> > +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> > +
> > +	default_backing_dev_info.ra_pages =
> > +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Initialise a struct file's readahead state.  Assumes that the caller
> > has * memset *ra to zero.
> > Index: linux-2.6/kernel/sysctl.c
> > ===================================================================
> > --- linux-2.6.orig/kernel/sysctl.c
> > +++ linux-2.6/kernel/sysctl.c
> > @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
> >  		.extra2		= &one,
> >  	},
> >  #endif
> > -
> > +	{
> > +		.procname	= "max_readahead_kb",
> > +		.data		= &vm_max_readahead_kb,
> > +		.maxlen		= sizeof(vm_max_readahead_kb),
> > +		.mode		= 0644,
> > +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> > +	},
> >  	{ }
> >  };
> >
> > Index: linux-2.6/Documentation/sysctl/vm.txt
> > ===================================================================
> > --- linux-2.6.orig/Documentation/sysctl/vm.txt
> > +++ linux-2.6/Documentation/sysctl/vm.txt
> > @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
> >  - laptop_mode
> >  - legacy_va_layout
> >  - lowmem_reserve_ratio
> > +- max_readahead_kb
> >  - max_map_count
> >  - memory_failure_early_kill
> >  - memory_failure_recovery
> > @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
> >
> >  ==============================================================
> >
> > +max_readahead_kb:
> > +
> > +This file contains the default maximum readahead that would be
> > +used, when new disks would be added to the system.
> > +
> > +Also filesystems which use default_backing_dev_info would also
> > +use this new value, even if they were already mounted.
> > +
> > +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > +
> > +==============================================================
> > +
> >  max_map_count:
> >
> >  This file contains the maximum number of memory map areas a process
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
  2010-02-10 13:52         ` Nikanth Karthikesan
@ 2010-02-11  5:13           ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-11  5:13 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

On Wed, Feb 10, 2010 at 09:52:40PM +0800, Nikanth Karthikesan wrote:
> On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> > Nikanth,
> > 
> > > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > > in procfs to change it. This would ensure that new disks added would
> > > use this value as their default read_ahead_kb.
> > 
> > Do you have use case, or customer demand for it?
> > 
> 
> No body requested for it. But when doing some performance testing with 
> readahead_kb re-compiling would be a pain, and thought that having a 
> configurable default might be useful.

I wonder why you need to recompile kernel in the tests.
There are three interfaces to change readahead size in runtime:

        blockdev --setra 1024 /dev/sda
        echo 512 > /sys/block/*/queue/read_ahead_kb
        echo 512 > /sys/devices/virtual/bdi/*/read_ahead_kb

> > > Also filesystems which use default_backing_dev_info would also
> > > use this new value, even if they were already mounted.
> > >
> > > Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > 
> > This sounds like bad interface, in that users will be confused by the
> > tricky details of "works for new devices" and "works for some fs".
> > 
> > One more tricky point is, btrfs/md/dm readahead size may not be
> > influenced if some of the component disks are hot added.
> > 
> > So this patch is only going to work for hot-plugged disks that
> > contains _standalone_ filesystem. Is this typical use case in servers?
> > 
> 
> Yes, it would work only if the top-level disk is hot-plugged/created.
 
Or maybe what you really want is a kernel parameter for setting the
default readahead size at boot time?

In another thread, Christian Ehrhardt recommended to add a config
option for it. If you like it, I can also do the kernel parameter
by the way.

Thanks,
Fengguang

> > > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> > >
> > > ---
> > >
> > > Index: linux-2.6/block/blk-core.c
> > > ===================================================================
> > > --- linux-2.6.orig/block/blk-core.c
> > > +++ linux-2.6/block/blk-core.c
> > > @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
> > >  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
> > >  	q->backing_dev_info.unplug_io_data = q;
> > >  	q->backing_dev_info.ra_pages =
> > > -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > > +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> > >  	q->backing_dev_info.state = 0;
> > >  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
> > >  	q->backing_dev_info.name = "block";
> > > Index: linux-2.6/fs/fuse/inode.c
> > > ===================================================================
> > > --- linux-2.6.orig/fs/fuse/inode.c
> > > +++ linux-2.6/fs/fuse/inode.c
> > > @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
> > >  	int err;
> > >
> > >  	fc->bdi.name = "fuse";
> > > -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > > +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> > >  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
> > >  	/* fuse does it's own writeback accounting */
> > >  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> > > Index: linux-2.6/include/linux/mm.h
> > > ===================================================================
> > > --- linux-2.6.orig/include/linux/mm.h
> > > +++ linux-2.6/include/linux/mm.h
> > > @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
> > >  void task_dirty_inc(struct task_struct *tsk);
> > >
> > >  /* readahead.c */
> > > -#define VM_MAX_READAHEAD	128	/* kbytes */
> > > +#define INITIAL_VM_MAX_READAHEAD_KB	128
> > > +extern unsigned long vm_max_readahead_kb;
> > > +
> > > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > > write, +		void __user *buffer, size_t *length, loff_t *ppos);
> > >
> > >  int force_page_cache_readahead(struct address_space *mapping, struct
> > > file *filp, pgoff_t offset, unsigned long nr_to_read);
> > > Index: linux-2.6/mm/backing-dev.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/backing-dev.c
> > > +++ linux-2.6/mm/backing-dev.c
> > > @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> > >
> > >  struct backing_dev_info default_backing_dev_info = {
> > >  	.name		= "default",
> > > -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> > > +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> > > +					>> (PAGE_CACHE_SHIFT - 10),
> > >  	.state		= 0,
> > >  	.capabilities	= BDI_CAP_MAP_COPY,
> > >  	.unplug_io_fn	= default_unplug_io_fn,
> > > Index: linux-2.6/mm/readahead.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/readahead.c
> > > +++ linux-2.6/mm/readahead.c
> > > @@ -17,6 +17,19 @@
> > >  #include <linux/pagevec.h>
> > >  #include <linux/pagemap.h>
> > >
> > > +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> > > +
> > > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > > write, +		void __user *buffer, size_t *length, loff_t *ppos)
> > > +{
> > > +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> > > +
> > > +	default_backing_dev_info.ra_pages =
> > > +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  /*
> > >   * Initialise a struct file's readahead state.  Assumes that the caller
> > > has * memset *ra to zero.
> > > Index: linux-2.6/kernel/sysctl.c
> > > ===================================================================
> > > --- linux-2.6.orig/kernel/sysctl.c
> > > +++ linux-2.6/kernel/sysctl.c
> > > @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
> > >  		.extra2		= &one,
> > >  	},
> > >  #endif
> > > -
> > > +	{
> > > +		.procname	= "max_readahead_kb",
> > > +		.data		= &vm_max_readahead_kb,
> > > +		.maxlen		= sizeof(vm_max_readahead_kb),
> > > +		.mode		= 0644,
> > > +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> > > +	},
> > >  	{ }
> > >  };
> > >
> > > Index: linux-2.6/Documentation/sysctl/vm.txt
> > > ===================================================================
> > > --- linux-2.6.orig/Documentation/sysctl/vm.txt
> > > +++ linux-2.6/Documentation/sysctl/vm.txt
> > > @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
> > >  - laptop_mode
> > >  - legacy_va_layout
> > >  - lowmem_reserve_ratio
> > > +- max_readahead_kb
> > >  - max_map_count
> > >  - memory_failure_early_kill
> > >  - memory_failure_recovery
> > > @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
> > >
> > >  ==============================================================
> > >
> > > +max_readahead_kb:
> > > +
> > > +This file contains the default maximum readahead that would be
> > > +used, when new disks would be added to the system.
> > > +
> > > +Also filesystems which use default_backing_dev_info would also
> > > +use this new value, even if they were already mounted.
> > > +
> > > +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > > +
> > > +==============================================================
> > > +
> > >  max_map_count:
> > >
> > >  This file contains the maximum number of memory map areas a process
> > 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
@ 2010-02-11  5:13           ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-11  5:13 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

On Wed, Feb 10, 2010 at 09:52:40PM +0800, Nikanth Karthikesan wrote:
> On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> > Nikanth,
> > 
> > > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > > in procfs to change it. This would ensure that new disks added would
> > > use this value as their default read_ahead_kb.
> > 
> > Do you have use case, or customer demand for it?
> > 
> 
> No body requested for it. But when doing some performance testing with 
> readahead_kb re-compiling would be a pain, and thought that having a 
> configurable default might be useful.

I wonder why you need to recompile kernel in the tests.
There are three interfaces to change readahead size in runtime:

        blockdev --setra 1024 /dev/sda
        echo 512 > /sys/block/*/queue/read_ahead_kb
        echo 512 > /sys/devices/virtual/bdi/*/read_ahead_kb

> > > Also filesystems which use default_backing_dev_info would also
> > > use this new value, even if they were already mounted.
> > >
> > > Currently xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > 
> > This sounds like bad interface, in that users will be confused by the
> > tricky details of "works for new devices" and "works for some fs".
> > 
> > One more tricky point is, btrfs/md/dm readahead size may not be
> > influenced if some of the component disks are hot added.
> > 
> > So this patch is only going to work for hot-plugged disks that
> > contains _standalone_ filesystem. Is this typical use case in servers?
> > 
> 
> Yes, it would work only if the top-level disk is hot-plugged/created.
 
Or maybe what you really want is a kernel parameter for setting the
default readahead size at boot time?

In another thread, Christian Ehrhardt recommended to add a config
option for it. If you like it, I can also do the kernel parameter
by the way.

Thanks,
Fengguang

> > > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> > >
> > > ---
> > >
> > > Index: linux-2.6/block/blk-core.c
> > > ===================================================================
> > > --- linux-2.6.orig/block/blk-core.c
> > > +++ linux-2.6/block/blk-core.c
> > > @@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_no
> > >  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
> > >  	q->backing_dev_info.unplug_io_data = q;
> > >  	q->backing_dev_info.ra_pages =
> > > -			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > > +			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> > >  	q->backing_dev_info.state = 0;
> > >  	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
> > >  	q->backing_dev_info.name = "block";
> > > Index: linux-2.6/fs/fuse/inode.c
> > > ===================================================================
> > > --- linux-2.6.orig/fs/fuse/inode.c
> > > +++ linux-2.6/fs/fuse/inode.c
> > > @@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
> > >  	int err;
> > >
> > >  	fc->bdi.name = "fuse";
> > > -	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
> > > +	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
> > >  	fc->bdi.unplug_io_fn = default_unplug_io_fn;
> > >  	/* fuse does it's own writeback accounting */
> > >  	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
> > > Index: linux-2.6/include/linux/mm.h
> > > ===================================================================
> > > --- linux-2.6.orig/include/linux/mm.h
> > > +++ linux-2.6/include/linux/mm.h
> > > @@ -1188,7 +1188,11 @@ int write_one_page(struct page *page, in
> > >  void task_dirty_inc(struct task_struct *tsk);
> > >
> > >  /* readahead.c */
> > > -#define VM_MAX_READAHEAD	128	/* kbytes */
> > > +#define INITIAL_VM_MAX_READAHEAD_KB	128
> > > +extern unsigned long vm_max_readahead_kb;
> > > +
> > > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > > write, +		void __user *buffer, size_t *length, loff_t *ppos);
> > >
> > >  int force_page_cache_readahead(struct address_space *mapping, struct
> > > file *filp, pgoff_t offset, unsigned long nr_to_read);
> > > Index: linux-2.6/mm/backing-dev.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/backing-dev.c
> > > +++ linux-2.6/mm/backing-dev.c
> > > @@ -18,7 +18,8 @@ EXPORT_SYMBOL(default_unplug_io_fn);
> > >
> > >  struct backing_dev_info default_backing_dev_info = {
> > >  	.name		= "default",
> > > -	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
> > > +	.ra_pages	= INITIAL_VM_MAX_READAHEAD_KB
> > > +					>> (PAGE_CACHE_SHIFT - 10),
> > >  	.state		= 0,
> > >  	.capabilities	= BDI_CAP_MAP_COPY,
> > >  	.unplug_io_fn	= default_unplug_io_fn,
> > > Index: linux-2.6/mm/readahead.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/readahead.c
> > > +++ linux-2.6/mm/readahead.c
> > > @@ -17,6 +17,19 @@
> > >  #include <linux/pagevec.h>
> > >  #include <linux/pagemap.h>
> > >
> > > +unsigned long vm_max_readahead_kb = INITIAL_VM_MAX_READAHEAD_KB;
> > > +
> > > +int sysctl_vm_max_readahead_kb_handler(struct ctl_table *table, int
> > > write, +		void __user *buffer, size_t *length, loff_t *ppos)
> > > +{
> > > +	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> > > +
> > > +	default_backing_dev_info.ra_pages =
> > > +			vm_max_readahead_kb >> (PAGE_CACHE_SHIFT - 10);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  /*
> > >   * Initialise a struct file's readahead state.  Assumes that the caller
> > > has * memset *ra to zero.
> > > Index: linux-2.6/kernel/sysctl.c
> > > ===================================================================
> > > --- linux-2.6.orig/kernel/sysctl.c
> > > +++ linux-2.6/kernel/sysctl.c
> > > @@ -1273,7 +1273,13 @@ static struct ctl_table vm_table[] = {
> > >  		.extra2		= &one,
> > >  	},
> > >  #endif
> > > -
> > > +	{
> > > +		.procname	= "max_readahead_kb",
> > > +		.data		= &vm_max_readahead_kb,
> > > +		.maxlen		= sizeof(vm_max_readahead_kb),
> > > +		.mode		= 0644,
> > > +		.proc_handler	= sysctl_vm_max_readahead_kb_handler,
> > > +	},
> > >  	{ }
> > >  };
> > >
> > > Index: linux-2.6/Documentation/sysctl/vm.txt
> > > ===================================================================
> > > --- linux-2.6.orig/Documentation/sysctl/vm.txt
> > > +++ linux-2.6/Documentation/sysctl/vm.txt
> > > @@ -31,6 +31,7 @@ Currently, these files are in /proc/sys/
> > >  - laptop_mode
> > >  - legacy_va_layout
> > >  - lowmem_reserve_ratio
> > > +- max_readahead_kb
> > >  - max_map_count
> > >  - memory_failure_early_kill
> > >  - memory_failure_recovery
> > > @@ -263,6 +264,18 @@ The minimum value is 1 (1/1 -> 100%).
> > >
> > >  ==============================================================
> > >
> > > +max_readahead_kb:
> > > +
> > > +This file contains the default maximum readahead that would be
> > > +used, when new disks would be added to the system.
> > > +
> > > +Also filesystems which use default_backing_dev_info would also
> > > +use this new value, even if they were already mounted.
> > > +
> > > +xfs, btrfs, nilfs, raw, mtd use the default_backing_dev_info.
> > > +
> > > +==============================================================
> > > +
> > >  max_map_count:
> > >
> > >  This file contains the maximum number of memory map areas a process
> > 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
  2010-02-11  5:13           ` Wu Fengguang
@ 2010-02-11  7:34             ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11  7:34 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

On Thursday 11 February 2010 10:43:41 Wu Fengguang wrote:
> On Wed, Feb 10, 2010 at 09:52:40PM +0800, Nikanth Karthikesan wrote:
> > On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> > > Nikanth,
> > >
> > > > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > > > in procfs to change it. This would ensure that new disks added would
> > > > use this value as their default read_ahead_kb.
> > >
> > > Do you have use case, or customer demand for it?
> >
> > No body requested for it. But when doing some performance testing with
> > readahead_kb re-compiling would be a pain, and thought that having a
> > configurable default might be useful.
> 
> I wonder why you need to recompile kernel in the tests.
> There are three interfaces to change readahead size in runtime:
> 
>         blockdev --setra 1024 /dev/sda
>         echo 512 > /sys/block/*/queue/read_ahead_kb
>         echo 512 > /sys/devices/virtual/bdi/*/read_ahead_kb
> 

Right, I did use that. But thought that a global tunable would be better. :-)

> > > > Also filesystems which use default_backing_dev_info would also
> > > > use this new value, even if they were already mounted.
> > > >
> > > > Currently xfs, btrfs, nilfs, raw, mtd use the
> > > > default_backing_dev_info.
> > >
> > > This sounds like bad interface, in that users will be confused by the
> > > tricky details of "works for new devices" and "works for some fs".
> > >
> > > One more tricky point is, btrfs/md/dm readahead size may not be
> > > influenced if some of the component disks are hot added.
> > >
> > > So this patch is only going to work for hot-plugged disks that
> > > contains _standalone_ filesystem. Is this typical use case in servers?
> >
> > Yes, it would work only if the top-level disk is hot-plugged/created.
> 
> Or maybe what you really want is a kernel parameter for setting the
> default readahead size at boot time?
> 
> In another thread, Christian Ehrhardt recommended to add a config
> option for it. If you like it, I can also do the kernel parameter
> by the way.
> 

Kernel parameter seems to be the right way to go. Does the attached patch look
good?

Thanks
Nikanth

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used
as the value of VM_MAX_READAHEAD.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..7d5230a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,16 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make vm_max_readahead configurable at run-time
@ 2010-02-11  7:34             ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11  7:34 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

On Thursday 11 February 2010 10:43:41 Wu Fengguang wrote:
> On Wed, Feb 10, 2010 at 09:52:40PM +0800, Nikanth Karthikesan wrote:
> > On Wednesday 10 February 2010 16:35:51 Wu Fengguang wrote:
> > > Nikanth,
> > >
> > > > Make vm_max_readahead configurable at run-time. Expose a sysctl knob
> > > > in procfs to change it. This would ensure that new disks added would
> > > > use this value as their default read_ahead_kb.
> > >
> > > Do you have use case, or customer demand for it?
> >
> > No body requested for it. But when doing some performance testing with
> > readahead_kb re-compiling would be a pain, and thought that having a
> > configurable default might be useful.
> 
> I wonder why you need to recompile kernel in the tests.
> There are three interfaces to change readahead size in runtime:
> 
>         blockdev --setra 1024 /dev/sda
>         echo 512 > /sys/block/*/queue/read_ahead_kb
>         echo 512 > /sys/devices/virtual/bdi/*/read_ahead_kb
> 

Right, I did use that. But thought that a global tunable would be better. :-)

> > > > Also filesystems which use default_backing_dev_info would also
> > > > use this new value, even if they were already mounted.
> > > >
> > > > Currently xfs, btrfs, nilfs, raw, mtd use the
> > > > default_backing_dev_info.
> > >
> > > This sounds like bad interface, in that users will be confused by the
> > > tricky details of "works for new devices" and "works for some fs".
> > >
> > > One more tricky point is, btrfs/md/dm readahead size may not be
> > > influenced if some of the component disks are hot added.
> > >
> > > So this patch is only going to work for hot-plugged disks that
> > > contains _standalone_ filesystem. Is this typical use case in servers?
> >
> > Yes, it would work only if the top-level disk is hot-plugged/created.
> 
> Or maybe what you really want is a kernel parameter for setting the
> default readahead size at boot time?
> 
> In another thread, Christian Ehrhardt recommended to add a config
> option for it. If you like it, I can also do the kernel parameter
> by the way.
> 

Kernel parameter seems to be the right way to go. Does the attached patch look
good?

Thanks
Nikanth

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used
as the value of VM_MAX_READAHEAD.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..7d5230a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,16 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11  7:34             ` Nikanth Karthikesan
@ 2010-02-11 10:16               ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11 10:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

[Changes since v1: Added null check for parameter without value]

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used
as the value of VM_MAX_READAHEAD.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..a801358 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,18 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-11 10:16               ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11 10:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Andrew Morton, balbir, Jens Axboe, linux-kernel, linux-mm,
	Christian Ehrhardt

[Changes since v1: Added null check for parameter without value]

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used
as the value of VM_MAX_READAHEAD.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..a801358 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,18 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11 10:16               ` Nikanth Karthikesan
@ 2010-02-11 11:15                 ` Ankit Jain
  -1 siblings, 0 replies; 40+ messages in thread
From: Ankit Jain @ 2010-02-11 11:15 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Wu Fengguang, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

> +static int __init readahead(char *str)
> +{
> +       if (!str)
> +               return -EINVAL;
> +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;

Just wondering, shouldn't you check whether the str had a valid value
[memparse (str, &next); next > str ..] and if it didn't, then use the
DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
value, the readahead value will become zero.

> +       default_backing_dev_info.ra_pages = vm_max_readahead_kb
> +                                               * 1024 / PAGE_CACHE_SIZE;
> +       return 0;
> +}
> +
> +early_param("readahead", readahead);
> +

-Ankit

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-11 11:15                 ` Ankit Jain
  0 siblings, 0 replies; 40+ messages in thread
From: Ankit Jain @ 2010-02-11 11:15 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Wu Fengguang, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

> +static int __init readahead(char *str)
> +{
> +       if (!str)
> +               return -EINVAL;
> +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;

Just wondering, shouldn't you check whether the str had a valid value
[memparse (str, &next); next > str ..] and if it didn't, then use the
DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
value, the readahead value will become zero.

> +       default_backing_dev_info.ra_pages = vm_max_readahead_kb
> +                                               * 1024 / PAGE_CACHE_SIZE;
> +       return 0;
> +}
> +
> +early_param("readahead", readahead);
> +

-Ankit

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11 11:15                 ` Ankit Jain
@ 2010-02-11 11:45                   ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11 11:45 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > +static int __init readahead(char *str)
> > +{
> > +       if (!str)
> > +               return -EINVAL;
> > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> 
> Just wondering, shouldn't you check whether the str had a valid value
> [memparse (str, &next); next > str ..] and if it didn't, then use the
> DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> value, the readahead value will become zero.
> 

Thanks for the review. Here is the fixed patch that checks whether all of the
parameters value is consumed.

Thanks
Nikanth

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used instead of the
value of VM_MAX_READAHEAD. If the parameter is not specified, the default
of 128kb would be used.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..6c451d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,24 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	unsigned long readahead_kb;
+
+	if (!str)
+		return -EINVAL;
+	readahead_kb = memparse(str, &str) / 1024ULL;
+	if (*str != '\0')
+		return -EINVAL;
+
+	vm_max_readahead_kb = readahead_kb;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-11 11:45                   ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-11 11:45 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > +static int __init readahead(char *str)
> > +{
> > +       if (!str)
> > +               return -EINVAL;
> > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> 
> Just wondering, shouldn't you check whether the str had a valid value
> [memparse (str, &next); next > str ..] and if it didn't, then use the
> DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> value, the readahead value will become zero.
> 

Thanks for the review. Here is the fixed patch that checks whether all of the
parameters value is consumed.

Thanks
Nikanth

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used instead of the
value of VM_MAX_READAHEAD. If the parameter is not specified, the default
of 128kb would be used.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>

---

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d456..354e6f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
+	readahead=	Default readahead value for block devices.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/cgroups/cpusets.txt.
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e..02ed748 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -499,7 +499,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a593578 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (vm_max_readahead_kb * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 60c467b..17825d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1188,9 +1188,11 @@ int write_one_page(struct page *page, int wait);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define DEFAULT_VM_MAX_READAHEAD       128     /* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long vm_max_readahead_kb;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
diff --git a/init/main.c b/init/main.c
index 4cb47a1..6c451d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <trace/boot.h>
+#include <linux/backing-dev.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -249,6 +250,24 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+static int __init readahead(char *str)
+{
+	unsigned long readahead_kb;
+
+	if (!str)
+		return -EINVAL;
+	readahead_kb = memparse(str, &str) / 1024ULL;
+	if (*str != '\0')
+		return -EINVAL;
+
+	vm_max_readahead_kb = readahead_kb;
+	default_backing_dev_info.ra_pages = vm_max_readahead_kb
+						* 1024 / PAGE_CACHE_SIZE;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..e33ff34 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
 	.name		= "default",
-	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
+	.ra_pages	= DEFAULT_VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc13..516f8da 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -17,6 +17,8 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long vm_max_readahead_kb = DEFAULT_VM_MAX_READAHEAD;
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11 11:45                   ` Nikanth Karthikesan
@ 2010-02-11 15:16                     ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-11 15:16 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

Nikanth,

> From: Nikanth Karthikesan <knikanth@suse.de>
> 
> Add new kernel parameter "readahead", which would be used instead of the
> value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> of 128kb would be used.

The patch looks good to me, though it conflicts with my patches.
Do you mind me including a modified version in my readahead patchset?

Thanks,
Fengguang

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-11 15:16                     ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-11 15:16 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

Nikanth,

> From: Nikanth Karthikesan <knikanth@suse.de>
> 
> Add new kernel parameter "readahead", which would be used instead of the
> value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> of 128kb would be used.

The patch looks good to me, though it conflicts with my patches.
Do you mind me including a modified version in my readahead patchset?

Thanks,
Fengguang

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11 11:45                   ` Nikanth Karthikesan
@ 2010-02-14 21:37                     ` Dave Chinner
  -1 siblings, 0 replies; 40+ messages in thread
From: Dave Chinner @ 2010-02-14 21:37 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Wu Fengguang, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

On Thu, Feb 11, 2010 at 05:15:03PM +0530, Nikanth Karthikesan wrote:
> On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > > +static int __init readahead(char *str)
> > > +{
> > > +       if (!str)
> > > +               return -EINVAL;
> > > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> > 
> > Just wondering, shouldn't you check whether the str had a valid value
> > [memparse (str, &next); next > str ..] and if it didn't, then use the
> > DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> > value, the readahead value will become zero.
> > 
> 
> Thanks for the review. Here is the fixed patch that checks whether all of the
> parameters value is consumed.
> 
> Thanks
> Nikanth
> 
> From: Nikanth Karthikesan <knikanth@suse.de>
> 
> Add new kernel parameter "readahead", which would be used instead of the
> value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> of 128kb would be used.
> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> 
> ---
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 736d456..354e6f1 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
>  			Format: <reboot_mode>[,<reboot_mode2>[,...]]
>  			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
>  
> +	readahead=	Default readahead value for block devices.
> +

I think the description should define the units (kb) and valid value
ranges e.g. page size to something not excessive - say 65536kb.  The
above description is, IMO, useless without refering to the source to
find out this information....

[snip]

> @@ -249,6 +250,24 @@ static int __init loglevel(char *str)
>  
>  early_param("loglevel", loglevel);
>  
> +static int __init readahead(char *str)
> +{
> +	unsigned long readahead_kb;
> +
> +	if (!str)
> +		return -EINVAL;
> +	readahead_kb = memparse(str, &str) / 1024ULL;
> +	if (*str != '\0')
> +		return -EINVAL;

And readahead_kb needs to be validated against the range of
valid values here.

> +
> +	vm_max_readahead_kb = readahead_kb;
> +	default_backing_dev_info.ra_pages = vm_max_readahead_kb
> +						* 1024 / PAGE_CACHE_SIZE;
> +	return 0;
> +}
> +
> +early_param("readahead", readahead);
> +

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-14 21:37                     ` Dave Chinner
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Chinner @ 2010-02-14 21:37 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Wu Fengguang, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

On Thu, Feb 11, 2010 at 05:15:03PM +0530, Nikanth Karthikesan wrote:
> On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > > +static int __init readahead(char *str)
> > > +{
> > > +       if (!str)
> > > +               return -EINVAL;
> > > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> > 
> > Just wondering, shouldn't you check whether the str had a valid value
> > [memparse (str, &next); next > str ..] and if it didn't, then use the
> > DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> > value, the readahead value will become zero.
> > 
> 
> Thanks for the review. Here is the fixed patch that checks whether all of the
> parameters value is consumed.
> 
> Thanks
> Nikanth
> 
> From: Nikanth Karthikesan <knikanth@suse.de>
> 
> Add new kernel parameter "readahead", which would be used instead of the
> value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> of 128kb would be used.
> 
> Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> 
> ---
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 736d456..354e6f1 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is defined in the file
>  			Format: <reboot_mode>[,<reboot_mode2>[,...]]
>  			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
>  
> +	readahead=	Default readahead value for block devices.
> +

I think the description should define the units (kb) and valid value
ranges e.g. page size to something not excessive - say 65536kb.  The
above description is, IMO, useless without refering to the source to
find out this information....

[snip]

> @@ -249,6 +250,24 @@ static int __init loglevel(char *str)
>  
>  early_param("loglevel", loglevel);
>  
> +static int __init readahead(char *str)
> +{
> +	unsigned long readahead_kb;
> +
> +	if (!str)
> +		return -EINVAL;
> +	readahead_kb = memparse(str, &str) / 1024ULL;
> +	if (*str != '\0')
> +		return -EINVAL;

And readahead_kb needs to be validated against the range of
valid values here.

> +
> +	vm_max_readahead_kb = readahead_kb;
> +	default_backing_dev_info.ra_pages = vm_max_readahead_kb
> +						* 1024 / PAGE_CACHE_SIZE;
> +	return 0;
> +}
> +
> +early_param("readahead", readahead);
> +

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-11 15:16                     ` Wu Fengguang
@ 2010-02-15  4:35                       ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-15  4:35 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Thursday 11 February 2010 20:46:51 Wu Fengguang wrote:
> Nikanth,
> 
> > From: Nikanth Karthikesan <knikanth@suse.de>
> >
> > Add new kernel parameter "readahead", which would be used instead of the
> > value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> > of 128kb would be used.
> 
> The patch looks good to me, though it conflicts with my patches.
> Do you mind me including a modified version in my readahead patchset?
> 

Sure, go ahead.

Thanks
Nikanth

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-15  4:35                       ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-15  4:35 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Thursday 11 February 2010 20:46:51 Wu Fengguang wrote:
> Nikanth,
> 
> > From: Nikanth Karthikesan <knikanth@suse.de>
> >
> > Add new kernel parameter "readahead", which would be used instead of the
> > value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> > of 128kb would be used.
> 
> The patch looks good to me, though it conflicts with my patches.
> Do you mind me including a modified version in my readahead patchset?
> 

Sure, go ahead.

Thanks
Nikanth

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-14 21:37                     ` Dave Chinner
@ 2010-02-15  4:36                       ` Nikanth Karthikesan
  -1 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-15  4:36 UTC (permalink / raw)
  To: Dave Chinner, Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Monday 15 February 2010 03:07:24 Dave Chinner wrote:
> On Thu, Feb 11, 2010 at 05:15:03PM +0530, Nikanth Karthikesan wrote:
> > On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > > > +static int __init readahead(char *str)
> > > > +{
> > > > +       if (!str)
> > > > +               return -EINVAL;
> > > > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> > >
> > > Just wondering, shouldn't you check whether the str had a valid value
> > > [memparse (str, &next); next > str ..] and if it didn't, then use the
> > > DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> > > value, the readahead value will become zero.
> >
> > Thanks for the review. Here is the fixed patch that checks whether all of
> > the parameters value is consumed.
> >
> > Thanks
> > Nikanth
> >
> > From: Nikanth Karthikesan <knikanth@suse.de>
> >
> > Add new kernel parameter "readahead", which would be used instead of the
> > value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> > of 128kb would be used.
> >
> > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> >
> > ---
> >
> > diff --git a/Documentation/kernel-parameters.txt
> > b/Documentation/kernel-parameters.txt index 736d456..354e6f1 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is
> > defined in the file Format: <reboot_mode>[,<reboot_mode2>[,...]]
> >  			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
> >
> > +	readahead=	Default readahead value for block devices.
> > +
> 
> I think the description should define the units (kb) and valid value
> ranges e.g. page size to something not excessive - say 65536kb.  The
> above description is, IMO, useless without refering to the source to
> find out this information....
> 

The parameter can be specified with/without any suffix(k/m/g) that memparse() 
helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
parameters that use memparse() to get similar values and they didn't document 
it. May be this should be described here.

> [snip]
> 
> > @@ -249,6 +250,24 @@ static int __init loglevel(char *str)
> >
> >  early_param("loglevel", loglevel);
> >
> > +static int __init readahead(char *str)
> > +{
> > +	unsigned long readahead_kb;
> > +
> > +	if (!str)
> > +		return -EINVAL;
> > +	readahead_kb = memparse(str, &str) / 1024ULL;
> > +	if (*str != '\0')
> > +		return -EINVAL;
>  		
> And readahead_kb needs to be validated against the range of
> valid values here.
> 

I didn't want to impose artificial restrictions. I think Wu's patch set would 
be adding some restrictions, like minimum readahead. He could fix it when he 
modifies the patch to include in his patch set.

> > +
> > +	vm_max_readahead_kb = readahead_kb;
> > +	default_backing_dev_info.ra_pages = vm_max_readahead_kb
> > +						* 1024 / PAGE_CACHE_SIZE;
> > +	return 0;
> > +}
> > +
> > +early_param("readahead", readahead);
> > +
> 

Thanks for reviewing.

Thanks
Nikanth

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-15  4:36                       ` Nikanth Karthikesan
  0 siblings, 0 replies; 40+ messages in thread
From: Nikanth Karthikesan @ 2010-02-15  4:36 UTC (permalink / raw)
  To: Dave Chinner, Wu Fengguang
  Cc: Ankit Jain, Andrew Morton, balbir, Jens Axboe, linux-kernel,
	linux-mm, Christian Ehrhardt

On Monday 15 February 2010 03:07:24 Dave Chinner wrote:
> On Thu, Feb 11, 2010 at 05:15:03PM +0530, Nikanth Karthikesan wrote:
> > On Thursday 11 February 2010 16:45:24 Ankit Jain wrote:
> > > > +static int __init readahead(char *str)
> > > > +{
> > > > +       if (!str)
> > > > +               return -EINVAL;
> > > > +       vm_max_readahead_kb = memparse(str, &str) / 1024ULL;
> > >
> > > Just wondering, shouldn't you check whether the str had a valid value
> > > [memparse (str, &next); next > str ..] and if it didn't, then use the
> > > DEFAULT_VM_MAX_READAHEAD ? Otherwise, incase of a invalid
> > > value, the readahead value will become zero.
> >
> > Thanks for the review. Here is the fixed patch that checks whether all of
> > the parameters value is consumed.
> >
> > Thanks
> > Nikanth
> >
> > From: Nikanth Karthikesan <knikanth@suse.de>
> >
> > Add new kernel parameter "readahead", which would be used instead of the
> > value of VM_MAX_READAHEAD. If the parameter is not specified, the default
> > of 128kb would be used.
> >
> > Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
> >
> > ---
> >
> > diff --git a/Documentation/kernel-parameters.txt
> > b/Documentation/kernel-parameters.txt index 736d456..354e6f1 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -2148,6 +2148,8 @@ and is between 256 and 4096 characters. It is
> > defined in the file Format: <reboot_mode>[,<reboot_mode2>[,...]]
> >  			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
> >
> > +	readahead=	Default readahead value for block devices.
> > +
> 
> I think the description should define the units (kb) and valid value
> ranges e.g. page size to something not excessive - say 65536kb.  The
> above description is, IMO, useless without refering to the source to
> find out this information....
> 

The parameter can be specified with/without any suffix(k/m/g) that memparse() 
helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
parameters that use memparse() to get similar values and they didn't document 
it. May be this should be described here.

> [snip]
> 
> > @@ -249,6 +250,24 @@ static int __init loglevel(char *str)
> >
> >  early_param("loglevel", loglevel);
> >
> > +static int __init readahead(char *str)
> > +{
> > +	unsigned long readahead_kb;
> > +
> > +	if (!str)
> > +		return -EINVAL;
> > +	readahead_kb = memparse(str, &str) / 1024ULL;
> > +	if (*str != '\0')
> > +		return -EINVAL;
>  		
> And readahead_kb needs to be validated against the range of
> valid values here.
> 

I didn't want to impose artificial restrictions. I think Wu's patch set would 
be adding some restrictions, like minimum readahead. He could fix it when he 
modifies the patch to include in his patch set.

> > +
> > +	vm_max_readahead_kb = readahead_kb;
> > +	default_backing_dev_info.ra_pages = vm_max_readahead_kb
> > +						* 1024 / PAGE_CACHE_SIZE;
> > +	return 0;
> > +}
> > +
> > +early_param("readahead", readahead);
> > +
> 

Thanks for reviewing.

Thanks
Nikanth

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-15  4:36                       ` Nikanth Karthikesan
@ 2010-02-21 14:26                         ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 14:26 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

Nikanth,

> > > +	readahead=	Default readahead value for block devices.
> > > +
> > 
> > I think the description should define the units (kb) and valid value
> > ranges e.g. page size to something not excessive - say 65536kb.  The
> > above description is, IMO, useless without refering to the source to
> > find out this information....
> > 
> 
> The parameter can be specified with/without any suffix(k/m/g) that memparse() 
> helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
> parameters that use memparse() to get similar values and they didn't document 
> it. May be this should be described here.

Hope this helps clarify things to user:

+       readahead=nn[KM]
+                       Default max readahead size for block devices.
+                       Range: 0; 4k - 128m

> > And readahead_kb needs to be validated against the range of
> > valid values here.
> > 
> 
> I didn't want to impose artificial restrictions. I think Wu's patch set would 
> be adding some restrictions, like minimum readahead. He could fix it when he 
> modifies the patch to include in his patch set.

OK, I imposed a larger bound -- 128MB.
And values 1-4095 (more exactly: PAGE_CACHE_SIZE) are prohibited mainly to 
catch "readahead=128" where the user really means to do 128 _KB_ readahead.

Christian, with this patch and more patches to scale down readahead
size on small memory/device size, I guess it's no longer necessary to
introduce a CONFIG_READAHEAD_SIZE?

Thanks,
Fengguang
---
make default readahead size a kernel parameter

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used instead of the
value of VM_MAX_READAHEAD. If the parameter is not specified, the default
of 128kb would be used.

CC: Ankit Jain <radical@gmail.com>
CC: Dave Chinner <david@fromorbit.com>
CC: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 Documentation/kernel-parameters.txt |    4 ++++
 block/blk-core.c                    |    3 +--
 fs/fuse/inode.c                     |    2 +-
 include/linux/mm.h                  |    2 ++
 mm/readahead.c                      |   26 ++++++++++++++++++++++++++
 5 files changed, 34 insertions(+), 3 deletions(-)

--- linux.orig/Documentation/kernel-parameters.txt	2010-02-21 22:09:41.000000000 +0800
+++ linux/Documentation/kernel-parameters.txt	2010-02-21 22:11:08.000000000 +0800
@@ -2174,6 +2174,10 @@ and is between 256 and 4096 characters. 
 			Run specified binary instead of /init from the ramdisk,
 			used for early userspace startup. See initrd.
 
+	readahead=nn[KM]
+			Default max readahead size for block devices.
+			Range: 0; 4k - 128m
+
 	reboot=		[BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
--- linux.orig/block/blk-core.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/block/blk-core.c	2010-02-21 22:09:42.000000000 +0800
@@ -498,8 +498,7 @@ struct request_queue *blk_alloc_queue_no
 
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
-	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = max_readahead_pages;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
--- linux.orig/fs/fuse/inode.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/fs/fuse/inode.c	2010-02-21 22:09:42.000000000 +0800
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = max_readahead_pages;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
--- linux.orig/include/linux/mm.h	2010-02-21 22:09:41.000000000 +0800
+++ linux/include/linux/mm.h	2010-02-21 22:09:42.000000000 +0800
@@ -1187,6 +1187,8 @@ void task_dirty_inc(struct task_struct *
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long max_readahead_pages;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
--- linux.orig/mm/readahead.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 22:13:44.000000000 +0800
@@ -19,6 +19,32 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long max_readahead_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+
+static int __init readahead(char *str)
+{
+	unsigned long bytes;
+
+	if (!str)
+		return -EINVAL;
+	bytes = memparse(str, &str);
+	if (*str != '\0')
+		return -EINVAL;
+
+	if (bytes) {
+		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
+			return -EINVAL;
+		if (bytes > 128 << 20)		/* limit to 128MB */
+			bytes = 128 << 20;
+	}
+
+	max_readahead_pages = bytes / PAGE_CACHE_SIZE;
+	default_backing_dev_info.ra_pages = max_readahead_pages;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-21 14:26                         ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 14:26 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

Nikanth,

> > > +	readahead=	Default readahead value for block devices.
> > > +
> > 
> > I think the description should define the units (kb) and valid value
> > ranges e.g. page size to something not excessive - say 65536kb.  The
> > above description is, IMO, useless without refering to the source to
> > find out this information....
> > 
> 
> The parameter can be specified with/without any suffix(k/m/g) that memparse() 
> helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
> parameters that use memparse() to get similar values and they didn't document 
> it. May be this should be described here.

Hope this helps clarify things to user:

+       readahead=nn[KM]
+                       Default max readahead size for block devices.
+                       Range: 0; 4k - 128m

> > And readahead_kb needs to be validated against the range of
> > valid values here.
> > 
> 
> I didn't want to impose artificial restrictions. I think Wu's patch set would 
> be adding some restrictions, like minimum readahead. He could fix it when he 
> modifies the patch to include in his patch set.

OK, I imposed a larger bound -- 128MB.
And values 1-4095 (more exactly: PAGE_CACHE_SIZE) are prohibited mainly to 
catch "readahead=128" where the user really means to do 128 _KB_ readahead.

Christian, with this patch and more patches to scale down readahead
size on small memory/device size, I guess it's no longer necessary to
introduce a CONFIG_READAHEAD_SIZE?

Thanks,
Fengguang
---
make default readahead size a kernel parameter

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which would be used instead of the
value of VM_MAX_READAHEAD. If the parameter is not specified, the default
of 128kb would be used.

CC: Ankit Jain <radical@gmail.com>
CC: Dave Chinner <david@fromorbit.com>
CC: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 Documentation/kernel-parameters.txt |    4 ++++
 block/blk-core.c                    |    3 +--
 fs/fuse/inode.c                     |    2 +-
 include/linux/mm.h                  |    2 ++
 mm/readahead.c                      |   26 ++++++++++++++++++++++++++
 5 files changed, 34 insertions(+), 3 deletions(-)

--- linux.orig/Documentation/kernel-parameters.txt	2010-02-21 22:09:41.000000000 +0800
+++ linux/Documentation/kernel-parameters.txt	2010-02-21 22:11:08.000000000 +0800
@@ -2174,6 +2174,10 @@ and is between 256 and 4096 characters. 
 			Run specified binary instead of /init from the ramdisk,
 			used for early userspace startup. See initrd.
 
+	readahead=nn[KM]
+			Default max readahead size for block devices.
+			Range: 0; 4k - 128m
+
 	reboot=		[BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
--- linux.orig/block/blk-core.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/block/blk-core.c	2010-02-21 22:09:42.000000000 +0800
@@ -498,8 +498,7 @@ struct request_queue *blk_alloc_queue_no
 
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
-	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = max_readahead_pages;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
--- linux.orig/fs/fuse/inode.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/fs/fuse/inode.c	2010-02-21 22:09:42.000000000 +0800
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = max_readahead_pages;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
--- linux.orig/include/linux/mm.h	2010-02-21 22:09:41.000000000 +0800
+++ linux/include/linux/mm.h	2010-02-21 22:09:42.000000000 +0800
@@ -1187,6 +1187,8 @@ void task_dirty_inc(struct task_struct *
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
+extern unsigned long max_readahead_pages;
+
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
--- linux.orig/mm/readahead.c	2010-02-21 22:09:41.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 22:13:44.000000000 +0800
@@ -19,6 +19,32 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+unsigned long max_readahead_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+
+static int __init readahead(char *str)
+{
+	unsigned long bytes;
+
+	if (!str)
+		return -EINVAL;
+	bytes = memparse(str, &str);
+	if (*str != '\0')
+		return -EINVAL;
+
+	if (bytes) {
+		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
+			return -EINVAL;
+		if (bytes > 128 << 20)		/* limit to 128MB */
+			bytes = 128 << 20;
+	}
+
+	max_readahead_pages = bytes / PAGE_CACHE_SIZE;
+	default_backing_dev_info.ra_pages = max_readahead_pages;
+	return 0;
+}
+
+early_param("readahead", readahead);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-21 14:26                         ` Wu Fengguang
@ 2010-02-21 15:49                           ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 15:49 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

> Christian, with this patch and more patches to scale down readahead
> size on small memory/device size, I guess it's no longer necessary to
> introduce a CONFIG_READAHEAD_SIZE?

This is the memory size based readahead limit :)

Thanks,
Fengguang
---
readahead: limit readahead size for small memory systems

When lifting the default readahead size from 128KB to 512KB,
make sure it won't add memory pressure to small memory systems.

For read-ahead, the memory pressure is mainly readahead buffers consumed
by too many concurrent streams. The context readahead can adapt
readahead size to thrashing threshold well.  So in principle we don't
need to adapt the default _max_ read-ahead size to memory pressure.

For read-around, the memory pressure is mainly read-around misses on
executables/libraries. Which could be reduced by scaling down
read-around size on fast "reclaim passes".

This patch presents a straightforward solution: to limit default
readahead size proportional to available system memory, ie.
                512MB mem => 512KB readahead size
                128MB mem => 128KB readahead size
                 32MB mem =>  32KB readahead size (minimal)

Strictly speaking, only read-around size has to be limited.  However we
don't bother to seperate read-around size from read-ahead size for now.

CC: Matt Mackall <mpm@selenic.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 mm/readahead.c |   25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

--- linux.orig/mm/readahead.c	2010-02-21 22:42:15.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 23:43:14.000000000 +0800
@@ -19,6 +19,9 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+#define MIN_READAHEAD_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
+
+static int __init user_defined_readahead_size;
 static int __init config_readahead_size(char *str)
 {
 	unsigned long bytes;
@@ -36,11 +39,33 @@ static int __init config_readahead_size(
 			bytes = 128 << 20;
 	}
 
+	user_defined_readahead_size = 1;
 	default_backing_dev_info.ra_pages = bytes / PAGE_CACHE_SIZE;
 	return 0;
 }
 early_param("readahead", config_readahead_size);
 
+static int __init readahead_init(void)
+{
+	/*
+	 * Scale down default readahead size for small memory systems.
+	 * For example, a 64MB box will do 64KB read-ahead/read-around
+	 * instead of the default 512KB.
+	 *
+	 * Note that the default readahead size will also be scaled down
+	 * for small devices in add_disk().
+	 */
+	if (!user_defined_readahead_size) {
+		unsigned long max = roundup_pow_of_two(totalram_pages / 1024);
+		if (default_backing_dev_info.ra_pages > max)
+		    default_backing_dev_info.ra_pages = max;
+		if (default_backing_dev_info.ra_pages < MIN_READAHEAD_PAGES)
+		    default_backing_dev_info.ra_pages = MIN_READAHEAD_PAGES;
+	}
+	return 0;
+}
+fs_initcall(readahead_init);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-21 15:49                           ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 15:49 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

> Christian, with this patch and more patches to scale down readahead
> size on small memory/device size, I guess it's no longer necessary to
> introduce a CONFIG_READAHEAD_SIZE?

This is the memory size based readahead limit :)

Thanks,
Fengguang
---
readahead: limit readahead size for small memory systems

When lifting the default readahead size from 128KB to 512KB,
make sure it won't add memory pressure to small memory systems.

For read-ahead, the memory pressure is mainly readahead buffers consumed
by too many concurrent streams. The context readahead can adapt
readahead size to thrashing threshold well.  So in principle we don't
need to adapt the default _max_ read-ahead size to memory pressure.

For read-around, the memory pressure is mainly read-around misses on
executables/libraries. Which could be reduced by scaling down
read-around size on fast "reclaim passes".

This patch presents a straightforward solution: to limit default
readahead size proportional to available system memory, ie.
                512MB mem => 512KB readahead size
                128MB mem => 128KB readahead size
                 32MB mem =>  32KB readahead size (minimal)

Strictly speaking, only read-around size has to be limited.  However we
don't bother to seperate read-around size from read-ahead size for now.

CC: Matt Mackall <mpm@selenic.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 mm/readahead.c |   25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

--- linux.orig/mm/readahead.c	2010-02-21 22:42:15.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 23:43:14.000000000 +0800
@@ -19,6 +19,9 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+#define MIN_READAHEAD_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
+
+static int __init user_defined_readahead_size;
 static int __init config_readahead_size(char *str)
 {
 	unsigned long bytes;
@@ -36,11 +39,33 @@ static int __init config_readahead_size(
 			bytes = 128 << 20;
 	}
 
+	user_defined_readahead_size = 1;
 	default_backing_dev_info.ra_pages = bytes / PAGE_CACHE_SIZE;
 	return 0;
 }
 early_param("readahead", config_readahead_size);
 
+static int __init readahead_init(void)
+{
+	/*
+	 * Scale down default readahead size for small memory systems.
+	 * For example, a 64MB box will do 64KB read-ahead/read-around
+	 * instead of the default 512KB.
+	 *
+	 * Note that the default readahead size will also be scaled down
+	 * for small devices in add_disk().
+	 */
+	if (!user_defined_readahead_size) {
+		unsigned long max = roundup_pow_of_two(totalram_pages / 1024);
+		if (default_backing_dev_info.ra_pages > max)
+		    default_backing_dev_info.ra_pages = max;
+		if (default_backing_dev_info.ra_pages < MIN_READAHEAD_PAGES)
+		    default_backing_dev_info.ra_pages = MIN_READAHEAD_PAGES;
+	}
+	return 0;
+}
+fs_initcall(readahead_init);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-21 14:26                         ` Wu Fengguang
@ 2010-02-21 15:52                           ` Wu Fengguang
  -1 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 15:52 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

> +unsigned long max_readahead_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
> +
> +static int __init readahead(char *str)
> +{
> +	unsigned long bytes;
> +
> +	if (!str)
> +		return -EINVAL;
> +	bytes = memparse(str, &str);
> +	if (*str != '\0')
> +		return -EINVAL;
> +
> +	if (bytes) {
> +		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
> +			return -EINVAL;
> +		if (bytes > 128 << 20)		/* limit to 128MB */
> +			bytes = 128 << 20;
> +	}
> +
> +	max_readahead_pages = bytes / PAGE_CACHE_SIZE;
> +	default_backing_dev_info.ra_pages = max_readahead_pages;
> +	return 0;
> +}
> +
> +early_param("readahead", readahead);

This further optimizes away max_readahead_pages :)

---
make default readahead size a kernel parameter

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which allows user to override
the static VM_MAX_READAHEAD=512kb.

CC: Ankit Jain <radical@gmail.com>
CC: Dave Chinner <david@fromorbit.com>
CC: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 Documentation/kernel-parameters.txt |    4 ++++
 block/blk-core.c                    |    3 +--
 fs/fuse/inode.c                     |    2 +-
 mm/readahead.c                      |   22 ++++++++++++++++++++++
 4 files changed, 28 insertions(+), 3 deletions(-)

--- linux.orig/Documentation/kernel-parameters.txt	2010-02-21 22:41:29.000000000 +0800
+++ linux/Documentation/kernel-parameters.txt	2010-02-21 22:41:30.000000000 +0800
@@ -2174,6 +2174,10 @@ and is between 256 and 4096 characters. 
 			Run specified binary instead of /init from the ramdisk,
 			used for early userspace startup. See initrd.
 
+	readahead=nn[KM]
+			Default max readahead size for block devices.
+			Range: 0; 4k - 128m
+
 	reboot=		[BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
--- linux.orig/block/blk-core.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/block/blk-core.c	2010-02-21 22:41:30.000000000 +0800
@@ -498,8 +498,7 @@ struct request_queue *blk_alloc_queue_no
 
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
-	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = default_backing_dev_info.ra_pages;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
--- linux.orig/fs/fuse/inode.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/fs/fuse/inode.c	2010-02-21 22:41:30.000000000 +0800
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = default_backing_dev_info.ra_pages;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
--- linux.orig/mm/readahead.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 22:42:15.000000000 +0800
@@ -19,6 +19,28 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+static int __init config_readahead_size(char *str)
+{
+	unsigned long bytes;
+
+	if (!str)
+		return -EINVAL;
+	bytes = memparse(str, &str);
+	if (*str != '\0')
+		return -EINVAL;
+
+	if (bytes) {
+		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
+			return -EINVAL;
+		if (bytes > 128 << 20)		/* limit to 128MB */
+			bytes = 128 << 20;
+	}
+
+	default_backing_dev_info.ra_pages = bytes / PAGE_CACHE_SIZE;
+	return 0;
+}
+early_param("readahead", config_readahead_size);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-21 15:52                           ` Wu Fengguang
  0 siblings, 0 replies; 40+ messages in thread
From: Wu Fengguang @ 2010-02-21 15:52 UTC (permalink / raw)
  To: Nikanth Karthikesan
  Cc: Dave Chinner, Ankit Jain, Andrew Morton, balbir, Jens Axboe,
	linux-kernel, linux-mm, Christian Ehrhardt

> +unsigned long max_readahead_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
> +
> +static int __init readahead(char *str)
> +{
> +	unsigned long bytes;
> +
> +	if (!str)
> +		return -EINVAL;
> +	bytes = memparse(str, &str);
> +	if (*str != '\0')
> +		return -EINVAL;
> +
> +	if (bytes) {
> +		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
> +			return -EINVAL;
> +		if (bytes > 128 << 20)		/* limit to 128MB */
> +			bytes = 128 << 20;
> +	}
> +
> +	max_readahead_pages = bytes / PAGE_CACHE_SIZE;
> +	default_backing_dev_info.ra_pages = max_readahead_pages;
> +	return 0;
> +}
> +
> +early_param("readahead", readahead);

This further optimizes away max_readahead_pages :)

---
make default readahead size a kernel parameter

From: Nikanth Karthikesan <knikanth@suse.de>

Add new kernel parameter "readahead", which allows user to override
the static VM_MAX_READAHEAD=512kb.

CC: Ankit Jain <radical@gmail.com>
CC: Dave Chinner <david@fromorbit.com>
CC: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 Documentation/kernel-parameters.txt |    4 ++++
 block/blk-core.c                    |    3 +--
 fs/fuse/inode.c                     |    2 +-
 mm/readahead.c                      |   22 ++++++++++++++++++++++
 4 files changed, 28 insertions(+), 3 deletions(-)

--- linux.orig/Documentation/kernel-parameters.txt	2010-02-21 22:41:29.000000000 +0800
+++ linux/Documentation/kernel-parameters.txt	2010-02-21 22:41:30.000000000 +0800
@@ -2174,6 +2174,10 @@ and is between 256 and 4096 characters. 
 			Run specified binary instead of /init from the ramdisk,
 			used for early userspace startup. See initrd.
 
+	readahead=nn[KM]
+			Default max readahead size for block devices.
+			Range: 0; 4k - 128m
+
 	reboot=		[BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
--- linux.orig/block/blk-core.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/block/blk-core.c	2010-02-21 22:41:30.000000000 +0800
@@ -498,8 +498,7 @@ struct request_queue *blk_alloc_queue_no
 
 	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
 	q->backing_dev_info.unplug_io_data = q;
-	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = default_backing_dev_info.ra_pages;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	q->backing_dev_info.name = "block";
--- linux.orig/fs/fuse/inode.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/fs/fuse/inode.c	2010-02-21 22:41:30.000000000 +0800
@@ -870,7 +870,7 @@ static int fuse_bdi_init(struct fuse_con
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = default_backing_dev_info.ra_pages;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
--- linux.orig/mm/readahead.c	2010-02-21 22:41:29.000000000 +0800
+++ linux/mm/readahead.c	2010-02-21 22:42:15.000000000 +0800
@@ -19,6 +19,28 @@
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
 
+static int __init config_readahead_size(char *str)
+{
+	unsigned long bytes;
+
+	if (!str)
+		return -EINVAL;
+	bytes = memparse(str, &str);
+	if (*str != '\0')
+		return -EINVAL;
+
+	if (bytes) {
+		if (bytes < PAGE_CACHE_SIZE)	/* missed 'k'/'m' suffixes? */
+			return -EINVAL;
+		if (bytes > 128 << 20)		/* limit to 128MB */
+			bytes = 128 << 20;
+	}
+
+	default_backing_dev_info.ra_pages = bytes / PAGE_CACHE_SIZE;
+	return 0;
+}
+early_param("readahead", config_readahead_size);
+
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
  * memset *ra to zero.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-21 14:26                         ` Wu Fengguang
@ 2010-02-22  8:16                           ` Christian Ehrhardt
  -1 siblings, 0 replies; 40+ messages in thread
From: Christian Ehrhardt @ 2010-02-22  8:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Nikanth Karthikesan, Dave Chinner, Ankit Jain, Andrew Morton,
	balbir, Jens Axboe, linux-kernel, linux-mm



Wu Fengguang wrote:
> Nikanth,
> 
>> I didn't want to impose artificial restrictions. I think Wu's patch set would 
>> be adding some restrictions, like minimum readahead. He could fix it when he 
>> modifies the patch to include in his patch set.
> 
> OK, I imposed a larger bound -- 128MB.
> And values 1-4095 (more exactly: PAGE_CACHE_SIZE) are prohibited mainly to 
> catch "readahead=128" where the user really means to do 128 _KB_ readahead.
> 
> Christian, with this patch and more patches to scale down readahead
> size on small memory/device size, I guess it's no longer necessary to
> introduce a CONFIG_READAHEAD_SIZE?

Yes as I mentioned before a kernel parameter supersedes a config symbol 
in my opinion too.
-> agreed

> Thanks,
> Fengguang
> ---

-- 

Grüsse / regards, Christian Ehrhardt
IBM Linux Technology Center, System z Linux Performance

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-22  8:16                           ` Christian Ehrhardt
  0 siblings, 0 replies; 40+ messages in thread
From: Christian Ehrhardt @ 2010-02-22  8:16 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Nikanth Karthikesan, Dave Chinner, Ankit Jain, Andrew Morton,
	balbir, Jens Axboe, linux-kernel, linux-mm



Wu Fengguang wrote:
> Nikanth,
> 
>> I didn't want to impose artificial restrictions. I think Wu's patch set would 
>> be adding some restrictions, like minimum readahead. He could fix it when he 
>> modifies the patch to include in his patch set.
> 
> OK, I imposed a larger bound -- 128MB.
> And values 1-4095 (more exactly: PAGE_CACHE_SIZE) are prohibited mainly to 
> catch "readahead=128" where the user really means to do 128 _KB_ readahead.
> 
> Christian, with this patch and more patches to scale down readahead
> size on small memory/device size, I guess it's no longer necessary to
> introduce a CONFIG_READAHEAD_SIZE?

Yes as I mentioned before a kernel parameter supersedes a config symbol 
in my opinion too.
-> agreed

> Thanks,
> Fengguang
> ---

-- 

Grusse / regards, Christian Ehrhardt
IBM Linux Technology Center, System z Linux Performance

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
  2010-02-21 14:26                         ` Wu Fengguang
@ 2010-02-23  2:25                           ` Dave Chinner
  -1 siblings, 0 replies; 40+ messages in thread
From: Dave Chinner @ 2010-02-23  2:25 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Nikanth Karthikesan, Ankit Jain, Andrew Morton, balbir,
	Jens Axboe, linux-kernel, linux-mm, Christian Ehrhardt

On Sun, Feb 21, 2010 at 10:26:00PM +0800, Wu Fengguang wrote:
> Nikanth,
> 
> > > > +	readahead=	Default readahead value for block devices.
> > > > +
> > > 
> > > I think the description should define the units (kb) and valid value
> > > ranges e.g. page size to something not excessive - say 65536kb.  The
> > > above description is, IMO, useless without refering to the source to
> > > find out this information....
> > > 
> > 
> > The parameter can be specified with/without any suffix(k/m/g) that memparse() 
> > helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
> > parameters that use memparse() to get similar values and they didn't document 
> > it. May be this should be described here.
> 
> Hope this helps clarify things to user:
> 
> +       readahead=nn[KM]
> +                       Default max readahead size for block devices.
> +                       Range: 0; 4k - 128m

Yes, that is exactly what I was thinĸing of. Thanks.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter
@ 2010-02-23  2:25                           ` Dave Chinner
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Chinner @ 2010-02-23  2:25 UTC (permalink / raw)
  To: Wu Fengguang
  Cc: Nikanth Karthikesan, Ankit Jain, Andrew Morton, balbir,
	Jens Axboe, linux-kernel, linux-mm, Christian Ehrhardt

On Sun, Feb 21, 2010 at 10:26:00PM +0800, Wu Fengguang wrote:
> Nikanth,
> 
> > > > +	readahead=	Default readahead value for block devices.
> > > > +
> > > 
> > > I think the description should define the units (kb) and valid value
> > > ranges e.g. page size to something not excessive - say 65536kb.  The
> > > above description is, IMO, useless without refering to the source to
> > > find out this information....
> > > 
> > 
> > The parameter can be specified with/without any suffix(k/m/g) that memparse() 
> > helper function can accept. So it can take 1M, 1024k, 1050620. I checked other 
> > parameters that use memparse() to get similar values and they didn't document 
> > it. May be this should be described here.
> 
> Hope this helps clarify things to user:
> 
> +       readahead=nn[KM]
> +                       Default max readahead size for block devices.
> +                       Range: 0; 4k - 128m

Yes, that is exactly what I was thinA,ing of. Thanks.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2010-02-23  2:25 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-09 11:29 [PATCH] Make vm_max_readahead configurable at run-time Nikanth Karthikesan
2010-02-09 11:29 ` Nikanth Karthikesan
2010-02-09 23:22 ` Andrew Morton
2010-02-09 23:22   ` Andrew Morton
2010-02-10  6:25 ` Balbir Singh
2010-02-10  6:25   ` Balbir Singh
2010-02-10 10:53   ` [PATCH v2] " Nikanth Karthikesan
2010-02-10 10:53     ` Nikanth Karthikesan
2010-02-10 11:05     ` Wu Fengguang
2010-02-10 11:05       ` Wu Fengguang
2010-02-10 13:52       ` Nikanth Karthikesan
2010-02-10 13:52         ` Nikanth Karthikesan
2010-02-11  5:13         ` Wu Fengguang
2010-02-11  5:13           ` Wu Fengguang
2010-02-11  7:34           ` Nikanth Karthikesan
2010-02-11  7:34             ` Nikanth Karthikesan
2010-02-11 10:16             ` [PATCH v2] Make VM_MAX_READAHEAD a kernel parameter Nikanth Karthikesan
2010-02-11 10:16               ` Nikanth Karthikesan
2010-02-11 11:15               ` Ankit Jain
2010-02-11 11:15                 ` Ankit Jain
2010-02-11 11:45                 ` Nikanth Karthikesan
2010-02-11 11:45                   ` Nikanth Karthikesan
2010-02-11 15:16                   ` Wu Fengguang
2010-02-11 15:16                     ` Wu Fengguang
2010-02-15  4:35                     ` Nikanth Karthikesan
2010-02-15  4:35                       ` Nikanth Karthikesan
2010-02-14 21:37                   ` Dave Chinner
2010-02-14 21:37                     ` Dave Chinner
2010-02-15  4:36                     ` Nikanth Karthikesan
2010-02-15  4:36                       ` Nikanth Karthikesan
2010-02-21 14:26                       ` Wu Fengguang
2010-02-21 14:26                         ` Wu Fengguang
2010-02-21 15:49                         ` Wu Fengguang
2010-02-21 15:49                           ` Wu Fengguang
2010-02-21 15:52                         ` Wu Fengguang
2010-02-21 15:52                           ` Wu Fengguang
2010-02-22  8:16                         ` Christian Ehrhardt
2010-02-22  8:16                           ` Christian Ehrhardt
2010-02-23  2:25                         ` Dave Chinner
2010-02-23  2:25                           ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.