All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drop page cache of a single file
@ 2006-12-28  3:17 Zhang, Yanmin
  2006-12-28  3:49 ` Andrew Morton
  2007-01-28  7:08 ` Vaidyanathan Srinivasan
  0 siblings, 2 replies; 8+ messages in thread
From: Zhang, Yanmin @ 2006-12-28  3:17 UTC (permalink / raw)
  To: LKML

Currently, by /proc/sys/vm/drop_caches, applications could drop pagecache,
slab(dentries and inodes), or both, but applications couldn't choose to
just drop the page cache of one file. An user of VOD (Video-On-Demand)
needs this capability to have more detailed control on page cache release.

Below patch against 2.6.19 implements it.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>

---

diff -Nraup linux-2.6.19/Documentation/filesystems/proc.txt linux-2.6.19_dropcache/Documentation/filesystems/proc.txt
--- linux-2.6.19/Documentation/filesystems/proc.txt	2006-12-08 15:32:44.000000000 +0800
+++ linux-2.6.19_dropcache/Documentation/filesystems/proc.txt	2006-12-28 10:20:39.000000000 +0800
@@ -1320,6 +1320,8 @@ To free dentries and inodes:
 	echo 2 > /proc/sys/vm/drop_caches
 To free pagecache, dentries and inodes:
 	echo 3 > /proc/sys/vm/drop_caches
+To free the pagecache of one file:
+	echo "4 /path/to/filename" > /proc/sys/vm/drop_caches
 
 As this is a non-destructive operation and dirty objects are not freeable, the
 user should run `sync' first.
diff -Nraup linux-2.6.19/fs/drop_caches.c linux-2.6.19_dropcache/fs/drop_caches.c
--- linux-2.6.19/fs/drop_caches.c	2006-12-08 15:31:58.000000000 +0800
+++ linux-2.6.19_dropcache/fs/drop_caches.c	2006-12-28 11:04:22.000000000 +0800
@@ -8,9 +8,9 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include <linux/namei.h>
 
-/* A global variable is a bit ugly, but it keeps the code simple */
-int sysctl_drop_caches;
+char sysctl_drop_caches[PATH_MAX+2];
 
 static void drop_pagecache_sb(struct super_block *sb)
 {
@@ -54,15 +54,70 @@ void drop_slab(void)
 	} while (nr_objects > 10);
 }
 
+void drop_file_pagecache(char *path)
+{
+	struct inode *inode;
+	struct nameidata nd;
+	int error;
+
+	if (!path || !*path)
+		return;
+
+	error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+	if (error)
+		return;
+
+	inode = nd.dentry->d_inode;
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+		invalidate_inode_pages(inode->i_mapping);
+	path_release(&nd);
+
+	return;
+}
+
 int drop_caches_sysctl_handler(ctl_table *table, int write,
 	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
-	if (write) {
-		if (sysctl_drop_caches & 1)
+	int error;
+	char *path;
+	int operation;
+
+	error = proc_dostring(table, write, file, buffer, length, ppos);
+	if (write && !error) {
+		sscanf(sysctl_drop_caches, "%d", &operation);
+
+		switch (operation) {
+		case 1:
 			drop_pagecache();
-		if (sysctl_drop_caches & 2)
+			break;
+		case 2:
 			drop_slab();
+			break;
+		case 3:
+			drop_pagecache();
+			drop_slab();
+			break;
+		case 4:
+			/*
+			 * The format in sysctl_drop_caches is:
+			 * 4 /path/to/filename
+			 */
+			path = strchr(sysctl_drop_caches, '4');
+			if (!path)
+				break;
+
+			path ++;
+			while (*path) {
+				if (*path == ' ' || *path == '\t')
+					path ++;
+				else
+					break;
+			}
+
+			drop_file_pagecache(path);
+			break;
+		}
 	}
 	return 0;
 }
+
diff -Nraup linux-2.6.19/include/linux/mm.h linux-2.6.19_dropcache/include/linux/mm.h
--- linux-2.6.19/include/linux/mm.h	2006-12-08 15:32:49.000000000 +0800
+++ linux-2.6.19_dropcache/include/linux/mm.h	2006-12-28 09:59:10.000000000 +0800
@@ -1121,6 +1121,7 @@ unsigned long shrink_slab(unsigned long 
 			unsigned long lru_pages);
 void drop_pagecache(void);
 void drop_slab(void);
+void drop_file_pagecache(char *path);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff -Nraup linux-2.6.19/kernel/sysctl.c linux-2.6.19_dropcache/kernel/sysctl.c
--- linux-2.6.19/kernel/sysctl.c	2006-12-08 15:32:49.000000000 +0800
+++ linux-2.6.19_dropcache/kernel/sysctl.c	2006-12-28 09:50:18.000000000 +0800
@@ -73,7 +73,7 @@ extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
-extern int sysctl_drop_caches;
+extern char sysctl_drop_caches[PATH_MAX+2];
 extern int percpu_pagelist_fraction;
 extern int compat_log;
 
@@ -901,10 +901,10 @@ static ctl_table vm_table[] = {
 		.ctl_name	= VM_DROP_PAGECACHE,
 		.procname	= "drop_caches",
 		.data		= &sysctl_drop_caches,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(sysctl_drop_caches),
 		.mode		= 0644,
 		.proc_handler	= drop_caches_sysctl_handler,
-		.strategy	= &sysctl_intvec,
+		.strategy	= &sysctl_string,
 	},
 	{
 		.ctl_name	= VM_MIN_FREE_KBYTES,

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
  2006-12-28  3:17 [PATCH] drop page cache of a single file Zhang, Yanmin
@ 2006-12-28  3:49 ` Andrew Morton
  2006-12-28  7:19     ` Fengguang Wu
  2007-01-28  7:08 ` Vaidyanathan Srinivasan
  1 sibling, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2006-12-28  3:49 UTC (permalink / raw)
  To: Zhang, Yanmin; +Cc: LKML

On Thu, 28 Dec 2006 11:17:25 +0800
"Zhang, Yanmin" <yanmin_zhang@linux.intel.com> wrote:

> Currently, by /proc/sys/vm/drop_caches, applications could drop pagecache,
> slab(dentries and inodes), or both, but applications couldn't choose to
> just drop the page cache of one file. An user of VOD (Video-On-Demand)
> needs this capability to have more detailed control on page cache release.

The posix_fadvise() system call should be used for this.  Probably in
combination with sys_sync_file_range().


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
@ 2006-12-28  7:19     ` Fengguang Wu
  2006-12-28 10:29       ` Andrew Morton
  0 siblings, 1 reply; 8+ messages in thread
From: Fengguang Wu @ 2006-12-28  7:19 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Zhang, Yanmin, LKML

On Wed, Dec 27, 2006 at 07:49:59PM -0800, Andrew Morton wrote:
> On Thu, 28 Dec 2006 11:17:25 +0800
> "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> wrote:
> 
> > Currently, by /proc/sys/vm/drop_caches, applications could drop pagecache,
> > slab(dentries and inodes), or both, but applications couldn't choose to
> > just drop the page cache of one file. An user of VOD (Video-On-Demand)
> > needs this capability to have more detailed control on page cache release.
> 
> The posix_fadvise() system call should be used for this.  Probably in
> combination with sys_sync_file_range().

Yanmin: I've been using the fadvise tool from
http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz

It's a nice tool:

% fadvise 
Usage: fadvise filename offset length advice [loops]
      advice: normal sequential willneed noreuse dontneed asyncwrite writewait
% fadvise /var/sparse 0 0x7fffffff dontneed

Regards,
Wu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
  2006-12-28  7:19     ` Fengguang Wu
@ 2006-12-28 10:29       ` Andrew Morton
  2006-12-28 10:45         ` Russell King
  2006-12-28 10:57           ` Fengguang Wu
  0 siblings, 2 replies; 8+ messages in thread
From: Andrew Morton @ 2006-12-28 10:29 UTC (permalink / raw)
  To: Fengguang Wu; +Cc: Zhang, Yanmin, LKML

On Thu, 28 Dec 2006 15:19:04 +0800
Fengguang Wu <fengguang.wu@gmail.com> wrote:

> On Wed, Dec 27, 2006 at 07:49:59PM -0800, Andrew Morton wrote:
> > On Thu, 28 Dec 2006 11:17:25 +0800
> > "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> wrote:
> > 
> > > Currently, by /proc/sys/vm/drop_caches, applications could drop pagecache,
> > > slab(dentries and inodes), or both, but applications couldn't choose to
> > > just drop the page cache of one file. An user of VOD (Video-On-Demand)
> > > needs this capability to have more detailed control on page cache release.
> > 
> > The posix_fadvise() system call should be used for this.  Probably in
> > combination with sys_sync_file_range().
> 
> Yanmin: I've been using the fadvise tool from
> http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz
> 
> It's a nice tool:
> 
> % fadvise 
> Usage: fadvise filename offset length advice [loops]
>       advice: normal sequential willneed noreuse dontneed asyncwrite writewait
> % fadvise /var/sparse 0 0x7fffffff dontneed
> 

I was a bit reluctant to point at that because it has nasty hacks to make
it mostly-work on old glibc's which don't implement posix_fadvise().

Hopefully if you're running a recent distro, you have glibc support for
fadvise() and it's possible to write a portable version of that app which
doesn't need to know about per-arch syscall numbers.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
  2006-12-28 10:29       ` Andrew Morton
@ 2006-12-28 10:45         ` Russell King
  2006-12-28 11:03           ` Andrew Morton
  2006-12-28 10:57           ` Fengguang Wu
  1 sibling, 1 reply; 8+ messages in thread
From: Russell King @ 2006-12-28 10:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Fengguang Wu, Zhang, Yanmin, LKML

On Thu, Dec 28, 2006 at 02:29:26AM -0800, Andrew Morton wrote:
> On Thu, 28 Dec 2006 15:19:04 +0800
> Fengguang Wu <fengguang.wu@gmail.com> wrote:
> > Yanmin: I've been using the fadvise tool from
> > http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz
> > 
> > It's a nice tool:
> > 
> > % fadvise 
> > Usage: fadvise filename offset length advice [loops]
> >       advice: normal sequential willneed noreuse dontneed asyncwrite writewait
> > % fadvise /var/sparse 0 0x7fffffff dontneed
> > 
> 
> I was a bit reluctant to point at that because it has nasty hacks to make
> it mostly-work on old glibc's which don't implement posix_fadvise().
> 
> Hopefully if you're running a recent distro, you have glibc support for
> fadvise() and it's possible to write a portable version of that app which
> doesn't need to know about per-arch syscall numbers.

And note that if it gets implemented on ARM on pre-fadvise() glibc,
the syscall argument order is rather non-standard: fd, action, start,
size rather than fd, start, size, action - since otherwise we run out
of registers with EABI.

The kernel community needs to get a grip with the implementation of
new syscalls - we need a process where architecture maintainers get
to review the arguments _prior_ to them being accepted into the kernel.
That way we can avoid silly architecture specific syscall changes like
this.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
@ 2006-12-28 10:57           ` Fengguang Wu
  0 siblings, 0 replies; 8+ messages in thread
From: Fengguang Wu @ 2006-12-28 10:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Zhang, Yanmin, LKML

On Thu, Dec 28, 2006 at 02:29:26AM -0800, Andrew Morton wrote:
> On Thu, 28 Dec 2006 15:19:04 +0800
> Fengguang Wu <fengguang.wu@gmail.com> wrote:
>
> > Yanmin: I've been using the fadvise tool from
> > http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz
> > 
> > It's a nice tool:
> > 
> > % fadvise 
> > Usage: fadvise filename offset length advice [loops]
> >       advice: normal sequential willneed noreuse dontneed asyncwrite writewait
> > % fadvise /var/sparse 0 0x7fffffff dontneed
> > 
> 
> I was a bit reluctant to point at that because it has nasty hacks to make
> it mostly-work on old glibc's which don't implement posix_fadvise().
> 
> Hopefully if you're running a recent distro, you have glibc support for
> fadvise() and it's possible to write a portable version of that app which
> doesn't need to know about per-arch syscall numbers.

Bad news: it's still broken. posix_fadvise() here just failed silently.
I'm running Debian Etch with libc6=2.3.6.ds1-7.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
  2006-12-28 10:45         ` Russell King
@ 2006-12-28 11:03           ` Andrew Morton
  0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2006-12-28 11:03 UTC (permalink / raw)
  To: Russell King; +Cc: Fengguang Wu, Zhang, Yanmin, LKML

On Thu, 28 Dec 2006 10:45:08 +0000
Russell King <rmk+lkml@arm.linux.org.uk> wrote:

> The kernel community needs to get a grip with the implementation of
> new syscalls - we need a process where architecture maintainers get
> to review the arguments _prior_ to them being accepted into the kernel.
> That way we can avoid silly architecture specific syscall changes like
> this.

hm, well, actually, sys_fadvise64_64 was discussed on linux-arch when
it went in. 

(Gad, it was over three years ago! - glibc support should be pretty widespread
now)

That's about as much discussion as these things will get.  And this assumes
that people remember to mention it.  Mental note made.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drop page cache of a single file
  2006-12-28  3:17 [PATCH] drop page cache of a single file Zhang, Yanmin
  2006-12-28  3:49 ` Andrew Morton
@ 2007-01-28  7:08 ` Vaidyanathan Srinivasan
  1 sibling, 0 replies; 8+ messages in thread
From: Vaidyanathan Srinivasan @ 2007-01-28  7:08 UTC (permalink / raw)
  To: Zhang, Yanmin; +Cc: LKML



Zhang, Yanmin wrote:
> Currently, by /proc/sys/vm/drop_caches, applications could drop pagecache,
> slab(dentries and inodes), or both, but applications couldn't choose to
> just drop the page cache of one file. An user of VOD (Video-On-Demand)
> needs this capability to have more detailed control on page cache release.
> 
> Below patch against 2.6.19 implements it.
> 
> Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
> 
> ---
> 
> diff -Nraup linux-2.6.19/Documentation/filesystems/proc.txt linux-2.6.19_dropcache/Documentation/filesystems/proc.txt
> --- linux-2.6.19/Documentation/filesystems/proc.txt	2006-12-08 15:32:44.000000000 +0800
> +++ linux-2.6.19_dropcache/Documentation/filesystems/proc.txt	2006-12-28 10:20:39.000000000 +0800
> @@ -1320,6 +1320,8 @@ To free dentries and inodes:
>  	echo 2 > /proc/sys/vm/drop_caches
>  To free pagecache, dentries and inodes:
>  	echo 3 > /proc/sys/vm/drop_caches
> +To free the pagecache of one file:
> +	echo "4 /path/to/filename" > /proc/sys/vm/drop_caches
> 
>  As this is a non-destructive operation and dirty objects are not freeable, the
>  user should run `sync' first.

"sync" is the most time consuming operation.  Clean pagecache pages
are immediately reclaimable... they are actually free pages.  Writing
out dirty pages consumes time.

Hence this approach may not provide the required performance benefits
since only clean pagecache pages are marked free.  fadvise approach
would provide similar behavior.

--Vaidy

[snip]


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-01-28  7:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-12-28  3:17 [PATCH] drop page cache of a single file Zhang, Yanmin
2006-12-28  3:49 ` Andrew Morton
2006-12-28  7:19   ` Fengguang Wu
2006-12-28  7:19     ` Fengguang Wu
2006-12-28 10:29       ` Andrew Morton
2006-12-28 10:45         ` Russell King
2006-12-28 11:03           ` Andrew Morton
2006-12-28 10:57         ` Fengguang Wu
2006-12-28 10:57           ` Fengguang Wu
2007-01-28  7:08 ` Vaidyanathan Srinivasan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.