linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] tmpfs 1/6 use generic_write_checks
@ 2003-04-01 22:31 Hugh Dickins
  2003-04-01 22:32 ` [PATCH] tmpfs 2/6 remove shmem_readpage Hugh Dickins
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:31 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, Oleg Drokin, linux-kernel

First of a suite of six patches to 2.5.66-mm2 tmpfs (mm/shmem.c):
 Documentation/filesystems/tmpfs.txt |   25 ++---
 mm/shmem.c                          |  162 ++++++++++++------------------------
 2 files changed, 66 insertions(+), 121 deletions(-)

tmpfs 1/6 use generic_write_checks
Blessings be upon the creator of generic_write_checks in filemap.c:
shmem_file_write call it instead of duplicating those tedious checks.

--- 2.5.66-mm2/mm/shmem.c	Tue Apr  1 11:25:50 2003
+++ tmpfs1/mm/shmem.c	Tue Apr  1 21:34:48 2003
@@ -1126,10 +1126,8 @@
 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 {
 	struct inode	*inode = file->f_dentry->d_inode;
-	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 	loff_t		pos;
 	unsigned long	written;
-	long		status;
 	int		err;
 	loff_t		maxpos;
 
@@ -1142,88 +1140,25 @@
 	down(&inode->i_sem);
 
 	pos = *ppos;
-	err = -EINVAL;
-	if (pos < 0)
-		goto out_nc;
-
-	err = file->f_error;
-	if (err) {
-		file->f_error = 0;
-		goto out_nc;
-	}
-
 	written = 0;
 
-	if (file->f_flags & O_APPEND)
-		pos = inode->i_size;
+	err = generic_write_checks(inode, file, &pos, &count, 0);
+	if (err || !count)
+		goto out;
 
 	maxpos = inode->i_size;
-	if (pos + count > inode->i_size) {
+	if (maxpos < pos + count) {
 		maxpos = pos + count;
-		if (maxpos > SHMEM_MAX_BYTES)
-			maxpos = SHMEM_MAX_BYTES;
 		if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
 			err = -ENOMEM;
-			goto out_nc;
-		}
-	}
-
-	/*
-	 * Check whether we've reached the file size limit.
-	 */
-	err = -EFBIG;
-	if (limit != RLIM_INFINITY) {
-		if (pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			goto out;
-		}
-		if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
-			/* send_sig(SIGXFSZ, current, 0); */
-			count = limit - (u32)pos;
-		}
-	}
-
-	/*
-	 *	LFS rule
-	 */
-	if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
-		if (pos >= MAX_NON_LFS) {
-			send_sig(SIGXFSZ, current, 0);
 			goto out;
 		}
-		if (count > MAX_NON_LFS - (u32)pos) {
-			/* send_sig(SIGXFSZ, current, 0); */
-			count = MAX_NON_LFS - (u32)pos;
-		}
-	}
-
-	/*
-	 *	Are we about to exceed the fs block limit ?
-	 *
-	 *	If we have written data it becomes a short write
-	 *	If we have exceeded without writing data we send
-	 *	a signal and give them an EFBIG.
-	 *
-	 *	Linus frestrict idea will clean these up nicely..
-	 */
-	if (pos >= SHMEM_MAX_BYTES) {
-		if (count || pos > SHMEM_MAX_BYTES) {
-			send_sig(SIGXFSZ, current, 0);
-			err = -EFBIG;
-			goto out;
-		}
-		/* zero-length writes at ->s_maxbytes are OK */
 	}
-	if (pos + count > SHMEM_MAX_BYTES)
-		count = SHMEM_MAX_BYTES - pos;
 
-	status	= 0;
-	if (count) {
-		remove_suid(file->f_dentry);
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-	}
+	remove_suid(file->f_dentry);
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 
-	while (count) {
+	do {
 		struct page *page = NULL;
 		unsigned long bytes, index, offset;
 		char *kaddr;
@@ -1241,8 +1176,8 @@
 		 * But it still may be a good idea to prefault below.
 		 */
 
-		status = shmem_getpage(inode, index, &page, SGP_WRITE);
-		if (status)
+		err = shmem_getpage(inode, index, &page, SGP_WRITE);
+		if (err)
 			break;
 
 		left = bytes;
@@ -1263,7 +1198,7 @@
 		flush_dcache_page(page);
 		if (left) {
 			page_cache_release(page);
-			status = -EFAULT;
+			err = -EFAULT;
 			break;
 		}
 
@@ -1271,7 +1206,8 @@
 		page_cache_release(page);
 
 		/*
-		 * Balance dirty pages??
+		 * Our dirty pages are not counted in nr_dirty,
+		 * and we do not attempt to balance dirty pages.
 		 */
 
 		written += bytes;
@@ -1280,15 +1216,16 @@
 		buf += bytes;
 		if (pos > inode->i_size)
 			inode->i_size = pos;
-	}
+	} while (count);
 
 	*ppos = pos;
-	err = written ? written : status;
-out:
+	if (written)
+		err = written;
+
 	/* Short writes give back address space */
 	if (inode->i_size != maxpos)
 		vm_unacct_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size));
-out_nc:
+out:
 	up(&inode->i_sem);
 	return err;
 }


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tmpfs 2/6 remove shmem_readpage
  2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
@ 2003-04-01 22:32 ` Hugh Dickins
  2003-04-01 22:33 ` [PATCH] tmpfs 3/6 use generic_file_llseek Hugh Dickins
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, linux-kernel

shmem_readpage was created to give tmpfs sendfile and loop ability; but
they're both using shmem_file_sendfile now, so remove shmem_readpage.

--- tmpfs1/mm/shmem.c	Tue Apr  1 21:34:48 2003
+++ tmpfs2/mm/shmem.c	Tue Apr  1 21:34:59 2003
@@ -750,9 +750,9 @@
 	 * Normally, filepage is NULL on entry, and either found
 	 * uptodate immediately, or allocated and zeroed, or read
 	 * in under swappage, which is then assigned to filepage.
-	 * But shmem_readpage and shmem_prepare_write pass in a locked
-	 * filepage, which may be found not uptodate by other callers
-	 * too, and may need to be copied from the swappage read in.
+	 * But shmem_prepare_write passes in a locked filepage,
+	 * which may be found not uptodate by other callers too,
+	 * and may need to be copied from the swappage read in.
 	 */
 repeat:
 	if (!filepage)
@@ -1102,20 +1102,10 @@
 static struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * tmpfs itself makes no use of generic_file_read, generic_file_mmap
- * or generic_file_write; but shmem_readpage, shmem_prepare_write and
- * simple_commit_write let a tmpfs file be used below the loop driver.
+ * Normally tmpfs makes no use of shmem_prepare_write, but it
+ * lets a tmpfs file be used read-write below the loop driver.
  */
 static int
-shmem_readpage(struct file *file, struct page *page)
-{
-	struct inode *inode = page->mapping->host;
-	int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
-	unlock_page(page);
-	return error;
-}
-
-static int
 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
@@ -1751,7 +1741,6 @@
 	.writepage	= shmem_writepage,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 #ifdef CONFIG_TMPFS
-	.readpage	= shmem_readpage,
 	.prepare_write	= shmem_prepare_write,
 	.commit_write	= simple_commit_write,
 #endif


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tmpfs 3/6 use generic_file_llseek
  2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
  2003-04-01 22:32 ` [PATCH] tmpfs 2/6 remove shmem_readpage Hugh Dickins
@ 2003-04-01 22:33 ` Hugh Dickins
  2003-04-01 22:34 ` [PATCH] tmpfs 4/6 use mark_page_accessed Hugh Dickins
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, linux-kernel

default_llseek's use of BKL and not i_sem was recently exposed:
tmpfs should be using generic_file_llseek which guards with i_sem.

--- tmpfs2/mm/shmem.c	Tue Apr  1 21:34:59 2003
+++ tmpfs3/mm/shmem.c	Tue Apr  1 21:35:10 2003
@@ -1749,6 +1749,7 @@
 static struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
 #ifdef CONFIG_TMPFS
+	.llseek		= generic_file_llseek,
 	.read		= shmem_file_read,
 	.write		= shmem_file_write,
 	.fsync		= simple_sync_file,


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tmpfs 4/6 use mark_page_accessed
  2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
  2003-04-01 22:32 ` [PATCH] tmpfs 2/6 remove shmem_readpage Hugh Dickins
  2003-04-01 22:33 ` [PATCH] tmpfs 3/6 use generic_file_llseek Hugh Dickins
@ 2003-04-01 22:34 ` Hugh Dickins
  2003-04-01 22:35 ` [PATCH] tmpfs 5/6 use cond_resched Hugh Dickins
  2003-04-01 22:37 ` [PATCH] tmpfs 6/6 percentile size Hugh Dickins
  4 siblings, 0 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, linux-kernel

tmpfs pages should be surfing the LRUs in the company of their filemap
friends: I was expecting the rules to change, but they've been stable
so long, let's sprinkle mark_page_accessed in the equivalent places
here; but (don't ask me why) SetPageReferenced in shmem_file_write.
Ooh, and shmem_populate was missing a flush_page_to_ram.

--- tmpfs3/mm/shmem.c	Tue Apr  1 21:35:10 2003
+++ tmpfs4/mm/shmem.c	Tue Apr  1 21:35:21 2003
@@ -951,6 +951,7 @@
 	if (error)
 		return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
 
+	mark_page_accessed(page);
 	flush_page_to_ram(page);
 	return page;
 }
@@ -978,6 +979,8 @@
 		if (err)
 			return err;
 		if (page) {
+			mark_page_accessed(page);
+			flush_page_to_ram(page);
 			err = install_page(mm, vma, addr, page, prot);
 			if (err) {
 				page_cache_release(page);
@@ -1192,6 +1195,8 @@
 			break;
 		}
 
+		if (!PageReferenced(page))
+			SetPageReferenced(page);
 		set_page_dirty(page);
 		page_cache_release(page);
 
@@ -1264,13 +1269,20 @@
 		}
 		nr -= offset;
 
-		/* If users can be writing to this page using arbitrary
-		 * virtual addresses, take care about potential aliasing
-		 * before reading the page on the kernel side.
-		 */
-		if (!list_empty(&mapping->i_mmap_shared) &&
-		    page != ZERO_PAGE(0))
-			flush_dcache_page(page);
+		if (page != ZERO_PAGE(0)) {
+			/*
+			 * If users can be writing to this page using arbitrary
+			 * virtual addresses, take care about potential aliasing
+			 * before reading the page on the kernel side.
+			 */
+			if (!list_empty(&mapping->i_mmap_shared))
+				flush_dcache_page(page);
+			/*
+			 * Mark the page accessed if we read the beginning.
+			 */
+			if (!offset)
+				mark_page_accessed(page);
+		}
 
 		/*
 		 * Ok, we have the page, and it's up-to-date, so
@@ -1523,6 +1535,7 @@
 		return res;
 	res = vfs_readlink(dentry, buffer, buflen, kmap(page));
 	kunmap(page);
+	mark_page_accessed(page);
 	page_cache_release(page);
 	return res;
 }
@@ -1535,6 +1548,7 @@
 		return res;
 	res = vfs_follow_link(nd, kmap(page));
 	kunmap(page);
+	mark_page_accessed(page);
 	page_cache_release(page);
 	return res;
 }


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tmpfs 5/6 use cond_resched
  2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
                   ` (2 preceding siblings ...)
  2003-04-01 22:34 ` [PATCH] tmpfs 4/6 use mark_page_accessed Hugh Dickins
@ 2003-04-01 22:35 ` Hugh Dickins
  2003-04-01 22:37 ` [PATCH] tmpfs 6/6 percentile size Hugh Dickins
  4 siblings, 0 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, linux-kernel

cond_resched each time around the loop in shmem_file_write
and do_shmem_file_read, matching filemap.c.

--- tmpfs4/mm/shmem.c	Tue Apr  1 21:35:21 2003
+++ tmpfs5/mm/shmem.c	Tue Apr  1 21:35:32 2003
@@ -1211,6 +1211,8 @@
 		buf += bytes;
 		if (pos > inode->i_size)
 			inode->i_size = pos;
+
+		cond_resched();
 	} while (count);
 
 	*ppos = pos;
@@ -1302,6 +1304,8 @@
 		page_cache_release(page);
 		if (ret != nr || !desc->count)
 			break;
+
+		cond_resched();
 	}
 
 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tmpfs 6/6 percentile size
  2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
                   ` (3 preceding siblings ...)
  2003-04-01 22:35 ` [PATCH] tmpfs 5/6 use cond_resched Hugh Dickins
@ 2003-04-01 22:37 ` Hugh Dickins
  4 siblings, 0 replies; 6+ messages in thread
From: Hugh Dickins @ 2003-04-01 22:37 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Christoph Rohland, CaT, linux-kernel

>From CaT <cat@zip.com.au>:
What this patch does is allow you to specify the max amount of memory
tmpfs can use as a percentage of available real ram. This (in my eyes)
is useful so that you do not have to remember to change the setting if
you want something other then 50% and some of your ram goes.

Hugh redid the arithmetic to not overflow at 4GB; the particular order
of lines helps RH2.96-110 not to get confused in the do_div.  2.5 can
use totalram_pages.  Update mount options in tmpfs Doc.

There's an argument that the percentage should be of ram+swap, that's
what Christoph originally intended.  But we set the default at 50% of
ram only, so I believe it's more consistent to follow that precedent.

--- tmpfs5/Documentation/filesystems/tmpfs.txt	Thu Oct 31 05:39:34 2002
+++ tmpfs6/Documentation/filesystems/tmpfs.txt	Tue Apr  1 21:35:43 2003
@@ -54,18 +54,21 @@
 4) And probably a lot more I do not know about :-)
 
 
-tmpfs has a couple of mount options:
+tmpfs has three mount options for sizing:
 
-size:	   The limit of allocated bytes for this tmpfs instance. The 
+size:      The limit of allocated bytes for this tmpfs instance. The 
            default is half of your physical RAM without swap. If you
-	   oversize your tmpfs instances the machine will deadlock
-	   since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGECACHE_SIZE.
+           oversize your tmpfs instances the machine will deadlock
+           since the OOM handler will not be able to free that memory.
+nr_blocks: The same as size, but in blocks of PAGE_CACHE_SIZE.
 nr_inodes: The maximum number of inodes for this instance. The default
            is half of the number of your physical RAM pages.
 
 These parameters accept a suffix k, m or g for kilo, mega and giga and
-can be changed on remount.
+can be changed on remount.  The size parameter also accepts a suffix %
+to limit this tmpfs instance to that percentage of your physical RAM:
+the default, when neither size nor nr_blocks is specified, is size=50%
+
 
 To specify the initial root directory you can use the following mount
 options:
@@ -83,15 +86,7 @@
 RAM/SWAP in 10240 inodes and it is only accessible by root.
 
 
-TODOs:
-
-1) give the size option a percent semantic: If you give a mount option
-   size=50% the tmpfs instance should be able to grow to 50 percent of
-   RAM + swap. So the instance should adapt automatically if you add
-   or remove swap space.
-2) Show the number of tmpfs RAM pages. (As shared?)
-
 Author:
    Christoph Rohland <cr@sap.com>, 1.12.01
 Updated:
-   Hugh Dickins <hugh@veritas.com>, 17 Oct 2002
+   Hugh Dickins <hugh@veritas.com>, 01 April 2003
--- tmpfs5/mm/shmem.c	Tue Apr  1 21:35:32 2003
+++ tmpfs6/mm/shmem.c	Tue Apr  1 21:35:43 2003
@@ -35,6 +35,7 @@
 #include <linux/vfs.h>
 #include <linux/blkdev.h>
 #include <asm/uaccess.h>
+#include <asm/div64.h>
 
 /* This magic number is used in glibc for posix shared memory */
 #define TMPFS_MAGIC	0x01021994
@@ -1587,6 +1588,12 @@
 		if (!strcmp(this_char,"size")) {
 			unsigned long long size;
 			size = memparse(value,&rest);
+			if (*rest == '%') {
+				size <<= PAGE_SHIFT;
+				size *= totalram_pages;
+				do_div(size, 100);
+				rest++;
+			}
 			if (*rest)
 				goto bad_val;
 			*blocks = size >> PAGE_CACHE_SHIFT;
@@ -1652,7 +1659,6 @@
 	uid_t uid = current->fsuid;
 	gid_t gid = current->fsgid;
 	struct shmem_sb_info *sbinfo;
-	struct sysinfo si;
 	int err = -ENOMEM;
 
 	sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
@@ -1665,8 +1671,7 @@
 	 * Per default we only allow half of the physical ram per
 	 * tmpfs instance
 	 */
-	si_meminfo(&si);
-	blocks = inodes = si.totalram / 2;
+	blocks = inodes = totalram_pages / 2;
 
 #ifdef CONFIG_TMPFS
 	if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2003-04-01 22:24 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-01 22:31 [PATCH] tmpfs 1/6 use generic_write_checks Hugh Dickins
2003-04-01 22:32 ` [PATCH] tmpfs 2/6 remove shmem_readpage Hugh Dickins
2003-04-01 22:33 ` [PATCH] tmpfs 3/6 use generic_file_llseek Hugh Dickins
2003-04-01 22:34 ` [PATCH] tmpfs 4/6 use mark_page_accessed Hugh Dickins
2003-04-01 22:35 ` [PATCH] tmpfs 5/6 use cond_resched Hugh Dickins
2003-04-01 22:37 ` [PATCH] tmpfs 6/6 percentile size Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).