linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Rohland <cr@sap.com>
To: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: linux-kernel@vger.kernel.org
Subject: [Patch] read/write support for shm fs
Date: 13 Jan 2001 11:49:00 +0100	[thread overview]
Message-ID: <m3ae8v211w.fsf@linux.local> (raw)

Hi Alan,

Here is a patch which makes the shm fs a full swappable file system
like Solaris' tmpfs.

Does anybody have a really good fs check tool? Not benchmarking, but
concurrent truncate, read, write, unlink stress test. Would be good to
test it with that. I did my usual POSIX/SYSV shm tests which it
survived quite easily.

The patch is additional to my truncate patch which is included in
2.4.0-ac8.

Greetings
                Christoph


diff -uNr 2.4.0-shm_vm_locked-truncate/include/linux/fs.h 2.4.0-shm_vm_locked-truncate-rw/include/linux/fs.h
--- 2.4.0-shm_vm_locked-truncate/include/linux/fs.h	Fri Jan 12 22:58:58 2001
+++ 2.4.0-shm_vm_locked-truncate-rw/include/linux/fs.h	Sat Jan 13 10:12:32 2001
@@ -1198,6 +1198,7 @@
 	}
 	return inode;
 }
+extern void remove_suid(struct inode *inode);
 
 extern void insert_inode_hash(struct inode *);
 extern void remove_inode_hash(struct inode *);
@@ -1245,6 +1246,7 @@
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
+extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
 extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
 extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
diff -uNr 2.4.0-shm_vm_locked-truncate/include/linux/mm.h 2.4.0-shm_vm_locked-truncate-rw/include/linux/mm.h
--- 2.4.0-shm_vm_locked-truncate/include/linux/mm.h	Wed Jan 10 21:48:18 2001
+++ 2.4.0-shm_vm_locked-truncate-rw/include/linux/mm.h	Sat Jan 13 10:22:00 2001
@@ -200,8 +200,8 @@
 					smp_mb__before_clear_bit(); \
 					if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); \
 					smp_mb__after_clear_bit(); \
-					if (waitqueue_active(&page->wait)) \
-						wake_up(&page->wait); \
+					if (waitqueue_active(&(page)->wait)) \
+						wake_up(&(page)->wait); \
 				} while (0)
 #define PageError(page)		test_bit(PG_error, &(page)->flags)
 #define SetPageError(page)	set_bit(PG_error, &(page)->flags)
diff -uNr 2.4.0-shm_vm_locked-truncate/mm/filemap.c 2.4.0-shm_vm_locked-truncate-rw/mm/filemap.c
--- 2.4.0-shm_vm_locked-truncate/mm/filemap.c	Fri Jan  5 10:33:50 2001
+++ 2.4.0-shm_vm_locked-truncate-rw/mm/filemap.c	Sat Jan 13 09:52:06 2001
@@ -1212,7 +1212,7 @@
 	UPDATE_ATIME(inode);
 }
 
-static int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
 	char *kaddr;
 	unsigned long left, count = desc->count;
@@ -2408,7 +2408,7 @@
 	return page;
 }
 
-static inline void remove_suid(struct inode *inode)
+inline void remove_suid(struct inode *inode)
 {
 	unsigned int mode;
 
diff -uNr 2.4.0-shm_vm_locked-truncate/mm/shmem.c 2.4.0-shm_vm_locked-truncate-rw/mm/shmem.c
--- 2.4.0-shm_vm_locked-truncate/mm/shmem.c	Thu Jan 11 08:12:06 2001
+++ 2.4.0-shm_vm_locked-truncate-rw/mm/shmem.c	Sat Jan 13 11:21:32 2001
@@ -10,11 +10,8 @@
 
 /*
  * This shared memory handling is heavily based on the ramfs. It
- * extends the ramfs by the ability to use swap which would makes it a
- * completely usable filesystem.
- *
- * But read and write are not supported (yet)
- *
+ * extends the ramfs by the ability to use swap and honor resource
+ * limits which makes it a completely usable filesystem.
  */
 
 #include <linux/module.h>
@@ -42,8 +39,7 @@
 static struct inode_operations shmem_inode_operations;
 static struct file_operations shmem_dir_operations;
 static struct inode_operations shmem_dir_inode_operations;
-static struct vm_operations_struct shmem_shared_vm_ops;
-static struct vm_operations_struct shmem_private_vm_ops;
+static struct vm_operations_struct shmem_vm_ops;
 
 LIST_HEAD (shmem_inodes);
 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
@@ -60,17 +56,17 @@
 	index /= ENTRIES_PER_PAGE;
 
 	if (index >= ENTRIES_PER_PAGE)
-		return NULL;
+		return ERR_PTR(-EFBIG);
 
 	if (!info->i_indirect) {
 		info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
 		if (!info->i_indirect)
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 	}
 	if(!(info->i_indirect[index])) {
 		info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
 		if (!info->i_indirect[index])
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 	}
 	
 	return info->i_indirect[index]+offset;
@@ -131,11 +127,8 @@
 
 	spin_lock (&info->lock);
 	index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (index >= info->max_index) {
-		info->max_index = index;
-		spin_unlock (&info->lock);
-		return;
-	}
+	if (index >= info->max_index)
+		goto out;
 
 	start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, index, &freed);
 
@@ -217,7 +210,7 @@
 
 	spin_lock(&info->lock);
 	entry = shmem_swp_entry(info, page->index);
-	if (!entry)	/* this had been allocted on page allocation */
+	if (IS_ERR(entry))	/* this had been allocted on page allocation */
 		BUG();
 	error = -EAGAIN;
 	if (entry->val) {
@@ -242,41 +235,21 @@
 	return error;
 }
 
-/*
- * shmem_nopage - either get the page from swap or allocate a new one
- *
- * If we allocate a new one we do not mark it dirty. That's up to the
- * vm. If we swap it in we mark it dirty since we also free the swap
- * entry since a page cannot live in both the swap and page cache
- */
-struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
+static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
 {
-	unsigned long size;
-	struct page * page;
-	unsigned int idx;
-	swp_entry_t *entry;
-	struct inode * inode = vma->vm_file->f_dentry->d_inode;
 	struct address_space * mapping = inode->i_mapping;
 	struct shmem_inode_info *info;
+	struct page * page;
+	swp_entry_t *entry;
 
-	idx = (address - vma->vm_start) >> PAGE_SHIFT;
-	idx += vma->vm_pgoff;
-
-	down (&inode->i_sem);
-	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	page = NOPAGE_SIGBUS;
-	if ((idx >= size) && (vma->vm_mm == current->mm))
-		goto out;
-
-	/* retry, we may have slept */
-	page = __find_lock_page(mapping, idx, page_hash (mapping, idx));
+	page = find_lock_page(mapping, idx);;
 	if (page)
-		goto cached_page;
+		return page;
 
 	info = &inode->u.shmem_i;
 	entry = shmem_swp_entry (info, idx);
-	if (!entry)
-		goto oom;
+	if (IS_ERR(entry))
+		return (void *)entry;
 	if (entry->val) {
 		unsigned long flags;
 
@@ -288,13 +261,13 @@
 			page = read_swap_cache(*entry);
 			unlock_kernel();
 			if (!page) 
-				goto oom;
+				return ERR_PTR(-ENOMEM);
 		}
 
 		/* We have to this with page locked to prevent races */
+		lock_page(page);
 		spin_lock (&info->lock);
 		swap_free(*entry);
-		lock_page(page);
 		delete_from_swap_cache_nolock(page);
 		*entry = (swp_entry_t) {0};
 		flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
@@ -311,17 +284,75 @@
 		/* Ok, get a new page */
 		page = page_cache_alloc();
 		if (!page)
-			goto oom;
-		clear_user_highpage(page, address);
+			return ERR_PTR(-ENOMEM);
+		clear_highpage(page);
 		inode->i_blocks++;
 		add_to_page_cache (page, mapping, idx);
 	}
 	/* We have the page */
 	SetPageUptodate (page);
+	return page;
+no_space:
+	spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
+	return ERR_PTR(-ENOSPC);
+}
 
-cached_page:
-	UnlockPage (page);
-	up(&inode->i_sem);
+/*
+ * shmem_getpage - either get the page from swap or allocate a new one
+ *
+ * If we allocate a new one we do not mark it dirty. That's up to the
+ * vm. If we swap it in we mark it dirty since we also free the swap
+ * entry since a page cannot live in both the swap and page cache
+ */
+static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
+{
+	struct address_space * mapping = inode->i_mapping;
+	int error;
+
+	*ptr = NOPAGE_SIGBUS;
+	if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
+		return -EFAULT;
+
+	*ptr = __find_get_page(mapping, idx, page_hash(mapping, idx));
+	if (*ptr) {
+		if (Page_Uptodate(*ptr))
+			return 0;
+		page_cache_release(*ptr);
+	}
+
+	down (&inode->i_sem);
+	/* retest we may have slept */
+	if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
+		goto sigbus;
+	*ptr = shmem_getpage_locked(inode, idx);
+	if (IS_ERR (*ptr))
+		goto failed;
+	UnlockPage(*ptr);
+	up (&inode->i_sem);
+	return 0;
+failed:
+	up (&inode->i_sem);
+	error = PTR_ERR(*ptr);
+	*ptr = NOPAGE_OOM;
+	if (error != -EFBIG)
+		*ptr = NOPAGE_SIGBUS;
+	return error;
+sigbus:
+	*ptr = NOPAGE_SIGBUS;
+	return -EFAULT;
+}
+
+struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
+{
+	struct page * page;
+	unsigned int idx;
+	struct inode * inode = vma->vm_file->f_dentry->d_inode;
+
+	idx = (address - vma->vm_start) >> PAGE_SHIFT;
+	idx += vma->vm_pgoff;
+
+	if (shmem_getpage(inode, idx, &page))
+		return page;
 
 	if (no_share) {
 		struct page *new_page = page_cache_alloc();
@@ -337,13 +368,6 @@
 
 	flush_page_to_ram (page);
 	return(page);
-no_space:
-	spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
-oom:
-	page = NOPAGE_OOM;
-out:
-	up(&inode->i_sem);
-	return page;
 }
 
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
@@ -392,6 +416,216 @@
 	return inode;
 }
 
+static ssize_t
+shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
+{
+	struct inode	*inode = file->f_dentry->d_inode; 
+	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+	loff_t		pos;
+	struct page	*page;
+	unsigned long	written;
+	long		status;
+	int		err;
+
+
+	down(&inode->i_sem);
+
+	pos = *ppos;
+	err = -EINVAL;
+	if (pos < 0)
+		goto out;
+
+	err = file->f_error;
+	if (err) {
+		file->f_error = 0;
+		goto out;
+	}
+
+	written = 0;
+
+	if (file->f_flags & O_APPEND)
+		pos = inode->i_size;
+
+	/*
+	 * Check whether we've reached the file size limit.
+	 */
+	err = -EFBIG;
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			goto out;
+		}
+		if (count > limit - pos) {
+			send_sig(SIGXFSZ, current, 0);
+			count = limit - pos;
+		}
+	}
+
+	status	= 0;
+	if (count) {
+		remove_suid(inode);
+		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	}
+
+	while (count) {
+		unsigned long bytes, index, offset;
+		char *kaddr;
+		int deactivate = 1;
+
+		/*
+		 * Try to find the page in the cache. If it isn't there,
+		 * allocate a free page.
+		 */
+		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+		index = pos >> PAGE_CACHE_SHIFT;
+		bytes = PAGE_CACHE_SIZE - offset;
+		if (bytes > count) {
+			bytes = count;
+			deactivate = 0;
+		}
+
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 */
+		{ volatile unsigned char dummy;
+			__get_user(dummy, buf);
+			__get_user(dummy, buf+bytes-1);
+		}
+
+		page = shmem_getpage_locked(inode, index);
+		status = PTR_ERR(page);
+		if (IS_ERR(page))
+			break;
+
+		/* We have exclusive IO access to the page.. */
+		if (!PageLocked(page)) {
+			PAGE_BUG(page);
+		}
+
+		kaddr = kmap(page);
+// can this do a truncated write? cr
+		status = copy_from_user(kaddr+offset, buf, bytes);
+		if (status)
+			goto fail_write;
+
+		flush_dcache_page(page);
+		if (bytes > 0) {
+			SetPageDirty(page);
+			written += bytes;
+			count -= bytes;
+			pos += bytes;
+			buf += bytes;
+			if (pos > inode->i_size) 
+				inode->i_size = pos;
+                        if (inode->u.shmem_i.max_index < index)
+                                inode->u.shmem_i.max_index = index;
+
+		}
+unlock:
+		/* Mark it unlocked again and drop the page.. */
+		UnlockPage(page);
+		if (deactivate)
+			deactivate_page(page);
+		page_cache_release(page);
+
+		if (status < 0)
+			break;
+	}
+	*ppos = pos;
+
+	err = written ? written : status;
+out:
+	up(&inode->i_sem);
+	return err;
+fail_write:
+	status = -EFAULT;
+	ClearPageUptodate(page);
+	kunmap(page);
+	goto unlock;
+}
+
+static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct address_space *mapping = inode->i_mapping;
+	unsigned long index, offset;
+	int nr = 1;
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+
+	while (nr && desc->count) {
+		struct page *page;
+		unsigned long end_index, nr;
+
+		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+		if (index > end_index)
+			break;
+		nr = PAGE_CACHE_SIZE;
+		if (index == end_index) {
+			nr = inode->i_size & ~PAGE_CACHE_MASK;
+			if (nr <= offset)
+				break;
+		}
+
+		nr = nr - offset;
+
+		if ((desc->error = shmem_getpage(inode, index, &page)))
+			break;
+
+		if (mapping->i_mmap_shared != NULL)
+			flush_dcache_page(page);
+
+		/*
+		 * Ok, we have the page, and it's up-to-date, so
+		 * now we can copy it to user space...
+		 *
+		 * The actor routine returns how many bytes were actually used..
+		 * NOTE! This may not be the same as how much of a user buffer
+		 * we filled up (we may be padding etc), so we can only update
+		 * "pos" here (the actor routine has to update the user buffer
+		 * pointers and the remaining count).
+		 */
+		nr = file_read_actor(desc, page, offset, nr);
+		offset += nr;
+		index += offset >> PAGE_CACHE_SHIFT;
+		offset &= ~PAGE_CACHE_MASK;
+	
+		page_cache_release(page);
+	}
+
+	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+	UPDATE_ATIME(inode);
+}
+
+static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
+{
+	ssize_t retval;
+
+	retval = -EFAULT;
+	if (access_ok(VERIFY_WRITE, buf, count)) {
+		retval = 0;
+
+		if (count) {
+			read_descriptor_t desc;
+
+			desc.written = 0;
+			desc.count = count;
+			desc.buf = buf;
+			desc.error = 0;
+			do_shmem_file_read(filp, ppos, &desc);
+
+			retval = desc.written;
+			if (!retval)
+				retval = desc.error;
+		}
+	}
+	return retval;
+}
+
 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
 {
 	buf->f_type = SHMEM_MAGIC;
@@ -554,9 +788,7 @@
 	struct vm_operations_struct * ops;
 	struct inode *inode = file->f_dentry->d_inode;
 
-	ops = &shmem_private_vm_ops;
-	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
-		ops = &shmem_shared_vm_ops;
+	ops = &shmem_vm_ops;
 	if (!inode->i_sb || !S_ISREG(inode->i_mode))
 		return -EACCES;
 	UPDATE_ATIME(inode);
@@ -668,7 +900,9 @@
 };
 
 static struct file_operations shmem_file_operations = {
-	mmap:		shmem_mmap
+	mmap:	shmem_mmap,
+	read:	shmem_file_read,
+	write:	shmem_file_write
 };
 
 static struct inode_operations shmem_inode_operations = {
@@ -699,11 +933,7 @@
 	put_inode:	force_delete,	
 };
 
-static struct vm_operations_struct shmem_private_vm_ops = {
-	nopage:	shmem_nopage,
-};
-
-static struct vm_operations_struct shmem_shared_vm_ops = {
+static struct vm_operations_struct shmem_vm_ops = {
 	nopage:	shmem_nopage,
 };
 
@@ -876,6 +1106,6 @@
 	if (vma->vm_file)
 		fput (vma->vm_file);
 	vma->vm_file = file;
-	vma->vm_ops = &shmem_shared_vm_ops;
+	vma->vm_ops = &shmem_vm_ops;
 	return 0;
 }

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/

             reply	other threads:[~2001-01-13 10:45 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-01-13 10:49 Christoph Rohland [this message]
2001-01-13 16:03 ` [Patch] symlink fix for shm/swap fs Christoph Rohland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m3ae8v211w.fsf@linux.local \
    --to=cr@sap.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).