linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alexander Viro <viro@math.psu.edu>
To: Chris Mason <mason@suse.com>
Cc: Andrea Arcangeli <andrea@suse.de>,
	Linus Torvalds <torvalds@transmeta.com>,
	Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] fs/block_dev.c cleanup
Date: Thu, 20 Sep 2001 16:54:18 -0400 (EDT)	[thread overview]
Message-ID: <Pine.GSO.4.21.0109201645540.5631-100000@weyl.math.psu.edu> (raw)
In-Reply-To: <Pine.GSO.4.21.0109201201590.3498-100000@weyl.math.psu.edu>



On Thu, 20 Sep 2001, Alexander Viro wrote:

> On Thu, 20 Sep 2001, Chris Mason wrote:
> 
> > > <nod>  And if you add Andrea's (perfectly valid) observation re having no
> > > need to sync any fs structures we might have for that device, you get
> > > __block_fsync().  After that it's easy to merge blkdev_close() code into
> > > blkdev_put().
> > > 
> > >
> > 
> > Ok, __block_fsync is much better than just fsync_dev.
> > 
> > Are there other parts of blkdev_close you want merged into 
> > blkdev_put? Without changing the reread blocks on last close 
> > semantics, I think this is all we can do.
> > 
> > As far as I can tell, bdev->bd_inode is valid to send 
> > to __block_fsync, am I missing something?
> 
> Eventually that will be the right thing, but only after we allocate
> bd_inode upon blkdev_get()/blkdev_open() instead of trying to cannibalize
> the inode passed to blkdev_open().
> 
> I'm testing that chunk right now (it also kills all the fake_inode crap in
> block_dev.c).

OK, it seems to be working here.  It doesn't fix anything in rd.c - all
it does is crapectomy in block_dev.c.   Pseudo-fs added, inodes are
allocated there upon blkdev_open() and blkdev_get(), ->bd_inode is
always from that pseudo-fs and never equal to inode passed into blkdev_open().

Crap with fake inodes is gone - we simply use ->bd_inode.  ->a_ops for
block device inodes is not set at all - neither in devices.c nor in devfs.
We set it for inodes on pseudo-fs and setting ->i_mapping upon open()
does the right thing.

All mess with keeping the first inode alive is gone - no need to do that
anymore.

diff -urN S10-pre12/drivers/block/rd.c linux/drivers/block/rd.c
--- S10-pre12/drivers/block/rd.c	Thu Sep 20 15:27:18 2001
+++ linux/drivers/block/rd.c	Thu Sep 20 15:26:09 2001
@@ -420,7 +420,6 @@
 			/* bdev->bd_sem is held by caller */
 			bdev->bd_openers++;
 			bdev->bd_cache_openers++;
-			bdev->bd_inode = inode;
 		}
 	}
 
diff -urN S10-pre12/fs/block_dev.c linux/fs/block_dev.c
--- S10-pre12/fs/block_dev.c	Thu Sep 20 15:27:24 2001
+++ linux/fs/block_dev.c	Thu Sep 20 15:40:04 2001
@@ -18,6 +18,7 @@
 #include <linux/iobuf.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 
@@ -365,6 +366,53 @@
 }
 
 /*
+ * pseudo-fs
+ */
+
+static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent)
+{
+	static struct super_operations sops = {};
+	struct inode *root = new_inode(sb);
+	if (!root)
+		return NULL;
+	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
+	root->i_uid = root->i_gid = 0;
+	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
+	sb->s_blocksize = 1024;
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = 0x62646576;
+	sb->s_op = &sops;
+	sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 });
+	if (!sb->s_root) {
+		iput(root);
+		return NULL;
+	}
+	sb->s_root->d_sb = sb;
+	sb->s_root->d_parent = sb->s_root;
+	d_instantiate(sb->s_root, root);
+	return sb;
+}
+
+static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT);
+
+static struct vfsmount *bd_mnt;
+
+static int get_inode(struct block_device *bdev)
+{
+	if (!bdev->bd_inode) {
+		struct inode *inode = new_inode(bd_mnt->mnt_sb);
+		if (!inode)
+			return -ENOMEM;
+		inode->i_rdev = to_kdev_t(bdev->bd_dev);
+		atomic_inc(&bdev->bd_count);	/* will go away */
+		inode->i_bdev = bdev;
+		inode->i_data.a_ops = &def_blk_aops;
+		bdev->bd_inode = inode;
+	}
+	return 0;
+}
+
+/*
  * bdev cache handling - shamelessly stolen from inode.c
  * We use smaller hashtable, though.
  */
@@ -394,7 +442,7 @@
 
 void __init bdev_cache_init(void)
 {
-	int i;
+	int i, err;
 	struct list_head *head = bdev_hashtable;
 
 	i = HASH_SIZE;
@@ -410,6 +458,13 @@
 					 NULL);
 	if (!bdev_cachep)
 		panic("Cannot create bdev_cache SLAB cache");
+	err = register_filesystem(&bd_type);
+	if (err)
+		panic("Cannot register bdev pseudo-fs");
+	bd_mnt = kern_mount(&bd_type);
+	err = PTR_ERR(bd_mnt);
+	if (IS_ERR(bd_mnt))
+		panic("Cannot create bdev pseudo-fs");
 }
 
 /*
@@ -598,18 +653,13 @@
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
 {
-	struct inode inode_fake;
 	int res;
 	mm_segment_t old_fs = get_fs();
 
 	if (!bdev->bd_op->ioctl)
 		return -EINVAL;
-	memset(&inode_fake, 0, sizeof(inode_fake));
-	inode_fake.i_rdev = to_kdev_t(bdev->bd_dev);
-	inode_fake.i_bdev = bdev;
-	init_waitqueue_head(&inode_fake.i_wait);
 	set_fs(KERNEL_DS);
-	res = bdev->bd_op->ioctl(&inode_fake, NULL, cmd, arg);
+	res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg);
 	set_fs(old_fs);
 	return res;
 }
@@ -619,6 +669,12 @@
 	int ret = -ENODEV;
 	kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
 	down(&bdev->bd_sem);
+
+	if (get_inode(bdev)) {
+		up(&bdev->bd_sem);
+		return -ENOMEM;
+	}
+
 	lock_kernel();
 	if (!bdev->bd_op)
 		bdev->bd_op = get_blkfops(MAJOR(rdev));
@@ -631,23 +687,22 @@
 		 */
 		struct file fake_file = {};
 		struct dentry fake_dentry = {};
-		struct inode *fake_inode = get_empty_inode();
 		ret = -ENOMEM;
-		if (fake_inode) {
-			fake_file.f_mode = mode;
-			fake_file.f_flags = flags;
-			fake_file.f_dentry = &fake_dentry;
-			fake_dentry.d_inode = fake_inode;
-			fake_inode->i_rdev = rdev;
-			ret = 0;
-			if (bdev->bd_op->open)
-				ret = bdev->bd_op->open(fake_inode, &fake_file);
-			if (!ret) {
-				bdev->bd_openers++;
-				atomic_inc(&bdev->bd_count);
-			} else if (!bdev->bd_openers)
-				bdev->bd_op = NULL;
-			iput(fake_inode);
+		fake_file.f_mode = mode;
+		fake_file.f_flags = flags;
+		fake_file.f_dentry = &fake_dentry;
+		fake_dentry.d_inode = bdev->bd_inode;
+		ret = 0;
+		if (bdev->bd_op->open)
+			ret = bdev->bd_op->open(bdev->bd_inode, &fake_file);
+		if (!ret) {
+			bdev->bd_openers++;
+			atomic_inc(&bdev->bd_count);
+		} else if (!bdev->bd_openers) {
+			struct inode *bd_inode = bdev->bd_inode;
+			bdev->bd_op = NULL;
+			bdev->bd_inode = NULL;
+			iput(bd_inode);
 		}
 	}
 	unlock_kernel();
@@ -669,6 +724,12 @@
 	filp->f_flags |= O_LARGEFILE;
 
 	down(&bdev->bd_sem);
+
+	if (get_inode(bdev)) {
+		up(&bdev->bd_sem);
+		return -ENOMEM;
+	}
+
 	lock_kernel();
 	if (!bdev->bd_op)
 		bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
@@ -678,20 +739,15 @@
 			ret = bdev->bd_op->open(inode,filp);
 		if (!ret) {
 			bdev->bd_openers++;
-			if (!bdev->bd_cache_openers && bdev->bd_inode)
-				BUG();
-			if (bdev->bd_cache_openers && !bdev->bd_inode)
-				BUG();
-			if (!bdev->bd_cache_openers++)
-				bdev->bd_inode = inode;
-			else {
-				if (bdev->bd_inode != inode && !inode->i_mapping_overload++) {
-					inode->i_mapping = bdev->bd_inode->i_mapping;
-					atomic_inc(&bdev->bd_inode->i_count);
-				}
-			}
-		} else if (!bdev->bd_openers)
+			bdev->bd_cache_openers++;
+			inode->i_mapping = bdev->bd_inode->i_mapping;
+			inode->i_mapping_overload++;
+		} else if (!bdev->bd_openers) {
+			struct inode *bd_inode = bdev->bd_inode;
 			bdev->bd_op = NULL;
+			bdev->bd_inode = NULL;
+			iput(bd_inode);
+		}
 	}	
 	unlock_kernel();
 	up(&bdev->bd_sem);
@@ -702,28 +758,24 @@
 {
 	int ret = 0;
 	kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
+	struct inode *bd_inode = bdev->bd_inode;
+
 	down(&bdev->bd_sem);
 	lock_kernel();
 	if (kind == BDEV_FILE)
-		fsync_dev(rdev);
+		__block_fsync(bd_inode);
 	else if (kind == BDEV_FS)
 		fsync_no_super(rdev);
 	/* only filesystems uses buffer cache for the metadata these days */
 	if (kind == BDEV_FS)
 		invalidate_buffers(rdev);
-	if (bdev->bd_op->release) {
-		struct inode * fake_inode = get_empty_inode();
-		ret = -ENOMEM;
-		if (fake_inode) {
-			fake_inode->i_rdev = rdev;
-			ret = bdev->bd_op->release(fake_inode, NULL);
-			iput(fake_inode);
-		} else
-			printk(KERN_WARNING "blkdev_put: ->release couldn't be run due -ENOMEM\n");
-	}
-	if (!--bdev->bd_openers)
-		bdev->bd_op = NULL;	/* we can't rely on driver being */
-					/* kind to stay around. */
+	if (bdev->bd_op->release)
+		ret = bdev->bd_op->release(bd_inode, NULL);
+	if (!--bdev->bd_openers) {
+		bdev->bd_op = NULL;
+		bdev->bd_inode = NULL;
+		iput(bd_inode);
+	}
 	unlock_kernel();
 	up(&bdev->bd_sem);
 	bdput(bdev);
@@ -736,8 +788,6 @@
 	int ret = 0;
 	struct inode * bd_inode = bdev->bd_inode;
 
-	if (bd_inode->i_mapping != inode->i_mapping)
-		BUG();
 	down(&bdev->bd_sem);
 	lock_kernel();
 	/* cache coherency protocol */
@@ -745,11 +795,9 @@
 		struct super_block * sb;
 
 		/* flush the pagecache to disk */
-		__block_fsync(inode);
+		__block_fsync(bd_inode);
 		/* drop the pagecache, uptodate info is on disk by now */
 		truncate_inode_pages(inode->i_mapping, 0);
-		/* forget the bdev pagecache address space */
-		bdev->bd_inode = NULL;
 
 		/* if the fs was mounted ro just throw away most of its caches */
 		sb = get_super(inode->i_rdev);
@@ -782,16 +830,17 @@
 			drop_super(sb);
 		}
 	}
-	if (inode != bd_inode && !--inode->i_mapping_overload) {
+	if (!--inode->i_mapping_overload)
 		inode->i_mapping = &inode->i_data;
-		iput(bd_inode);
-	}
 
 	/* release the device driver */
 	if (bdev->bd_op->release)
 		ret = bdev->bd_op->release(inode, NULL);
-	if (!--bdev->bd_openers)
+	if (!--bdev->bd_openers) {
 		bdev->bd_op = NULL;
+		bdev->bd_inode = NULL;
+		iput(bd_inode);
+	}
 	unlock_kernel();
 	up(&bdev->bd_sem);
 
diff -urN S10-pre12/fs/devices.c linux/fs/devices.c
--- S10-pre12/fs/devices.c	Thu Sep 20 15:27:24 2001
+++ linux/fs/devices.c	Thu Sep 20 15:26:09 2001
@@ -206,7 +206,6 @@
 		inode->i_cdev = cdget(rdev);
 	} else if (S_ISBLK(mode)) {
 		inode->i_fop = &def_blk_fops;
-		inode->i_mapping->a_ops = &def_blk_aops;
 		inode->i_rdev = to_kdev_t(rdev);
 		inode->i_bdev = bdget(rdev);
 	} else if (S_ISFIFO(mode))


  reply	other threads:[~2001-09-20 20:54 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-09-18  0:08 Linux 2.4.10-pre11 Linus Torvalds
2001-09-17 23:17 ` Marcelo Tosatti
2001-09-18  1:08   ` Marcelo Tosatti
2001-09-18  3:37     ` Andrea Arcangeli
2001-09-18  2:25       ` Marcelo Tosatti
2001-09-18  3:58         ` Andrea Arcangeli
2001-09-18  2:53           ` Marcelo Tosatti
2001-09-18  4:54             ` Andrea Arcangeli
2001-09-18  3:33               ` Marcelo Tosatti
2001-09-18  5:06                 ` Andrea Arcangeli
2001-09-18  3:55                   ` Marcelo Tosatti
2001-09-18  5:32                     ` Andrea Arcangeli
2001-09-18  4:14                       ` Marcelo Tosatti
2001-09-18  5:59                         ` Andrea Arcangeli
2001-09-18  5:00                       ` Marcelo Tosatti
     [not found] ` <20010917211834.A31693@redhat.com>
     [not found]   ` <20010918035055.J698@athlon.random>
2001-09-18  2:02     ` Andrea Arcangeli
     [not found]     ` <20010917221653.B31693@redhat.com>
2001-09-18  2:27       ` Linus Torvalds
2001-09-18  3:14         ` Alan Cox
2001-09-18  3:26         ` Andrea Arcangeli
     [not found]       ` <20010918052201.N698@athlon.random>
2001-09-18  4:01         ` Benjamin LaHaise
2001-09-18  4:39           ` Andrea Arcangeli
2001-09-18  5:04             ` Alan Cox
2001-09-18  5:09               ` Andrea Arcangeli
2001-09-18  5:22             ` Benjamin LaHaise
2001-09-18  5:48               ` Andrea Arcangeli
2001-09-18  5:48 ` Andrew Morton
2001-09-18  6:11   ` Andrea Arcangeli
2001-09-18  5:02     ` Marcelo Tosatti
2001-09-18  6:40       ` Andrea Arcangeli
2001-09-18 16:06         ` Marcelo Tosatti
2001-09-18 19:18           ` Marcelo Tosatti
2001-09-18 21:05             ` Andrea Arcangeli
2001-09-19 13:57               ` Rik van Riel
2001-09-18 10:58   ` Martin Dalecki
2001-09-18  9:31 ` Alexander Viro
2001-09-18  9:39   ` Andrea Arcangeli
2001-09-18  9:44     ` Alexander Viro
2001-09-18  9:57       ` Andrea Arcangeli
2001-09-18 10:02         ` Alexander Viro
2001-09-18 10:17           ` Andrea Arcangeli
2001-09-18 10:28             ` Alexander Viro
2001-09-18 10:35               ` Andrea Arcangeli
2001-09-18 10:52                 ` Alexander Viro
2001-09-18 11:05             ` Helge Hafting
2001-09-18 12:40               ` Andrea Arcangeli
2001-09-18 17:02             ` Linus Torvalds
2001-09-18 16:45   ` Linus Torvalds
2001-09-18 18:19     ` Alexander Viro
2001-09-18 18:27       ` Linus Torvalds
2001-09-18 19:14         ` Andreas Dilger
2001-09-18 19:41           ` Alexander Viro
2001-09-18 20:33           ` Richard Gooch
2001-09-18 20:53             ` Alexander Viro
2001-09-18 21:06             ` Richard Gooch
2001-09-18 21:27               ` Alexander Viro
2001-09-18 19:29         ` Benjamin LaHaise
2001-09-18 20:17         ` Stephan von Krawczynski
2001-09-18 20:33           ` Alan Cox
2001-09-19 13:42           ` Rik van Riel
2001-09-19 14:27             ` Alexander Viro
2001-09-19  2:59         ` Michael Peddemors
2001-09-19 16:11         ` Alexander Viro
2001-09-19 18:25           ` Andrea Arcangeli
2001-09-19 19:21             ` Alexander Viro
2001-09-19 20:55               ` Andrea Arcangeli
2001-09-19 21:17                 ` Alexander Viro
2001-09-19 23:01                   ` Andrea Arcangeli
2001-09-19 23:03                   ` Andrea Arcangeli
2001-09-19 23:30                     ` Alexander Viro
2001-09-19 23:40                       ` Andrea Arcangeli
2001-09-20 13:56                         ` Alexander Viro
2001-09-20 14:38                           ` Chris Mason
2001-09-20 14:50                             ` Alexander Viro
2001-09-20 15:44                               ` Chris Mason
2001-09-20 16:43                                 ` Alexander Viro
2001-09-20 20:54                                   ` Alexander Viro [this message]
2001-09-19 22:15                 ` Richard Gooch
2001-09-20  2:34               ` Andrea Arcangeli
2001-09-20 10:52                 ` Alexander Viro
2001-09-20 18:18                   ` Andrea Arcangeli
2001-09-20 18:33                     ` Alexander Viro
2001-09-20 18:59                       ` Andrea Arcangeli
2001-09-20 20:41                         ` Alexander Viro
2001-09-20 21:18                           ` Andrea Arcangeli
2001-09-20 21:40                             ` Alexander Viro
2001-09-20 22:13                               ` Andrea Arcangeli
2001-09-20 22:20                                 ` Alexander Viro
2001-09-20 22:31                                   ` Andrea Arcangeli
2001-09-20 22:44                                     ` Alexander Viro
2001-09-20 23:03                                       ` Andrea Arcangeli
2001-09-20 23:11                                         ` Alexander Viro
2001-09-21  1:50                                           ` Alexander Viro
2001-09-21  2:42                                             ` Andrea Arcangeli
2001-09-21  3:47                                         ` Andrea Arcangeli
2001-09-21  4:00                                           ` Alexander Viro
2001-09-21  4:06                                             ` Andrea Arcangeli
2001-09-21  4:06                                           ` Andrea Arcangeli
2001-09-21  4:46                                             ` Andrea Arcangeli
2001-09-21  7:09                                               ` Andrea Arcangeli
2001-09-19 20:41             ` Richard Gooch
2001-09-19 13:38       ` Rik van Riel
2001-09-19 16:35       ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.GSO.4.21.0109201645540.5631-100000@weyl.math.psu.edu \
    --to=viro@math.psu.edu \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mason@suse.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).