All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Greg KH <greg@kroah.com>
Cc: Greg KH <gregkh@suse.de>, "Hans J. Koch" <hjk@linutronix.de>,
	linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 4/5] libunload: A library to help remove open files
Date: Sun, 26 Sep 2010 15:48:44 -0700	[thread overview]
Message-ID: <m1ocbk86k3.fsf_-_@fess.ebiederm.org> (raw)
In-Reply-To: <20100926192142.GA7252@kroah.com> (Greg KH's message of "Sun, 26 Sep 2010 12:21:42 -0700")


The problem of how to remove open files due to module unloading or device
hotunplugging keeps coming up.  We have multiple implementations of roughly
the same logic in proc, sysctl, sysfs, tun and now I am working on yet
another one for uio.  It is time to start working on a generic implementation.

This library does not aim to allow wrapping any arbitray set of file operations
and making it safe to unload any module.  This library aims to work in
conjuction with the code implementiong an object to make it safe to remove
safely remove the object while file handles to it are still open.  libunload
implements the necessary locking and logic to make it striaght forward to
implement file_operations for objects that are removed at runtime.

It is hard to arrange for the ->close method of vm_operations_struct to be
called when an object is being removed, and this code doesn't even attempt
to help with that.  Instead it is assumed that calling ->close is not needed.
Without close support mmap at hotunplug time is simply a matter of calling
umap_mapping_range() to invaildate the mappings, and to arrange for vm_fault
to return VM_FAULT_SIGBUS when the unload_trylock fails.

Wait queues and fasync queues can safely be woken up after unload_barrier
making the semantics clean.   The fasync entries can be freed as a list of
all of the file descriptors is kept.  poll entries can not be freed so the
poll wait queue heads must be kept around.   If someone else's poll method is
being wrapped the wrapped poll wait queue head could be freed, but it requires
that there is a wrapping wait queue head that is kept around.  If there is no
other way wrapping a poll wait queue head seems practical but in general it
isn't a particularly useful.

libunload is best understood from the perspective of code that calls
unload_barrier().  Past the unload barrier it is guaranteed that there
is no code in the critical sections protectecd by the unload lock, and the
unload release lock.  Past the unload barrier it is safe to call the release
methods for remaining file descriptors, to ensure some logical state does
not persist.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 fs/Makefile            |    2 +-
 fs/libunload.c         |  166 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/unload.h |   33 ++++++++++
 3 files changed, 200 insertions(+), 1 deletions(-)
 create mode 100644 fs/libunload.c
 create mode 100644 include/linux/unload.h

diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d3..fa6bd11 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o
+		stack.o fs_struct.o statfs.o libunload.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/libunload.c b/fs/libunload.c
new file mode 100644
index 0000000..2470bf2
--- /dev/null
+++ b/fs/libunload.c
@@ -0,0 +1,166 @@
+#include <linux/fs.h>
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/unload.h>
+
+struct unload_barrier {
+	struct completion	completion;
+	int			releasers;
+};
+
+void unload_init(struct unload *unload)
+{
+	INIT_HLIST_HEAD(&unload->ufiles);
+	spin_lock_init(&unload->lock);
+	unload->active = 1;
+	unload->barrier = NULL;
+}
+EXPORT_SYMBOL_GPL(unload_init);
+
+void unload_file_init(struct unload_file *ufile, struct file *file, struct unload *unload)
+{
+	ufile->file = file;
+	ufile->unload = unload;
+	INIT_HLIST_NODE(&ufile->list);
+}
+EXPORT_SYMBOL_GPL(unload_file_init);
+
+bool unload_trylock(struct unload *unload)
+{
+	bool locked = false;
+	spin_lock(&unload->lock);
+	if (likely(!unload->barrier)) {
+		unload->active++;
+		locked = true;
+	}
+	spin_unlock(&unload->lock);
+	return locked;
+}
+EXPORT_SYMBOL_GPL(unload_trylock);
+
+static void __unload_unlock(struct unload *unload)
+{
+	unload->active--;
+	if ((unload->active == 0) && (unload->barrier->releasers == 0))
+		complete(&unload->barrier->completion);
+}
+
+void unload_unlock(struct unload *unload)
+{
+	spin_lock(&unload->lock);
+	__unload_unlock(unload);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_unlock);
+
+static void __unload_file_attach(struct unload_file *ufile, struct unload *unload)
+{
+	ufile->unload = unload;
+	hlist_add_head(&ufile->list, &unload->ufiles);
+}
+
+void unload_file_attach(struct unload_file *ufile, struct unload *unload)
+{
+	spin_lock(&unload->lock);
+	__unload_file_attach(ufile, unload);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_file_attach);
+
+static void __unload_file_detach(struct unload_file *ufile)
+{
+	hlist_del_init(&ufile->list);
+}
+
+void unload_file_detach(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+
+	spin_lock(&unload->lock);
+	__unload_file_detach(ufile);
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_file_detach);
+
+struct unload_file *find_unload_file(struct unload *unload, struct file *file)
+{
+	struct unload_file *ufile;
+	struct hlist_node *pos;
+
+	spin_lock(&unload->lock);
+	hlist_for_each_entry(ufile, pos, &unload->ufiles, list) {
+		if (ufile->file == file)
+			goto done;
+	}
+	ufile = NULL;
+done:
+	spin_unlock(&unload->lock);
+	return ufile;
+}
+EXPORT_SYMBOL_GPL(find_unload_file);
+
+bool unload_release_trylock(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+	bool locked = false;
+
+	spin_lock(&unload->lock);
+	if (!hlist_unhashed(&ufile->list))
+		locked = true;
+	spin_unlock(&unload->lock);
+	return locked;
+}
+EXPORT_SYMBOL_GPL(unload_release_trylock);
+
+void unload_release_unlock(struct unload_file *ufile)
+{
+	struct unload *unload = ufile->unload;
+	struct unload_barrier *barrier;
+
+	spin_lock(&unload->lock);
+	__unload_file_detach(ufile);
+	barrier = unload->barrier;
+	if (barrier) {
+		barrier->releasers -= 1;
+		if ((barrier->releasers == 0) && (unload->active == 0))
+			complete(&barrier->completion);
+	}
+	spin_unlock(&unload->lock);
+}
+EXPORT_SYMBOL_GPL(unload_release_unlock);
+
+
+void unload_barrier(struct unload *unload)
+{
+	struct unload_barrier barrier;
+	struct unload_file *ufile;
+	struct hlist_node *pos;
+
+	/* Guarantee that when this function returns I am not
+	 * executing any code protected by the unload_lock or
+	 * unload_releas_lock, and that I will never again execute
+	 * code protected by those locks.
+	 *
+	 * Also guarantee the file count for every file remaining on
+	 * the unload ufiles list has been incremented.  The increment
+	 * of the file count guarantees __fput will not be called.
+	 */
+	init_completion(&barrier.completion);
+	barrier.releasers = 0;
+
+	spin_lock(&unload->lock);
+	unload->barrier = &barrier;
+
+	hlist_for_each_entry(ufile, pos, &unload->ufiles, list)
+		if (!atomic_long_inc_not_zero(&ufile->file->f_count))
+			barrier.releasers++;
+	unload->active--;
+	if (unload->active || barrier.releasers) {
+		spin_unlock(&unload->lock);
+		wait_for_completion(&barrier.completion);
+		spin_lock(&unload->lock);
+	}
+	spin_unlock(&unload->lock);
+}
diff --git a/include/linux/unload.h b/include/linux/unload.h
new file mode 100644
index 0000000..fc1b4f6
--- /dev/null
+++ b/include/linux/unload.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_UNLOAD_H
+#define _LINUX_UNLOAD_H
+
+#include <linux/list.h>
+
+struct file;
+struct vm_operations_struct;
+struct unload_barrier;
+
+struct unload {
+	struct hlist_head	ufiles;
+	struct unload_barrier	*barrier;
+	spinlock_t		lock;
+	int			active;
+};
+
+struct unload_file {
+	struct unload		*unload;
+	struct hlist_node	list;
+	struct file 		*file;
+};
+
+void unload_init(struct unload *unload);
+void unload_file_init(struct unload_file *ufile, struct file *file, struct unload *unload);
+bool unload_trylock(struct unload *unload);
+void unload_unlock(struct unload *unload);
+bool unload_release_trylock(struct unload_file *ufile);
+void unload_release_unlock(struct unload_file *ufile);
+void unload_file_attach(struct unload_file *ufile, struct unload *unload);
+void unload_file_detach(struct unload_file *ufile);
+struct unload_file *find_unload_file(struct unload *unload, struct file *file);
+void unload_barrier(struct unload *unload);
+#endif /* _LINUX_UNLOAD_H */
-- 
1.7.2.2


  parent reply	other threads:[~2010-09-26 22:48 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-14 18:35 [PATCH 0/5] Uio enhancements Eric W. Biederman
2010-09-14 18:36 ` [PATCH 1/5] uio: Fix lack of locking in init_uio_class Eric W. Biederman
2010-09-17 20:32   ` Thomas Gleixner
2010-09-17 20:49     ` Hans J. Koch
2010-09-14 18:36 ` [PATCH 2/5] uio: Don't clear driver data Eric W. Biederman
2010-09-17 20:33   ` Thomas Gleixner
2010-09-17 20:50     ` Hans J. Koch
2010-09-14 18:37 ` [PATCH 3/5] uio: Cleanup irq handling Eric W. Biederman
2010-09-17 20:34   ` Thomas Gleixner
2010-09-17 20:51     ` Hans J. Koch
2010-09-14 18:38 ` [PATCH 4/5] uio: Support 2^MINOR_BITS minors Eric W. Biederman
2010-09-17 20:36   ` Thomas Gleixner
2010-09-17 20:57     ` Hans J. Koch
2010-09-17 21:09       ` Greg KH
2010-09-21 21:08     ` Greg KH
2010-09-21 21:38       ` Thomas Gleixner
2010-09-21 21:56         ` Greg KH
2010-09-21 22:21         ` Eric W. Biederman
2010-09-21 22:26           ` Thomas Gleixner
2010-09-14 18:38 ` [PATCH 5/5] uio: Statically allocate uio_class and use class .dev_attrs Eric W. Biederman
2010-09-17 20:37   ` Thomas Gleixner
2010-09-17 20:57     ` Hans J. Koch
2010-09-17 20:59 ` [PATCH 0/5] Uio enhancements Hans J. Koch
2010-09-20  7:19   ` [PATCH 0/5] uio hotplug support Eric W. Biederman
2010-09-20  7:21     ` [PATCH 1/5] uio: Simplify the lifetime logic of struct uio_device Eric W. Biederman
2010-09-20  7:21     ` [PATCH 2/5] uio: Kill unused vma_count Eric W. Biederman
2010-09-20  7:23     ` [PATCH 3/5] uio: Remove unused uio_info mmap method Eric W. Biederman
2010-09-20  7:23     ` [PATCH 4/5] libunload: A library to help remove open files Eric W. Biederman
2010-09-20  7:24     ` [PATCH 5/5] uio: Implement hotunplug support, using libunload Eric W. Biederman
2010-09-24 10:55       ` Hans J. Koch
2010-09-24 17:11         ` Eric W. Biederman
2010-09-25  2:06         ` [PATCH] uio: Fix accidentally changed return value in uio_read Eric W. Biederman
2010-09-24 10:45     ` [PATCH 0/5] uio hotplug support Hans J. Koch
2010-09-24 17:14       ` Eric W. Biederman
2010-09-24 17:31         ` Hans J. Koch
2010-09-24 18:38           ` Eric W. Biederman
2010-09-25  0:05           ` Eric W. Biederman
2010-09-25  0:33             ` Greg KH
2010-09-25  1:54               ` Eric W. Biederman
2010-09-26 19:21                 ` Greg KH
2010-09-26 22:46                   ` [PATCH 1/5] uio: Simplify the lifetime logic of struct uio_device Eric W. Biederman
2010-09-30 22:00                     ` Hans J. Koch
2010-09-26 22:47                   ` [PATCH 2/5] uio: Kill unused vma_count Eric W. Biederman
2010-09-26 22:48                   ` [PATCH 3/5] uio: Remove unused uio_info mmap method Eric W. Biederman
2010-10-04  9:26                     ` Hans J. Koch
2010-09-26 22:48                   ` Eric W. Biederman [this message]
2010-10-04  9:56                     ` [PATCH 4/5] libunload: A library to help remove open files Hans J. Koch
2010-09-26 22:49                   ` [PATCH 5/5] uio: Implement hotunplug support, using libunload Eric W. Biederman
2010-10-04 10:47                     ` Hans J. Koch
2010-10-04 12:34                     ` Hans J. Koch
2010-10-04 18:19                       ` Eric W. Biederman
2010-10-04 18:52                         ` Hans J. Koch
2010-09-26 22:49                   ` [PATCH 6/5] uio: Fix accidentally changed return value in uio_read Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m1ocbk86k3.fsf_-_@fess.ebiederm.org \
    --to=ebiederm@xmission.com \
    --cc=greg@kroah.com \
    --cc=gregkh@suse.de \
    --cc=hjk@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.