linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH] FUSYN 5/10: kernel fuqueues
       [not found] <0312030051..akdxcwbwbHdYdmdSaFbbcycyc3a~bzd25502@intel.com>
@ 2003-12-03  8:51 ` inaky.perez-gonzalez
  2003-12-03  8:51   ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
                     ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03  8:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes

 include/linux/fuqueue.h |  451 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/plist.h   |  197 ++++++++++++++++++++
 kernel/fuqueue.c        |  220 +++++++++++++++++++++++
 3 files changed, 868 insertions(+)

--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/include/linux/fuqueue.h	Wed Nov 19 16:42:50 2003
@@ -0,0 +1,451 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Quick usage guide:
+ *
+ * struct fuqueue f = fuqueue_INIT (&f);
+ *
+ * fuqueue_wait (&f, 4343)	Wait for max 4343 jiffies
+ * fuqueue_wake (&f, 5, -EPERM) Wake the first five guys waiting on f
+ *				with -EPERM.
+ *
+ * These are simple priority-sorted wait queues that can be used from
+ * the kernel. They provide the foundation for more complex items
+ * (fulocks, fuconds, ufulocks, ufuconds) and use from user-space
+ * (ufuqueues, just like futexes).
+ *
+ * The type wlist_t provides the sorting for the wait list. The type
+ * 'struct fuqueue_waiter' represents a waiting task on a fuqueue. The
+ * 'struct fuqueue_ops' is what allows extensibility for other
+ * synchronization primitives.
+ *
+ * I have just realized that this is too similar to the wait_queues...
+ */
+
+#ifndef __linux_fuqueue_h__
+#define __linux_fuqueue_h__
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+#include <linux/plist.h>
+#include <linux/sched.h>
+#include <asm/hardirq.h>
+
+  /*
+   * Debug stuff -- move forward for seeing the real meat
+   * 
+   * Levels
+   * 
+   *   0 Nothing activated, all compiled out
+   * > 0 Activates assertions, memory allocation tracking
+   * > 1 Activates debug messages
+   * > 2 Activates [most] function traces
+   * > 3 Activates random debug stuff
+   */
+
+#undef DEBUG
+#define DEBUG 0
+
+#if DEBUG > 0
+#if 1 || !defined(__i386__)
+#define __debug_printstr(a...) printk(a)	/* Dump to normal console */
+#else						/* Dump straight to ttyS0 */
+#include <linux/serial_reg.h>
+#include <linux/stringify.h>
+static inline void __debug_outb (unsigned val, int port) {
+	__asm__ __volatile__ ("outb %b0,%w1" : : "a" (val), "Nd" (port));
+}
+static inline unsigned __debug_inb (int port) {
+	unsigned value;
+	__asm__ __volatile__ ("inb %w1,%b0" : "=a" (value) : "Nd" (port));
+	return value;
+}
+static inline
+void __debug_printstr (const char *str) {
+	const int port = 0x03f8;  
+	while (*str) {
+		while (!(__debug_inb (port + UART_LSR) & UART_LSR_THRE));
+		__debug_outb (*str++, port+UART_TX);
+	}
+	__debug_outb ('\r', port + UART_TX);
+}
+#endif
+
+extern spinlock_t __debug_lock;
+
+static inline
+u64 __tsc_read (void)
+{
+	u64 tsc;
+#if defined(__i386__)
+	__asm__ __volatile__("rdtsc" : "=A" (tsc));
+#elif defined (__ia64__)
+	__asm__ __volatile__("mov %0=ar.itc" : "=r" (tsc) : : "memory");
+#else
+#warning "Architecture not supported in __tsc_read()!"
+	tsc = 0;
+#endif
+	return tsc;
+}
+
+#define __debug(a...)							\
+do {									\
+	/* Dirty: Try to avoid >1 CPUs printing ... will suck */	\
+	char __X_buf[256];						\
+	unsigned __X_len;						\
+	unsigned long __X_flags;					\
+	__X_len = snprintf (__X_buf, 255, "%Lu: %s:%d: %s[%d:%d] ",	\
+			__tsc_read(), __FILE__, __LINE__, __FUNCTION__,	\
+			current->pid, current->thread_info->cpu);	\
+	snprintf (__X_buf + __X_len, 255 - __X_len, a);			\
+	spin_lock_irqsave (&__debug_lock, __X_flags);			\
+	__debug_printstr (__X_buf);					\
+	spin_unlock_irqrestore (&__debug_lock, __X_flags);		\
+} while (0)
+#endif /* #if DEBUG > 0 */
+
+/* The real debug statements */
+
+#if DEBUG > 0
+#define ldebug(l,a...)	 do { if (DEBUG >= l) __debug (a); } while (0)
+#define debug(a...)	 ldebug(1,a)
+#define fdebug(f, a...)	 do { if ((DEBUG >= 2) && f) __debug (a); } while (0)
+#define __ftrace(l,a...) do { if ((l)) __debug (a); } while (0)
+#define ftrace(a...)	 __ftrace((DEBUG >= 2),a)
+#define assert(c, a...)	 do { if ((DEBUG >= 0) && !(c)) BUG(); } while (0)
+#else
+#define ldebug(l,a...)	 
+#define debug(a...)	 
+#define fdebug(f, a...)	 
+#define __ftrace(l,a...) 
+#define ftrace(a...)	 
+#define assert(c, a...)	 
+#endif
+
+
+
+
+/**
+ * Wait list type, O(N) addition, O(1) removal
+ *
+ * This is just a redirection of the methods used to manage the list
+ * of waiters that are waiting for a fuqueue. For hardcore-timing
+ * applications, you might want to use a PALIST instead of a PLIST,
+ * but FIXME, it is not done yet :)
+ */
+
+typedef struct plist wlist_t;
+
+#define FULOCK_WLIST_USE_PLIST
+#ifdef FULOCK_WLIST_USE_PLIST
+#define wlist_add plist_add
+#define wlist_rem plist_rem
+#define __wlist_rem __plist_rem
+#define wlist_first plist_first
+#define wlist_empty plist_empty
+#define wlist_INIT plist_INIT
+#define wlist_init plist_init
+#define wlist_last plist_last
+#define wlist_update_prio plist_update_prio
+#define wlist_prio plist_prio
+#define wlist_chprio plist_chprio
+#define __wlist_chprio __plist_chprio
+#if 0
+#define wlist_for_each_safe plist_for_each_safe
+#endif
+#endif
+
+struct task_struct;
+struct fuqueue;
+
+/** Descriptor of a waiting task */
+struct fuqueue_waiter {
+	wlist_t wlist_node;		/* node for the wait list */
+	struct task_struct *task;	/* task that is waiting */
+	int result;			/* what happened */
+};
+
+/**
+ * Operations on a fuqueue.
+ *
+ * FIXME: is it worth to have get/put? maybe they should be enforced
+ *        for every fuqueue, this way we don't have to query the ops
+ *        structure for the get/put method and if it is there, call
+ *        it. We'd have to move the get/put ops over to the vlocator,
+ *        but that's not much of a problem.
+ *
+ *        The decission factor is that an atomic operation needs to
+ *        lock the whole bus and is not as scalable as testing a ptr
+ *        for NULLness.
+ *
+ *        For simplicity, probably the idea of forcing the refcount in
+ *        the fuqueue makes sense.
+ */
+struct fuqueue_ops {
+	void (* get) (struct fuqueue *);
+	void (* put) (struct fuqueue *);
+	unsigned (* wait_cancel) (struct fuqueue *, struct fuqueue_waiter *);
+	struct task_struct * (* chprio) (
+		struct task_struct *, struct fuqueue *,
+		struct fuqueue_waiter *);
+};
+
+/** A fuqueue, a prioritized wait queue usable from kernel space. */
+struct fuqueue {
+	spinlock_t lock;	
+	wlist_t wlist;
+	struct fuqueue_ops *ops;
+};
+
+
+/** Initialize a @fuqueue structure with given @ops */
+static inline
+void __fuqueue_init (struct fuqueue *fuqueue, struct fuqueue_ops *ops)
+{
+	spin_lock_init (&fuqueue->lock);
+	wlist_init (&fuqueue->wlist);
+	fuqueue->ops = ops;
+}
+
+/** Statically initialize a @fuqueue with given @ops. */
+#define __fuqueue_INIT(fuqueue, ops) {			\
+	.lock = SPIN_LOCK_UNLOCKED,			\
+	.wlist = wlist_INIT (&(fuqueue)->wlist),	\
+	.ops = (ops)					\
+}
+
+
+/** fuqueue operations for in-kernel usage */
+extern struct fuqueue_ops fuqueue_ops;
+
+
+/** Initialize a @fuqueue for usage within the kernel */
+static inline
+void fuqueue_init (struct fuqueue *fuqueue)
+{
+	__fuqueue_init (fuqueue, &fuqueue_ops);
+}
+
+/** Statically initialize a @fuqueue for usage within the kernel */
+#define fuqueue_INIT(fuqueue) __fuqueue_INIT(fuqueue, &fuqueue_ops)
+
+
+/** Wait for a @fuqueue to be woken for as much as @timeout, @returns
+ * wake up code */
+extern int fuqueue_wait (struct fuqueue *fuqueue, signed long timeout);
+
+/* Cancel the wait on a fuqueue */
+extern void fuqueue_wait_cancel (struct task_struct *, int);
+
+/*
+ * The following are functions to be able to access fuqueue
+ * functionality when building on top of it, like the [u]fulocks,
+ * [u]fuconds and ufuqueues.
+ */
+
+#if DEBUG > 0
+/* BUG_ON() firing? Temporary fix, do you have CONFIG_PREEMPT enabled?
+ * either that or disable DEBUG (force #define it to zero). */ 
+#define CHECK_IRQs() do { BUG_ON (!in_atomic()); } while (0)
+#else
+#define CHECK_IRQs() do {} while (0)
+#endif
+
+
+/**
+ * Setup @current to wait for a fuqueue.
+ *
+ * This only setups, it does not block.
+ * 
+ * @fuqueue: fuqueue to wait on.
+ * @w: waiter structure to fill up and queue.
+ * @returns !0 if the wlist prio changed.
+ *
+ * Fills up @current's fuqueue_wait* info and queues up @w after
+ * filling it.
+ *
+ * WARNING: Call with preempt and local IRQs disabled
+ */
+static inline
+unsigned __fuqueue_wait_queue (struct fuqueue *fuqueue,
+			       struct fuqueue_waiter *w)
+{
+	ftrace ("(%p, %p)\n", fuqueue, w);
+	CHECK_IRQs();
+	
+	_raw_spin_lock (&current->fuqueue_wait_lock);
+	current->fuqueue_wait = fuqueue;
+	current->fuqueue_waiter = w;
+	_raw_spin_unlock (&current->fuqueue_wait_lock);
+	w->task = current;
+	w->result = INT_MAX;
+	__wlist_chprio (&w->wlist_node, current->prio);
+	return wlist_add (&fuqueue->wlist, &w->wlist_node);
+}
+
+
+/**
+ * Wakes up a single waiter and cleans up it's task wait information.
+ *
+ * Needs to be split from __fuqueue_wake_waiter() as
+ * fuqueue_wake_cancel() needs to acquire the locks in a funny way to
+ * prevent issues. 
+ *
+ * WARNING: call with preeempt and local IRQs disabled!!
+ */
+static inline
+void __fuqueue_wait_unqueue (struct fuqueue_waiter *w, int result)
+{
+	struct task_struct *task = w->task;
+
+	ftrace ("(%p [%d], %d)\n", w, w->task->pid, result);
+	CHECK_IRQs();
+	
+	__wlist_rem (&w->wlist_node);
+	_raw_spin_lock (&task->fuqueue_wait_lock);
+	task->fuqueue_wait = NULL;
+	task->fuqueue_waiter->result = result;
+	task->fuqueue_waiter = NULL;
+	_raw_spin_unlock (&task->fuqueue_wait_lock);
+}
+
+
+/**
+ * Wait for a @fuqueue until woken up, @returns wake up code
+ *
+ * Needs to be called with the fuqueue lock held, local IRQs disabled
+ * and preempt disabled. Will release the lock, enable IRQs and
+ * preemtion. 
+ */
+static inline
+int __fuqueue_wait_block (struct fuqueue *fuqueue, struct fuqueue_waiter *w,
+			  signed long timeout)
+{
+	ftrace ("(%p, %p, %ld)\n", fuqueue, w, timeout);
+	CHECK_IRQs();
+	
+	set_current_state (TASK_INTERRUPTIBLE);
+	_raw_spin_unlock (&fuqueue->lock);
+	local_irq_enable();
+	preempt_enable_no_resched();
+	
+	/* Wait until we are woken up */
+	timeout = schedule_timeout (timeout);
+	set_current_state (TASK_RUNNING);
+	/*
+	 * Now, whoever woke us up had to call first
+	 * fuqueue->ops->wait_cancel() through fuqueue_wait_cancel(),
+	 * and thus, unqueued us and set up a return code in
+	 * w->result. However, if w->result is still pristine as left
+	 * by __fuqueue_wait_queue(), that means our waker either
+	 * didn't know we were waiting or we have signal(s) pending,
+	 * so we do it ourselves.
+	 */
+	if (unlikely (w->result == INT_MAX)) {
+		int result = -EINTR;
+		BUG_ON (wlist_empty (&w->wlist_node));
+		if (timeout == 0)
+			result = -ETIMEDOUT;
+		else
+			WARN_ON (!signal_pending (current));
+		fuqueue_wait_cancel (current, result);
+	}
+	return w->result;
+}
+
+
+
+/** @Returns true if the @fuqueue has no waiters. */
+static inline
+unsigned __fuqueue_empty (const struct fuqueue *fuqueue)
+{
+	return wlist_empty (&fuqueue->wlist);
+}
+
+
+/** Return the first waiter on a @fuqueue */
+static inline
+struct fuqueue_waiter * __fuqueue_first (struct fuqueue *fuqueue)
+{
+	return container_of (wlist_first (&fuqueue->wlist),
+			     struct fuqueue_waiter, wlist_node);
+}
+
+/** Wake @howmany @fuqueue waiters with @code. */
+static inline
+void __fuqueue_wake (struct fuqueue *fuqueue, size_t howmany, int code)
+{
+	unsigned to_go = howmany;
+	struct fuqueue_waiter *w;
+
+        ftrace ("(%p, %zu, %d)\n", fuqueue, howmany, code);
+	
+        while (to_go-- && !__fuqueue_empty (fuqueue)) {
+		w = __fuqueue_first (fuqueue);
+                __fuqueue_wait_unqueue (w, code);
+		wake_up_process (w->task);
+	}
+        if (likely (to_go != howmany))
+		wlist_update_prio (&fuqueue->wlist);
+}
+
+
+/** Wake @howmany waiters waiting on a @fuqueue, with return code (for
+ * them) @code */
+static inline
+int fuqueue_wake (struct fuqueue *fuqueue, size_t howmany, int code)
+{
+	unsigned long flags;
+
+        ftrace ("(%p, %zu, %d)\n", fuqueue, howmany, code);
+	
+	spin_lock_irqsave (&fuqueue->lock, flags);
+	__fuqueue_wake (fuqueue, howmany, code);
+	spin_unlock_irqrestore (&fuqueue->lock, flags);
+	return 0;
+}
+
+
+/* See docs in kernel/fuqueue.c */
+extern unsigned __fuqueue_wait_cancel (struct fuqueue *, struct fuqueue_waiter *);
+
+/** Propagation of priority change */
+extern void fuqueue_chprio (struct task_struct *);
+extern void __set_prio (struct task_struct *p, int prio);
+
+
+/**
+ * Set the priority of a fuqueue waiter, repositioning it in the wait
+ * list.
+ *
+ * This does not set the prio of the process itself! 
+ *
+ * @task: task to reprioritize 
+ * @fuqueue: fuqueue the task is waiting for [locked]
+ * @w: waiter @task is waiting on in @fuqueue.
+ * @prio: new priority (prio
+ * @returns: NULL (as there is no propagation).
+ *	     
+ * Assumptions: prio != task->prio
+ *		fuqueue->lock held
+ */
+static inline
+struct task_struct * __fuqueue_chprio (struct task_struct *task,
+				       struct fuqueue *fuqueue,
+				       struct fuqueue_waiter *w)
+{
+	wlist_chprio (&fuqueue->wlist, &w->wlist_node, task->prio);
+	return NULL;
+}
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fuqueue_h__ */
--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/include/linux/plist.h	Sun Nov 16 07:21:32 2003
@@ -0,0 +1,197 @@
+/*
+ * Descending-priority-sorted double-linked list
+ *
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on simple lists (include/linux/list.h).
+ *
+ * The head will always contain the head and the highest priority of
+ * the nodes in the list. Addition is O(N), removal is O(1), change of
+ * priority is O(N)
+ *
+ * 0 is highest priority, ~0 is lowest priority.
+ *
+ * No locking is done, up to the caller.
+ */
+
+#ifndef _LINUX_PLIST_H_
+#define _LINUX_PLIST_H_
+
+#include <linux/list.h>
+
+/* Priority-sorted list */
+struct plist {
+	unsigned prio;
+	struct list_head node;
+};
+
+#define plist_INIT(p)				\
+{						\
+	.node = LIST_HEAD_INIT((p)->node),	\
+	.prio = ~0				\
+}
+
+/* Initialize a pl */
+static inline
+void plist_init (struct plist *pl)
+{
+	INIT_LIST_HEAD (&pl->node);
+	pl->prio = ~0;
+}
+
+/* Return the first node (and thus, highest priority)
+ *
+ * Assumes the plist is _not_ empty.
+ */
+static inline
+struct plist * plist_first (struct plist *plist)
+{
+	return list_entry (plist->node.next, struct plist, node);
+}
+
+/* Return if the plist is empty. */
+static inline
+unsigned plist_empty (const struct plist *plist)
+{
+	return list_empty (&plist->node);
+}
+
+/* Update the maximum priority
+ *
+ * Return !0 if the plist's maximum priority changes.
+ *
+ * __plist_update_prio() assumes the plist is not empty.
+ */
+static inline
+unsigned __plist_update_prio (struct plist *plist)
+{
+	unsigned prio = plist_first (plist)->prio;
+	if (plist->prio == prio)
+		return 0;
+	plist->prio = prio;
+	return !0;
+}
+
+static inline
+unsigned plist_update_prio (struct plist *plist)
+{
+	unsigned old_prio = plist->prio;
+	/* plist empty, max prio = ~0 */
+	plist->prio = plist_empty (plist)? ~0 : plist_first (plist)->prio;
+	return old_prio != plist->prio;
+}
+
+/* Add a node to the plist [internal]
+ *
+ * pl->prio == ~0 is an special case, means low priority, get down to
+ * the end of the plist. Note the <; we want to add the new guy of a
+ * set of same priority people to the end of that set (FIFO behaviour
+ * on the same priority).
+ */
+static inline
+void __plist_add_sorted (struct plist *plist, struct plist *pl)
+{
+	struct list_head *itr;
+	struct plist *itr_pl;
+
+	if (pl->prio < ~0)
+		list_for_each (itr, &plist->node) {
+			itr_pl = list_entry (itr, struct plist, node);
+			if (pl->prio < itr_pl->prio)
+				break;
+		}
+	else
+		itr = &plist->node;
+	list_add_tail (&pl->node, itr);
+}
+
+/* Add a node to a plist
+ *
+ * Return !0 if the plist's maximum priority changes.
+ */
+static inline
+unsigned plist_add (struct plist *plist, struct plist *pl)
+{
+	__plist_add_sorted (plist, pl);
+	/* Are we setting a higher priority? */
+	if (pl->prio < plist->prio) {
+		plist->prio = pl->prio;
+		return !0;
+	}
+	return 0;
+}
+
+/* Return the priority a pl node */
+static inline
+unsigned plist_prio (struct plist *pl)
+{
+	return pl->prio;
+}
+
+/* Change the priority of a pl node, without updating plist position */
+static inline
+void __plist_chprio (struct plist *pl, unsigned new_prio)
+{
+	pl->prio = new_prio;
+}
+
+/* Change the priority of a pl node updating the list's max priority.
+ *
+ * Return !0 if the plist's maximum priority changes
+ */
+static inline
+unsigned plist_chprio (struct plist *plist, struct plist *pl,
+		       unsigned new_prio)
+{
+	__plist_chprio (pl, new_prio);
+	list_del (&pl->node);
+	__plist_add_sorted (plist, pl);
+	return __plist_update_prio (plist);
+}
+
+/* Remove a pl node from a plist
+ *
+ * Return !0 if the plist's maximum priority changed.
+ */
+static inline
+void __plist_rem (struct plist *pl)
+{
+	list_del_init (&pl->node);
+}
+static inline
+unsigned plist_rem (struct plist *plist, struct plist *pl)
+{
+	__plist_rem (pl);
+	return plist_update_prio (plist);
+}
+
+/* Iterate over a plist - in priority order (from high to low) */
+#define plist_for_each(pos, head)					\
+for (pos = container_of ((head)->node.next, struct plist, node),	\
+       prefetch (pos->node.next);					\
+     pos != (head);							\
+     pos = container_of (pos->node.next, struct plist, node),		\
+       prefetch (pos->node.next))
+
+#define plist_for_each_safe(pos, n, head)					\
+	for (pos = container_of ((head)->node.next, struct plist, node),	\
+	       n = container_of (pos->node.next, struct plist, node);		\
+	     pos != (head);							\
+	     pos = n,								\
+	       n = container_of (pos->node.next, struct plist, node))
+
+
+/** Return !0 if node @pl is the last one on list @plist. */
+
+static inline
+unsigned plist_last (const struct plist *plist, const struct plist *pl)
+{
+	return pl->node.next == &plist->node;
+}
+
+
+#endif /* #ifndef _LINUX_PLIST_H_ */
+
--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/kernel/fuqueue.c	Tue Dec  2 20:05:38 2003
@@ -0,0 +1,220 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * see the doc in linux/fuqueue.h for some info.
+ */
+
+#define DEBUG 8
+
+#include <linux/fuqueue.h>
+#include <linux/sched.h>
+#include <linux/plist.h>
+#include <linux/errno.h>
+
+#if DEBUG > 0
+spinlock_t __debug_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
+
+/**
+ * Wait for a @fuqueue to be woken for as much as @timeout, @returns
+ * wake up code
+ *
+ * WARNING: can only be called from process context
+ */
+int fuqueue_wait (struct fuqueue *fuqueue, signed long timeout)
+{
+        struct fuqueue_waiter w;
+	
+        ftrace ("(%p, %ld)\n", fuqueue, timeout);
+	
+	spin_lock_irq (&fuqueue->lock);
+	__fuqueue_wait_queue (fuqueue, &w);
+	return __fuqueue_wait_block (fuqueue, &w, timeout); /* unlocks */
+}
+
+
+/**
+ * Change the priority of a waiter.
+ *
+ * @task: task whose priority has to be changed.
+ *
+ * This is the entry point that the scheduler functions call when a
+ * task that is waiting for a fuqueue changes its priority. It will
+ * call the fuqueue-specific chprio function after safely determining
+ * what is the fuqueue it is waiting for and then, if the
+ * specific chprio function determines that the prio change has to be
+ * propagated, it will keep doing it.
+ *
+ * The task that the chprio function returns has to be returned with a
+ * get_task_struct() reference.
+ *
+ * Note the weird locking: we are one of the little places that needs
+ * to take the locks in inverse order (most are fuqueue first,
+ * task_wait later--FT), we need to do TF, so we do T, test F, if it
+ * fails, unlock T, try again.
+ */
+void fuqueue_chprio (struct task_struct *task)
+{
+	struct fuqueue_ops *ops;
+	struct fuqueue *fuqueue;
+	struct fuqueue_waiter *w;
+	int prio = task->prio;
+	unsigned long flags;
+
+	__ftrace (1, "(%p [%d])\n", task, task->pid);
+	CHECK_IRQs();
+	
+	local_irq_save (flags);
+	preempt_disable();
+	get_task_struct (task);
+next:
+	/* Who is the task waiting for? safely acquire and lock it */
+	_raw_spin_lock (&task->fuqueue_wait_lock);
+	fuqueue = task->fuqueue_wait;
+	if (fuqueue == NULL)				/* Ok, not waiting */
+		goto out_task_unlock;
+	if (!_raw_spin_trylock (&fuqueue->lock)) {      /* Spin dance... */
+		_raw_spin_unlock (&task->fuqueue_wait_lock);
+		goto next;
+	}
+	ops = fuqueue->ops;
+	if (ops->get)
+		ops->get (fuqueue);
+	
+	w = task->fuqueue_waiter;
+	_raw_spin_unlock (&task->fuqueue_wait_lock);
+	put_task_struct (task);
+	
+	/* Ok, we need to propagate the prio change to the fuqueue */
+	ops = fuqueue->ops;
+	task = ops->chprio (task, fuqueue, w);
+	if (task == NULL)
+		goto out_fuqueue_unlock;
+
+	/* Do we need to propagate to the next task */
+	get_task_struct (task);
+	_raw_spin_unlock (&fuqueue->lock);
+	if (ops->put)
+		ops->put (fuqueue);
+	ldebug (1, "__set_prio (%d, %d)\n", task->pid, prio);
+	__set_prio (task, prio);
+	goto next;
+
+out_fuqueue_unlock:
+	_raw_spin_unlock (&fuqueue->lock);
+	if (ops->put)
+		ops->put (fuqueue);
+	goto out;
+	
+out_task_unlock:
+	_raw_spin_unlock (&task->fuqueue_wait_lock);
+	put_task_struct (task);
+out:
+	local_irq_restore (flags);
+	preempt_enable();
+	return;
+}
+
+
+/** Cancel @task's wait on @fuqueue and update the wait list priority */
+unsigned __fuqueue_wait_cancel (struct fuqueue *fuqueue,
+				struct fuqueue_waiter *w)
+{
+	ftrace ("(%p, %p [%d])\n", fuqueue, w, w->task->pid);
+	
+	__wlist_rem (&w->wlist_node);
+	return wlist_update_prio (&fuqueue->wlist);
+}
+
+
+/**
+ * Cancel the wait of a task on a fuqueue and wake it up.
+ *
+ * @task: task whose wait is to be canceled
+ * 
+ * Called by:
+ * - signal_wake_up()
+ * - process_timeout()
+ * - __wake_up_common()
+ * - FIXME
+ * 
+ * when the task they are about to wake is waiting on a
+ * fuqueue. Safely acquires which fuqueue the task is waiting for,
+ * references it, cleans up the task->fuqueue_wait* information, and
+ * then calls the fuqueue specific wait_cancel() function.
+ *
+ * FIXME: the entry points we get called from don't seem to be all of
+ * them; the perfect thing here would be to hook into
+ * try_to_wake_up()--but is kind of tricky..,
+ *
+ * Note that for this function to actually do anything, the task must
+ * be a task waiting on a fuqueue (so that means TASK_INTERRUPTIBLE
+ * and off the runqueues).
+ */
+void fuqueue_wait_cancel (struct task_struct *task, int result)
+{
+	struct fuqueue_ops *ops;
+	struct fuqueue *fuqueue;
+	struct fuqueue_waiter *w;
+	unsigned long flags;
+
+	ftrace ("(%p [%d], %d)\n", task, task->pid, result);
+	
+	local_irq_save (flags);
+	preempt_disable();
+	get_task_struct (task);
+retry:
+	/* Who is the task waiting for? safely acquire and lock it */
+	_raw_spin_lock (&task->fuqueue_wait_lock);
+	fuqueue = task->fuqueue_wait;
+	if (fuqueue == NULL)				/* Ok, not waiting */
+		goto out_task_unlock;
+	if (!_raw_spin_trylock (&fuqueue->lock)) {      /* Spin dance... */
+		_raw_spin_unlock (&task->fuqueue_wait_lock);
+		goto retry;
+	}
+	ops = fuqueue->ops;
+	if (ops->get)
+		ops->get (fuqueue);
+
+	w = task->fuqueue_waiter;
+	w->result = result;
+	
+	/* Do the specific cancel op */
+	ops->wait_cancel (fuqueue, w);
+	task->fuqueue_wait = NULL;
+	task->fuqueue_waiter = NULL;
+	_raw_spin_unlock (&task->fuqueue_wait_lock);
+	put_task_struct (task);
+	_raw_spin_unlock (&fuqueue->lock);
+	local_irq_restore (flags);
+	preempt_enable();
+	if (ops->put)
+		ops->put (fuqueue);
+	return;
+		
+out_task_unlock:
+	_raw_spin_unlock (&task->fuqueue_wait_lock);
+	put_task_struct (task);
+	local_irq_restore (flags);
+	preempt_enable();
+	return;
+}
+
+
+/** Fuqueue operations for usage within the kernel */
+struct fuqueue_ops fuqueue_ops = {
+	.get = NULL,
+	.put = NULL,
+	.wait_cancel = __fuqueue_wait_cancel,
+	.chprio = __fuqueue_chprio
+};

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker
  2003-12-03  8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
@ 2003-12-03  8:51   ` inaky.perez-gonzalez
  2003-12-03  8:51     ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
  2003-12-11 23:30   ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
  2003-12-12 19:21   ` William Lee Irwin III
  2 siblings, 1 reply; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03  8:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes

 include/linux/vlocator.h |  125 ++++++++++++++
 kernel/vlocator.c        |  414 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 539 insertions(+)

--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/include/linux/vlocator.h	Tue Dec  2 20:05:38 2003
@@ -0,0 +1,125 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Stuff to map user space addresses to kernel space objects in a
+ * controlled way. 
+ */
+
+#ifndef __linux_vlocator_h__
+#define __linux_vlocator_h__
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <asm/atomic.h>
+#include <linux/futex.h> /* for the futex hash */
+
+
+/** Track @what by a user level vm address. */
+struct vlocator {
+	atomic_t refcount;
+	struct list_head hash_list;
+	union futex_key key;
+	const struct vlocator_ops *ops;
+};
+
+static __inline__
+void vlocator_init (struct vlocator *vlocator)
+{
+	atomic_set (&vlocator->refcount, 0);
+	INIT_LIST_HEAD (&vlocator->hash_list);
+}
+
+
+/** A single queue in the hash. */
+struct vlocator_queue
+{
+	spinlock_t lock;
+	struct list_head queue;
+};
+
+static __inline__
+void vlocator_queue_init (struct vlocator_queue *vq)
+{
+	vq->lock = SPIN_LOCK_UNLOCKED;
+	INIT_LIST_HEAD (&vq->queue);
+}
+
+struct page;
+union futex_key;
+
+/** vlocator operations on the items that are to be located. */
+struct vlocator_ops
+{
+	struct vlocator * (* alloc) (void);
+	void (* create) (struct vlocator *, struct page *,
+			 const union futex_key *, unsigned flags);
+	void (* release) (struct vlocator *);
+	void (* free) (struct vlocator *);
+};
+
+
+/** Reference an vlocator @vlocator. */
+#define vl_get(vlocator)			  \
+do { atomic_inc (&(vlocator)->refcount); } while (0)
+
+
+/** Unreference an @vlocator; return true if it drops to zero. */
+static inline
+unsigned vl_put (struct vlocator *vlocator)
+{
+	return atomic_dec_and_test (&vlocator->refcount);
+}
+
+extern int vl_find (struct page **ppage, struct vlocator **pvl,
+		    const struct vlocator_ops *ops,
+		    volatile const unsigned __user *uaddr);
+extern int vl_locate (struct page **ppage, struct vlocator **pvl,
+		      const struct vlocator_ops *ops,
+		      volatile const unsigned __user *uaddr, unsigned flags);
+
+
+/* Debug stuff */
+
+/*
+ * Helpers for debugging memory allocation [needs to be here so it can
+ * be shared by ufuqueue.c and ufulock.c].
+ */
+
+#if DEBUG >= 1
+static atomic_t allocated_count = ATOMIC_INIT(0);
+static inline
+int allocated_get (void)
+{
+	return atomic_read (&allocated_count);
+}
+static inline
+int allocated_inc (void)
+{
+	atomic_inc (&allocated_count);
+	return allocated_get();
+}
+static inline
+int allocated_dec (void)
+{
+	atomic_dec (&allocated_count);
+	return allocated_get();
+}
+#else
+static inline int allocated_get (void) { return 0; }
+static inline int allocated_inc (void) { return 0; }
+static inline int allocated_dec (void) { return 0; }
+#endif /* DEBUG > 1 */
+
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_vlocator_h__ */
--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/kernel/vlocator.c	Tue Dec  2 20:05:38 2003
@@ -0,0 +1,414 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/futex.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/fuqueue.h>  /* For the debug statements */
+#include <linux/vlocator.h>
+
+#define VL_GC_PERIOD 20 /* do garbage collection every 20 seconds */
+#define VL_HASH_BITS 8
+#define VL_HASH_QUEUES (1 << VL_HASH_BITS)
+static struct vlocator_queue __vl_hash[VL_HASH_QUEUES];
+
+/** Get the hash index for futex_key @k. */
+static __inline__
+struct vlocator_queue * vl_hash (const union futex_key *k)
+{
+	unsigned index = futex_hash_key (k) & (VL_HASH_QUEUES - 1);
+	return &__vl_hash[index];
+}
+
+
+/** Search in a queue. */
+static __inline__
+struct vlocator * __vlocator_find (struct vlocator_queue *vlq,
+				 const union futex_key *key)
+{
+	struct vlocator *vl = NULL;
+	struct list_head *itr;
+
+	list_for_each (itr, &vlq->queue) {
+		vl = container_of (itr, struct vlocator, hash_list);
+		if (match_futex_key (key, &vl->key))
+			goto out;
+	}
+	vl = NULL;
+out:
+	return vl;
+}
+
+
+/** Search in a queue [backwards - locates faster recent additions]. */
+static inline
+struct vlocator * __vlocator_find_r (struct vlocator_queue *vlq,
+				     const union futex_key *key)
+{
+	struct vlocator *vl = NULL;
+	struct list_head *itr;
+
+	list_for_each_prev (itr, &vlq->queue) {
+		vl = container_of (itr, struct vlocator, hash_list);
+		if (match_futex_key (key, &vl->key))
+			goto out;
+	}
+	vl = NULL;
+out:
+	return vl;
+}
+
+
+/** Remove @vlocator from queue @vlq if its use count is zero (and
+ * return true). */ 
+static __inline__
+unsigned __vlocator_rem (struct vlocator *v)
+{
+	unsigned result = 0, refcount;
+	refcount = atomic_read (&v->refcount);
+	if (refcount == 0) {
+		list_del (&v->hash_list);
+		result = 1;
+	}
+	return result;
+}
+
+
+/** Append @vlocator to queue @vlq. */ 
+static __inline__
+void __vlocator_add (struct vlocator_queue *vlq, struct vlocator *v)
+{
+	list_add (&v->hash_list, &vlq->queue);
+}
+
+
+/**
+ * Setup all the memory mumbo jumbo we need to access a user space
+ * item: locate the page where the address is and generate a futex_key
+ * for it. Return both referenced [with pin_page() and get_key_refs()]
+ *
+ * Shamelessly copied from Jamie's kernel/futex.c:get_futex_key()...
+ * good invention this GPL thingie :)  
+ */
+int vl_setup (struct page **ppage, union futex_key *key,
+	      volatile const unsigned __user *_uaddr)
+{
+	int result;
+	struct page *page;
+	unsigned long uaddr = (unsigned long) _uaddr;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	
+	ftrace ("(%p, %p, %p)\n", ppage, key, _uaddr);
+
+	/* The futex address must be "naturally" aligned. */
+	key->both.offset = uaddr % PAGE_SIZE;
+	if (unlikely ((key->both.offset % sizeof (u32)) != 0))
+		return -EINVAL;
+	uaddr -= key->both.offset;
+	
+	/* Generate a key for the vfulock. */
+	down_read (&mm->mmap_sem);
+	/*
+	 * The futex is hashed differently depending on whether
+	 * it's in a shared or private mapping.	 So check vma first.
+	 */
+	vma = find_extend_vma (mm, uaddr);
+	result = -EFAULT;
+	if (unlikely (!vma))
+		goto error_up;
+	/* Permissions. */
+	result = (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+	if (unlikely ((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
+		goto error_up;
+	/*
+	 * Private mappings are handled in a simple way.
+	 *
+	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
+	 * it's a read-only handle, it's expected that futexes attach to
+	 * the object not the particular process.  Therefore we use
+	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
+	 * mappings of _writable_ handles.
+	 */
+	result = 0;
+	if (likely (!(vma->vm_flags & VM_MAYSHARE))) {
+		key->private.mm = mm;
+		key->private.uaddr = uaddr;
+		goto out_pin;
+	}
+	/* Linear file mappings are also simple. */
+	key->shared.inode = vma->vm_file->f_dentry->d_inode;
+	key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
+	if (likely (!(vma->vm_flags & VM_NONLINEAR))) {
+		key->shared.pgoff = (((uaddr - vma->vm_start) >> PAGE_SHIFT)
+				     + vma->vm_pgoff);
+		goto out_pin;
+	}
+
+	/*
+	 * We really need to pin the page in memory, we are about to
+	 * modify it.
+	 */
+
+	/*
+	 * Do a quick atomic lookup first - this is the fastpath.
+	 */
+#warning FIXME: OPTIMIZE:
+	/*
+	 * We only need to take this path when we don't know the page;
+	 * we can hack vl_locate() so that if we know the key without
+	 * checking the page and we find the vlocator, we can take the
+	 * page from there.
+	 */
+out_pin:
+	spin_lock (&mm->page_table_lock);
+	page = follow_page (mm, uaddr, 0);
+	if (likely (page != NULL)) {
+		key->shared.pgoff =
+			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		if (!PageReserved (page))
+			get_page (page);
+		spin_unlock (&mm->page_table_lock);
+		goto out;
+	}
+	spin_unlock (&mm->page_table_lock);
+
+	/*
+	 * Do it the general way.
+	 */
+	result = get_user_pages (current, mm, uaddr, 1, 0, 0, &page, NULL);
+	if (result < 0)
+		goto error_up;
+	key->shared.pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+out:
+	get_key_refs (key);
+	*ppage = page;
+error_up:
+	up_read (&current->mm->mmap_sem);
+	return result;
+}
+
+
+/**
+ * Locate a vlocator for a certain user address.
+ *
+ * @ppage: Pointer to where to store the pointer to the referenced
+ *         page where @uaddr is. NULL if unneeded.
+ * @pvl:   Pointer to where to store the pointer to the located
+ *         vlocator based structure.
+ * @ops:   Pointer to the function ops for this guy (for checking)
+ * @uaddr: Address in user space.
+ * @returns: < 0 errno code on error (no resources held)
+ *	     0 if ok and:
+ *
+ *	     - Located vlocator in *pvl, referenced
+ *	     - Page pointer in *ppage (unless NULL), referenced
+ *	     - The key in (*pvl)->key has been referenced with get_key_refs()
+ */
+int vl_find (struct page **ppage, struct vlocator **pvl,
+	     const struct vlocator_ops *ops,
+	     volatile const unsigned __user *uaddr)
+{
+	int result;
+	struct vlocator_queue *vlq;
+	struct vlocator *vl;
+	union futex_key key;
+	struct page *page;
+	unsigned need_page = 0;
+	
+	ftrace ("(%p, %p, %p, %p)\n", ppage, pvl, ops, uaddr);
+
+	result = vl_setup (&page, &key, uaddr);
+	if (result < 0)
+		goto error;
+	/* Is it in the hash already? [do we know about it?] */
+	vlq = vl_hash (&key);
+	spin_lock (&vlq->lock);
+	vl = __vlocator_find (vlq, &key);
+	if (likely (vl == NULL))
+		goto out_unlock;
+	/* Check the type is correct */
+	result = -EFAULT;
+	if (unlikely (vl->ops != ops))
+		goto out_unlock;
+	result = 0;
+	vl_get (vl);
+	*pvl = vl;
+	if (ppage) {
+		*ppage = page;
+		need_page = 1;
+	}
+out_unlock:
+	spin_unlock (&vlq->lock);
+	drop_key_refs (&key);	     // Undo from ...
+	if (need_page == 0)
+		put_page (page);   // ... ufu_setup() (maybe)
+error:
+	return result;
+}
+
+
+
+
+/**
+ * Locate or allocate a vlocator for a certain user address.
+ *
+ * @ppage: Pointer to where to store the pointer to the referenced
+ *         page where @uaddr is. NULL if unneeded.
+ * @pvl:   Pointer to where to store the pointer to the allocated
+ *         vlocator based structure.
+ * @ops:   Pointer to the function ops used for allocating/constructing, etc.
+ * @uaddr: Address in user space.
+ * @flags: flags, for initialization, passed to ops->create()
+ * @returns: < 0 errno code on error (no resources held)
+ *	     0 if ok and:
+ *
+ *	     - Located or allocated vlocator in *pvl, referenced
+ *	     - Page pointer in *ppage (unless NULL), referenced
+ *	     - The key in (*pvl)->key has been referenced with get_key_refs()
+ *
+ * FIXME: optimize allocation collision detection; add a modification
+ * count to the hash table; whenever anything is added, inc the
+ * count. If we see the count changed after we droped the lock,
+ * allocated and re-locked, then we need to find_r(), if not, we can
+ * assume nothing changed.
+ */
+int vl_locate (struct page **ppage, struct vlocator **pvl,
+	       const struct vlocator_ops *ops,
+	       volatile const unsigned __user *uaddr, unsigned flags)
+{
+	int result;
+	struct vlocator_queue *vlq;
+	struct vlocator *vl, *vl_alt;
+	union futex_key key;
+	struct page *page;
+	unsigned need_page = 0;
+	
+	ftrace ("(%p, %p, %p, %p, %x)\n", ppage, pvl, ops, uaddr, flags);
+
+	result = vl_setup (&page, &key, uaddr);
+	if (result < 0)
+		goto error;
+	/* Is it in the hash already? [do we know about it?] */
+	vlq = vl_hash (&key);
+	spin_lock (&vlq->lock);
+	vl = __vlocator_find (vlq, &key);
+	if (likely (vl != NULL))
+		goto out_check;
+	spin_unlock (&vlq->lock);
+
+	/* Naaah, let's alloc it */
+	result = -ENOMEM;
+	vl = ops->alloc();
+	if (unlikely (vl == NULL))
+		goto error_alloc;	 
+	result = 0;
+	
+	/* Ok, allocated - did somebody add it while we were allocating? */
+	spin_lock (&vlq->lock);
+	vl_alt = __vlocator_find_r (vlq, &key);
+	if (vl_alt == NULL) {	/* No allocation collision, create it */
+		ops->create (vl, page, &key, flags);
+		vl->ops = ops;
+		__vlocator_add (vlq, vl);
+		goto out_ref;
+	}
+	/* Allocation collision, get the new one, discard ours */  
+	ops->free (vl);
+	vl = vl_alt;
+out_check:
+	result = -EFAULT;
+	if (unlikely (vl->ops != ops))  /* Check the type is correct */
+		goto out_unlock;
+	result = 0;
+out_ref:
+	vl_get (vl);
+	*pvl = vl;
+	if (ppage) {
+		*ppage = page;
+		need_page = 1;
+	}
+out_unlock:
+	spin_unlock (&vlq->lock);
+error_alloc:
+	drop_key_refs (&key);	     // Undo from ...
+	if (need_page == 0)
+		put_page (page);   // ... ufu_setup() (maybe)
+error:
+	return result;
+}
+
+
+/** Work structure for the garbage collector */
+static void vl_garbage_collector (void *);
+DECLARE_WORK(vl_workqueue, vl_garbage_collector, NULL);
+
+
+/**
+ * Do garbage collection (called from the work-queue) and re-arm 
+ *
+ * The most important action is to cleanup the ownership of [u]fulocks
+ * that were assigned to init because the owner died and robustness
+ * was not enabled. This means that only ufulocks get to optionally
+ * suport robustness; kernel based fulocks have to do robustness
+ * always. 
+ */
+static
+void vl_garbage_collector (void *dummy)
+{
+	struct list_head *itr, *nxt;
+	static unsigned i;
+	int cnt;
+	struct vlocator_queue *vlq;
+	struct vlocator *vl;
+	
+	/* Cleanup the ufulock hash: anything with a zero refcount is wiped */
+	for (cnt = 0; cnt < VL_HASH_QUEUES; cnt++, i++) {
+		if (i >= VL_HASH_QUEUES)
+			i = 0;
+		vlq = &__vl_hash[i];
+		if (list_empty (&vlq->queue)) // Some cheating always helps...
+			continue;
+		spin_lock (&vlq->lock);
+		list_for_each_safe (itr, nxt, &vlq->queue) {
+			vl = container_of (itr, struct vlocator, hash_list);
+			if (__vlocator_rem (vl)) {
+				vl->ops->release (vl);
+				vl->ops->free (vl);
+			}
+		}
+		spin_unlock (&vlq->lock);
+		if ((cnt > VL_HASH_QUEUES / 8) && need_resched())
+			break;
+	}
+	/* Re-arm */
+	schedule_delayed_work (&vl_workqueue, VL_GC_PERIOD * HZ);
+}
+
+
+/** Initialize the ufu subsystem. */
+static
+int __init subsys_ufu_init (void)
+{
+	unsigned i;
+	for (i = 0; i < sizeof (__vl_hash) / sizeof (__vl_hash[0]); i++)
+		vlocator_queue_init (&__vl_hash[i]);
+	/* Set up the garbage collector to run every 10 seconds */
+	schedule_delayed_work (&vl_workqueue, VL_GC_PERIOD * HZ);
+	return 0;
+}
+__initcall (subsys_ufu_init);
+
+

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC/PATCH] FUSYN 7/10: user space fuqueues
  2003-12-03  8:51   ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2003-12-03  8:51     ` inaky.perez-gonzalez
  2003-12-03  8:51       ` [RFC/PATCH] FUSYN 8/10: kernel fulocks inaky.perez-gonzalez
  0 siblings, 1 reply; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03  8:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes

 ufuqueue.c |  236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 236 insertions(+)

--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/kernel/ufuqueue.c	Wed Nov 19 17:32:07 2003
@@ -0,0 +1,236 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/highmem.h>   /* kmap_atomic() */
+#include <linux/vlocator.h>
+#include <linux/fuqueue.h>
+
+extern signed long ufu_timespec2jiffies (const struct timespec __user *);
+static struct fuqueue_ops ufuqueue_ops;
+
+/** Slab for allocation of 'struct ufuqueue's. */
+static kmem_cache_t *ufuqueue_slab; 
+
+/** A ufuqueue, tied to a user-space vm address. */
+struct ufuqueue {
+	struct fuqueue fuqueue;
+	struct vlocator vlocator;
+};
+
+
+/** Initialize an @ufuqueue (for slab object construction). */
+static inline
+void ufuqueue_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+	struct ufuqueue *ufuqueue = obj;
+	__fuqueue_init (&ufuqueue->fuqueue, &ufuqueue_ops);
+	vlocator_init (&ufuqueue->vlocator);
+}
+
+
+/** Allocate an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+struct vlocator * ufuqueue_alloc (void)
+{
+	struct ufuqueue *ufuqueue;
+	ufuqueue = kmem_cache_alloc (ufuqueue_slab, GFP_KERNEL);
+	if (DEBUG > 0 && ufuqueue != NULL)
+		ldebug (1, "ufuqueue %p allocated, total: %d\n",
+			ufuqueue, allocated_inc());
+	return &ufuqueue->vlocator;
+}
+
+
+/**
+ * Create a ufuqueue that is going to be inserted to the hash
+ * [called from vlocator.c:vl_locate() throught the vlocator ops]
+ */
+static
+void ufuqueue_create (struct vlocator *vl, struct page *page,
+		      const union futex_key *key, unsigned flags)
+{
+	memcpy (&vl->key, key, sizeof (*key));
+	get_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_release (struct vlocator *vl)
+{
+	drop_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_free (struct vlocator *vl)
+{
+	struct ufuqueue *ufuqueue =
+		container_of (vl, struct ufuqueue, vlocator);
+	kmem_cache_free (ufuqueue_slab, ufuqueue);
+	if (DEBUG > 0 && ufuqueue != NULL)
+		ldebug (1, "ufuqueue %p freed, total: %d\n",
+			ufuqueue, allocated_dec());
+}
+
+
+struct vlocator_ops ufuqueue_vops = {
+	.alloc = ufuqueue_alloc,
+	.create = ufuqueue_create,
+	.release = ufuqueue_release,
+	.free = ufuqueue_free
+};
+
+
+/**
+ * Wait on a fuqueue
+ *
+ * @_vfuqueue: address in user space of the condvar
+ * @cond_flags: flags for the conditional variable
+ * @_vfulock: address in user space of the fulock.
+ * @lock_flags: flags for the fulock.
+ * @_timeout: timeout information [see sys_ufulock_lock()]
+ *
+ * This is just a thin shell that locates the kernel descriptors for
+ * the condvar and the lock and then handles the work to
+ * ufuqueue_wait().
+ */
+asmlinkage
+int sys_ufuqueue_wait (volatile unsigned __user *_vfuqueue,
+		       unsigned val, struct timespec __user *_timeout)
+{
+	int result = -EINVAL;
+	struct ufuqueue *ufuqueue;
+	struct page *page;
+	char *page_kmap;
+	volatile unsigned *vfuqueue;
+	unsigned new_val;
+	struct vlocator *vl;
+	signed long timeout;
+        struct fuqueue_waiter w;
+		
+	ftrace ("(%p, %x, %p)\n", _vfuqueue, val, _timeout);
+	might_sleep();
+
+	/* ufuqueue: pin pages, get keys, look up/allocate, refcount */
+	result = vl_locate (&page, &vl, &ufuqueue_vops, _vfuqueue, 0);
+	if (unlikely (result < 0))
+		goto out;
+	ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+	/* We are going to lock the ufuqueue, so get the timeout first */
+	timeout = ufu_timespec2jiffies (_timeout);
+	result = (int) timeout;
+	if (timeout < 0)
+		goto out_put_vl;
+	
+	spin_lock_irq (&ufuqueue->fuqueue.lock);
+	__fuqueue_wait_queue (&ufuqueue->fuqueue, &w);
+	/* Now, are we ok with the value? */
+	page_kmap = kmap_atomic (page, KM_IRQ0);
+	vfuqueue = (volatile unsigned *)page_kmap + (vl->key.both.offset & ~1);
+	new_val = *vfuqueue;
+	kunmap_atomic (page_kmap, KM_IRQ0);
+	result = -EWOULDBLOCK;
+	if (val != new_val)
+		goto out_unqueue;
+	/* ok, go ahead and wait (it will unlock and restore irqs/preempt */
+	return __fuqueue_wait_block (&ufuqueue->fuqueue, &w, timeout);
+
+out_unqueue:
+	__fuqueue_wait_unqueue (&w, 0);
+	spin_unlock_irq (&ufuqueue->fuqueue.lock);
+out_put_vl:
+	vl_put (vl);
+	put_page (page);
+out:
+	return result;
+}
+
+	
+/**
+ * Wake up waiters of a fuqueue
+ *
+ * @_vfuqueue: pointer to the fuqueue
+ * @howmany: number of waiters to wake up
+ * @code: code to return to the waiters [default it to zero]
+ * @returns: 0 if ok, < 0 errno code on error.
+ */
+asmlinkage
+int sys_ufuqueue_wake (volatile unsigned __user *_vfuqueue,
+		       size_t howmany, int code)
+{
+	int result;
+	struct vlocator *vl;
+	struct ufuqueue *ufuqueue;
+	
+	ftrace ("(%p, %u)\n", _vfuqueue, howmany);
+	might_sleep();
+	
+	/* ufuqueue: get keys, look up (don't allocate), refcount */
+	result = vl_locate (NULL, &vl, NULL, _vfuqueue, 0);
+	if (result < 0)
+		goto out;
+	ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+	/* Wake'en up */
+	spin_lock_irq (&ufuqueue->fuqueue.lock);
+	__fuqueue_wake (&ufuqueue->fuqueue, howmany, code);
+	spin_unlock_irq (&ufuqueue->fuqueue.lock);
+	vl_put (vl);
+	result = 0;
+out:
+	return result;
+}
+
+
+/** Initialize the ufuqueue subsystem. */
+static
+int __init subsys_ufuqueue_init (void)
+{
+	ufuqueue_slab = kmem_cache_create ("ufuqueue", sizeof (struct ufuqueue),
+					  0, 0, ufuqueue_ctor, NULL);
+	if (ufuqueue_slab == NULL) 
+		panic ("ufuqueue_init(): "
+		       "Unable to initialize ufuqueue slab allocator.\n");
+	return 0;
+}
+__initcall (subsys_ufuqueue_init);
+
+
+/* Adaptors for fulock operations */
+static
+void ufuqueue_put (struct fuqueue *fuqueue) 
+{
+	struct ufuqueue *ufuqueue =
+		container_of (fuqueue, struct ufuqueue, fuqueue);
+	vl_put (&ufuqueue->vlocator);
+}
+
+static
+void ufuqueue_get (struct fuqueue *fuqueue) 
+{
+	struct ufuqueue *ufuqueue =
+		container_of (fuqueue, struct ufuqueue, fuqueue);
+	vl_get (&ufuqueue->vlocator);
+}
+
+/** Ufuqueue operations */
+static
+struct fuqueue_ops ufuqueue_ops = {
+	.get = ufuqueue_get,
+	.put = ufuqueue_put,
+	.wait_cancel = __fuqueue_wait_cancel,
+	.chprio = __fuqueue_chprio
+};
+

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC/PATCH] FUSYN 8/10: kernel fulocks
  2003-12-03  8:51     ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
@ 2003-12-03  8:51       ` inaky.perez-gonzalez
  0 siblings, 0 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03  8:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes

 include/linux/fulock-olist.h |   55 ++++
 include/linux/fulock.h       |  278 +++++++++++++++++++++
 kernel/fulock.c              |  560 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 893 insertions(+)

--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/include/linux/fulock-olist.h	Sun Nov 16 07:21:32 2003
@@ -0,0 +1,55 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Fulock Ownership List
+ * 
+ * This file is here to avoid include hell, as sched.h needs it to be
+ * able to embed a fulock ownership list into 'struct
+ * task_struct'. Its sole users are sched.h and fulock.h.
+ *
+ * This is just a redirection of the methods used to manage the list
+ * of fulocks that a task owns in a given moment.
+ *
+ * I don't think anybody in its right mind wants to use a
+ * priority-array list for this, but just in case, and to ease the
+ * change of the implementation, I was redirecting it.
+ * 
+ * I will pull the plug on this one, search and replace all the
+ * olist_*() for plist_*() and wipe this file out as soon as I have a
+ * minute. I consider this low prio. 
+ */
+
+#ifndef __linux_fulock_olist_h__
+#define __linux_fulock_olist_h__
+
+#ifdef __KERNEL__
+
+#include <linux/plist.h>
+
+typedef struct plist olist_t;
+
+#define olist_add plist_add
+#define olist_rem plist_rem
+#define olist_init plist_init
+#define olist_INIT(plist) plist_INIT(plist)
+#define olist_for_each plist_for_each
+#define olist_for_each_safe plist_for_each_safe
+#define olist_empty plist_empty
+#define olist_first plist_first
+#define olist_prio plist_prio
+#define olist_chprio plist_chprio
+#if 0
+#define olist_update_prio plist_update_prio
+#define __olist_chprio __plist_chprio
+#endif
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fulock_olist_h__ */
--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/include/linux/fulock.h	Tue Dec  2 20:05:38 2003
@@ -0,0 +1,278 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#ifndef __linux_fulock_h__
+#define __linux_fulock_h__
+
+#include <asm/fulock.h>
+
+/**
+ * User space provided fulock flags
+ *
+ * fulocks/ufulocks are *always* robust, it is up to the caller to
+ * emulate a hang if a robust behavior is not desired. However, the
+ * NOT_RM (not robust mutex) flag is kept and checked on exit and if
+ * there are owned fulocks, a warning will be printed.
+ */
+#define FULOCK_FL_USER_MK    0xfffff000 /* Flags provided by user space */
+#define FULOCK_FL_PI	     0x80000000 /* Priority inherit */
+#define FULOCK_FL_PP	     0x40000000 /* Priority protected */
+#define FULOCK_FL_RM_SUN     0x20000000 /* Robust mutex (sun style) */
+#define FULOCK_FL_RM	     0x10000000 /* Non-robust mutex [warns only] */
+#define FULOCK_FL_ERROR_CHK  0x04000000 /* Perform POSIX error checks */
+/* Priority ceiling masks */
+#define FULOCK_FL_PP_PLC_MK  0x00f00000 /* Policy */
+#define FULOCK_FL_PP_PRIO_MK 0x000ff000 /* Priority */
+
+/** Fulock consistency state */
+enum fulock_st {
+	fulock_st_healthy,   /* Normal, healthy */
+	fulock_st_dead,	     /* Some previous owner died */
+	fulock_st_nr,	     /* idem, plus cannot be recovered */
+	fulock_st_init	     /* fulock was re-initialized */
+};
+
+
+/** Fulock consistency action */
+enum fulock_con {
+	fulock_con_nop = 0,   /* No-op, just return actual consistency */
+	fulock_con_init,      /* Move from any state to initialized */
+	fulock_con_heal,      /* Move from dead to healthy */
+	fulock_con_nr,	      /* Move from dead to not-recoverable */
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/fuqueue.h>
+#include <linux/plist.h>
+#include <linux/vlocator.h>
+#include <asm/errno.h>
+
+/* Internal fulock flags */
+#define FULOCK_FL_NR	     0x00000100 /* not recoverable */
+#define FULOCK_FL_DEAD	     0x00000200 /* dead-owner */
+#define FULOCK_FL_NEW	     0x00000800 /* recently allocated ufulock */
+#define FULOCK_FL_PP_MK	     0x000000ff /* raw priority ceiling */
+
+struct fulock;
+
+/** Operations on a fulock. */
+struct fulock_ops 
+{
+	struct fuqueue_ops fuqueue;
+	void (* owner_set) (struct fulock *, struct task_struct *);
+	void (* owner_reset) (struct fulock *);
+	void (* exit) (struct fulock *);
+};
+
+/* In-kernel fulock operations */
+extern struct fulock_ops fulock_ops;
+extern struct fulock_ops ufulock_ops;
+extern struct vlocator_ops ufulock_vops;
+
+
+/** A fulock, mutex usable from the kernel. */
+struct fulock {
+	struct fuqueue fuqueue;
+	struct task_struct *owner;
+	unsigned flags;
+	olist_t olist_node;
+};
+
+
+/** Initialize a @fulock with given ops */
+static __inline__
+void __fulock_init (struct fulock *fulock, struct fulock_ops *ops)
+{
+	__fuqueue_init (&fulock->fuqueue, &ops->fuqueue);
+	fulock->owner = NULL;
+	fulock->flags = 0;
+	olist_init (&fulock->olist_node);
+}
+
+/** Statically initialize a @fulock with given ops */
+#define __fulock_INIT(fulock, ops) {				\
+	.fuqueue = __fuqueue_INIT (&(fulock)->fuqueue,		\
+				   &(ops)->fuqueue),		\
+	.owner = NULL,						\
+	.flags = 0,						\
+	.olist_node = olist_INIT (&(fulock)->olist_node)	\
+}
+
+/** Initialize a @fulock for usage within the kernel */
+static __inline__
+void fulock_init (struct fulock *fulock)
+{
+	__fulock_init (fulock, &fulock_ops);
+}
+
+/** Statically initialize a @fulock for usage within the kernel */
+#define fulock_INIT(fulock, ops) __fulock_INIT (fulock, &kernel_ops)
+
+
+/* Primitives for locking/unlocking/waiting/signalling */
+extern int __fulock_lock (struct fulock *,  signed long);
+extern int __fulock_unlock (struct fulock *, size_t, int);
+extern int __fulock_consistency (struct fulock *fulock,
+				 enum fulock_con consistency);
+extern void __fulock_exit (struct fulock *);
+extern void exit_fulocks (struct task_struct *task);
+
+extern int __fulock_pp_allowed (struct fulock *);
+extern void __fulock_pp_boost (struct fulock *);
+extern void __fulock_pp_unboost (struct fulock *);
+extern int __fulock_check_deadlock (struct task_struct *, struct fulock *);
+
+/** More internal stuff for building on top of fulock */
+extern unsigned __fulock_wait_cancel (struct fuqueue *, struct fuqueue_waiter *);
+extern struct task_struct * __fulock_chprio (struct task_struct *,
+					     struct fuqueue *,
+					     struct fuqueue_waiter *);
+
+
+/** Check if it is ok to for current to lock @fulock */
+static __inline__
+int __fulock_lock_check (struct fulock *fulock)
+{
+	int result = 0;
+	return result;
+}
+
+
+/**
+ * Lock a fulock, maybe wait for it to be available
+ *
+ * @timeout: wait to acquire the fulock as much @timeout jiffies. If
+ *	     zero, don't block, tryonly. If MAX_SCHEDULE_TIMEOUT,
+ *	     block indefinitely until the lock is acquired. 
+ *
+ * @returns: See __fulock_lock().
+ *
+ * Can ONLY be called from process context. Note __fulock_lock()
+ * unlocks the fulock on return and re-enables IRQs and preemption. 
+ */
+static __inline__
+int fulock_lock (struct fulock *fulock, signed timeout) 
+{
+	int result;
+	do {
+		spin_lock_irq (&fulock->fuqueue.lock);
+		result = __fulock_lock (fulock, timeout);
+	} while (result == -EAGAIN);
+	return result;
+}
+
+
+/**
+ * Unlock a fulock, wake up waiter(s)
+ *
+ * @f:	     fulock.
+ * @howmany: Wake up this many waiters; if 0, wake up only one,
+ *	     forcing a serialization in the acquisition of the
+ *	     futex, so that no other task (in user or kernel space)
+ *	     can acquire it.
+ * @returns: Number of tasks woken up, < 0 errno code on error.
+ *
+ * Can be called from any context [I hope].
+ */
+static __inline__
+int fulock_unlock (struct fulock *fulock, size_t howmany)
+{
+	int result;
+	unsigned long flags;
+	
+	ftrace ("(%p, %zu)\n", fulock, howmany);
+	
+	spin_lock_irqsave (&fulock->fuqueue.lock, flags);
+	result = __fulock_unlock (fulock, howmany, 0);
+	spin_unlock_irqrestore (&fulock->fuqueue.lock, flags);
+	return result;
+}
+
+
+/**
+ * Set the consistency of a fulock
+ *
+ * @f: fulock to set
+ * @consistency: New consistency state to move it to (unless it is
+ *		 fulock_con_nop, which is a nop and can be used to get
+ *		 the actual consistency state); see enum fulock_con.
+ *
+ * @returns: 'enum fulock_st' consistency state; < 0 errno code on
+ *	     error.
+ *
+ * FIXME: this function set is kind of too convoluted, I am afraid.
+ *
+ * Can be called from only from process context, as it checks for
+ * current being the current owner.
+ */
+static __inline__
+int fulock_consistency (struct fulock *fulock, enum fulock_con consistency)
+{
+	int result;
+	unsigned long flags;
+	ftrace ("(%p, %d)\n", fulock, consistency);
+	
+	spin_lock_irqsave (&fulock->fuqueue.lock, flags);
+	result = __fulock_consistency (fulock, consistency);
+	spin_unlock_irqrestore (&fulock->fuqueue.lock, flags);
+	return result;
+}
+
+
+/** A ufulock, tied to a user-space vm address. */
+struct ufulock {
+	struct fulock fulock;
+	struct vlocator vlocator;
+	struct page *page;
+};
+
+
+/** @Return true if the fulock @f has no waiters. */
+static __inline__
+unsigned __fulock_empty (const struct fulock *f)
+{
+	return __fuqueue_empty (&f->fuqueue);
+}
+
+
+/** [Internal] Make task @task the owner of fulock @f. */
+static __inline__
+void __fulock_owner_set (struct fulock *fulock, struct task_struct *task)
+{
+	ftrace ("(%p, %p [%d])\n", fulock, task, task->pid);
+	CHECK_IRQs();
+	
+	fulock->owner = task;
+	_raw_spin_lock (&task->fulock_olist_lock);
+	olist_add (&task->fulock_olist, &fulock->olist_node);
+	_raw_spin_unlock (&task->fulock_olist_lock);
+}
+
+
+/** [Internal] Reset ownership of fulock @f. */
+static __inline__
+void __fulock_owner_reset (struct fulock *fulock)
+{
+	struct task_struct *owner = fulock->owner;
+	ftrace ("(%p)\n", fulock);
+	CHECK_IRQs();
+  
+	_raw_spin_lock (&owner->fulock_olist_lock);
+	olist_rem (&owner->fulock_olist, &fulock->olist_node);
+	_raw_spin_unlock (&owner->fulock_olist_lock);
+	fulock->owner = NULL;
+}
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fulock_h__ */
--- /dev/null	Tue Dec  2 20:07:55 2003
+++ linux/kernel/fulock.c	Tue Dec  2 20:05:38 2003
@@ -0,0 +1,560 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/fulock.h>
+#include <linux/plist.h>
+#include <linux/time.h>	    /* struct timespec */
+#include <linux/sched.h>    /* MAX_SCHEDULE_TIMEOUT */
+#include <linux/errno.h>
+
+/** Get the real priority from the real-time priority of a task. */
+static inline
+unsigned long prio_from_rt_prio (struct task_struct *task)
+{
+  return MAX_USER_RT_PRIO-1 - task->rt_priority;
+}
+
+
+/**
+ * Perform priority inheritance over the ownership chain
+ *
+ * @fulock: a waiter in this fulock was re-prioritized and the wlist
+ *          max priority changed; propagate that change. It has to be
+ *          locked. 
+ *
+ * Updates the position of the @fulock on its owner's ownership list
+ * and if this results on a changed maximum ownership list priority,
+ * propagate it up.
+ *
+ * This is also used for un-boosting when a waiter stops waiting and
+ * the priorities have to be shuffled back.
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+void __fulock_pi_boost (struct fulock *fulock)
+{
+	unsigned prio_changed;
+	struct task_struct *owner = fulock->owner;
+	unsigned long new_prio = wlist_prio (&fulock->fuqueue.wlist);
+
+	__ftrace (1, "(%p)\n", fulock);
+	CHECK_IRQs();
+	
+	_raw_spin_lock (&owner->fulock_olist_lock);
+	prio_changed = olist_chprio (&owner->fulock_olist, &fulock->olist_node,
+				     new_prio);
+	_raw_spin_unlock (&owner->fulock_olist_lock);
+	if (prio_changed) {
+		new_prio = min (prio_from_rt_prio (owner), new_prio);
+		if (new_prio != owner->prio) {
+			ldebug (1, "__set_prio (%d, %lu)\n", owner->pid, new_prio);
+			__set_prio (owner, new_prio);
+			/* Now the priority changed for the owner, so we need
+			 * to propagate it */
+			fuqueue_chprio (owner);
+		}
+	}
+}
+
+
+/**
+ * Test if a to-be queued task would deadlock if it waits for a fulock.
+ *
+ * Simple as it is, it looks ugly as hell. Basically, the fulock we
+ * are about to lock will have an owner, so we check the owner; if it
+ * is us, deadlock, if not, we see if the fulock is waiting for
+ * anything; if so, we check it is a fulock, and if so, who's the
+ * owner; if it is us, then deadlock, if not, start again ...
+ *
+ * Now, the trick is to keep the reference counts, and the locks and
+ * all the crap. A single lock for everything would be *so* beautiful
+ * (and not scalable :).
+ * 
+ * @task: the task to check
+ * @fulock: the fulock to check
+ * @returns: 0 if ok, < 0 errno on error.
+ *
+ * This *should* be safe being lock-less [famous last words].
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+int __fulock_check_deadlock (struct task_struct *task, struct fulock *fulock)
+{
+	int result = 0;
+	struct fuqueue_ops *ops;
+	struct task_struct *owner;
+	struct fuqueue *fuqueue;
+	
+	ftrace ("(%p, %p)\n", task, fulock);
+	CHECK_IRQs();
+	
+	/* first fulock to trace is already locked and we won't unlock it */
+	owner = fulock->owner;
+	if (owner == NULL)
+		goto out;
+	result = -EDEADLK;
+	if (owner == task)
+		goto out;
+	get_task_struct (owner);
+next:
+	result = 0;
+	/* Who is the owner waiting for? safely acquire and lock it */
+	_raw_spin_lock (&owner->fuqueue_wait_lock);
+	fuqueue = owner->fuqueue_wait;
+	if (fuqueue == NULL)				/* Ok, not waiting */
+		goto out_owner_unlock;
+	if (!_raw_spin_trylock (&fuqueue->lock)) {      /* Spin dance... */
+		_raw_spin_unlock (&owner->fuqueue_wait_lock);
+		goto next;
+	}
+	ops = fuqueue->ops;
+	if (ops->get)
+		ops->get (fuqueue);
+	_raw_spin_unlock (&owner->fuqueue_wait_lock);	
+	put_task_struct (owner);
+	
+	/* Is a fulock whatever the owner is waiting for? */
+	if (ops != &fulock_ops.fuqueue && ops != &ufulock_ops.fuqueue)
+		goto out_fuqueue_unlock;
+	fulock = container_of (fuqueue, struct fulock, fuqueue);
+	owner = fulock->owner;                /* Who's the fulock's owner? */
+	if (unlikely (owner == NULL))         /* Released before we locked it? */
+		goto out_fuqueue_unlock;	
+	result = -EDEADLK;                    /* It is us? ooops */
+	if (owner == task)
+		goto out_fuqueue_unlock;
+	
+	/* What's the owner waiting for? Proceed to it */
+	get_task_struct (owner);
+	_raw_spin_unlock (&fulock->fuqueue.lock);
+	if (ops->put)
+		ops->put (fuqueue);
+	goto next;
+
+out_owner_unlock:
+	_raw_spin_unlock (&owner->fuqueue_wait_lock);
+	put_task_struct (owner);
+	return result;
+	
+out_fuqueue_unlock:
+	_raw_spin_unlock (&fulock->fuqueue.lock);
+	if (ops->put)
+		ops->put (fuqueue);
+out:
+	return result;
+}
+
+
+/**
+ * [Try]lock a fulock
+ *
+ * @f: fulock to [try]lock.
+ * @timeout: Time to wait for the lock to be acquired. If 0, trylock
+ *	     only, if MAX_SCHEDULE_TIMEOUT, wait for ever, else, wait
+ *	     the given time. 
+ * @returns: 0	     Acquired the fulock
+ *	     -EOWNERDEAD  Acquired the fulock, some previous owner died.
+ *	     -ENOTRECOVERABLE  Not acquired, fulock is not recoverable
+ *	     -EBUSY  Not acquired, it is locked [and trylock was
+ *		     requested].
+ *	     -EAGAIN Not acquired, try again [FIXME: change this,
+ *		     POSIX uses it for mutexes].
+ *	     *	     Not acquired, error.
+ *
+ * Needs f->lock held; on return, it will have been released.
+ *
+ * Note that some operations involving flag reading don't need locking
+ * because for changing those values, we calling task needs to be the
+ * owner of the fulock, and once we come out of wait without errors,
+ * we are the owners.
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled [the
+ * fuqueue lock acquired with spin_lock_irq()].
+ */
+int __fulock_lock (struct fulock *fulock, signed long timeout)
+{
+	int fulock_is_pp;
+	int result = 0;
+	unsigned prio_changed;
+	struct fulock_ops *ops =
+		container_of (fulock->fuqueue.ops, struct fulock_ops, fuqueue);
+	struct fuqueue_waiter w;
+	
+	ftrace ("(%p [flags 0x%x], %ld)\n", fulock, fulock->flags, timeout);
+
+	result = -ENOTRECOVERABLE;
+	if (fulock->flags & FULOCK_FL_NR)	/* can we lock? */
+		goto out_unlock;
+	if (fulock->flags & FULOCK_FL_ERROR_CHK) {
+		result = __fulock_check_deadlock (current, fulock);
+		if (result < 0)
+			goto out_unlock;
+	}
+	fulock_is_pp = fulock->flags & FULOCK_FL_PP;
+	if (fulock_is_pp) {
+		result = __fulock_pp_allowed (fulock);
+		if (result)
+			goto out_unlock;
+	}
+	if (fulock->owner == NULL)		/* Unlocked? take it */
+		goto its_unlocked;
+	/* Nchts, we have to wait. */
+	result = -EBUSY;
+	if (!timeout)
+		goto out_unlock;
+	prio_changed = __fuqueue_wait_queue (&fulock->fuqueue, &w);
+	if (prio_changed && fulock->flags & FULOCK_FL_PI)
+		__fulock_pi_boost (fulock);
+	return __fuqueue_wait_block (&fulock->fuqueue, &w, timeout);
+
+its_unlocked:
+	result = fulock->flags & FULOCK_FL_DEAD? -EOWNERDEAD : 0;
+	ops->owner_set (fulock, current);
+	if (fulock_is_pp)
+		__fulock_pp_boost (fulock);
+out_unlock:
+	_raw_spin_unlock (&fulock->fuqueue.lock);
+	local_irq_enable();
+	preempt_enable();
+	return result;
+}
+
+
+/**
+ * Unlock a fulock, wake up waiter(s) [internal version]
+ *
+ * @fulock:  Address for the fulock (kernel space).
+ * @howmany: Wake up this many waiters; if 0, wake up only one,
+ *	     forcing a serialization in the acquisition of the
+ *	     futex, so that no other task (in user or kernel space)
+ *	     can acquire it.
+ * @code:    If waking up in parallel mode, return code to be passed to
+ *           the waiters as a result of their wait. 
+ * @returns: 0 if the fulock was unlocked/there are no waiters left or
+ *             howmany > 0
+ *	     1 fulock unlocked, there was one waiter (now none)
+ *	     2 fulock unlocked, there was more than one waiter
+ *	     
+ *	     ufulock_unlock() uses this to update the vfulock, except
+ *           if howmany > 0.
+ *           
+ * Requires fulock->fuqueue.lock held, IRQs & preempt disabled!!!
+ */
+int __fulock_unlock (struct fulock *fulock, size_t howmany, int code)
+{
+	int result = 0;
+	struct fulock_ops *ops =
+		container_of (fulock->fuqueue.ops, struct fulock_ops, fuqueue);
+	struct task_struct *owner;
+	struct fuqueue_waiter *w;
+	
+	ftrace ("(%p, %zu, %d)\n", fulock, howmany, code);
+	CHECK_IRQs();
+	
+	if (fulock->owner == NULL)		/* Unlocked? */
+		goto out;
+	owner = fulock->owner;
+	ops->owner_reset (fulock);
+	if (__fulock_empty (fulock))
+		goto out_unboost;
+	if (howmany > 0) {			/* Parallel unlock */
+		code = code == 0? -EAGAIN : code;
+		while (howmany-- && !__fuqueue_empty (&fulock->fuqueue)) {
+			w = __fuqueue_first (&fulock->fuqueue);
+			__fuqueue_wait_unqueue (w, code);
+			wake_up_process (w->task);
+		}
+	}
+	else {					/* Serialized unlock */
+		w = __fuqueue_first (&fulock->fuqueue);
+		ops->owner_set (fulock, w->task);
+		if (fulock->flags & FULOCK_FL_PP)
+			__fulock_pp_boost (fulock);
+		code = fulock->flags & FULOCK_FL_DEAD? -EOWNERDEAD : 0;
+		__fuqueue_wait_unqueue (w, code);
+		wake_up_process (w->task);
+		result = __fulock_empty (fulock)? 1 : 2;
+	}
+	/* Now, once we have done it, we can unboost PI or PP */
+out_unboost:
+	if (fulock->flags & FULOCK_FL_PP)
+		__fulock_pp_unboost (fulock);
+	else if (fulock->flags & FULOCK_FL_PI
+		 && owner->prio != prio_from_rt_prio (owner)) {
+		/* We were boosted, undo that */
+		ldebug (1, "__set_prio (%d, %lu)\n", owner->pid, prio_from_rt_prio (owner));
+		__set_prio (owner, prio_from_rt_prio (owner));
+		fuqueue_chprio (owner); /* owner might be waiting */
+	}
+out:
+	return result;
+}
+
+
+/**
+ * Set the consistency of a fulock, get previous.
+ *
+ * @fulock: fulock whose consistency is to be set [fulock->fuqueue.lock
+ *	    has to be held]. 
+ * @consistency: New consistency state to move it to (unless it is
+ *		 fulock_con_get, which is a nop and can be used to get
+ *		 the actual consistency state.
+ *
+ * @returns: consistency state
+ */
+int __fulock_consistency (struct fulock *fulock, enum fulock_con consistency)
+{
+	int result;
+	enum fulock_st state;
+
+	ftrace ("(%p, %d)\n", fulock, consistency);
+	CHECK_IRQs();
+	
+	/* Gather actual state */
+	if (fulock->flags & FULOCK_FL_NR)
+		state = fulock_st_nr;
+	else if (fulock->flags & FULOCK_FL_DEAD)
+		state = fulock_st_dead;
+	else
+		state = fulock_st_healthy;
+	
+	/* Now, what to do? */
+	switch (consistency) {
+	    /* Nothing, so just say how are we standing */
+	    case fulock_con_nop:
+		result = state;
+		break;
+
+	    /* Reinitialize it */
+	    case fulock_con_init:
+		fulock->flags &= ~(FULOCK_FL_DEAD | FULOCK_FL_NR);
+		__fulock_unlock (fulock, (size_t) ~0, -ENOENT);
+		result = fulock_st_init;
+		break;
+		
+	    /* Mark it healthy */
+	    case fulock_con_heal:
+		result = -EINVAL;
+		if (state != fulock_st_dead)
+			break;
+		result = -EPERM;
+		if (fulock->owner != current)	/* Who are you? */
+			break;
+		fulock->flags &= ~FULOCK_FL_DEAD;
+		result = fulock_st_healthy;
+		break;
+		
+	    /* Make it not recoverable; wake up every waiter with error;
+	     * unlock. */ 
+	    case fulock_con_nr:
+		result = -EINVAL;
+		if (state != fulock_st_dead)
+			break;
+		result = -EPERM;
+		if (fulock->owner != current)	/* Who are you? */
+			break;
+		result = __fulock_unlock (fulock, (size_t)~0, -ENOTRECOVERABLE);
+		if (result >= 0) {
+			fulock->flags &= ~FULOCK_FL_DEAD;
+			fulock->flags |= FULOCK_FL_NR;
+			result = fulock_st_nr;
+		}
+		break;
+
+	    default:
+		result = -EINVAL;
+	}
+	return result;
+}
+
+
+/**
+ * Set the priority of a fulock waiter.
+ *
+ * @task: task to re-prioritize 
+ * @fuqueue: fuqueue of the fulock the task is waiting for [locked]
+ * @w: waiter @task is waiting on in @fuqueue.
+ * @prio: new priority (prio
+ * @returns: NULL (as there is no propagation), task to propage to.
+ *
+ * This does not set the prio of the process itself!
+ *
+ * This will just reposition it in the wait list and if priority
+ * inheritance is enabled, reposition in the ownership list.
+ * 
+ * Now, after repositioning in the wait list, we have to do the same
+ * in the ownership list. If we have set a new maximum, and that
+ * maximum is different to the current task->prio, then we have to
+ * update, so we return a task pointer that needs to be referenced.
+ * 
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+struct task_struct * __fulock_chprio (struct task_struct *task,
+				      struct fuqueue *fuqueue,
+				      struct fuqueue_waiter *w)
+{
+	unsigned prio_changed;
+	unsigned long new_prio;
+	struct fuqueue_ops *ops;
+	struct fulock *fulock;
+	struct task_struct *owner = NULL;
+
+	ftrace ("(%p [%d], %p, %p)\n", task, task->pid, fuqueue, w);
+	CHECK_IRQs();
+	
+	/* Verify this is really a fulock */
+	ops = fuqueue->ops;
+	BUG_ON (ops != &fulock_ops.fuqueue && ops != &ufulock_ops.fuqueue);
+	fulock = container_of (fuqueue, struct fulock, fuqueue);
+	/* Update the wlist of our fulock */
+	__fuqueue_chprio (task, fuqueue, w); /* fuqueue is locked */
+#warning FIXME: trap if in the middle, the task stopped waiting?
+	/* Now, if we ain't PI, there is no point in continuing */
+	if (!(fulock->flags & FULOCK_FL_PI))
+		goto out;
+	/* And if it is unlocked, somebody unlocked just before we
+	 * came here, so we'll do nothing */
+	owner = fulock->owner;
+	if (unlikely (owner == NULL))
+		goto out;
+	/* Ok, we have to propagate, reposition in the ownership list,
+	 * and if the max prio changed and it is higher than the
+	 * owner's priority, then we have to go with him */
+	new_prio = wlist_prio (&fuqueue->wlist);
+	_raw_spin_lock (&owner->fulock_olist_lock);
+	prio_changed = olist_chprio (&owner->fulock_olist, &fulock->olist_node,
+				     new_prio);
+	_raw_spin_unlock (&owner->fulock_olist_lock);
+	if (prio_changed) {
+		new_prio = min (prio_from_rt_prio (owner), new_prio);
+		if (new_prio != owner->prio)
+			goto out;
+	}
+	owner = NULL;
+out:
+	return owner;
+}
+
+
+/**
+ * Initialize fulock specific stuff for a task
+ *
+ */
+void init_fulock (struct task_struct *task)
+{
+	__ftrace (0, "(task %p)\n", task);
+	
+	spin_lock_init (&task->fuqueue_wait_lock);
+	task->fuqueue_wait = NULL;
+	task->fuqueue_waiter = NULL;
+	spin_lock_init (&task->fulock_olist_lock);
+	olist_init (&task->fulock_olist);
+}
+
+
+/** Release as dead a @fulock because the owner is exiting. */
+void __fulock_exit (struct fulock *fulock)
+{
+	ftrace ("(%p)\n", fulock);
+
+	fulock->flags |= FULOCK_FL_DEAD;
+	if (!(fulock->flags & FULOCK_FL_RM))
+		printk (KERN_WARNING "Task %d [%s] exited holding non-robust "
+			"fulock %p; waiters might block for ever\n",
+			current->pid, current->comm, fulock);
+	__fulock_unlock (fulock, 0, -EOWNERDEAD);
+}
+
+
+/**
+ * When @task exits, release all the fulocks it holds as dead.
+ *
+ * We have to discriminate between ufulocks and locks; when it is an
+ * ufulock, we just need to see if we have to put() the reference that
+ * the owner had or not (when the ownership is successfully passed to
+ * somebody else, we don't have to put it).
+ */
+#warning FIXME: hook up to exec()?
+void exit_fulocks (struct task_struct *task)
+{
+	olist_t *itr;
+	struct fulock *fulock;
+	struct fulock_ops *ops;
+	unsigned long flags;
+	
+	if (DEBUG > 0 && !olist_empty (&task->fulock_olist))
+		ftrace ("(%p [%d])\n", task, task->pid);
+
+	/* FIXME: there is a better way to do this, but I feel toooo
+	 * thick today -- the problem is fulock->ops->exit() is going
+	 * to take fulock_olist_lock to reset the ownership...so
+	 * whatever it is, we have to call without holding it. */
+	local_irq_save (flags);
+	preempt_disable();
+	_raw_spin_lock (&task->fulock_olist_lock);
+	while (!olist_empty (&task->fulock_olist)) {
+		itr = olist_first (&task->fulock_olist);
+		fulock = container_of (itr, struct fulock, olist_node);
+		ldebug (7, "task %p [%d] still owns fulock %p\n",
+			task, task->pid, fulock);
+		ops = container_of (fulock->fuqueue.ops,
+				    struct fulock_ops, fuqueue);
+		if (ops->fuqueue.get)
+			ops->fuqueue.get (&fulock->fuqueue);
+		_raw_spin_unlock (&task->fulock_olist_lock);
+		
+		_raw_spin_lock (&fulock->fuqueue.lock);	  
+		ops->exit (fulock); /* releases fulock lock */
+		_raw_spin_unlock (&fulock->fuqueue.lock);
+		
+		if (ops->fuqueue.put)
+			ops->fuqueue.put (&fulock->fuqueue);
+		_raw_spin_lock (&task->fulock_olist_lock);
+	}
+	_raw_spin_unlock (&task->fulock_olist_lock);
+	local_irq_restore (flags);
+	preempt_enable();
+}
+
+
+/** Cancel @task's wait on @fuqueue and update the wait list priority */
+unsigned __fulock_wait_cancel (struct fuqueue *fuqueue,
+			   struct fuqueue_waiter *w)
+{
+	unsigned prio_change;
+	struct fulock *fulock =
+		container_of (fuqueue, struct fulock, fuqueue);
+	
+	ftrace ("(%p, %p [%d], %p)\n",
+		fuqueue, w, w->task->pid, w);
+
+	prio_change = __fuqueue_wait_cancel (fuqueue, w);
+	ldebug (2, "prio_change is %u\n", prio_change);
+	if (prio_change && (fulock->flags & FULOCK_FL_PI))
+		__fulock_pi_boost (fulock);
+	return prio_change;
+}
+
+
+/** Fulock operations */
+struct fulock_ops fulock_ops = {
+	.fuqueue = {
+		.get = NULL,
+		.put = NULL,
+		.wait_cancel = __fulock_wait_cancel,
+		.chprio = __fulock_chprio
+	},
+	.owner_set = __fulock_owner_set,
+	.owner_reset = __fulock_owner_reset,
+	.exit = __fulock_exit
+};
+

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
  2003-12-03  8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
  2003-12-03  8:51   ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2003-12-11 23:30   ` Matt Mackall
  2003-12-11 23:55     ` Gene Heskett
  2003-12-14 16:15     ` Pavel Machek
  2003-12-12 19:21   ` William Lee Irwin III
  2 siblings, 2 replies; 9+ messages in thread
From: Matt Mackall @ 2003-12-11 23:30 UTC (permalink / raw)
  To: inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes

On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
>  include/linux/fuqueue.h |  451 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/plist.h   |  197 ++++++++++++++++++++
>  kernel/fuqueue.c        |  220 +++++++++++++++++++++++
>  3 files changed, 868 insertions(+)
> 
> +++ linux/include/linux/fuqueue.h	Wed Nov 19 16:42:50 2003

I don't suppose you've run this feature name past anyone in marketting
or PR?

-- 
Matt Mackall : http://www.selenic.com : Linux development and consulting

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
  2003-12-11 23:30   ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
@ 2003-12-11 23:55     ` Gene Heskett
  2003-12-14 16:15     ` Pavel Machek
  1 sibling, 0 replies; 9+ messages in thread
From: Gene Heskett @ 2003-12-11 23:55 UTC (permalink / raw)
  To: Matt Mackall, inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes

On Thursday 11 December 2003 18:30, Matt Mackall wrote:
>On Wed, Dec 03, 2003 at 12:51:34AM -0800, 
inaky.perez-gonzalez@intel.com wrote:
>>  include/linux/fuqueue.h |  451
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> include/linux/plist.h   |  197 ++++++++++++++++++++
>>  kernel/fuqueue.c        |  220 +++++++++++++++++++++++
>>  3 files changed, 868 insertions(+)
>>
>> +++ linux/include/linux/fuqueue.h	Wed Nov 19 16:42:50 2003
>
>I don't suppose you've run this feature name past anyone in
> marketting or PR?

Obviously not...

-- 
Cheers, Gene
AMD K6-III@500mhz 320M
Athlon1600XP@1400mhz  512M
99.22% setiathome rank, not too shabby for a WV hillbilly
Yahoo.com attornies please note, additions to this message
by Gene Heskett are:
Copyright 2003 by Maurice Eugene Heskett, all rights reserved.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
  2003-12-03  8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
  2003-12-03  8:51   ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
  2003-12-11 23:30   ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
@ 2003-12-12 19:21   ` William Lee Irwin III
  2 siblings, 0 replies; 9+ messages in thread
From: William Lee Irwin III @ 2003-12-12 19:21 UTC (permalink / raw)
  To: inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes

On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +static inline void __debug_outb (unsigned val, int port) {
> +	__asm__ __volatile__ ("outb %b0,%w1" : : "a" (val), "Nd" (port));
> +}
> +static inline unsigned __debug_inb (int port) {
> +	unsigned value;
> +	__asm__ __volatile__ ("inb %w1,%b0" : "=a" (value) : "Nd" (port));
> +	return value;
> +}
> +static inline
> +void __debug_printstr (const char *str) {
> +	const int port = 0x03f8;  
> +	while (*str) {
> +		while (!(__debug_inb (port + UART_LSR) & UART_LSR_THRE));
> +		__debug_outb (*str++, port+UART_TX);
> +	}
> +	__debug_outb ('\r', port + UART_TX);
> +}
> +#endif

In general, this kind of debug code shouldn't go in "shipped" patches.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +#define assert(c, a...)	 do { if ((DEBUG >= 0) && !(c)) BUG(); } while (0)

assert() is a no-no in Linux, for various reasons.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> + * FIXME: is it worth to have get/put? maybe they should be enforced
> + *        for every fuqueue, this way we don't have to query the ops
> + *        structure for the get/put method and if it is there, call
> + *        it. We'd have to move the get/put ops over to the vlocator,
> + *        but that's not much of a problem.
> + *        The decission factor is that an atomic operation needs to
> + *        lock the whole bus and is not as scalable as testing a ptr
> + *        for NULLness.
> + *        For simplicity, probably the idea of forcing the refcount in
> + *        the fuqueue makes sense.

Basically, if there aren't multiple implementations of ->get()/->put()
that need to be disambiguated, this should get left out.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +#if DEBUG > 0
> +/* BUG_ON() firing? Temporary fix, do you have CONFIG_PREEMPT enabled?
> + * either that or disable DEBUG (force #define it to zero). */ 
> +#define CHECK_IRQs() do { BUG_ON (!in_atomic()); } while (0)
> +#else
> +#define CHECK_IRQs() do {} while (0)
> +#endif

This kind of thing isn't good to have in shipped patches either.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +/* Priority-sorted list */
> +struct plist {
> +	unsigned prio;
> +	struct list_head node;
> +};

Maybe the expectation is for short lists, but it might be better to use
an rbtree or other sublinear insertion/removal structure for this. It
would be nice if we had a generic heap structure for this sort of affair.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +void fuqueue_chprio (struct task_struct *task)
> +{
> +	struct fuqueue_ops *ops;
> +	struct fuqueue *fuqueue;
> +	struct fuqueue_waiter *w;
> +	int prio = task->prio;
> +	unsigned long flags;
> +
> +	__ftrace (1, "(%p [%d])\n", task, task->pid);
> +	CHECK_IRQs();
> +	
> +	local_irq_save (flags);
> +	preempt_disable();
> +	get_task_struct (task);
> +next:
> +	/* Who is the task waiting for? safely acquire and lock it */
> +	_raw_spin_lock (&task->fuqueue_wait_lock);
> +	fuqueue = task->fuqueue_wait;
> +	if (fuqueue == NULL)				/* Ok, not waiting */
> +		goto out_task_unlock;
> +	if (!_raw_spin_trylock (&fuqueue->lock)) {      /* Spin dance... */
> +		_raw_spin_unlock (&task->fuqueue_wait_lock);
> +		goto next;
> +	}
> +	ops = fuqueue->ops;
> +	if (ops->get)
> +		ops->get (fuqueue);
> +	
> +	w = task->fuqueue_waiter;
> +	_raw_spin_unlock (&task->fuqueue_wait_lock);
> +	put_task_struct (task);
> +	
> +	/* Ok, we need to propagate the prio change to the fuqueue */
> +	ops = fuqueue->ops;
> +	task = ops->chprio (task, fuqueue, w);
> +	if (task == NULL)
> +		goto out_fuqueue_unlock;
> +
> +	/* Do we need to propagate to the next task */
> +	get_task_struct (task);
> +	_raw_spin_unlock (&fuqueue->lock);
> +	if (ops->put)
> +		ops->put (fuqueue);
> +	ldebug (1, "__set_prio (%d, %d)\n", task->pid, prio);
> +	__set_prio (task, prio);
> +	goto next;
> +
> +out_fuqueue_unlock:
> +	_raw_spin_unlock (&fuqueue->lock);
> +	if (ops->put)
> +		ops->put (fuqueue);
> +	goto out;
> +	
> +out_task_unlock:
> +	_raw_spin_unlock (&task->fuqueue_wait_lock);
> +	put_task_struct (task);
> +out:
> +	local_irq_restore (flags);
> +	preempt_enable();
> +	return;

Heavy use of _raw_spin_lock() like this is a poor practice.


On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +/** Fuqueue operations for usage within the kernel */
> +struct fuqueue_ops fuqueue_ops = {
> +	.get = NULL,
> +	.put = NULL,
> +	.wait_cancel = __fuqueue_wait_cancel,
> +	.chprio = __fuqueue_chprio
> +};

If this is all ->get() and ->put() are going to be, why bother?


-- wli

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
  2003-12-11 23:30   ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
  2003-12-11 23:55     ` Gene Heskett
@ 2003-12-14 16:15     ` Pavel Machek
  1 sibling, 0 replies; 9+ messages in thread
From: Pavel Machek @ 2003-12-14 16:15 UTC (permalink / raw)
  To: Matt Mackall; +Cc: inaky.perez-gonzalez, linux-kernel, robustmutexes

Hi!

> >  include/linux/fuqueue.h |  451 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/plist.h   |  197 ++++++++++++++++++++
> >  kernel/fuqueue.c        |  220 +++++++++++++++++++++++
> >  3 files changed, 868 insertions(+)
> > 
> > +++ linux/include/linux/fuqueue.h	Wed Nov 19 16:42:50 2003
> 
> I don't suppose you've run this feature name past anyone in marketting
> or PR?

Hehe, reminds me of podfuk (now called uservfs) project :-).
								Pavel
-- 
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC/PATCH] FUSYN 7/10: user space fuqueues
  2004-01-14 22:50 [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2004-01-14 22:50 ` inaky.perez-gonzalez
  0 siblings, 0 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2004-01-14 22:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes

 ufuqueue.c |  236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 236 insertions(+)

--- /dev/null	Wed Jan 14 14:39:30 2004
+++ linux/kernel/ufuqueue.c	Wed Nov 19 17:32:07 2003
@@ -0,0 +1,236 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/highmem.h>   /* kmap_atomic() */
+#include <linux/vlocator.h>
+#include <linux/fuqueue.h>
+
+extern signed long ufu_timespec2jiffies (const struct timespec __user *);
+static struct fuqueue_ops ufuqueue_ops;
+
+/** Slab for allocation of 'struct ufuqueue's. */
+static kmem_cache_t *ufuqueue_slab; 
+
+/** A ufuqueue, tied to a user-space vm address. */
+struct ufuqueue {
+	struct fuqueue fuqueue;
+	struct vlocator vlocator;
+};
+
+
+/** Initialize an @ufuqueue (for slab object construction). */
+static inline
+void ufuqueue_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+	struct ufuqueue *ufuqueue = obj;
+	__fuqueue_init (&ufuqueue->fuqueue, &ufuqueue_ops);
+	vlocator_init (&ufuqueue->vlocator);
+}
+
+
+/** Allocate an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+struct vlocator * ufuqueue_alloc (void)
+{
+	struct ufuqueue *ufuqueue;
+	ufuqueue = kmem_cache_alloc (ufuqueue_slab, GFP_KERNEL);
+	if (DEBUG > 0 && ufuqueue != NULL)
+		ldebug (1, "ufuqueue %p allocated, total: %d\n",
+			ufuqueue, allocated_inc());
+	return &ufuqueue->vlocator;
+}
+
+
+/**
+ * Create a ufuqueue that is going to be inserted to the hash
+ * [called from vlocator.c:vl_locate() throught the vlocator ops]
+ */
+static
+void ufuqueue_create (struct vlocator *vl, struct page *page,
+		      const union futex_key *key, unsigned flags)
+{
+	memcpy (&vl->key, key, sizeof (*key));
+	get_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_release (struct vlocator *vl)
+{
+	drop_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_free (struct vlocator *vl)
+{
+	struct ufuqueue *ufuqueue =
+		container_of (vl, struct ufuqueue, vlocator);
+	kmem_cache_free (ufuqueue_slab, ufuqueue);
+	if (DEBUG > 0 && ufuqueue != NULL)
+		ldebug (1, "ufuqueue %p freed, total: %d\n",
+			ufuqueue, allocated_dec());
+}
+
+
+struct vlocator_ops ufuqueue_vops = {
+	.alloc = ufuqueue_alloc,
+	.create = ufuqueue_create,
+	.release = ufuqueue_release,
+	.free = ufuqueue_free
+};
+
+
+/**
+ * Wait on a fuqueue
+ *
+ * @_vfuqueue: address in user space of the condvar
+ * @cond_flags: flags for the conditional variable
+ * @_vfulock: address in user space of the fulock.
+ * @lock_flags: flags for the fulock.
+ * @_timeout: timeout information [see sys_ufulock_lock()]
+ *
+ * This is just a thin shell that locates the kernel descriptors for
+ * the condvar and the lock and then handles the work to
+ * ufuqueue_wait().
+ */
+asmlinkage
+int sys_ufuqueue_wait (volatile unsigned __user *_vfuqueue,
+		       unsigned val, struct timespec __user *_timeout)
+{
+	int result = -EINVAL;
+	struct ufuqueue *ufuqueue;
+	struct page *page;
+	char *page_kmap;
+	volatile unsigned *vfuqueue;
+	unsigned new_val;
+	struct vlocator *vl;
+	signed long timeout;
+        struct fuqueue_waiter w;
+		
+	ftrace ("(%p, %x, %p)\n", _vfuqueue, val, _timeout);
+	might_sleep();
+
+	/* ufuqueue: pin pages, get keys, look up/allocate, refcount */
+	result = vl_locate (&page, &vl, &ufuqueue_vops, _vfuqueue, 0);
+	if (unlikely (result < 0))
+		goto out;
+	ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+	/* We are going to lock the ufuqueue, so get the timeout first */
+	timeout = ufu_timespec2jiffies (_timeout);
+	result = (int) timeout;
+	if (timeout < 0)
+		goto out_put_vl;
+	
+	spin_lock_irq (&ufuqueue->fuqueue.lock);
+	__fuqueue_wait_queue (&ufuqueue->fuqueue, &w);
+	/* Now, are we ok with the value? */
+	page_kmap = kmap_atomic (page, KM_IRQ0);
+	vfuqueue = (volatile unsigned *)page_kmap + (vl->key.both.offset & ~1);
+	new_val = *vfuqueue;
+	kunmap_atomic (page_kmap, KM_IRQ0);
+	result = -EWOULDBLOCK;
+	if (val != new_val)
+		goto out_unqueue;
+	/* ok, go ahead and wait (it will unlock and restore irqs/preempt */
+	return __fuqueue_wait_block (&ufuqueue->fuqueue, &w, timeout);
+
+out_unqueue:
+	__fuqueue_wait_unqueue (&w, 0);
+	spin_unlock_irq (&ufuqueue->fuqueue.lock);
+out_put_vl:
+	vl_put (vl);
+	put_page (page);
+out:
+	return result;
+}
+
+	
+/**
+ * Wake up waiters of a fuqueue
+ *
+ * @_vfuqueue: pointer to the fuqueue
+ * @howmany: number of waiters to wake up
+ * @code: code to return to the waiters [default it to zero]
+ * @returns: 0 if ok, < 0 errno code on error.
+ */
+asmlinkage
+int sys_ufuqueue_wake (volatile unsigned __user *_vfuqueue,
+		       size_t howmany, int code)
+{
+	int result;
+	struct vlocator *vl;
+	struct ufuqueue *ufuqueue;
+	
+	ftrace ("(%p, %u)\n", _vfuqueue, howmany);
+	might_sleep();
+	
+	/* ufuqueue: get keys, look up (don't allocate), refcount */
+	result = vl_locate (NULL, &vl, NULL, _vfuqueue, 0);
+	if (result < 0)
+		goto out;
+	ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+	/* Wake'en up */
+	spin_lock_irq (&ufuqueue->fuqueue.lock);
+	__fuqueue_wake (&ufuqueue->fuqueue, howmany, code);
+	spin_unlock_irq (&ufuqueue->fuqueue.lock);
+	vl_put (vl);
+	result = 0;
+out:
+	return result;
+}
+
+
+/** Initialize the ufuqueue subsystem. */
+static
+int __init subsys_ufuqueue_init (void)
+{
+	ufuqueue_slab = kmem_cache_create ("ufuqueue", sizeof (struct ufuqueue),
+					  0, 0, ufuqueue_ctor, NULL);
+	if (ufuqueue_slab == NULL) 
+		panic ("ufuqueue_init(): "
+		       "Unable to initialize ufuqueue slab allocator.\n");
+	return 0;
+}
+__initcall (subsys_ufuqueue_init);
+
+
+/* Adaptors for fulock operations */
+static
+void ufuqueue_put (struct fuqueue *fuqueue) 
+{
+	struct ufuqueue *ufuqueue =
+		container_of (fuqueue, struct ufuqueue, fuqueue);
+	vl_put (&ufuqueue->vlocator);
+}
+
+static
+void ufuqueue_get (struct fuqueue *fuqueue) 
+{
+	struct ufuqueue *ufuqueue =
+		container_of (fuqueue, struct ufuqueue, fuqueue);
+	vl_get (&ufuqueue->vlocator);
+}
+
+/** Ufuqueue operations */
+static
+struct fuqueue_ops ufuqueue_ops = {
+	.get = ufuqueue_get,
+	.put = ufuqueue_put,
+	.wait_cancel = __fuqueue_wait_cancel,
+	.chprio = __fuqueue_chprio
+};
+

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2004-01-14 22:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <0312030051..akdxcwbwbHdYdmdSaFbbcycyc3a~bzd25502@intel.com>
2003-12-03  8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
2003-12-03  8:51   ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
2003-12-03  8:51     ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
2003-12-03  8:51       ` [RFC/PATCH] FUSYN 8/10: kernel fulocks inaky.perez-gonzalez
2003-12-11 23:30   ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
2003-12-11 23:55     ` Gene Heskett
2003-12-14 16:15     ` Pavel Machek
2003-12-12 19:21   ` William Lee Irwin III
2004-01-14 22:50 [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
2004-01-14 22:50 ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).