* [RFC/PATCH] FUSYN 5/10: kernel fuqueues
[not found] <0312030051..akdxcwbwbHdYdmdSaFbbcycyc3a~bzd25502@intel.com>
@ 2003-12-03 8:51 ` inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03 8:51 UTC (permalink / raw)
To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes
include/linux/fuqueue.h | 451 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/plist.h | 197 ++++++++++++++++++++
kernel/fuqueue.c | 220 +++++++++++++++++++++++
3 files changed, 868 insertions(+)
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/include/linux/fuqueue.h Wed Nov 19 16:42:50 2003
@@ -0,0 +1,451 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Quick usage guide:
+ *
+ * struct fuqueue f = fuqueue_INIT (&f);
+ *
+ * fuqueue_wait (&f, 4343) Wait for max 4343 jiffies
+ * fuqueue_wake (&f, 5, -EPERM) Wake the first five guys waiting on f
+ * with -EPERM.
+ *
+ * These are simple priority-sorted wait queues that can be used from
+ * the kernel. They provide the foundation for more complex items
+ * (fulocks, fuconds, ufulocks, ufuconds) and use from user-space
+ * (ufuqueues, just like futexes).
+ *
+ * The type wlist_t provides the sorting for the wait list. The type
+ * 'struct fuqueue_waiter' represents a waiting task on a fuqueue. The
+ * 'struct fuqueue_ops' is what allows extensibility for other
+ * synchronization primitives.
+ *
+ * I have just realized that this is too similar to the wait_queues...
+ */
+
+#ifndef __linux_fuqueue_h__
+#define __linux_fuqueue_h__
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+#include <linux/plist.h>
+#include <linux/sched.h>
+#include <asm/hardirq.h>
+
+ /*
+ * Debug stuff -- move forward for seeing the real meat
+ *
+ * Levels
+ *
+ * 0 Nothing activated, all compiled out
+ * > 0 Activates assertions, memory allocation tracking
+ * > 1 Activates debug messages
+ * > 2 Activates [most] function traces
+ * > 3 Activates random debug stuff
+ */
+
+#undef DEBUG
+#define DEBUG 0
+
+#if DEBUG > 0
+#if 1 || !defined(__i386__)
+#define __debug_printstr(a...) printk(a) /* Dump to normal console */
+#else /* Dump straight to ttyS0 */
+#include <linux/serial_reg.h>
+#include <linux/stringify.h>
+static inline void __debug_outb (unsigned val, int port) {
+ __asm__ __volatile__ ("outb %b0,%w1" : : "a" (val), "Nd" (port));
+}
+static inline unsigned __debug_inb (int port) {
+ unsigned value;
+ __asm__ __volatile__ ("inb %w1,%b0" : "=a" (value) : "Nd" (port));
+ return value;
+}
+static inline
+void __debug_printstr (const char *str) {
+ const int port = 0x03f8;
+ while (*str) {
+ while (!(__debug_inb (port + UART_LSR) & UART_LSR_THRE));
+ __debug_outb (*str++, port+UART_TX);
+ }
+ __debug_outb ('\r', port + UART_TX);
+}
+#endif
+
+extern spinlock_t __debug_lock;
+
+static inline
+u64 __tsc_read (void)
+{
+ u64 tsc;
+#if defined(__i386__)
+ __asm__ __volatile__("rdtsc" : "=A" (tsc));
+#elif defined (__ia64__)
+ __asm__ __volatile__("mov %0=ar.itc" : "=r" (tsc) : : "memory");
+#else
+#warning "Architecture not supported in __tsc_read()!"
+ tsc = 0;
+#endif
+ return tsc;
+}
+
+#define __debug(a...) \
+do { \
+ /* Dirty: Try to avoid >1 CPUs printing ... will suck */ \
+ char __X_buf[256]; \
+ unsigned __X_len; \
+ unsigned long __X_flags; \
+ __X_len = snprintf (__X_buf, 255, "%Lu: %s:%d: %s[%d:%d] ", \
+ __tsc_read(), __FILE__, __LINE__, __FUNCTION__, \
+ current->pid, current->thread_info->cpu); \
+ snprintf (__X_buf + __X_len, 255 - __X_len, a); \
+ spin_lock_irqsave (&__debug_lock, __X_flags); \
+ __debug_printstr (__X_buf); \
+ spin_unlock_irqrestore (&__debug_lock, __X_flags); \
+} while (0)
+#endif /* #if DEBUG > 0 */
+
+/* The real debug statements */
+
+#if DEBUG > 0
+#define ldebug(l,a...) do { if (DEBUG >= l) __debug (a); } while (0)
+#define debug(a...) ldebug(1,a)
+#define fdebug(f, a...) do { if ((DEBUG >= 2) && f) __debug (a); } while (0)
+#define __ftrace(l,a...) do { if ((l)) __debug (a); } while (0)
+#define ftrace(a...) __ftrace((DEBUG >= 2),a)
+#define assert(c, a...) do { if ((DEBUG >= 0) && !(c)) BUG(); } while (0)
+#else
+#define ldebug(l,a...)
+#define debug(a...)
+#define fdebug(f, a...)
+#define __ftrace(l,a...)
+#define ftrace(a...)
+#define assert(c, a...)
+#endif
+
+
+
+
+/**
+ * Wait list type, O(N) addition, O(1) removal
+ *
+ * This is just a redirection of the methods used to manage the list
+ * of waiters that are waiting for a fuqueue. For hardcore-timing
+ * applications, you might want to use a PALIST instead of a PLIST,
+ * but FIXME, it is not done yet :)
+ */
+
+typedef struct plist wlist_t;
+
+#define FULOCK_WLIST_USE_PLIST
+#ifdef FULOCK_WLIST_USE_PLIST
+#define wlist_add plist_add
+#define wlist_rem plist_rem
+#define __wlist_rem __plist_rem
+#define wlist_first plist_first
+#define wlist_empty plist_empty
+#define wlist_INIT plist_INIT
+#define wlist_init plist_init
+#define wlist_last plist_last
+#define wlist_update_prio plist_update_prio
+#define wlist_prio plist_prio
+#define wlist_chprio plist_chprio
+#define __wlist_chprio __plist_chprio
+#if 0
+#define wlist_for_each_safe plist_for_each_safe
+#endif
+#endif
+
+struct task_struct;
+struct fuqueue;
+
+/** Descriptor of a waiting task */
+struct fuqueue_waiter {
+ wlist_t wlist_node; /* node for the wait list */
+ struct task_struct *task; /* task that is waiting */
+ int result; /* what happened */
+};
+
+/**
+ * Operations on a fuqueue.
+ *
+ * FIXME: is it worth to have get/put? maybe they should be enforced
+ * for every fuqueue, this way we don't have to query the ops
+ * structure for the get/put method and if it is there, call
+ * it. We'd have to move the get/put ops over to the vlocator,
+ * but that's not much of a problem.
+ *
+ * The decission factor is that an atomic operation needs to
+ * lock the whole bus and is not as scalable as testing a ptr
+ * for NULLness.
+ *
+ * For simplicity, probably the idea of forcing the refcount in
+ * the fuqueue makes sense.
+ */
+struct fuqueue_ops {
+ void (* get) (struct fuqueue *);
+ void (* put) (struct fuqueue *);
+ unsigned (* wait_cancel) (struct fuqueue *, struct fuqueue_waiter *);
+ struct task_struct * (* chprio) (
+ struct task_struct *, struct fuqueue *,
+ struct fuqueue_waiter *);
+};
+
+/** A fuqueue, a prioritized wait queue usable from kernel space. */
+struct fuqueue {
+ spinlock_t lock;
+ wlist_t wlist;
+ struct fuqueue_ops *ops;
+};
+
+
+/** Initialize a @fuqueue structure with given @ops */
+static inline
+void __fuqueue_init (struct fuqueue *fuqueue, struct fuqueue_ops *ops)
+{
+ spin_lock_init (&fuqueue->lock);
+ wlist_init (&fuqueue->wlist);
+ fuqueue->ops = ops;
+}
+
+/** Statically initialize a @fuqueue with given @ops. */
+#define __fuqueue_INIT(fuqueue, ops) { \
+ .lock = SPIN_LOCK_UNLOCKED, \
+ .wlist = wlist_INIT (&(fuqueue)->wlist), \
+ .ops = (ops) \
+}
+
+
+/** fuqueue operations for in-kernel usage */
+extern struct fuqueue_ops fuqueue_ops;
+
+
+/** Initialize a @fuqueue for usage within the kernel */
+static inline
+void fuqueue_init (struct fuqueue *fuqueue)
+{
+ __fuqueue_init (fuqueue, &fuqueue_ops);
+}
+
+/** Statically initialize a @fuqueue for usage within the kernel */
+#define fuqueue_INIT(fuqueue) __fuqueue_INIT(fuqueue, &fuqueue_ops)
+
+
+/** Wait for a @fuqueue to be woken for as much as @timeout, @returns
+ * wake up code */
+extern int fuqueue_wait (struct fuqueue *fuqueue, signed long timeout);
+
+/* Cancel the wait on a fuqueue */
+extern void fuqueue_wait_cancel (struct task_struct *, int);
+
+/*
+ * The following are functions to be able to access fuqueue
+ * functionality when building on top of it, like the [u]fulocks,
+ * [u]fuconds and ufuqueues.
+ */
+
+#if DEBUG > 0
+/* BUG_ON() firing? Temporary fix, do you have CONFIG_PREEMPT enabled?
+ * either that or disable DEBUG (force #define it to zero). */
+#define CHECK_IRQs() do { BUG_ON (!in_atomic()); } while (0)
+#else
+#define CHECK_IRQs() do {} while (0)
+#endif
+
+
+/**
+ * Setup @current to wait for a fuqueue.
+ *
+ * This only setups, it does not block.
+ *
+ * @fuqueue: fuqueue to wait on.
+ * @w: waiter structure to fill up and queue.
+ * @returns !0 if the wlist prio changed.
+ *
+ * Fills up @current's fuqueue_wait* info and queues up @w after
+ * filling it.
+ *
+ * WARNING: Call with preempt and local IRQs disabled
+ */
+static inline
+unsigned __fuqueue_wait_queue (struct fuqueue *fuqueue,
+ struct fuqueue_waiter *w)
+{
+ ftrace ("(%p, %p)\n", fuqueue, w);
+ CHECK_IRQs();
+
+ _raw_spin_lock (¤t->fuqueue_wait_lock);
+ current->fuqueue_wait = fuqueue;
+ current->fuqueue_waiter = w;
+ _raw_spin_unlock (¤t->fuqueue_wait_lock);
+ w->task = current;
+ w->result = INT_MAX;
+ __wlist_chprio (&w->wlist_node, current->prio);
+ return wlist_add (&fuqueue->wlist, &w->wlist_node);
+}
+
+
+/**
+ * Wakes up a single waiter and cleans up it's task wait information.
+ *
+ * Needs to be split from __fuqueue_wake_waiter() as
+ * fuqueue_wake_cancel() needs to acquire the locks in a funny way to
+ * prevent issues.
+ *
+ * WARNING: call with preeempt and local IRQs disabled!!
+ */
+static inline
+void __fuqueue_wait_unqueue (struct fuqueue_waiter *w, int result)
+{
+ struct task_struct *task = w->task;
+
+ ftrace ("(%p [%d], %d)\n", w, w->task->pid, result);
+ CHECK_IRQs();
+
+ __wlist_rem (&w->wlist_node);
+ _raw_spin_lock (&task->fuqueue_wait_lock);
+ task->fuqueue_wait = NULL;
+ task->fuqueue_waiter->result = result;
+ task->fuqueue_waiter = NULL;
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+}
+
+
+/**
+ * Wait for a @fuqueue until woken up, @returns wake up code
+ *
+ * Needs to be called with the fuqueue lock held, local IRQs disabled
+ * and preempt disabled. Will release the lock, enable IRQs and
+ * preemtion.
+ */
+static inline
+int __fuqueue_wait_block (struct fuqueue *fuqueue, struct fuqueue_waiter *w,
+ signed long timeout)
+{
+ ftrace ("(%p, %p, %ld)\n", fuqueue, w, timeout);
+ CHECK_IRQs();
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ _raw_spin_unlock (&fuqueue->lock);
+ local_irq_enable();
+ preempt_enable_no_resched();
+
+ /* Wait until we are woken up */
+ timeout = schedule_timeout (timeout);
+ set_current_state (TASK_RUNNING);
+ /*
+ * Now, whoever woke us up had to call first
+ * fuqueue->ops->wait_cancel() through fuqueue_wait_cancel(),
+ * and thus, unqueued us and set up a return code in
+ * w->result. However, if w->result is still pristine as left
+ * by __fuqueue_wait_queue(), that means our waker either
+ * didn't know we were waiting or we have signal(s) pending,
+ * so we do it ourselves.
+ */
+ if (unlikely (w->result == INT_MAX)) {
+ int result = -EINTR;
+ BUG_ON (wlist_empty (&w->wlist_node));
+ if (timeout == 0)
+ result = -ETIMEDOUT;
+ else
+ WARN_ON (!signal_pending (current));
+ fuqueue_wait_cancel (current, result);
+ }
+ return w->result;
+}
+
+
+
+/** @Returns true if the @fuqueue has no waiters. */
+static inline
+unsigned __fuqueue_empty (const struct fuqueue *fuqueue)
+{
+ return wlist_empty (&fuqueue->wlist);
+}
+
+
+/** Return the first waiter on a @fuqueue */
+static inline
+struct fuqueue_waiter * __fuqueue_first (struct fuqueue *fuqueue)
+{
+ return container_of (wlist_first (&fuqueue->wlist),
+ struct fuqueue_waiter, wlist_node);
+}
+
+/** Wake @howmany @fuqueue waiters with @code. */
+static inline
+void __fuqueue_wake (struct fuqueue *fuqueue, size_t howmany, int code)
+{
+ unsigned to_go = howmany;
+ struct fuqueue_waiter *w;
+
+ ftrace ("(%p, %zu, %d)\n", fuqueue, howmany, code);
+
+ while (to_go-- && !__fuqueue_empty (fuqueue)) {
+ w = __fuqueue_first (fuqueue);
+ __fuqueue_wait_unqueue (w, code);
+ wake_up_process (w->task);
+ }
+ if (likely (to_go != howmany))
+ wlist_update_prio (&fuqueue->wlist);
+}
+
+
+/** Wake @howmany waiters waiting on a @fuqueue, with return code (for
+ * them) @code */
+static inline
+int fuqueue_wake (struct fuqueue *fuqueue, size_t howmany, int code)
+{
+ unsigned long flags;
+
+ ftrace ("(%p, %zu, %d)\n", fuqueue, howmany, code);
+
+ spin_lock_irqsave (&fuqueue->lock, flags);
+ __fuqueue_wake (fuqueue, howmany, code);
+ spin_unlock_irqrestore (&fuqueue->lock, flags);
+ return 0;
+}
+
+
+/* See docs in kernel/fuqueue.c */
+extern unsigned __fuqueue_wait_cancel (struct fuqueue *, struct fuqueue_waiter *);
+
+/** Propagation of priority change */
+extern void fuqueue_chprio (struct task_struct *);
+extern void __set_prio (struct task_struct *p, int prio);
+
+
+/**
+ * Set the priority of a fuqueue waiter, repositioning it in the wait
+ * list.
+ *
+ * This does not set the prio of the process itself!
+ *
+ * @task: task to reprioritize
+ * @fuqueue: fuqueue the task is waiting for [locked]
+ * @w: waiter @task is waiting on in @fuqueue.
+ * @prio: new priority (prio
+ * @returns: NULL (as there is no propagation).
+ *
+ * Assumptions: prio != task->prio
+ * fuqueue->lock held
+ */
+static inline
+struct task_struct * __fuqueue_chprio (struct task_struct *task,
+ struct fuqueue *fuqueue,
+ struct fuqueue_waiter *w)
+{
+ wlist_chprio (&fuqueue->wlist, &w->wlist_node, task->prio);
+ return NULL;
+}
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fuqueue_h__ */
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/include/linux/plist.h Sun Nov 16 07:21:32 2003
@@ -0,0 +1,197 @@
+/*
+ * Descending-priority-sorted double-linked list
+ *
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on simple lists (include/linux/list.h).
+ *
+ * The head will always contain the head and the highest priority of
+ * the nodes in the list. Addition is O(N), removal is O(1), change of
+ * priority is O(N)
+ *
+ * 0 is highest priority, ~0 is lowest priority.
+ *
+ * No locking is done, up to the caller.
+ */
+
+#ifndef _LINUX_PLIST_H_
+#define _LINUX_PLIST_H_
+
+#include <linux/list.h>
+
+/* Priority-sorted list */
+struct plist {
+ unsigned prio;
+ struct list_head node;
+};
+
+#define plist_INIT(p) \
+{ \
+ .node = LIST_HEAD_INIT((p)->node), \
+ .prio = ~0 \
+}
+
+/* Initialize a pl */
+static inline
+void plist_init (struct plist *pl)
+{
+ INIT_LIST_HEAD (&pl->node);
+ pl->prio = ~0;
+}
+
+/* Return the first node (and thus, highest priority)
+ *
+ * Assumes the plist is _not_ empty.
+ */
+static inline
+struct plist * plist_first (struct plist *plist)
+{
+ return list_entry (plist->node.next, struct plist, node);
+}
+
+/* Return if the plist is empty. */
+static inline
+unsigned plist_empty (const struct plist *plist)
+{
+ return list_empty (&plist->node);
+}
+
+/* Update the maximum priority
+ *
+ * Return !0 if the plist's maximum priority changes.
+ *
+ * __plist_update_prio() assumes the plist is not empty.
+ */
+static inline
+unsigned __plist_update_prio (struct plist *plist)
+{
+ unsigned prio = plist_first (plist)->prio;
+ if (plist->prio == prio)
+ return 0;
+ plist->prio = prio;
+ return !0;
+}
+
+static inline
+unsigned plist_update_prio (struct plist *plist)
+{
+ unsigned old_prio = plist->prio;
+ /* plist empty, max prio = ~0 */
+ plist->prio = plist_empty (plist)? ~0 : plist_first (plist)->prio;
+ return old_prio != plist->prio;
+}
+
+/* Add a node to the plist [internal]
+ *
+ * pl->prio == ~0 is an special case, means low priority, get down to
+ * the end of the plist. Note the <; we want to add the new guy of a
+ * set of same priority people to the end of that set (FIFO behaviour
+ * on the same priority).
+ */
+static inline
+void __plist_add_sorted (struct plist *plist, struct plist *pl)
+{
+ struct list_head *itr;
+ struct plist *itr_pl;
+
+ if (pl->prio < ~0)
+ list_for_each (itr, &plist->node) {
+ itr_pl = list_entry (itr, struct plist, node);
+ if (pl->prio < itr_pl->prio)
+ break;
+ }
+ else
+ itr = &plist->node;
+ list_add_tail (&pl->node, itr);
+}
+
+/* Add a node to a plist
+ *
+ * Return !0 if the plist's maximum priority changes.
+ */
+static inline
+unsigned plist_add (struct plist *plist, struct plist *pl)
+{
+ __plist_add_sorted (plist, pl);
+ /* Are we setting a higher priority? */
+ if (pl->prio < plist->prio) {
+ plist->prio = pl->prio;
+ return !0;
+ }
+ return 0;
+}
+
+/* Return the priority a pl node */
+static inline
+unsigned plist_prio (struct plist *pl)
+{
+ return pl->prio;
+}
+
+/* Change the priority of a pl node, without updating plist position */
+static inline
+void __plist_chprio (struct plist *pl, unsigned new_prio)
+{
+ pl->prio = new_prio;
+}
+
+/* Change the priority of a pl node updating the list's max priority.
+ *
+ * Return !0 if the plist's maximum priority changes
+ */
+static inline
+unsigned plist_chprio (struct plist *plist, struct plist *pl,
+ unsigned new_prio)
+{
+ __plist_chprio (pl, new_prio);
+ list_del (&pl->node);
+ __plist_add_sorted (plist, pl);
+ return __plist_update_prio (plist);
+}
+
+/* Remove a pl node from a plist
+ *
+ * Return !0 if the plist's maximum priority changed.
+ */
+static inline
+void __plist_rem (struct plist *pl)
+{
+ list_del_init (&pl->node);
+}
+static inline
+unsigned plist_rem (struct plist *plist, struct plist *pl)
+{
+ __plist_rem (pl);
+ return plist_update_prio (plist);
+}
+
+/* Iterate over a plist - in priority order (from high to low) */
+#define plist_for_each(pos, head) \
+for (pos = container_of ((head)->node.next, struct plist, node), \
+ prefetch (pos->node.next); \
+ pos != (head); \
+ pos = container_of (pos->node.next, struct plist, node), \
+ prefetch (pos->node.next))
+
+#define plist_for_each_safe(pos, n, head) \
+ for (pos = container_of ((head)->node.next, struct plist, node), \
+ n = container_of (pos->node.next, struct plist, node); \
+ pos != (head); \
+ pos = n, \
+ n = container_of (pos->node.next, struct plist, node))
+
+
+/** Return !0 if node @pl is the last one on list @plist. */
+
+static inline
+unsigned plist_last (const struct plist *plist, const struct plist *pl)
+{
+ return pl->node.next == &plist->node;
+}
+
+
+#endif /* #ifndef _LINUX_PLIST_H_ */
+
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/kernel/fuqueue.c Tue Dec 2 20:05:38 2003
@@ -0,0 +1,220 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * see the doc in linux/fuqueue.h for some info.
+ */
+
+#define DEBUG 8
+
+#include <linux/fuqueue.h>
+#include <linux/sched.h>
+#include <linux/plist.h>
+#include <linux/errno.h>
+
+#if DEBUG > 0
+spinlock_t __debug_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
+
+/**
+ * Wait for a @fuqueue to be woken for as much as @timeout, @returns
+ * wake up code
+ *
+ * WARNING: can only be called from process context
+ */
+int fuqueue_wait (struct fuqueue *fuqueue, signed long timeout)
+{
+ struct fuqueue_waiter w;
+
+ ftrace ("(%p, %ld)\n", fuqueue, timeout);
+
+ spin_lock_irq (&fuqueue->lock);
+ __fuqueue_wait_queue (fuqueue, &w);
+ return __fuqueue_wait_block (fuqueue, &w, timeout); /* unlocks */
+}
+
+
+/**
+ * Change the priority of a waiter.
+ *
+ * @task: task whose priority has to be changed.
+ *
+ * This is the entry point that the scheduler functions call when a
+ * task that is waiting for a fuqueue changes its priority. It will
+ * call the fuqueue-specific chprio function after safely determining
+ * what is the fuqueue it is waiting for and then, if the
+ * specific chprio function determines that the prio change has to be
+ * propagated, it will keep doing it.
+ *
+ * The task that the chprio function returns has to be returned with a
+ * get_task_struct() reference.
+ *
+ * Note the weird locking: we are one of the little places that needs
+ * to take the locks in inverse order (most are fuqueue first,
+ * task_wait later--FT), we need to do TF, so we do T, test F, if it
+ * fails, unlock T, try again.
+ */
+void fuqueue_chprio (struct task_struct *task)
+{
+ struct fuqueue_ops *ops;
+ struct fuqueue *fuqueue;
+ struct fuqueue_waiter *w;
+ int prio = task->prio;
+ unsigned long flags;
+
+ __ftrace (1, "(%p [%d])\n", task, task->pid);
+ CHECK_IRQs();
+
+ local_irq_save (flags);
+ preempt_disable();
+ get_task_struct (task);
+next:
+ /* Who is the task waiting for? safely acquire and lock it */
+ _raw_spin_lock (&task->fuqueue_wait_lock);
+ fuqueue = task->fuqueue_wait;
+ if (fuqueue == NULL) /* Ok, not waiting */
+ goto out_task_unlock;
+ if (!_raw_spin_trylock (&fuqueue->lock)) { /* Spin dance... */
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ goto next;
+ }
+ ops = fuqueue->ops;
+ if (ops->get)
+ ops->get (fuqueue);
+
+ w = task->fuqueue_waiter;
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ put_task_struct (task);
+
+ /* Ok, we need to propagate the prio change to the fuqueue */
+ ops = fuqueue->ops;
+ task = ops->chprio (task, fuqueue, w);
+ if (task == NULL)
+ goto out_fuqueue_unlock;
+
+ /* Do we need to propagate to the next task */
+ get_task_struct (task);
+ _raw_spin_unlock (&fuqueue->lock);
+ if (ops->put)
+ ops->put (fuqueue);
+ ldebug (1, "__set_prio (%d, %d)\n", task->pid, prio);
+ __set_prio (task, prio);
+ goto next;
+
+out_fuqueue_unlock:
+ _raw_spin_unlock (&fuqueue->lock);
+ if (ops->put)
+ ops->put (fuqueue);
+ goto out;
+
+out_task_unlock:
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ put_task_struct (task);
+out:
+ local_irq_restore (flags);
+ preempt_enable();
+ return;
+}
+
+
+/** Cancel @task's wait on @fuqueue and update the wait list priority */
+unsigned __fuqueue_wait_cancel (struct fuqueue *fuqueue,
+ struct fuqueue_waiter *w)
+{
+ ftrace ("(%p, %p [%d])\n", fuqueue, w, w->task->pid);
+
+ __wlist_rem (&w->wlist_node);
+ return wlist_update_prio (&fuqueue->wlist);
+}
+
+
+/**
+ * Cancel the wait of a task on a fuqueue and wake it up.
+ *
+ * @task: task whose wait is to be canceled
+ *
+ * Called by:
+ * - signal_wake_up()
+ * - process_timeout()
+ * - __wake_up_common()
+ * - FIXME
+ *
+ * when the task they are about to wake is waiting on a
+ * fuqueue. Safely acquires which fuqueue the task is waiting for,
+ * references it, cleans up the task->fuqueue_wait* information, and
+ * then calls the fuqueue specific wait_cancel() function.
+ *
+ * FIXME: the entry points we get called from don't seem to be all of
+ * them; the perfect thing here would be to hook into
+ * try_to_wake_up()--but is kind of tricky..,
+ *
+ * Note that for this function to actually do anything, the task must
+ * be a task waiting on a fuqueue (so that means TASK_INTERRUPTIBLE
+ * and off the runqueues).
+ */
+void fuqueue_wait_cancel (struct task_struct *task, int result)
+{
+ struct fuqueue_ops *ops;
+ struct fuqueue *fuqueue;
+ struct fuqueue_waiter *w;
+ unsigned long flags;
+
+ ftrace ("(%p [%d], %d)\n", task, task->pid, result);
+
+ local_irq_save (flags);
+ preempt_disable();
+ get_task_struct (task);
+retry:
+ /* Who is the task waiting for? safely acquire and lock it */
+ _raw_spin_lock (&task->fuqueue_wait_lock);
+ fuqueue = task->fuqueue_wait;
+ if (fuqueue == NULL) /* Ok, not waiting */
+ goto out_task_unlock;
+ if (!_raw_spin_trylock (&fuqueue->lock)) { /* Spin dance... */
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ goto retry;
+ }
+ ops = fuqueue->ops;
+ if (ops->get)
+ ops->get (fuqueue);
+
+ w = task->fuqueue_waiter;
+ w->result = result;
+
+ /* Do the specific cancel op */
+ ops->wait_cancel (fuqueue, w);
+ task->fuqueue_wait = NULL;
+ task->fuqueue_waiter = NULL;
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ put_task_struct (task);
+ _raw_spin_unlock (&fuqueue->lock);
+ local_irq_restore (flags);
+ preempt_enable();
+ if (ops->put)
+ ops->put (fuqueue);
+ return;
+
+out_task_unlock:
+ _raw_spin_unlock (&task->fuqueue_wait_lock);
+ put_task_struct (task);
+ local_irq_restore (flags);
+ preempt_enable();
+ return;
+}
+
+
+/** Fuqueue operations for usage within the kernel */
+struct fuqueue_ops fuqueue_ops = {
+ .get = NULL,
+ .put = NULL,
+ .wait_cancel = __fuqueue_wait_cancel,
+ .chprio = __fuqueue_chprio
+};
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
@ 2003-12-03 8:51 ` inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
2003-12-11 23:30 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
2003-12-12 19:21 ` William Lee Irwin III
2 siblings, 1 reply; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03 8:51 UTC (permalink / raw)
To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes
include/linux/vlocator.h | 125 ++++++++++++++
kernel/vlocator.c | 414 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 539 insertions(+)
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/include/linux/vlocator.h Tue Dec 2 20:05:38 2003
@@ -0,0 +1,125 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Stuff to map user space addresses to kernel space objects in a
+ * controlled way.
+ */
+
+#ifndef __linux_vlocator_h__
+#define __linux_vlocator_h__
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <asm/atomic.h>
+#include <linux/futex.h> /* for the futex hash */
+
+
+/** Track @what by a user level vm address. */
+struct vlocator {
+ atomic_t refcount;
+ struct list_head hash_list;
+ union futex_key key;
+ const struct vlocator_ops *ops;
+};
+
+static __inline__
+void vlocator_init (struct vlocator *vlocator)
+{
+ atomic_set (&vlocator->refcount, 0);
+ INIT_LIST_HEAD (&vlocator->hash_list);
+}
+
+
+/** A single queue in the hash. */
+struct vlocator_queue
+{
+ spinlock_t lock;
+ struct list_head queue;
+};
+
+static __inline__
+void vlocator_queue_init (struct vlocator_queue *vq)
+{
+ vq->lock = SPIN_LOCK_UNLOCKED;
+ INIT_LIST_HEAD (&vq->queue);
+}
+
+struct page;
+union futex_key;
+
+/** vlocator operations on the items that are to be located. */
+struct vlocator_ops
+{
+ struct vlocator * (* alloc) (void);
+ void (* create) (struct vlocator *, struct page *,
+ const union futex_key *, unsigned flags);
+ void (* release) (struct vlocator *);
+ void (* free) (struct vlocator *);
+};
+
+
+/** Reference an vlocator @vlocator. */
+#define vl_get(vlocator) \
+do { atomic_inc (&(vlocator)->refcount); } while (0)
+
+
+/** Unreference an @vlocator; return true if it drops to zero. */
+static inline
+unsigned vl_put (struct vlocator *vlocator)
+{
+ return atomic_dec_and_test (&vlocator->refcount);
+}
+
+extern int vl_find (struct page **ppage, struct vlocator **pvl,
+ const struct vlocator_ops *ops,
+ volatile const unsigned __user *uaddr);
+extern int vl_locate (struct page **ppage, struct vlocator **pvl,
+ const struct vlocator_ops *ops,
+ volatile const unsigned __user *uaddr, unsigned flags);
+
+
+/* Debug stuff */
+
+/*
+ * Helpers for debugging memory allocation [needs to be here so it can
+ * be shared by ufuqueue.c and ufulock.c].
+ */
+
+#if DEBUG >= 1
+static atomic_t allocated_count = ATOMIC_INIT(0);
+static inline
+int allocated_get (void)
+{
+ return atomic_read (&allocated_count);
+}
+static inline
+int allocated_inc (void)
+{
+ atomic_inc (&allocated_count);
+ return allocated_get();
+}
+static inline
+int allocated_dec (void)
+{
+ atomic_dec (&allocated_count);
+ return allocated_get();
+}
+#else
+static inline int allocated_get (void) { return 0; }
+static inline int allocated_inc (void) { return 0; }
+static inline int allocated_dec (void) { return 0; }
+#endif /* DEBUG > 1 */
+
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_vlocator_h__ */
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/kernel/vlocator.c Tue Dec 2 20:05:38 2003
@@ -0,0 +1,414 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/futex.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/fuqueue.h> /* For the debug statements */
+#include <linux/vlocator.h>
+
+#define VL_GC_PERIOD 20 /* do garbage collection every 20 seconds */
+#define VL_HASH_BITS 8
+#define VL_HASH_QUEUES (1 << VL_HASH_BITS)
+static struct vlocator_queue __vl_hash[VL_HASH_QUEUES];
+
+/** Get the hash index for futex_key @k. */
+static __inline__
+struct vlocator_queue * vl_hash (const union futex_key *k)
+{
+ unsigned index = futex_hash_key (k) & (VL_HASH_QUEUES - 1);
+ return &__vl_hash[index];
+}
+
+
+/** Search in a queue. */
+static __inline__
+struct vlocator * __vlocator_find (struct vlocator_queue *vlq,
+ const union futex_key *key)
+{
+ struct vlocator *vl = NULL;
+ struct list_head *itr;
+
+ list_for_each (itr, &vlq->queue) {
+ vl = container_of (itr, struct vlocator, hash_list);
+ if (match_futex_key (key, &vl->key))
+ goto out;
+ }
+ vl = NULL;
+out:
+ return vl;
+}
+
+
+/** Search in a queue [backwards - locates faster recent additions]. */
+static inline
+struct vlocator * __vlocator_find_r (struct vlocator_queue *vlq,
+ const union futex_key *key)
+{
+ struct vlocator *vl = NULL;
+ struct list_head *itr;
+
+ list_for_each_prev (itr, &vlq->queue) {
+ vl = container_of (itr, struct vlocator, hash_list);
+ if (match_futex_key (key, &vl->key))
+ goto out;
+ }
+ vl = NULL;
+out:
+ return vl;
+}
+
+
+/** Remove @vlocator from queue @vlq if its use count is zero (and
+ * return true). */
+static __inline__
+unsigned __vlocator_rem (struct vlocator *v)
+{
+ unsigned result = 0, refcount;
+ refcount = atomic_read (&v->refcount);
+ if (refcount == 0) {
+ list_del (&v->hash_list);
+ result = 1;
+ }
+ return result;
+}
+
+
+/** Append @vlocator to queue @vlq. */
+static __inline__
+void __vlocator_add (struct vlocator_queue *vlq, struct vlocator *v)
+{
+ list_add (&v->hash_list, &vlq->queue);
+}
+
+
+/**
+ * Setup all the memory mumbo jumbo we need to access a user space
+ * item: locate the page where the address is and generate a futex_key
+ * for it. Return both referenced [with pin_page() and get_key_refs()]
+ *
+ * Shamelessly copied from Jamie's kernel/futex.c:get_futex_key()...
+ * good invention this GPL thingie :)
+ */
+int vl_setup (struct page **ppage, union futex_key *key,
+ volatile const unsigned __user *_uaddr)
+{
+ int result;
+ struct page *page;
+ unsigned long uaddr = (unsigned long) _uaddr;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ ftrace ("(%p, %p, %p)\n", ppage, key, _uaddr);
+
+ /* The futex address must be "naturally" aligned. */
+ key->both.offset = uaddr % PAGE_SIZE;
+ if (unlikely ((key->both.offset % sizeof (u32)) != 0))
+ return -EINVAL;
+ uaddr -= key->both.offset;
+
+ /* Generate a key for the vfulock. */
+ down_read (&mm->mmap_sem);
+ /*
+ * The futex is hashed differently depending on whether
+ * it's in a shared or private mapping. So check vma first.
+ */
+ vma = find_extend_vma (mm, uaddr);
+ result = -EFAULT;
+ if (unlikely (!vma))
+ goto error_up;
+ /* Permissions. */
+ result = (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+ if (unlikely ((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
+ goto error_up;
+ /*
+ * Private mappings are handled in a simple way.
+ *
+ * NOTE: When userspace waits on a MAP_SHARED mapping, even if
+ * it's a read-only handle, it's expected that futexes attach to
+ * the object not the particular process. Therefore we use
+ * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
+ * mappings of _writable_ handles.
+ */
+ result = 0;
+ if (likely (!(vma->vm_flags & VM_MAYSHARE))) {
+ key->private.mm = mm;
+ key->private.uaddr = uaddr;
+ goto out_pin;
+ }
+ /* Linear file mappings are also simple. */
+ key->shared.inode = vma->vm_file->f_dentry->d_inode;
+ key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
+ if (likely (!(vma->vm_flags & VM_NONLINEAR))) {
+ key->shared.pgoff = (((uaddr - vma->vm_start) >> PAGE_SHIFT)
+ + vma->vm_pgoff);
+ goto out_pin;
+ }
+
+ /*
+ * We really need to pin the page in memory, we are about to
+ * modify it.
+ */
+
+ /*
+ * Do a quick atomic lookup first - this is the fastpath.
+ */
+#warning FIXME: OPTIMIZE:
+ /*
+ * We only need to take this path when we don't know the page;
+ * we can hack vl_locate() so that if we know the key without
+ * checking the page and we find the vlocator, we can take the
+ * page from there.
+ */
+out_pin:
+ spin_lock (&mm->page_table_lock);
+ page = follow_page (mm, uaddr, 0);
+ if (likely (page != NULL)) {
+ key->shared.pgoff =
+ page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ if (!PageReserved (page))
+ get_page (page);
+ spin_unlock (&mm->page_table_lock);
+ goto out;
+ }
+ spin_unlock (&mm->page_table_lock);
+
+ /*
+ * Do it the general way.
+ */
+ result = get_user_pages (current, mm, uaddr, 1, 0, 0, &page, NULL);
+ if (result < 0)
+ goto error_up;
+ key->shared.pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+out:
+ get_key_refs (key);
+ *ppage = page;
+error_up:
+ up_read (¤t->mm->mmap_sem);
+ return result;
+}
+
+
+/**
+ * Locate a vlocator for a certain user address.
+ *
+ * @ppage: Pointer to where to store the pointer to the referenced
+ * page where @uaddr is. NULL if unneeded.
+ * @pvl: Pointer to where to store the pointer to the located
+ * vlocator based structure.
+ * @ops: Pointer to the function ops for this guy (for checking)
+ * @uaddr: Address in user space.
+ * @returns: < 0 errno code on error (no resources held)
+ * 0 if ok and:
+ *
+ * - Located vlocator in *pvl, referenced
+ * - Page pointer in *ppage (unless NULL), referenced
+ * - The key in (*pvl)->key has been referenced with get_key_refs()
+ */
+int vl_find (struct page **ppage, struct vlocator **pvl,
+ const struct vlocator_ops *ops,
+ volatile const unsigned __user *uaddr)
+{
+ int result;
+ struct vlocator_queue *vlq;
+ struct vlocator *vl;
+ union futex_key key;
+ struct page *page;
+ unsigned need_page = 0;
+
+ ftrace ("(%p, %p, %p, %p)\n", ppage, pvl, ops, uaddr);
+
+ result = vl_setup (&page, &key, uaddr);
+ if (result < 0)
+ goto error;
+ /* Is it in the hash already? [do we know about it?] */
+ vlq = vl_hash (&key);
+ spin_lock (&vlq->lock);
+ vl = __vlocator_find (vlq, &key);
+ if (likely (vl == NULL))
+ goto out_unlock;
+ /* Check the type is correct */
+ result = -EFAULT;
+ if (unlikely (vl->ops != ops))
+ goto out_unlock;
+ result = 0;
+ vl_get (vl);
+ *pvl = vl;
+ if (ppage) {
+ *ppage = page;
+ need_page = 1;
+ }
+out_unlock:
+ spin_unlock (&vlq->lock);
+ drop_key_refs (&key); // Undo from ...
+ if (need_page == 0)
+ put_page (page); // ... ufu_setup() (maybe)
+error:
+ return result;
+}
+
+
+
+
+/**
+ * Locate or allocate a vlocator for a certain user address.
+ *
+ * @ppage: Pointer to where to store the pointer to the referenced
+ * page where @uaddr is. NULL if unneeded.
+ * @pvl: Pointer to where to store the pointer to the allocated
+ * vlocator based structure.
+ * @ops: Pointer to the function ops used for allocating/constructing, etc.
+ * @uaddr: Address in user space.
+ * @flags: flags, for initialization, passed to ops->create()
+ * @returns: < 0 errno code on error (no resources held)
+ * 0 if ok and:
+ *
+ * - Located or allocated vlocator in *pvl, referenced
+ * - Page pointer in *ppage (unless NULL), referenced
+ * - The key in (*pvl)->key has been referenced with get_key_refs()
+ *
+ * FIXME: optimize allocation collision detection; add a modification
+ * count to the hash table; whenever anything is added, inc the
+ * count. If we see the count changed after we droped the lock,
+ * allocated and re-locked, then we need to find_r(), if not, we can
+ * assume nothing changed.
+ */
+int vl_locate (struct page **ppage, struct vlocator **pvl,
+ const struct vlocator_ops *ops,
+ volatile const unsigned __user *uaddr, unsigned flags)
+{
+ int result;
+ struct vlocator_queue *vlq;
+ struct vlocator *vl, *vl_alt;
+ union futex_key key;
+ struct page *page;
+ unsigned need_page = 0;
+
+ ftrace ("(%p, %p, %p, %p, %x)\n", ppage, pvl, ops, uaddr, flags);
+
+ result = vl_setup (&page, &key, uaddr);
+ if (result < 0)
+ goto error;
+ /* Is it in the hash already? [do we know about it?] */
+ vlq = vl_hash (&key);
+ spin_lock (&vlq->lock);
+ vl = __vlocator_find (vlq, &key);
+ if (likely (vl != NULL))
+ goto out_check;
+ spin_unlock (&vlq->lock);
+
+ /* Naaah, let's alloc it */
+ result = -ENOMEM;
+ vl = ops->alloc();
+ if (unlikely (vl == NULL))
+ goto error_alloc;
+ result = 0;
+
+ /* Ok, allocated - did somebody add it while we were allocating? */
+ spin_lock (&vlq->lock);
+ vl_alt = __vlocator_find_r (vlq, &key);
+ if (vl_alt == NULL) { /* No allocation collision, create it */
+ ops->create (vl, page, &key, flags);
+ vl->ops = ops;
+ __vlocator_add (vlq, vl);
+ goto out_ref;
+ }
+ /* Allocation collision, get the new one, discard ours */
+ ops->free (vl);
+ vl = vl_alt;
+out_check:
+ result = -EFAULT;
+ if (unlikely (vl->ops != ops)) /* Check the type is correct */
+ goto out_unlock;
+ result = 0;
+out_ref:
+ vl_get (vl);
+ *pvl = vl;
+ if (ppage) {
+ *ppage = page;
+ need_page = 1;
+ }
+out_unlock:
+ spin_unlock (&vlq->lock);
+error_alloc:
+ drop_key_refs (&key); // Undo from ...
+ if (need_page == 0)
+ put_page (page); // ... ufu_setup() (maybe)
+error:
+ return result;
+}
+
+
+/** Work structure for the garbage collector */
+static void vl_garbage_collector (void *);
+DECLARE_WORK(vl_workqueue, vl_garbage_collector, NULL);
+
+
+/**
+ * Do garbage collection (called from the work-queue) and re-arm
+ *
+ * The most important action is to cleanup the ownership of [u]fulocks
+ * that were assigned to init because the owner died and robustness
+ * was not enabled. This means that only ufulocks get to optionally
+ * suport robustness; kernel based fulocks have to do robustness
+ * always.
+ */
+static
+void vl_garbage_collector (void *dummy)
+{
+ struct list_head *itr, *nxt;
+ static unsigned i;
+ int cnt;
+ struct vlocator_queue *vlq;
+ struct vlocator *vl;
+
+ /* Cleanup the ufulock hash: anything with a zero refcount is wiped */
+ for (cnt = 0; cnt < VL_HASH_QUEUES; cnt++, i++) {
+ if (i >= VL_HASH_QUEUES)
+ i = 0;
+ vlq = &__vl_hash[i];
+ if (list_empty (&vlq->queue)) // Some cheating always helps...
+ continue;
+ spin_lock (&vlq->lock);
+ list_for_each_safe (itr, nxt, &vlq->queue) {
+ vl = container_of (itr, struct vlocator, hash_list);
+ if (__vlocator_rem (vl)) {
+ vl->ops->release (vl);
+ vl->ops->free (vl);
+ }
+ }
+ spin_unlock (&vlq->lock);
+ if ((cnt > VL_HASH_QUEUES / 8) && need_resched())
+ break;
+ }
+ /* Re-arm */
+ schedule_delayed_work (&vl_workqueue, VL_GC_PERIOD * HZ);
+}
+
+
+/** Initialize the ufu subsystem. */
+static
+int __init subsys_ufu_init (void)
+{
+ unsigned i;
+ for (i = 0; i < sizeof (__vl_hash) / sizeof (__vl_hash[0]); i++)
+ vlocator_queue_init (&__vl_hash[i]);
+ /* Set up the garbage collector to run every 10 seconds */
+ schedule_delayed_work (&vl_workqueue, VL_GC_PERIOD * HZ);
+ return 0;
+}
+__initcall (subsys_ufu_init);
+
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC/PATCH] FUSYN 7/10: user space fuqueues
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2003-12-03 8:51 ` inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 8/10: kernel fulocks inaky.perez-gonzalez
0 siblings, 1 reply; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03 8:51 UTC (permalink / raw)
To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes
ufuqueue.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 236 insertions(+)
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/kernel/ufuqueue.c Wed Nov 19 17:32:07 2003
@@ -0,0 +1,236 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/highmem.h> /* kmap_atomic() */
+#include <linux/vlocator.h>
+#include <linux/fuqueue.h>
+
+extern signed long ufu_timespec2jiffies (const struct timespec __user *);
+static struct fuqueue_ops ufuqueue_ops;
+
+/** Slab for allocation of 'struct ufuqueue's. */
+static kmem_cache_t *ufuqueue_slab;
+
+/** A ufuqueue, tied to a user-space vm address. */
+struct ufuqueue {
+ struct fuqueue fuqueue;
+ struct vlocator vlocator;
+};
+
+
+/** Initialize an @ufuqueue (for slab object construction). */
+static inline
+void ufuqueue_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ struct ufuqueue *ufuqueue = obj;
+ __fuqueue_init (&ufuqueue->fuqueue, &ufuqueue_ops);
+ vlocator_init (&ufuqueue->vlocator);
+}
+
+
+/** Allocate an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+struct vlocator * ufuqueue_alloc (void)
+{
+ struct ufuqueue *ufuqueue;
+ ufuqueue = kmem_cache_alloc (ufuqueue_slab, GFP_KERNEL);
+ if (DEBUG > 0 && ufuqueue != NULL)
+ ldebug (1, "ufuqueue %p allocated, total: %d\n",
+ ufuqueue, allocated_inc());
+ return &ufuqueue->vlocator;
+}
+
+
+/**
+ * Create a ufuqueue that is going to be inserted to the hash
+ * [called from vlocator.c:vl_locate() throught the vlocator ops]
+ */
+static
+void ufuqueue_create (struct vlocator *vl, struct page *page,
+ const union futex_key *key, unsigned flags)
+{
+ memcpy (&vl->key, key, sizeof (*key));
+ get_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_release (struct vlocator *vl)
+{
+ drop_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_free (struct vlocator *vl)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (vl, struct ufuqueue, vlocator);
+ kmem_cache_free (ufuqueue_slab, ufuqueue);
+ if (DEBUG > 0 && ufuqueue != NULL)
+ ldebug (1, "ufuqueue %p freed, total: %d\n",
+ ufuqueue, allocated_dec());
+}
+
+
+struct vlocator_ops ufuqueue_vops = {
+ .alloc = ufuqueue_alloc,
+ .create = ufuqueue_create,
+ .release = ufuqueue_release,
+ .free = ufuqueue_free
+};
+
+
+/**
+ * Wait on a fuqueue
+ *
+ * @_vfuqueue: address in user space of the condvar
+ * @cond_flags: flags for the conditional variable
+ * @_vfulock: address in user space of the fulock.
+ * @lock_flags: flags for the fulock.
+ * @_timeout: timeout information [see sys_ufulock_lock()]
+ *
+ * This is just a thin shell that locates the kernel descriptors for
+ * the condvar and the lock and then handles the work to
+ * ufuqueue_wait().
+ */
+asmlinkage
+int sys_ufuqueue_wait (volatile unsigned __user *_vfuqueue,
+ unsigned val, struct timespec __user *_timeout)
+{
+ int result = -EINVAL;
+ struct ufuqueue *ufuqueue;
+ struct page *page;
+ char *page_kmap;
+ volatile unsigned *vfuqueue;
+ unsigned new_val;
+ struct vlocator *vl;
+ signed long timeout;
+ struct fuqueue_waiter w;
+
+ ftrace ("(%p, %x, %p)\n", _vfuqueue, val, _timeout);
+ might_sleep();
+
+ /* ufuqueue: pin pages, get keys, look up/allocate, refcount */
+ result = vl_locate (&page, &vl, &ufuqueue_vops, _vfuqueue, 0);
+ if (unlikely (result < 0))
+ goto out;
+ ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+ /* We are going to lock the ufuqueue, so get the timeout first */
+ timeout = ufu_timespec2jiffies (_timeout);
+ result = (int) timeout;
+ if (timeout < 0)
+ goto out_put_vl;
+
+ spin_lock_irq (&ufuqueue->fuqueue.lock);
+ __fuqueue_wait_queue (&ufuqueue->fuqueue, &w);
+ /* Now, are we ok with the value? */
+ page_kmap = kmap_atomic (page, KM_IRQ0);
+ vfuqueue = (volatile unsigned *)page_kmap + (vl->key.both.offset & ~1);
+ new_val = *vfuqueue;
+ kunmap_atomic (page_kmap, KM_IRQ0);
+ result = -EWOULDBLOCK;
+ if (val != new_val)
+ goto out_unqueue;
+ /* ok, go ahead and wait (it will unlock and restore irqs/preempt */
+ return __fuqueue_wait_block (&ufuqueue->fuqueue, &w, timeout);
+
+out_unqueue:
+ __fuqueue_wait_unqueue (&w, 0);
+ spin_unlock_irq (&ufuqueue->fuqueue.lock);
+out_put_vl:
+ vl_put (vl);
+ put_page (page);
+out:
+ return result;
+}
+
+
+/**
+ * Wake up waiters of a fuqueue
+ *
+ * @_vfuqueue: pointer to the fuqueue
+ * @howmany: number of waiters to wake up
+ * @code: code to return to the waiters [default it to zero]
+ * @returns: 0 if ok, < 0 errno code on error.
+ */
+asmlinkage
+int sys_ufuqueue_wake (volatile unsigned __user *_vfuqueue,
+ size_t howmany, int code)
+{
+ int result;
+ struct vlocator *vl;
+ struct ufuqueue *ufuqueue;
+
+ ftrace ("(%p, %u)\n", _vfuqueue, howmany);
+ might_sleep();
+
+ /* ufuqueue: get keys, look up (don't allocate), refcount */
+ result = vl_locate (NULL, &vl, NULL, _vfuqueue, 0);
+ if (result < 0)
+ goto out;
+ ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+ /* Wake'en up */
+ spin_lock_irq (&ufuqueue->fuqueue.lock);
+ __fuqueue_wake (&ufuqueue->fuqueue, howmany, code);
+ spin_unlock_irq (&ufuqueue->fuqueue.lock);
+ vl_put (vl);
+ result = 0;
+out:
+ return result;
+}
+
+
+/** Initialize the ufuqueue subsystem. */
+static
+int __init subsys_ufuqueue_init (void)
+{
+ ufuqueue_slab = kmem_cache_create ("ufuqueue", sizeof (struct ufuqueue),
+ 0, 0, ufuqueue_ctor, NULL);
+ if (ufuqueue_slab == NULL)
+ panic ("ufuqueue_init(): "
+ "Unable to initialize ufuqueue slab allocator.\n");
+ return 0;
+}
+__initcall (subsys_ufuqueue_init);
+
+
+/* Adaptors for fulock operations */
+static
+void ufuqueue_put (struct fuqueue *fuqueue)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (fuqueue, struct ufuqueue, fuqueue);
+ vl_put (&ufuqueue->vlocator);
+}
+
+static
+void ufuqueue_get (struct fuqueue *fuqueue)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (fuqueue, struct ufuqueue, fuqueue);
+ vl_get (&ufuqueue->vlocator);
+}
+
+/** Ufuqueue operations */
+static
+struct fuqueue_ops ufuqueue_ops = {
+ .get = ufuqueue_get,
+ .put = ufuqueue_put,
+ .wait_cancel = __fuqueue_wait_cancel,
+ .chprio = __fuqueue_chprio
+};
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC/PATCH] FUSYN 8/10: kernel fulocks
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
@ 2003-12-03 8:51 ` inaky.perez-gonzalez
0 siblings, 0 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2003-12-03 8:51 UTC (permalink / raw)
To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes
include/linux/fulock-olist.h | 55 ++++
include/linux/fulock.h | 278 +++++++++++++++++++++
kernel/fulock.c | 560 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 893 insertions(+)
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/include/linux/fulock-olist.h Sun Nov 16 07:21:32 2003
@@ -0,0 +1,55 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ *
+ * Fulock Ownership List
+ *
+ * This file is here to avoid include hell, as sched.h needs it to be
+ * able to embed a fulock ownership list into 'struct
+ * task_struct'. Its sole users are sched.h and fulock.h.
+ *
+ * This is just a redirection of the methods used to manage the list
+ * of fulocks that a task owns in a given moment.
+ *
+ * I don't think anybody in its right mind wants to use a
+ * priority-array list for this, but just in case, and to ease the
+ * change of the implementation, I was redirecting it.
+ *
+ * I will pull the plug on this one, search and replace all the
+ * olist_*() for plist_*() and wipe this file out as soon as I have a
+ * minute. I consider this low prio.
+ */
+
+#ifndef __linux_fulock_olist_h__
+#define __linux_fulock_olist_h__
+
+#ifdef __KERNEL__
+
+#include <linux/plist.h>
+
+typedef struct plist olist_t;
+
+#define olist_add plist_add
+#define olist_rem plist_rem
+#define olist_init plist_init
+#define olist_INIT(plist) plist_INIT(plist)
+#define olist_for_each plist_for_each
+#define olist_for_each_safe plist_for_each_safe
+#define olist_empty plist_empty
+#define olist_first plist_first
+#define olist_prio plist_prio
+#define olist_chprio plist_chprio
+#if 0
+#define olist_update_prio plist_update_prio
+#define __olist_chprio __plist_chprio
+#endif
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fulock_olist_h__ */
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/include/linux/fulock.h Tue Dec 2 20:05:38 2003
@@ -0,0 +1,278 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#ifndef __linux_fulock_h__
+#define __linux_fulock_h__
+
+#include <asm/fulock.h>
+
+/**
+ * User space provided fulock flags
+ *
+ * fulocks/ufulocks are *always* robust, it is up to the caller to
+ * emulate a hang if a robust behavior is not desired. However, the
+ * NOT_RM (not robust mutex) flag is kept and checked on exit and if
+ * there are owned fulocks, a warning will be printed.
+ */
+#define FULOCK_FL_USER_MK 0xfffff000 /* Flags provided by user space */
+#define FULOCK_FL_PI 0x80000000 /* Priority inherit */
+#define FULOCK_FL_PP 0x40000000 /* Priority protected */
+#define FULOCK_FL_RM_SUN 0x20000000 /* Robust mutex (sun style) */
+#define FULOCK_FL_RM 0x10000000 /* Non-robust mutex [warns only] */
+#define FULOCK_FL_ERROR_CHK 0x04000000 /* Perform POSIX error checks */
+/* Priority ceiling masks */
+#define FULOCK_FL_PP_PLC_MK 0x00f00000 /* Policy */
+#define FULOCK_FL_PP_PRIO_MK 0x000ff000 /* Priority */
+
+/** Fulock consistency state */
+enum fulock_st {
+ fulock_st_healthy, /* Normal, healthy */
+ fulock_st_dead, /* Some previous owner died */
+ fulock_st_nr, /* idem, plus cannot be recovered */
+ fulock_st_init /* fulock was re-initialized */
+};
+
+
+/** Fulock consistency action */
+enum fulock_con {
+ fulock_con_nop = 0, /* No-op, just return actual consistency */
+ fulock_con_init, /* Move from any state to initialized */
+ fulock_con_heal, /* Move from dead to healthy */
+ fulock_con_nr, /* Move from dead to not-recoverable */
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/fuqueue.h>
+#include <linux/plist.h>
+#include <linux/vlocator.h>
+#include <asm/errno.h>
+
+/* Internal fulock flags */
+#define FULOCK_FL_NR 0x00000100 /* not recoverable */
+#define FULOCK_FL_DEAD 0x00000200 /* dead-owner */
+#define FULOCK_FL_NEW 0x00000800 /* recently allocated ufulock */
+#define FULOCK_FL_PP_MK 0x000000ff /* raw priority ceiling */
+
+struct fulock;
+
+/** Operations on a fulock. */
+struct fulock_ops
+{
+ struct fuqueue_ops fuqueue;
+ void (* owner_set) (struct fulock *, struct task_struct *);
+ void (* owner_reset) (struct fulock *);
+ void (* exit) (struct fulock *);
+};
+
+/* In-kernel fulock operations */
+extern struct fulock_ops fulock_ops;
+extern struct fulock_ops ufulock_ops;
+extern struct vlocator_ops ufulock_vops;
+
+
+/** A fulock, mutex usable from the kernel. */
+struct fulock {
+ struct fuqueue fuqueue;
+ struct task_struct *owner;
+ unsigned flags;
+ olist_t olist_node;
+};
+
+
+/** Initialize a @fulock with given ops */
+static __inline__
+void __fulock_init (struct fulock *fulock, struct fulock_ops *ops)
+{
+ __fuqueue_init (&fulock->fuqueue, &ops->fuqueue);
+ fulock->owner = NULL;
+ fulock->flags = 0;
+ olist_init (&fulock->olist_node);
+}
+
+/** Statically initialize a @fulock with given ops */
+#define __fulock_INIT(fulock, ops) { \
+ .fuqueue = __fuqueue_INIT (&(fulock)->fuqueue, \
+ &(ops)->fuqueue), \
+ .owner = NULL, \
+ .flags = 0, \
+ .olist_node = olist_INIT (&(fulock)->olist_node) \
+}
+
+/** Initialize a @fulock for usage within the kernel */
+static __inline__
+void fulock_init (struct fulock *fulock)
+{
+ __fulock_init (fulock, &fulock_ops);
+}
+
+/** Statically initialize a @fulock for usage within the kernel */
+#define fulock_INIT(fulock, ops) __fulock_INIT (fulock, &kernel_ops)
+
+
+/* Primitives for locking/unlocking/waiting/signalling */
+extern int __fulock_lock (struct fulock *, signed long);
+extern int __fulock_unlock (struct fulock *, size_t, int);
+extern int __fulock_consistency (struct fulock *fulock,
+ enum fulock_con consistency);
+extern void __fulock_exit (struct fulock *);
+extern void exit_fulocks (struct task_struct *task);
+
+extern int __fulock_pp_allowed (struct fulock *);
+extern void __fulock_pp_boost (struct fulock *);
+extern void __fulock_pp_unboost (struct fulock *);
+extern int __fulock_check_deadlock (struct task_struct *, struct fulock *);
+
+/** More internal stuff for building on top of fulock */
+extern unsigned __fulock_wait_cancel (struct fuqueue *, struct fuqueue_waiter *);
+extern struct task_struct * __fulock_chprio (struct task_struct *,
+ struct fuqueue *,
+ struct fuqueue_waiter *);
+
+
+/** Check if it is ok to for current to lock @fulock */
+static __inline__
+int __fulock_lock_check (struct fulock *fulock)
+{
+ int result = 0;
+ return result;
+}
+
+
+/**
+ * Lock a fulock, maybe wait for it to be available
+ *
+ * @timeout: wait to acquire the fulock as much @timeout jiffies. If
+ * zero, don't block, tryonly. If MAX_SCHEDULE_TIMEOUT,
+ * block indefinitely until the lock is acquired.
+ *
+ * @returns: See __fulock_lock().
+ *
+ * Can ONLY be called from process context. Note __fulock_lock()
+ * unlocks the fulock on return and re-enables IRQs and preemption.
+ */
+static __inline__
+int fulock_lock (struct fulock *fulock, signed timeout)
+{
+ int result;
+ do {
+ spin_lock_irq (&fulock->fuqueue.lock);
+ result = __fulock_lock (fulock, timeout);
+ } while (result == -EAGAIN);
+ return result;
+}
+
+
+/**
+ * Unlock a fulock, wake up waiter(s)
+ *
+ * @f: fulock.
+ * @howmany: Wake up this many waiters; if 0, wake up only one,
+ * forcing a serialization in the acquisition of the
+ * futex, so that no other task (in user or kernel space)
+ * can acquire it.
+ * @returns: Number of tasks woken up, < 0 errno code on error.
+ *
+ * Can be called from any context [I hope].
+ */
+static __inline__
+int fulock_unlock (struct fulock *fulock, size_t howmany)
+{
+ int result;
+ unsigned long flags;
+
+ ftrace ("(%p, %zu)\n", fulock, howmany);
+
+ spin_lock_irqsave (&fulock->fuqueue.lock, flags);
+ result = __fulock_unlock (fulock, howmany, 0);
+ spin_unlock_irqrestore (&fulock->fuqueue.lock, flags);
+ return result;
+}
+
+
+/**
+ * Set the consistency of a fulock
+ *
+ * @f: fulock to set
+ * @consistency: New consistency state to move it to (unless it is
+ * fulock_con_nop, which is a nop and can be used to get
+ * the actual consistency state); see enum fulock_con.
+ *
+ * @returns: 'enum fulock_st' consistency state; < 0 errno code on
+ * error.
+ *
+ * FIXME: this function set is kind of too convoluted, I am afraid.
+ *
+ * Can be called from only from process context, as it checks for
+ * current being the current owner.
+ */
+static __inline__
+int fulock_consistency (struct fulock *fulock, enum fulock_con consistency)
+{
+ int result;
+ unsigned long flags;
+ ftrace ("(%p, %d)\n", fulock, consistency);
+
+ spin_lock_irqsave (&fulock->fuqueue.lock, flags);
+ result = __fulock_consistency (fulock, consistency);
+ spin_unlock_irqrestore (&fulock->fuqueue.lock, flags);
+ return result;
+}
+
+
+/** A ufulock, tied to a user-space vm address. */
+struct ufulock {
+ struct fulock fulock;
+ struct vlocator vlocator;
+ struct page *page;
+};
+
+
+/** @Return true if the fulock @f has no waiters. */
+static __inline__
+unsigned __fulock_empty (const struct fulock *f)
+{
+ return __fuqueue_empty (&f->fuqueue);
+}
+
+
+/** [Internal] Make task @task the owner of fulock @f. */
+static __inline__
+void __fulock_owner_set (struct fulock *fulock, struct task_struct *task)
+{
+ ftrace ("(%p, %p [%d])\n", fulock, task, task->pid);
+ CHECK_IRQs();
+
+ fulock->owner = task;
+ _raw_spin_lock (&task->fulock_olist_lock);
+ olist_add (&task->fulock_olist, &fulock->olist_node);
+ _raw_spin_unlock (&task->fulock_olist_lock);
+}
+
+
+/** [Internal] Reset ownership of fulock @f. */
+static __inline__
+void __fulock_owner_reset (struct fulock *fulock)
+{
+ struct task_struct *owner = fulock->owner;
+ ftrace ("(%p)\n", fulock);
+ CHECK_IRQs();
+
+ _raw_spin_lock (&owner->fulock_olist_lock);
+ olist_rem (&owner->fulock_olist, &fulock->olist_node);
+ _raw_spin_unlock (&owner->fulock_olist_lock);
+ fulock->owner = NULL;
+}
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __linux_fulock_h__ */
--- /dev/null Tue Dec 2 20:07:55 2003
+++ linux/kernel/fulock.c Tue Dec 2 20:05:38 2003
@@ -0,0 +1,560 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/fulock.h>
+#include <linux/plist.h>
+#include <linux/time.h> /* struct timespec */
+#include <linux/sched.h> /* MAX_SCHEDULE_TIMEOUT */
+#include <linux/errno.h>
+
+/** Get the real priority from the real-time priority of a task. */
+static inline
+unsigned long prio_from_rt_prio (struct task_struct *task)
+{
+ return MAX_USER_RT_PRIO-1 - task->rt_priority;
+}
+
+
+/**
+ * Perform priority inheritance over the ownership chain
+ *
+ * @fulock: a waiter in this fulock was re-prioritized and the wlist
+ * max priority changed; propagate that change. It has to be
+ * locked.
+ *
+ * Updates the position of the @fulock on its owner's ownership list
+ * and if this results on a changed maximum ownership list priority,
+ * propagate it up.
+ *
+ * This is also used for un-boosting when a waiter stops waiting and
+ * the priorities have to be shuffled back.
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+void __fulock_pi_boost (struct fulock *fulock)
+{
+ unsigned prio_changed;
+ struct task_struct *owner = fulock->owner;
+ unsigned long new_prio = wlist_prio (&fulock->fuqueue.wlist);
+
+ __ftrace (1, "(%p)\n", fulock);
+ CHECK_IRQs();
+
+ _raw_spin_lock (&owner->fulock_olist_lock);
+ prio_changed = olist_chprio (&owner->fulock_olist, &fulock->olist_node,
+ new_prio);
+ _raw_spin_unlock (&owner->fulock_olist_lock);
+ if (prio_changed) {
+ new_prio = min (prio_from_rt_prio (owner), new_prio);
+ if (new_prio != owner->prio) {
+ ldebug (1, "__set_prio (%d, %lu)\n", owner->pid, new_prio);
+ __set_prio (owner, new_prio);
+ /* Now the priority changed for the owner, so we need
+ * to propagate it */
+ fuqueue_chprio (owner);
+ }
+ }
+}
+
+
+/**
+ * Test if a to-be queued task would deadlock if it waits for a fulock.
+ *
+ * Simple as it is, it looks ugly as hell. Basically, the fulock we
+ * are about to lock will have an owner, so we check the owner; if it
+ * is us, deadlock, if not, we see if the fulock is waiting for
+ * anything; if so, we check it is a fulock, and if so, who's the
+ * owner; if it is us, then deadlock, if not, start again ...
+ *
+ * Now, the trick is to keep the reference counts, and the locks and
+ * all the crap. A single lock for everything would be *so* beautiful
+ * (and not scalable :).
+ *
+ * @task: the task to check
+ * @fulock: the fulock to check
+ * @returns: 0 if ok, < 0 errno on error.
+ *
+ * This *should* be safe being lock-less [famous last words].
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+int __fulock_check_deadlock (struct task_struct *task, struct fulock *fulock)
+{
+ int result = 0;
+ struct fuqueue_ops *ops;
+ struct task_struct *owner;
+ struct fuqueue *fuqueue;
+
+ ftrace ("(%p, %p)\n", task, fulock);
+ CHECK_IRQs();
+
+ /* first fulock to trace is already locked and we won't unlock it */
+ owner = fulock->owner;
+ if (owner == NULL)
+ goto out;
+ result = -EDEADLK;
+ if (owner == task)
+ goto out;
+ get_task_struct (owner);
+next:
+ result = 0;
+ /* Who is the owner waiting for? safely acquire and lock it */
+ _raw_spin_lock (&owner->fuqueue_wait_lock);
+ fuqueue = owner->fuqueue_wait;
+ if (fuqueue == NULL) /* Ok, not waiting */
+ goto out_owner_unlock;
+ if (!_raw_spin_trylock (&fuqueue->lock)) { /* Spin dance... */
+ _raw_spin_unlock (&owner->fuqueue_wait_lock);
+ goto next;
+ }
+ ops = fuqueue->ops;
+ if (ops->get)
+ ops->get (fuqueue);
+ _raw_spin_unlock (&owner->fuqueue_wait_lock);
+ put_task_struct (owner);
+
+ /* Is a fulock whatever the owner is waiting for? */
+ if (ops != &fulock_ops.fuqueue && ops != &ufulock_ops.fuqueue)
+ goto out_fuqueue_unlock;
+ fulock = container_of (fuqueue, struct fulock, fuqueue);
+ owner = fulock->owner; /* Who's the fulock's owner? */
+ if (unlikely (owner == NULL)) /* Released before we locked it? */
+ goto out_fuqueue_unlock;
+ result = -EDEADLK; /* It is us? ooops */
+ if (owner == task)
+ goto out_fuqueue_unlock;
+
+ /* What's the owner waiting for? Proceed to it */
+ get_task_struct (owner);
+ _raw_spin_unlock (&fulock->fuqueue.lock);
+ if (ops->put)
+ ops->put (fuqueue);
+ goto next;
+
+out_owner_unlock:
+ _raw_spin_unlock (&owner->fuqueue_wait_lock);
+ put_task_struct (owner);
+ return result;
+
+out_fuqueue_unlock:
+ _raw_spin_unlock (&fulock->fuqueue.lock);
+ if (ops->put)
+ ops->put (fuqueue);
+out:
+ return result;
+}
+
+
+/**
+ * [Try]lock a fulock
+ *
+ * @f: fulock to [try]lock.
+ * @timeout: Time to wait for the lock to be acquired. If 0, trylock
+ * only, if MAX_SCHEDULE_TIMEOUT, wait for ever, else, wait
+ * the given time.
+ * @returns: 0 Acquired the fulock
+ * -EOWNERDEAD Acquired the fulock, some previous owner died.
+ * -ENOTRECOVERABLE Not acquired, fulock is not recoverable
+ * -EBUSY Not acquired, it is locked [and trylock was
+ * requested].
+ * -EAGAIN Not acquired, try again [FIXME: change this,
+ * POSIX uses it for mutexes].
+ * * Not acquired, error.
+ *
+ * Needs f->lock held; on return, it will have been released.
+ *
+ * Note that some operations involving flag reading don't need locking
+ * because for changing those values, we calling task needs to be the
+ * owner of the fulock, and once we come out of wait without errors,
+ * we are the owners.
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled [the
+ * fuqueue lock acquired with spin_lock_irq()].
+ */
+int __fulock_lock (struct fulock *fulock, signed long timeout)
+{
+ int fulock_is_pp;
+ int result = 0;
+ unsigned prio_changed;
+ struct fulock_ops *ops =
+ container_of (fulock->fuqueue.ops, struct fulock_ops, fuqueue);
+ struct fuqueue_waiter w;
+
+ ftrace ("(%p [flags 0x%x], %ld)\n", fulock, fulock->flags, timeout);
+
+ result = -ENOTRECOVERABLE;
+ if (fulock->flags & FULOCK_FL_NR) /* can we lock? */
+ goto out_unlock;
+ if (fulock->flags & FULOCK_FL_ERROR_CHK) {
+ result = __fulock_check_deadlock (current, fulock);
+ if (result < 0)
+ goto out_unlock;
+ }
+ fulock_is_pp = fulock->flags & FULOCK_FL_PP;
+ if (fulock_is_pp) {
+ result = __fulock_pp_allowed (fulock);
+ if (result)
+ goto out_unlock;
+ }
+ if (fulock->owner == NULL) /* Unlocked? take it */
+ goto its_unlocked;
+ /* Nchts, we have to wait. */
+ result = -EBUSY;
+ if (!timeout)
+ goto out_unlock;
+ prio_changed = __fuqueue_wait_queue (&fulock->fuqueue, &w);
+ if (prio_changed && fulock->flags & FULOCK_FL_PI)
+ __fulock_pi_boost (fulock);
+ return __fuqueue_wait_block (&fulock->fuqueue, &w, timeout);
+
+its_unlocked:
+ result = fulock->flags & FULOCK_FL_DEAD? -EOWNERDEAD : 0;
+ ops->owner_set (fulock, current);
+ if (fulock_is_pp)
+ __fulock_pp_boost (fulock);
+out_unlock:
+ _raw_spin_unlock (&fulock->fuqueue.lock);
+ local_irq_enable();
+ preempt_enable();
+ return result;
+}
+
+
+/**
+ * Unlock a fulock, wake up waiter(s) [internal version]
+ *
+ * @fulock: Address for the fulock (kernel space).
+ * @howmany: Wake up this many waiters; if 0, wake up only one,
+ * forcing a serialization in the acquisition of the
+ * futex, so that no other task (in user or kernel space)
+ * can acquire it.
+ * @code: If waking up in parallel mode, return code to be passed to
+ * the waiters as a result of their wait.
+ * @returns: 0 if the fulock was unlocked/there are no waiters left or
+ * howmany > 0
+ * 1 fulock unlocked, there was one waiter (now none)
+ * 2 fulock unlocked, there was more than one waiter
+ *
+ * ufulock_unlock() uses this to update the vfulock, except
+ * if howmany > 0.
+ *
+ * Requires fulock->fuqueue.lock held, IRQs & preempt disabled!!!
+ */
+int __fulock_unlock (struct fulock *fulock, size_t howmany, int code)
+{
+ int result = 0;
+ struct fulock_ops *ops =
+ container_of (fulock->fuqueue.ops, struct fulock_ops, fuqueue);
+ struct task_struct *owner;
+ struct fuqueue_waiter *w;
+
+ ftrace ("(%p, %zu, %d)\n", fulock, howmany, code);
+ CHECK_IRQs();
+
+ if (fulock->owner == NULL) /* Unlocked? */
+ goto out;
+ owner = fulock->owner;
+ ops->owner_reset (fulock);
+ if (__fulock_empty (fulock))
+ goto out_unboost;
+ if (howmany > 0) { /* Parallel unlock */
+ code = code == 0? -EAGAIN : code;
+ while (howmany-- && !__fuqueue_empty (&fulock->fuqueue)) {
+ w = __fuqueue_first (&fulock->fuqueue);
+ __fuqueue_wait_unqueue (w, code);
+ wake_up_process (w->task);
+ }
+ }
+ else { /* Serialized unlock */
+ w = __fuqueue_first (&fulock->fuqueue);
+ ops->owner_set (fulock, w->task);
+ if (fulock->flags & FULOCK_FL_PP)
+ __fulock_pp_boost (fulock);
+ code = fulock->flags & FULOCK_FL_DEAD? -EOWNERDEAD : 0;
+ __fuqueue_wait_unqueue (w, code);
+ wake_up_process (w->task);
+ result = __fulock_empty (fulock)? 1 : 2;
+ }
+ /* Now, once we have done it, we can unboost PI or PP */
+out_unboost:
+ if (fulock->flags & FULOCK_FL_PP)
+ __fulock_pp_unboost (fulock);
+ else if (fulock->flags & FULOCK_FL_PI
+ && owner->prio != prio_from_rt_prio (owner)) {
+ /* We were boosted, undo that */
+ ldebug (1, "__set_prio (%d, %lu)\n", owner->pid, prio_from_rt_prio (owner));
+ __set_prio (owner, prio_from_rt_prio (owner));
+ fuqueue_chprio (owner); /* owner might be waiting */
+ }
+out:
+ return result;
+}
+
+
+/**
+ * Set the consistency of a fulock, get previous.
+ *
+ * @fulock: fulock whose consistency is to be set [fulock->fuqueue.lock
+ * has to be held].
+ * @consistency: New consistency state to move it to (unless it is
+ * fulock_con_get, which is a nop and can be used to get
+ * the actual consistency state.
+ *
+ * @returns: consistency state
+ */
+int __fulock_consistency (struct fulock *fulock, enum fulock_con consistency)
+{
+ int result;
+ enum fulock_st state;
+
+ ftrace ("(%p, %d)\n", fulock, consistency);
+ CHECK_IRQs();
+
+ /* Gather actual state */
+ if (fulock->flags & FULOCK_FL_NR)
+ state = fulock_st_nr;
+ else if (fulock->flags & FULOCK_FL_DEAD)
+ state = fulock_st_dead;
+ else
+ state = fulock_st_healthy;
+
+ /* Now, what to do? */
+ switch (consistency) {
+ /* Nothing, so just say how are we standing */
+ case fulock_con_nop:
+ result = state;
+ break;
+
+ /* Reinitialize it */
+ case fulock_con_init:
+ fulock->flags &= ~(FULOCK_FL_DEAD | FULOCK_FL_NR);
+ __fulock_unlock (fulock, (size_t) ~0, -ENOENT);
+ result = fulock_st_init;
+ break;
+
+ /* Mark it healthy */
+ case fulock_con_heal:
+ result = -EINVAL;
+ if (state != fulock_st_dead)
+ break;
+ result = -EPERM;
+ if (fulock->owner != current) /* Who are you? */
+ break;
+ fulock->flags &= ~FULOCK_FL_DEAD;
+ result = fulock_st_healthy;
+ break;
+
+ /* Make it not recoverable; wake up every waiter with error;
+ * unlock. */
+ case fulock_con_nr:
+ result = -EINVAL;
+ if (state != fulock_st_dead)
+ break;
+ result = -EPERM;
+ if (fulock->owner != current) /* Who are you? */
+ break;
+ result = __fulock_unlock (fulock, (size_t)~0, -ENOTRECOVERABLE);
+ if (result >= 0) {
+ fulock->flags &= ~FULOCK_FL_DEAD;
+ fulock->flags |= FULOCK_FL_NR;
+ result = fulock_st_nr;
+ }
+ break;
+
+ default:
+ result = -EINVAL;
+ }
+ return result;
+}
+
+
+/**
+ * Set the priority of a fulock waiter.
+ *
+ * @task: task to re-prioritize
+ * @fuqueue: fuqueue of the fulock the task is waiting for [locked]
+ * @w: waiter @task is waiting on in @fuqueue.
+ * @prio: new priority (prio
+ * @returns: NULL (as there is no propagation), task to propage to.
+ *
+ * This does not set the prio of the process itself!
+ *
+ * This will just reposition it in the wait list and if priority
+ * inheritance is enabled, reposition in the ownership list.
+ *
+ * Now, after repositioning in the wait list, we have to do the same
+ * in the ownership list. If we have set a new maximum, and that
+ * maximum is different to the current task->prio, then we have to
+ * update, so we return a task pointer that needs to be referenced.
+ *
+ * WARNING: Needs to be called with IRQs and preemtion disabled.
+ */
+struct task_struct * __fulock_chprio (struct task_struct *task,
+ struct fuqueue *fuqueue,
+ struct fuqueue_waiter *w)
+{
+ unsigned prio_changed;
+ unsigned long new_prio;
+ struct fuqueue_ops *ops;
+ struct fulock *fulock;
+ struct task_struct *owner = NULL;
+
+ ftrace ("(%p [%d], %p, %p)\n", task, task->pid, fuqueue, w);
+ CHECK_IRQs();
+
+ /* Verify this is really a fulock */
+ ops = fuqueue->ops;
+ BUG_ON (ops != &fulock_ops.fuqueue && ops != &ufulock_ops.fuqueue);
+ fulock = container_of (fuqueue, struct fulock, fuqueue);
+ /* Update the wlist of our fulock */
+ __fuqueue_chprio (task, fuqueue, w); /* fuqueue is locked */
+#warning FIXME: trap if in the middle, the task stopped waiting?
+ /* Now, if we ain't PI, there is no point in continuing */
+ if (!(fulock->flags & FULOCK_FL_PI))
+ goto out;
+ /* And if it is unlocked, somebody unlocked just before we
+ * came here, so we'll do nothing */
+ owner = fulock->owner;
+ if (unlikely (owner == NULL))
+ goto out;
+ /* Ok, we have to propagate, reposition in the ownership list,
+ * and if the max prio changed and it is higher than the
+ * owner's priority, then we have to go with him */
+ new_prio = wlist_prio (&fuqueue->wlist);
+ _raw_spin_lock (&owner->fulock_olist_lock);
+ prio_changed = olist_chprio (&owner->fulock_olist, &fulock->olist_node,
+ new_prio);
+ _raw_spin_unlock (&owner->fulock_olist_lock);
+ if (prio_changed) {
+ new_prio = min (prio_from_rt_prio (owner), new_prio);
+ if (new_prio != owner->prio)
+ goto out;
+ }
+ owner = NULL;
+out:
+ return owner;
+}
+
+
+/**
+ * Initialize fulock specific stuff for a task
+ *
+ */
+void init_fulock (struct task_struct *task)
+{
+ __ftrace (0, "(task %p)\n", task);
+
+ spin_lock_init (&task->fuqueue_wait_lock);
+ task->fuqueue_wait = NULL;
+ task->fuqueue_waiter = NULL;
+ spin_lock_init (&task->fulock_olist_lock);
+ olist_init (&task->fulock_olist);
+}
+
+
+/** Release as dead a @fulock because the owner is exiting. */
+void __fulock_exit (struct fulock *fulock)
+{
+ ftrace ("(%p)\n", fulock);
+
+ fulock->flags |= FULOCK_FL_DEAD;
+ if (!(fulock->flags & FULOCK_FL_RM))
+ printk (KERN_WARNING "Task %d [%s] exited holding non-robust "
+ "fulock %p; waiters might block for ever\n",
+ current->pid, current->comm, fulock);
+ __fulock_unlock (fulock, 0, -EOWNERDEAD);
+}
+
+
+/**
+ * When @task exits, release all the fulocks it holds as dead.
+ *
+ * We have to discriminate between ufulocks and locks; when it is an
+ * ufulock, we just need to see if we have to put() the reference that
+ * the owner had or not (when the ownership is successfully passed to
+ * somebody else, we don't have to put it).
+ */
+#warning FIXME: hook up to exec()?
+void exit_fulocks (struct task_struct *task)
+{
+ olist_t *itr;
+ struct fulock *fulock;
+ struct fulock_ops *ops;
+ unsigned long flags;
+
+ if (DEBUG > 0 && !olist_empty (&task->fulock_olist))
+ ftrace ("(%p [%d])\n", task, task->pid);
+
+ /* FIXME: there is a better way to do this, but I feel toooo
+ * thick today -- the problem is fulock->ops->exit() is going
+ * to take fulock_olist_lock to reset the ownership...so
+ * whatever it is, we have to call without holding it. */
+ local_irq_save (flags);
+ preempt_disable();
+ _raw_spin_lock (&task->fulock_olist_lock);
+ while (!olist_empty (&task->fulock_olist)) {
+ itr = olist_first (&task->fulock_olist);
+ fulock = container_of (itr, struct fulock, olist_node);
+ ldebug (7, "task %p [%d] still owns fulock %p\n",
+ task, task->pid, fulock);
+ ops = container_of (fulock->fuqueue.ops,
+ struct fulock_ops, fuqueue);
+ if (ops->fuqueue.get)
+ ops->fuqueue.get (&fulock->fuqueue);
+ _raw_spin_unlock (&task->fulock_olist_lock);
+
+ _raw_spin_lock (&fulock->fuqueue.lock);
+ ops->exit (fulock); /* releases fulock lock */
+ _raw_spin_unlock (&fulock->fuqueue.lock);
+
+ if (ops->fuqueue.put)
+ ops->fuqueue.put (&fulock->fuqueue);
+ _raw_spin_lock (&task->fulock_olist_lock);
+ }
+ _raw_spin_unlock (&task->fulock_olist_lock);
+ local_irq_restore (flags);
+ preempt_enable();
+}
+
+
+/** Cancel @task's wait on @fuqueue and update the wait list priority */
+unsigned __fulock_wait_cancel (struct fuqueue *fuqueue,
+ struct fuqueue_waiter *w)
+{
+ unsigned prio_change;
+ struct fulock *fulock =
+ container_of (fuqueue, struct fulock, fuqueue);
+
+ ftrace ("(%p, %p [%d], %p)\n",
+ fuqueue, w, w->task->pid, w);
+
+ prio_change = __fuqueue_wait_cancel (fuqueue, w);
+ ldebug (2, "prio_change is %u\n", prio_change);
+ if (prio_change && (fulock->flags & FULOCK_FL_PI))
+ __fulock_pi_boost (fulock);
+ return prio_change;
+}
+
+
+/** Fulock operations */
+struct fulock_ops fulock_ops = {
+ .fuqueue = {
+ .get = NULL,
+ .put = NULL,
+ .wait_cancel = __fulock_wait_cancel,
+ .chprio = __fulock_chprio
+ },
+ .owner_set = __fulock_owner_set,
+ .owner_reset = __fulock_owner_reset,
+ .exit = __fulock_exit
+};
+
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2003-12-11 23:30 ` Matt Mackall
2003-12-11 23:55 ` Gene Heskett
2003-12-14 16:15 ` Pavel Machek
2003-12-12 19:21 ` William Lee Irwin III
2 siblings, 2 replies; 9+ messages in thread
From: Matt Mackall @ 2003-12-11 23:30 UTC (permalink / raw)
To: inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> include/linux/fuqueue.h | 451 ++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/plist.h | 197 ++++++++++++++++++++
> kernel/fuqueue.c | 220 +++++++++++++++++++++++
> 3 files changed, 868 insertions(+)
>
> +++ linux/include/linux/fuqueue.h Wed Nov 19 16:42:50 2003
I don't suppose you've run this feature name past anyone in marketting
or PR?
--
Matt Mackall : http://www.selenic.com : Linux development and consulting
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
2003-12-11 23:30 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
@ 2003-12-11 23:55 ` Gene Heskett
2003-12-14 16:15 ` Pavel Machek
1 sibling, 0 replies; 9+ messages in thread
From: Gene Heskett @ 2003-12-11 23:55 UTC (permalink / raw)
To: Matt Mackall, inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes
On Thursday 11 December 2003 18:30, Matt Mackall wrote:
>On Wed, Dec 03, 2003 at 12:51:34AM -0800,
inaky.perez-gonzalez@intel.com wrote:
>> include/linux/fuqueue.h | 451
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> include/linux/plist.h | 197 ++++++++++++++++++++
>> kernel/fuqueue.c | 220 +++++++++++++++++++++++
>> 3 files changed, 868 insertions(+)
>>
>> +++ linux/include/linux/fuqueue.h Wed Nov 19 16:42:50 2003
>
>I don't suppose you've run this feature name past anyone in
> marketting or PR?
Obviously not...
--
Cheers, Gene
AMD K6-III@500mhz 320M
Athlon1600XP@1400mhz 512M
99.22% setiathome rank, not too shabby for a WV hillbilly
Yahoo.com attornies please note, additions to this message
by Gene Heskett are:
Copyright 2003 by Maurice Eugene Heskett, all rights reserved.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
2003-12-11 23:30 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
@ 2003-12-12 19:21 ` William Lee Irwin III
2 siblings, 0 replies; 9+ messages in thread
From: William Lee Irwin III @ 2003-12-12 19:21 UTC (permalink / raw)
To: inaky.perez-gonzalez; +Cc: linux-kernel, robustmutexes
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +static inline void __debug_outb (unsigned val, int port) {
> + __asm__ __volatile__ ("outb %b0,%w1" : : "a" (val), "Nd" (port));
> +}
> +static inline unsigned __debug_inb (int port) {
> + unsigned value;
> + __asm__ __volatile__ ("inb %w1,%b0" : "=a" (value) : "Nd" (port));
> + return value;
> +}
> +static inline
> +void __debug_printstr (const char *str) {
> + const int port = 0x03f8;
> + while (*str) {
> + while (!(__debug_inb (port + UART_LSR) & UART_LSR_THRE));
> + __debug_outb (*str++, port+UART_TX);
> + }
> + __debug_outb ('\r', port + UART_TX);
> +}
> +#endif
In general, this kind of debug code shouldn't go in "shipped" patches.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +#define assert(c, a...) do { if ((DEBUG >= 0) && !(c)) BUG(); } while (0)
assert() is a no-no in Linux, for various reasons.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> + * FIXME: is it worth to have get/put? maybe they should be enforced
> + * for every fuqueue, this way we don't have to query the ops
> + * structure for the get/put method and if it is there, call
> + * it. We'd have to move the get/put ops over to the vlocator,
> + * but that's not much of a problem.
> + * The decission factor is that an atomic operation needs to
> + * lock the whole bus and is not as scalable as testing a ptr
> + * for NULLness.
> + * For simplicity, probably the idea of forcing the refcount in
> + * the fuqueue makes sense.
Basically, if there aren't multiple implementations of ->get()/->put()
that need to be disambiguated, this should get left out.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +#if DEBUG > 0
> +/* BUG_ON() firing? Temporary fix, do you have CONFIG_PREEMPT enabled?
> + * either that or disable DEBUG (force #define it to zero). */
> +#define CHECK_IRQs() do { BUG_ON (!in_atomic()); } while (0)
> +#else
> +#define CHECK_IRQs() do {} while (0)
> +#endif
This kind of thing isn't good to have in shipped patches either.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +/* Priority-sorted list */
> +struct plist {
> + unsigned prio;
> + struct list_head node;
> +};
Maybe the expectation is for short lists, but it might be better to use
an rbtree or other sublinear insertion/removal structure for this. It
would be nice if we had a generic heap structure for this sort of affair.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +void fuqueue_chprio (struct task_struct *task)
> +{
> + struct fuqueue_ops *ops;
> + struct fuqueue *fuqueue;
> + struct fuqueue_waiter *w;
> + int prio = task->prio;
> + unsigned long flags;
> +
> + __ftrace (1, "(%p [%d])\n", task, task->pid);
> + CHECK_IRQs();
> +
> + local_irq_save (flags);
> + preempt_disable();
> + get_task_struct (task);
> +next:
> + /* Who is the task waiting for? safely acquire and lock it */
> + _raw_spin_lock (&task->fuqueue_wait_lock);
> + fuqueue = task->fuqueue_wait;
> + if (fuqueue == NULL) /* Ok, not waiting */
> + goto out_task_unlock;
> + if (!_raw_spin_trylock (&fuqueue->lock)) { /* Spin dance... */
> + _raw_spin_unlock (&task->fuqueue_wait_lock);
> + goto next;
> + }
> + ops = fuqueue->ops;
> + if (ops->get)
> + ops->get (fuqueue);
> +
> + w = task->fuqueue_waiter;
> + _raw_spin_unlock (&task->fuqueue_wait_lock);
> + put_task_struct (task);
> +
> + /* Ok, we need to propagate the prio change to the fuqueue */
> + ops = fuqueue->ops;
> + task = ops->chprio (task, fuqueue, w);
> + if (task == NULL)
> + goto out_fuqueue_unlock;
> +
> + /* Do we need to propagate to the next task */
> + get_task_struct (task);
> + _raw_spin_unlock (&fuqueue->lock);
> + if (ops->put)
> + ops->put (fuqueue);
> + ldebug (1, "__set_prio (%d, %d)\n", task->pid, prio);
> + __set_prio (task, prio);
> + goto next;
> +
> +out_fuqueue_unlock:
> + _raw_spin_unlock (&fuqueue->lock);
> + if (ops->put)
> + ops->put (fuqueue);
> + goto out;
> +
> +out_task_unlock:
> + _raw_spin_unlock (&task->fuqueue_wait_lock);
> + put_task_struct (task);
> +out:
> + local_irq_restore (flags);
> + preempt_enable();
> + return;
Heavy use of _raw_spin_lock() like this is a poor practice.
On Wed, Dec 03, 2003 at 12:51:34AM -0800, inaky.perez-gonzalez@intel.com wrote:
> +/** Fuqueue operations for usage within the kernel */
> +struct fuqueue_ops fuqueue_ops = {
> + .get = NULL,
> + .put = NULL,
> + .wait_cancel = __fuqueue_wait_cancel,
> + .chprio = __fuqueue_chprio
> +};
If this is all ->get() and ->put() are going to be, why bother?
-- wli
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC/PATCH] FUSYN 5/10: kernel fuqueues
2003-12-11 23:30 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
2003-12-11 23:55 ` Gene Heskett
@ 2003-12-14 16:15 ` Pavel Machek
1 sibling, 0 replies; 9+ messages in thread
From: Pavel Machek @ 2003-12-14 16:15 UTC (permalink / raw)
To: Matt Mackall; +Cc: inaky.perez-gonzalez, linux-kernel, robustmutexes
Hi!
> > include/linux/fuqueue.h | 451 ++++++++++++++++++++++++++++++++++++++++++++++++
> > include/linux/plist.h | 197 ++++++++++++++++++++
> > kernel/fuqueue.c | 220 +++++++++++++++++++++++
> > 3 files changed, 868 insertions(+)
> >
> > +++ linux/include/linux/fuqueue.h Wed Nov 19 16:42:50 2003
>
> I don't suppose you've run this feature name past anyone in marketting
> or PR?
Hehe, reminds me of podfuk (now called uservfs) project :-).
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC/PATCH] FUSYN 7/10: user space fuqueues
2004-01-14 22:50 [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
@ 2004-01-14 22:50 ` inaky.perez-gonzalez
0 siblings, 0 replies; 9+ messages in thread
From: inaky.perez-gonzalez @ 2004-01-14 22:50 UTC (permalink / raw)
To: linux-kernel; +Cc: inaky.perez-gonzalez, robustmutexes
ufuqueue.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 236 insertions(+)
--- /dev/null Wed Jan 14 14:39:30 2004
+++ linux/kernel/ufuqueue.c Wed Nov 19 17:32:07 2003
@@ -0,0 +1,236 @@
+
+/*
+ * Fast User real-time/pi/pp/robust/deadlock SYNchronization
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on normal futexes (futex.c), (C) Rusty Russell.
+ * Please refer to Documentation/fusyn.txt for more info.
+ */
+
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/highmem.h> /* kmap_atomic() */
+#include <linux/vlocator.h>
+#include <linux/fuqueue.h>
+
+extern signed long ufu_timespec2jiffies (const struct timespec __user *);
+static struct fuqueue_ops ufuqueue_ops;
+
+/** Slab for allocation of 'struct ufuqueue's. */
+static kmem_cache_t *ufuqueue_slab;
+
+/** A ufuqueue, tied to a user-space vm address. */
+struct ufuqueue {
+ struct fuqueue fuqueue;
+ struct vlocator vlocator;
+};
+
+
+/** Initialize an @ufuqueue (for slab object construction). */
+static inline
+void ufuqueue_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ struct ufuqueue *ufuqueue = obj;
+ __fuqueue_init (&ufuqueue->fuqueue, &ufuqueue_ops);
+ vlocator_init (&ufuqueue->vlocator);
+}
+
+
+/** Allocate an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+struct vlocator * ufuqueue_alloc (void)
+{
+ struct ufuqueue *ufuqueue;
+ ufuqueue = kmem_cache_alloc (ufuqueue_slab, GFP_KERNEL);
+ if (DEBUG > 0 && ufuqueue != NULL)
+ ldebug (1, "ufuqueue %p allocated, total: %d\n",
+ ufuqueue, allocated_inc());
+ return &ufuqueue->vlocator;
+}
+
+
+/**
+ * Create a ufuqueue that is going to be inserted to the hash
+ * [called from vlocator.c:vl_locate() throught the vlocator ops]
+ */
+static
+void ufuqueue_create (struct vlocator *vl, struct page *page,
+ const union futex_key *key, unsigned flags)
+{
+ memcpy (&vl->key, key, sizeof (*key));
+ get_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_release (struct vlocator *vl)
+{
+ drop_key_refs (&vl->key);
+}
+
+
+/** Free an ufuqueue maintaining debug-allocation counts. */
+static __inline__
+void ufuqueue_free (struct vlocator *vl)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (vl, struct ufuqueue, vlocator);
+ kmem_cache_free (ufuqueue_slab, ufuqueue);
+ if (DEBUG > 0 && ufuqueue != NULL)
+ ldebug (1, "ufuqueue %p freed, total: %d\n",
+ ufuqueue, allocated_dec());
+}
+
+
+struct vlocator_ops ufuqueue_vops = {
+ .alloc = ufuqueue_alloc,
+ .create = ufuqueue_create,
+ .release = ufuqueue_release,
+ .free = ufuqueue_free
+};
+
+
+/**
+ * Wait on a fuqueue
+ *
+ * @_vfuqueue: address in user space of the condvar
+ * @cond_flags: flags for the conditional variable
+ * @_vfulock: address in user space of the fulock.
+ * @lock_flags: flags for the fulock.
+ * @_timeout: timeout information [see sys_ufulock_lock()]
+ *
+ * This is just a thin shell that locates the kernel descriptors for
+ * the condvar and the lock and then handles the work to
+ * ufuqueue_wait().
+ */
+asmlinkage
+int sys_ufuqueue_wait (volatile unsigned __user *_vfuqueue,
+ unsigned val, struct timespec __user *_timeout)
+{
+ int result = -EINVAL;
+ struct ufuqueue *ufuqueue;
+ struct page *page;
+ char *page_kmap;
+ volatile unsigned *vfuqueue;
+ unsigned new_val;
+ struct vlocator *vl;
+ signed long timeout;
+ struct fuqueue_waiter w;
+
+ ftrace ("(%p, %x, %p)\n", _vfuqueue, val, _timeout);
+ might_sleep();
+
+ /* ufuqueue: pin pages, get keys, look up/allocate, refcount */
+ result = vl_locate (&page, &vl, &ufuqueue_vops, _vfuqueue, 0);
+ if (unlikely (result < 0))
+ goto out;
+ ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+ /* We are going to lock the ufuqueue, so get the timeout first */
+ timeout = ufu_timespec2jiffies (_timeout);
+ result = (int) timeout;
+ if (timeout < 0)
+ goto out_put_vl;
+
+ spin_lock_irq (&ufuqueue->fuqueue.lock);
+ __fuqueue_wait_queue (&ufuqueue->fuqueue, &w);
+ /* Now, are we ok with the value? */
+ page_kmap = kmap_atomic (page, KM_IRQ0);
+ vfuqueue = (volatile unsigned *)page_kmap + (vl->key.both.offset & ~1);
+ new_val = *vfuqueue;
+ kunmap_atomic (page_kmap, KM_IRQ0);
+ result = -EWOULDBLOCK;
+ if (val != new_val)
+ goto out_unqueue;
+ /* ok, go ahead and wait (it will unlock and restore irqs/preempt */
+ return __fuqueue_wait_block (&ufuqueue->fuqueue, &w, timeout);
+
+out_unqueue:
+ __fuqueue_wait_unqueue (&w, 0);
+ spin_unlock_irq (&ufuqueue->fuqueue.lock);
+out_put_vl:
+ vl_put (vl);
+ put_page (page);
+out:
+ return result;
+}
+
+
+/**
+ * Wake up waiters of a fuqueue
+ *
+ * @_vfuqueue: pointer to the fuqueue
+ * @howmany: number of waiters to wake up
+ * @code: code to return to the waiters [default it to zero]
+ * @returns: 0 if ok, < 0 errno code on error.
+ */
+asmlinkage
+int sys_ufuqueue_wake (volatile unsigned __user *_vfuqueue,
+ size_t howmany, int code)
+{
+ int result;
+ struct vlocator *vl;
+ struct ufuqueue *ufuqueue;
+
+ ftrace ("(%p, %u)\n", _vfuqueue, howmany);
+ might_sleep();
+
+ /* ufuqueue: get keys, look up (don't allocate), refcount */
+ result = vl_locate (NULL, &vl, NULL, _vfuqueue, 0);
+ if (result < 0)
+ goto out;
+ ufuqueue = container_of (vl, struct ufuqueue, vlocator);
+ /* Wake'en up */
+ spin_lock_irq (&ufuqueue->fuqueue.lock);
+ __fuqueue_wake (&ufuqueue->fuqueue, howmany, code);
+ spin_unlock_irq (&ufuqueue->fuqueue.lock);
+ vl_put (vl);
+ result = 0;
+out:
+ return result;
+}
+
+
+/** Initialize the ufuqueue subsystem. */
+static
+int __init subsys_ufuqueue_init (void)
+{
+ ufuqueue_slab = kmem_cache_create ("ufuqueue", sizeof (struct ufuqueue),
+ 0, 0, ufuqueue_ctor, NULL);
+ if (ufuqueue_slab == NULL)
+ panic ("ufuqueue_init(): "
+ "Unable to initialize ufuqueue slab allocator.\n");
+ return 0;
+}
+__initcall (subsys_ufuqueue_init);
+
+
+/* Adaptors for fulock operations */
+static
+void ufuqueue_put (struct fuqueue *fuqueue)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (fuqueue, struct ufuqueue, fuqueue);
+ vl_put (&ufuqueue->vlocator);
+}
+
+static
+void ufuqueue_get (struct fuqueue *fuqueue)
+{
+ struct ufuqueue *ufuqueue =
+ container_of (fuqueue, struct ufuqueue, fuqueue);
+ vl_get (&ufuqueue->vlocator);
+}
+
+/** Ufuqueue operations */
+static
+struct fuqueue_ops ufuqueue_ops = {
+ .get = ufuqueue_get,
+ .put = ufuqueue_put,
+ .wait_cancel = __fuqueue_wait_cancel,
+ .chprio = __fuqueue_chprio
+};
+
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2004-01-14 22:54 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <0312030051..akdxcwbwbHdYdmdSaFbbcycyc3a~bzd25502@intel.com>
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
2003-12-03 8:51 ` [RFC/PATCH] FUSYN 8/10: kernel fulocks inaky.perez-gonzalez
2003-12-11 23:30 ` [RFC/PATCH] FUSYN 5/10: kernel fuqueues Matt Mackall
2003-12-11 23:55 ` Gene Heskett
2003-12-14 16:15 ` Pavel Machek
2003-12-12 19:21 ` William Lee Irwin III
2004-01-14 22:50 [RFC/PATCH] FUSYN 6/10: user space/kernel space tracker inaky.perez-gonzalez
2004-01-14 22:50 ` [RFC/PATCH] FUSYN 7/10: user space fuqueues inaky.perez-gonzalez
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).