* Re: [PATCH 1/6] Add a wait queue argument to wait_bit action()
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
@ 2005-06-20 16:20 ` Suparna Bhattacharya
2005-06-20 16:24 ` [PATCH 2/6] Rename __lock_page to lock_page_slow Suparna Bhattacharya
` (5 subsequent siblings)
6 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:20 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote:
> Here is a little bit of background on the motivation behind this set of
> patches to update AIO for filtered wakeups:
>
> (a) Since the introduction of filtered wakeups support and
> the wait_bit_queue infrastructure in mainline, it is no longer
> sufficient to just embed a wait queue entry in the kiocb
> for AIO operations involving filtered wakeups.
> (b) Given that filesystem reads/writes use filtered wakeups underlying
> wait_on_page_bit, fixing this becomes a pre-req for buffered
> filesystem AIO.
> (c) The wait_bit_queue infrastructure actually enables a cleaner
> implementation of filesystem AIO because it already provides
> for an action routine intended to allow both blocking and
> non-blocking or asynchronous behaviour.
>
>> Updated to apply to 2.6.12-rc6.
Add a wait queue parameter to the action routine called by
__wait_on_bit to allow it to determine whether to block or
not.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
fs/buffer.c | 2 +-
kernel/wait.c | 14 ++++++++------
mm/filemap.c | 2 +-
3 files changed, 10 insertions(+), 8 deletions(-)
diff -puN kernel/wait.c~modify-wait-bit-action-args kernel/wait.c
--- linux-2.6.9-rc1-mm4/kernel/wait.c~modify-wait-bit-action-args 2004-09-10 16:52:04.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/kernel/wait.c 2004-09-10 16:52:04.000000000 +0530
@@ -152,14 +152,14 @@ EXPORT_SYMBOL(wake_bit_function);
*/
int __sched fastcall
__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *), unsigned mode)
{
int ret = 0;
do {
prepare_to_wait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags))
- ret = (*action)(q->key.flags);
+ ret = (*action)(q->key.flags, &q->wait);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
finish_wait(wq, &q->wait);
return ret;
@@ -167,7 +167,8 @@ EXPORT_SYMBOL(__wait_on_bit);
EXPORT_SYMBOL(__wait_on_bit);
int __sched fastcall out_of_line_wait_on_bit(void *word, int bit,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *),
+ unsigned mode)
{
wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
@@ -178,14 +179,14 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit);
int __sched fastcall
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *), unsigned mode)
{
int ret = 0;
do {
prepare_to_wait_exclusive(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags)) {
- if ((ret = (*action)(q->key.flags)))
+ if ((ret = (*action)(q->key.flags, &q->wait)))
break;
}
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
@@ -195,7 +196,8 @@ __wait_on_bit_lock(wait_queue_head_t *wq
EXPORT_SYMBOL(__wait_on_bit_lock);
int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *wait),
+ unsigned mode)
{
wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit);
diff -puN mm/filemap.c~modify-wait-bit-action-args mm/filemap.c
--- linux-2.6.9-rc1-mm4/mm/filemap.c~modify-wait-bit-action-args 2004-09-10 16:52:04.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/mm/filemap.c 2004-09-10 16:52:04.000000000 +0530
@@ -133,7 +133,7 @@ void remove_from_page_cache(struct page
}
EXPORT_SYMBOL(remove_from_page_cache);
-static int sync_page(void *word)
+static int sync_page(void *word, wait_queue_t *wait)
{
struct address_space *mapping;
struct page *page;
diff -puN fs/buffer.c~modify-wait-bit-action-args fs/buffer.c
--- linux-2.6.9-rc1-mm4/fs/buffer.c~modify-wait-bit-action-args 2004-09-10 16:52:04.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/fs/buffer.c 2004-09-10 16:52:04.000000000 +0530
@@ -52,7 +52,7 @@ void wake_up_buffer(struct buffer_head *
bh->b_private = private;
}
-static int sync_buffer(void *word)
+static int sync_buffer(void *word, wait_queue_t *wait)
{
struct block_device *bd;
struct buffer_head *bh
_
diff -urp linux-2.6.9-rc3/include/linux/wait.h linux-2.6.9-rc3-mm2/include/linux/wait.h
--- linux-2.6.9-rc3/include/linux/wait.h 2004-10-07 13:19:09.000000000 +0530
+++ linux-2.6.9-rc3-mm2/include/linux/wait.h 2004-10-07 12:04:17.000000000 +0530
@@ -140,11 +151,15 @@ void FASTCALL(__wake_up(wait_queue_head_
extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
-int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
+int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *,
+ int (*)(void *, wait_queue_t *), unsigned));
+int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *,
+ int (*)(void *, wait_queue_t *), unsigned));
void FASTCALL(wake_up_bit(void *, int));
-int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned));
-int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned));
+int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *,
+ wait_queue_t *), unsigned));
+int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *,
+ wait_queue_t *), unsigned));
wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
#define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
@@ -364,7 +392,8 @@ int wake_bit_function(wait_queue_t *wait
* but has no intention of setting it.
*/
static inline int wait_on_bit(void *word, int bit,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *),
+ unsigned mode)
{
if (!test_bit(bit, word))
return 0;
@@ -388,7 +417,8 @@ static inline int wait_on_bit(void *word
* clear with the intention of setting it, and when done, clearing it.
*/
static inline int wait_on_bit_lock(void *word, int bit,
- int (*action)(void *), unsigned mode)
+ int (*action)(void *, wait_queue_t *),
+ unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
diff -urp linux-2.6.9-rc3/include/linux/writeback.h linux-2.6.9-rc3-mm2/include/linux/writeback.h
--- linux-2.6.9-rc3/include/linux/writeback.h 2004-10-07 13:19:09.000000000 +0530
+++ linux-2.6.9-rc3-mm2/include/linux/writeback.h 2004-10-07 12:05:30.000000000 +0530
@@ -68,7 +68,7 @@ struct writeback_control {
*/
void writeback_inodes(struct writeback_control *wbc);
void wake_up_inode(struct inode *inode);
-int inode_wait(void *);
+int inode_wait(void *, wait_queue_t *);
void sync_inodes_sb(struct super_block *, int wait);
void sync_inodes(int wait);
diff -urp linux-2.6.9-rc3/fs/inode.c linux-2.6.9-rc3-mm2/fs/inode.c
--- linux-2.6.9-rc3/fs/inode.c 2004-10-07 13:19:03.000000000 +0530
+++ linux-2.6.9-rc3-mm2/fs/inode.c 2004-10-07 12:06:07.000000000 +0530
@@ -1264,7 +1264,7 @@ void remove_dquot_ref(struct super_block
#endif
-int inode_wait(void *word)
+int inode_wait(void *word, wait_queue_t *wait)
{
schedule();
return 0;
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
2005-06-20 16:20 ` [PATCH 1/6] Add a wait queue argument to wait_bit action() Suparna Bhattacharya
@ 2005-06-20 16:24 ` Suparna Bhattacharya
2005-06-28 16:52 ` Zach Brown
2005-07-24 22:17 ` Christoph Hellwig
2005-06-20 16:28 ` [PATCH 3/6] Interfaces to initialize and to test a wait_bit key Suparna Bhattacharya
` (4 subsequent siblings)
6 siblings, 2 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:24 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
In order to allow for interruptible and asynchronous versions of
lock_page in conjunction with the wait_on_bit changes, we need to
define low-level lock page routines which take an additional
argument, i.e a wait queue entry and may return non-zero status,
e.g -EINTR, -EIOCBRETRY, -EWOULDBLOCK etc. This patch renames
__lock_page to lock_page_slow, so that __lock_page and
__lock_page_slow can denote the versions which take a wait queue
parameter.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
include/linux/pagemap.h | 4 ++--
mm/filemap.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff -puN include/linux/pagemap.h~lock_page_slow include/linux/pagemap.h
--- linux-2.6.9-rc1-mm4/include/linux/pagemap.h~lock_page_slow 2004-09-13 11:46:23.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/include/linux/pagemap.h 2004-09-13 12:01:03.000000000 +0530
@@ -151,14 +151,14 @@ static inline pgoff_t linear_page_index(
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
}
-extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(lock_page_slow(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
static inline void lock_page(struct page *page)
{
might_sleep();
if (TestSetPageLocked(page))
- __lock_page(page);
+ lock_page_slow(page);
}
/*
diff -puN mm/filemap.c~lock_page_slow mm/filemap.c
--- linux-2.6.9-rc1-mm4/mm/filemap.c~lock_page_slow 2004-09-13 11:46:23.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/mm/filemap.c 2004-09-13 12:07:53.000000000 +0530
@@ -438,14 +438,14 @@ EXPORT_SYMBOL(end_page_writeback);
* chances are that on the second loop, the block layer's plug list is empty,
* so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
-void fastcall __lock_page(struct page *page)
+void fastcall lock_page_slow(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
__wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
TASK_UNINTERRUPTIBLE);
}
-EXPORT_SYMBOL(__lock_page);
+EXPORT_SYMBOL(lock_page_slow);
/*
* Note completion of filesystem specific page synchronisation
_
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-06-20 16:24 ` [PATCH 2/6] Rename __lock_page to lock_page_slow Suparna Bhattacharya
@ 2005-06-28 16:52 ` Zach Brown
2005-06-29 9:51 ` Suparna Bhattacharya
2005-07-24 22:17 ` Christoph Hellwig
1 sibling, 1 reply; 25+ messages in thread
From: Zach Brown @ 2005-06-28 16:52 UTC (permalink / raw)
To: suparna; +Cc: linux-aio, linux-kernel, bcrl, wli, mason
I have to whine at least once about obscure names :)
> -void fastcall __lock_page(struct page *page)
> +void fastcall lock_page_slow(struct page *page)
> {
> DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
>
> __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
> TASK_UNINTERRUPTIBLE);
> }
> -EXPORT_SYMBOL(__lock_page);
> +EXPORT_SYMBOL(lock_page_slow);
Can we chose a name that describes what it does? Something like
lock_page_wait_on_locked()?
- z
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-06-28 16:52 ` Zach Brown
@ 2005-06-29 9:51 ` Suparna Bhattacharya
0 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-29 9:51 UTC (permalink / raw)
To: Zach Brown; +Cc: linux-aio, linux-kernel, bcrl, wli, mason
On Tue, Jun 28, 2005 at 09:52:37AM -0700, Zach Brown wrote:
>
> I have to whine at least once about obscure names :)
>
> > -void fastcall __lock_page(struct page *page)
> > +void fastcall lock_page_slow(struct page *page)
> > {
> > DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
> >
> > __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
> > TASK_UNINTERRUPTIBLE);
> > }
> > -EXPORT_SYMBOL(__lock_page);
> > +EXPORT_SYMBOL(lock_page_slow);
>
> Can we chose a name that describes what it does? Something like
> lock_page_wait_on_locked()?
The usage of the _slow suffix was inspired from fs/buffer.c,
__bread_slow, __find_get_block_slow, __getblk_slow etc.
I actually have an earlier version with the name changes you had
suggested earlier ... but then wasn't sure if these are really
much better, wanted to hear what others had to say. I do not have
any particular affinities, will go with whatever is the general
consensus.
Regards
Suparna
>
> - z
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-06-20 16:24 ` [PATCH 2/6] Rename __lock_page to lock_page_slow Suparna Bhattacharya
2005-06-28 16:52 ` Zach Brown
@ 2005-07-24 22:17 ` Christoph Hellwig
2005-07-24 22:36 ` Christoph Hellwig
1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2005-07-24 22:17 UTC (permalink / raw)
To: Suparna Bhattacharya; +Cc: linux-aio, linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:54:04PM +0530, Suparna Bhattacharya wrote:
> In order to allow for interruptible and asynchronous versions of
> lock_page in conjunction with the wait_on_bit changes, we need to
> define low-level lock page routines which take an additional
> argument, i.e a wait queue entry and may return non-zero status,
> e.g -EINTR, -EIOCBRETRY, -EWOULDBLOCK etc. This patch renames
> __lock_page to lock_page_slow, so that __lock_page and
> __lock_page_slow can denote the versions which take a wait queue
> parameter.
How many users that don't use a waitqueue parameter will be left
once all AIO patches go in?
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-07-24 22:17 ` Christoph Hellwig
@ 2005-07-24 22:36 ` Christoph Hellwig
2005-07-26 1:10 ` Suparna Bhattacharya
0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2005-07-24 22:36 UTC (permalink / raw)
To: Christoph Hellwig, Suparna Bhattacharya, linux-aio, linux-kernel,
bcrl, wli, zab, mason
On Sun, Jul 24, 2005 at 11:17:02PM +0100, Christoph Hellwig wrote:
> On Mon, Jun 20, 2005 at 09:54:04PM +0530, Suparna Bhattacharya wrote:
> > In order to allow for interruptible and asynchronous versions of
> > lock_page in conjunction with the wait_on_bit changes, we need to
> > define low-level lock page routines which take an additional
> > argument, i.e a wait queue entry and may return non-zero status,
> > e.g -EINTR, -EIOCBRETRY, -EWOULDBLOCK etc. This patch renames
> > __lock_page to lock_page_slow, so that __lock_page and
> > __lock_page_slow can denote the versions which take a wait queue
> > parameter.
>
> How many users that don't use a waitqueue parameter will be left
> once all AIO patches go in?
Actually looking at the later patches we always seem to pass
current->io_wait anyway, so is there a real point for having the
argument?
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/6] Rename __lock_page to lock_page_slow
2005-07-24 22:36 ` Christoph Hellwig
@ 2005-07-26 1:10 ` Suparna Bhattacharya
0 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-07-26 1:10 UTC (permalink / raw)
To: Christoph Hellwig, linux-aio, linux-kernel, bcrl, wli, zab, mason
On Sun, Jul 24, 2005 at 11:36:34PM +0100, Christoph Hellwig wrote:
> On Sun, Jul 24, 2005 at 11:17:02PM +0100, Christoph Hellwig wrote:
> > On Mon, Jun 20, 2005 at 09:54:04PM +0530, Suparna Bhattacharya wrote:
> > > In order to allow for interruptible and asynchronous versions of
> > > lock_page in conjunction with the wait_on_bit changes, we need to
> > > define low-level lock page routines which take an additional
> > > argument, i.e a wait queue entry and may return non-zero status,
> > > e.g -EINTR, -EIOCBRETRY, -EWOULDBLOCK etc. This patch renames
> > > __lock_page to lock_page_slow, so that __lock_page and
> > > __lock_page_slow can denote the versions which take a wait queue
> > > parameter.
> >
> > How many users that don't use a waitqueue parameter will be left
> > once all AIO patches go in?
Since these patches are intended only for aio reads and (O_SYNC) writes,
that still leaves most other users of regular lock_page() as they are.
>
> Actually looking at the later patches we always seem to pass
> current->io_wait anyway, so is there a real point for having the
> argument?
>
Having the parameter enables issual of synchronous lock_page() within
an AIO path, overriding current->io_wait, (for example, consider the case
of a page fault during AIO !), and thus avoids the need to convert and audit
more paths to handle -EIOCBRETRY propagation (though it is possible to
do so as and when the need arises). This is why I decided to keep this
parameter explicit at the low level, and let the caller decide how to
invoke it and handle the return value propagation.
Does that make sense ?
BTW, there is also a potential secondary benefit of a low level
primitive for asynchronous page IO operations etc directly usable
by kernel threads, without having to use the whole gamut of AIO
interfaces.
Thanks for reviewing the code !
Regards
Suparna
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 3/6] Interfaces to initialize and to test a wait_bit key
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
2005-06-20 16:20 ` [PATCH 1/6] Add a wait queue argument to wait_bit action() Suparna Bhattacharya
2005-06-20 16:24 ` [PATCH 2/6] Rename __lock_page to lock_page_slow Suparna Bhattacharya
@ 2005-06-20 16:28 ` Suparna Bhattacharya
2005-06-20 16:30 ` [PATCH 4/6] Add default io wait bit field in task struct Suparna Bhattacharya
` (3 subsequent siblings)
6 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:28 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote:
> patches to update AIO for filtered wakeups:
>
>> Updated to apply to 2.6.12-rc6
init_wait_bit_key() initializes the key field in an already
allocated wait bit structure, useful for async wait bit support.
Also separate out the wait bit test to a common routine which
can be used by different kinds of wakeup callbacks.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
linux-2.6.9-rc1-mm4-suparna/include/linux/wait.h | 17 +++++++++++++++++
linux-2.6.9-rc1-mm4-suparna/kernel/wait.c | 11 ++---------
2 files changed, 19 insertions(+), 9 deletions(-)
diff -puN include/linux/wait.h~init-wait-bit-key include/linux/wait.h
--- linux-2.6.9-rc1-mm4/include/linux/wait.h~init-wait-bit-key 2004-09-14 16:00:46.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/include/linux/wait.h 2004-09-15 15:33:51.000000000 +0530
@@ -103,6 +103,17 @@ static inline int waitqueue_active(wait_
return !list_empty(&q->task_list);
}
+static inline int test_wait_bit_key(wait_queue_t *wait,
+ struct wait_bit_key *key)
+{
+ struct wait_bit_queue *wait_bit
+ = container_of(wait, struct wait_bit_queue, wait);
+
+ return (wait_bit->key.flags == key->flags &&
+ wait_bit->key.bit_nr == key->bit_nr &&
+ !test_bit(key->bit_nr, key->flags));
+}
+
/*
* Used to distinguish between sync and async io wait context:
* sync i/o typically specifies a NULL wait queue entry or a wait
@@ -344,6 +359,19 @@ int wake_bit_function(wait_queue_t *wait
(wait)->task = current; \
(wait)->func = autoremove_wake_function; \
INIT_LIST_HEAD(&(wait)->task_list); \
+ } while (0)
+
+#define init_wait_bit_key(waitbit, word, bit) \
+ do { \
+ (waitbit)->key.flags = word; \
+ (waitbit)->key.bit_nr = bit; \
+ } while (0)
+
+#define init_wait_bit_task(waitbit, tsk) \
+ do { \
+ (waitbit)->wait.task = tsk; \
+ (waitbit)->wait.func = wake_bit_function; \
+ INIT_LIST_HEAD(&(waitbit)->wait.task_list); \
} while (0)
/**
diff -puN kernel/wait.c~init-wait-bit-key kernel/wait.c
--- linux-2.6.9-rc1-mm4/kernel/wait.c~init-wait-bit-key 2004-09-15 12:14:03.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/kernel/wait.c 2004-09-15 15:33:05.000000000 +0530
@@ -132,16 +132,9 @@ EXPORT_SYMBOL(autoremove_wake_function);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
- struct wait_bit_key *key = arg;
- struct wait_bit_queue *wait_bit
- = container_of(wait, struct wait_bit_queue, wait);
-
- if (wait_bit->key.flags != key->flags ||
- wait_bit->key.bit_nr != key->bit_nr ||
- test_bit(key->bit_nr, key->flags))
+ if (!test_wait_bit_key(wait, arg))
return 0;
- else
- return autoremove_wake_function(wait, mode, sync, key);
+ return autoremove_wake_function(wait, mode, sync, arg);
}
EXPORT_SYMBOL(wake_bit_function);
_
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 4/6] Add default io wait bit field in task struct
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
` (2 preceding siblings ...)
2005-06-20 16:28 ` [PATCH 3/6] Interfaces to initialize and to test a wait_bit key Suparna Bhattacharya
@ 2005-06-20 16:30 ` Suparna Bhattacharya
2005-06-20 16:33 ` [PATCH 5/6] AIO wait bit and AIO wake bit for filtered wakeups Suparna Bhattacharya
` (2 subsequent siblings)
6 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:30 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote:
> > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli)
> > Status: Updated to 2.6.12-rc6, needs review
>
Allocates space for the default io wait queue entry (actually a wait
bit entry) in the task struct. Doing so simplifies the patches
for AIO wait page allowing for cleaner and more efficient
implementation, at the cost of 28 additional bytes in task struct
vs allocation on demand on-stack.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
include/linux/sched.h | 11 +++++++----
kernel/fork.c | 3 ++-
2 files changed, 9 insertions(+), 5 deletions(-)
diff -puN include/linux/sched.h~tsk-default-io-wait include/linux/sched.h
--- linux-2.6.9-rc1-mm4/include/linux/sched.h~tsk-default-io-wait 2004-09-20 14:05:09.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/include/linux/sched.h 2004-09-20 15:14:25.000000000 +0530
@@ -584,11 +584,14 @@ struct task_struct {
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
+
+/* Space for default IO wait bit entry used for synchronous IO waits */
+ struct wait_bit_queue __wait;
/*
- * current io wait handle: wait queue entry to use for io waits
- * If this thread is processing aio, this points at the waitqueue
- * inside the currently handled kiocb. It may be NULL (i.e. default
- * to a stack based synchronous wait) if its doing sync IO.
+ * Current IO wait handle: wait queue entry to use for IO waits
+ * If this thread is processing AIO, this points at the waitqueue
+ * inside the currently handled kiocb. Otherwise, points to the
+ * default IO wait field (i.e &__wait.wait above).
*/
wait_queue_t *io_wait;
#ifdef CONFIG_NUMA
diff -puN kernel/fork.c~tsk-default-io-wait kernel/fork.c
--- linux-2.6.9-rc1-mm4/kernel/fork.c~tsk-default-io-wait 2004-09-20 14:05:09.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/kernel/fork.c 2004-09-20 15:15:22.000000000 +0530
@@ -859,7 +859,8 @@ static task_t *copy_process(unsigned lon
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
p->io_context = NULL;
- p->io_wait = NULL;
+ init_wait_bit_task(&p->__wait, p);
+ p->io_wait = &p->__wait.wait;
p->audit_context = NULL;
#ifdef CONFIG_NUMA
p->mempolicy = mpol_copy(p->mempolicy);
_
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/6] AIO wait bit and AIO wake bit for filtered wakeups
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
` (3 preceding siblings ...)
2005-06-20 16:30 ` [PATCH 4/6] Add default io wait bit field in task struct Suparna Bhattacharya
@ 2005-06-20 16:33 ` Suparna Bhattacharya
2005-06-20 16:36 ` [PATCH 6/6] AIO wait page and AIO lock page Suparna Bhattacharya
2005-06-30 15:49 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Sébastien Dugué
6 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:33 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote:
> > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli)
> > Status: Updated to 2.6.12-rc6, needs review
>
Enable wait bit based filtered wakeups to work for AIO.
Replaces the wait queue entry in the kiocb with a wait bit
structure, to allow enough space for the wait bit key.
This adds an extra level of indirection in references to the
wait queue entry in the iocb. Also, adds an extra check
in aio_wake_function to allow for other kinds of waiters
which do not require wait bit, based on the assumption that
the key passed in would be NULL in such cases.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
fs/aio.c | 20 +++++++++++++-------
include/linux/aio.h | 4 ++--
include/linux/wait.h | 18 ++++++++++++++++++
kernel/wait.c | 17 ++++++++++++++---
4 files changed, 47 insertions(+), 12 deletions(-)
diff -puN fs/aio.c~aio-wait-bit fs/aio.c
--- linux-2.6.9-rc1-mm4/fs/aio.c~aio-wait-bit 2004-09-15 16:02:13.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/fs/aio.c 2004-09-15 16:41:22.000000000 +0530
@@ -725,13 +725,13 @@ static ssize_t aio_run_iocb(struct kiocb
* the aio_wake_function callback).
*/
- BUG_ON(current->io_wait != NULL);
- current->io_wait = &iocb->ki_wait;
+ BUG_ON(!is_sync_wait(current->io_wait));
+ current->io_wait = &iocb->ki_wait.wait;
ret = retry(iocb);
current->io_wait = NULL;
if (-EIOCBRETRY != ret) {
if (-EIOCBQUEUED != ret) {
- BUG_ON(!list_empty(&iocb->ki_wait.task_list));
+ BUG_ON(!list_empty(&iocb->ki_wait.wait.task_list));
aio_complete(iocb, ret, 0);
/* must not access the iocb after this */
}
@@ -740,7 +740,7 @@ static ssize_t aio_run_iocb(struct kiocb
* Issue an additional retry to avoid waiting forever if
* no waits were queued (e.g. in case of a short read).
*/
- if (list_empty(&iocb->ki_wait.task_list))
+ if (list_empty(&iocb->ki_wait.wait.task_list))
kiocbSetKicked(iocb);
}
out:
@@ -886,7 +886,7 @@ void queue_kicked_iocb(struct kiocb *ioc
unsigned long flags;
int run = 0;
- WARN_ON((!list_empty(&iocb->ki_wait.task_list)));
+ WARN_ON((!list_empty(&iocb->ki_wait.wait.task_list)));
spin_lock_irqsave(&ctx->ctx_lock, flags);
run = __queue_kicked_iocb(iocb);
@@ -1472,7 +1472,13 @@ ssize_t aio_setup_iocb(struct kiocb *kio
*/
int aio_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
- struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait);
+ struct wait_bit_queue *wait_bit
+ = container_of(wait, struct wait_bit_queue, wait);
+ struct kiocb *iocb = container_of(wait_bit, struct kiocb, ki_wait);
+
+ /* Assumes that a non-NULL key implies wait bit filtering */
+ if (key && !test_wait_bit_key(wait, key))
+ return 0;
list_del_init(&wait->task_list);
kick_iocb(iocb);
@@ -1528,8 +1534,9 @@ int fastcall io_submit_one(struct kioctx
req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
req->ki_opcode = iocb->aio_lio_opcode;
- init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
- INIT_LIST_HEAD(&req->ki_wait.task_list);
+ init_waitqueue_func_entry(&req->ki_wait.wait, aio_wake_function);
+ INIT_LIST_HEAD(&req->ki_wait.wait.task_list);
+ req->ki_run_list.next = req->ki_run_list.prev = NULL;
req->ki_retried = 0;
ret = aio_setup_iocb(req);
diff -puN kernel/wait.c~aio-wait-bit kernel/wait.c
--- linux-2.6.9-rc1-mm4/kernel/wait.c~aio-wait-bit 2004-09-15 16:02:13.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/kernel/wait.c 2004-09-20 14:59:24.000000000 +0530
@@ -132,7 +132,8 @@ EXPORT_SYMBOL(autoremove_wake_function);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
- if (!test_wait_bit_key(wait, arg))
+ /* Assumes that a non-NULL key implies wait bit filtering */
+ if (arg && !test_wait_bit_key(wait, arg))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -152,7 +153,12 @@ __wait_on_bit(wait_queue_head_t *wq, str
if (test_bit(q->key.bit_nr, q->key.flags))
ret = (*action)(q->key.flags, &q->wait);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
- finish_wait(wq, &q->wait);
+ /*
+ * AIO retries require the wait queue entry to remain queued
+ * for async notification
+ */
+ if (ret != -EIOCBRETRY)
+ finish_wait(wq, &q->wait);
return ret;
}
EXPORT_SYMBOL(__wait_on_bit);
@@ -181,7 +187,12 @@ __wait_on_bit_lock(wait_queue_head_t *wq
break;
}
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
- finish_wait(wq, &q->wait);
+ /*
+ * AIO retries require the wait queue entry to remain queued
+ * for async notification
+ */
+ if (ret != -EIOCBRETRY)
+ finish_wait(wq, &q->wait);
return ret;
}
EXPORT_SYMBOL(__wait_on_bit_lock);
diff -puN include/linux/aio.h~aio-wait-bit include/linux/aio.h
--- linux-2.6.9-rc1-mm4/include/linux/aio.h~aio-wait-bit 2004-09-15 16:05:40.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/include/linux/aio.h 2004-09-20 15:20:58.000000000 +0530
@@ -69,7 +69,7 @@ struct kiocb {
size_t ki_nbytes; /* copy of iocb->aio_nbytes */
char __user *ki_buf; /* remaining iocb->aio_buf */
size_t ki_left; /* remaining bytes */
- wait_queue_t ki_wait;
+ struct wait_bit_queue ki_wait;
long ki_retried; /* just for testing */
long ki_kicked; /* just for testing */
long ki_queued; /* just for testing */
@@ -90,7 +90,7 @@ struct kiocb {
(x)->ki_dtor = NULL; \
(x)->ki_obj.tsk = tsk; \
(x)->ki_user_data = 0; \
- init_wait((&(x)->ki_wait)); \
+ init_wait_bit_task((&(x)->ki_wait), current);\
} while (0)
#define AIO_RING_MAGIC 0xa10a10a1
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 6/6] AIO wait page and AIO lock page
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
` (4 preceding siblings ...)
2005-06-20 16:33 ` [PATCH 5/6] AIO wait bit and AIO wake bit for filtered wakeups Suparna Bhattacharya
@ 2005-06-20 16:36 ` Suparna Bhattacharya
2005-06-30 15:49 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Sébastien Dugué
6 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-06-20 16:36 UTC (permalink / raw)
To: linux-aio; +Cc: linux-kernel, bcrl, wli, zab, mason
On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote:
> > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli)
> > Status: Updated to 2.6.12-rc6, needs review
>
Define low-level page wait and lock page routines which take a
wait queue entry pointer as an additional parameter and
return status (which may be non-zero when the wait queue
parameter signifies an asynchronous wait, typically during
AIO).
Synchronous IO waits become a special case where the wait
queue parameter is the running task's default io wait context.
Asynchronous IO waits happen when the wait queue parameter
is the io wait context of a kiocb. Code paths which choose to
execute synchronous or asynchronous behaviour depending on the
called context specify the current io wait context (which points
to sync or async context as the case may be) as the wait
parameter.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
include/linux/pagemap.h | 38 ++++++++++++++------
mm/filemap.c | 27 ++++++++------
2 files changed, 44 insertions(+), 21 deletions(-)
diff -urp linux-2.6.9-rc3/kernel/sched.c linux-2.6.9-rc3-mm2/kernel/sched.c
--- linux-2.6.9-rc3/kernel/sched.c 2004-10-07 13:19:10.000000000 +0530
+++ linux-2.6.9-rc3-mm2/kernel/sched.c 2004-10-08 11:53:18.000000000 +0530
@@ -4428,6 +4428,20 @@ long __sched io_schedule_timeout(long ti
return ret;
}
+/*
+ * Sleep only if the wait context passed is not async,
+ * otherwise return so that a retry can be issued later.
+ */
+int __sched io_wait_schedule(wait_queue_t *wait)
+{
+ if (!is_sync_wait(wait))
+ return -EIOCBRETRY;
+ io_schedule();
+ return 0;
+}
+
+EXPORT_SYMBOL(io_wait_schedule);
+
/**
* sys_sched_get_priority_max - return maximum RT priority.
* @policy: scheduling class.
diff -puN mm/filemap.c~aio-wait-page mm/filemap.c
--- linux-2.6.9-rc1-mm4/mm/filemap.c~aio-wait-page 2004-09-17 09:25:48.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/mm/filemap.c 2004-09-20 22:57:37.000000000 +0530
@@ -146,8 +146,7 @@ static int sync_page(void *word, wait_qu
mapping = page_mapping(page);
if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
mapping->a_ops->sync_page(page);
- io_schedule();
- return 0;
+ return io_wait_schedule(wait);
}
/**
@@ -378,13 +377,17 @@ static inline void wake_up_page(struct p
__wake_up_bit(page_waitqueue(page), &page->flags, bit);
}
-void fastcall wait_on_page_bit(struct page *page, int bit_nr)
+int fastcall wait_on_page_bit(struct page *page, int bit_nr,
+ wait_queue_t *wait)
{
- DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
-
- if (test_bit(bit_nr, &page->flags))
- __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+ if (test_bit(bit_nr, &page->flags)) {
+ struct wait_bit_queue *wait_bit
+ = container_of(wait, struct wait_bit_queue, wait);
+ init_wait_bit_key(wait_bit, &page->flags, bit_nr);
+ return __wait_on_bit(page_waitqueue(page), wait_bit, sync_page,
TASK_UNINTERRUPTIBLE);
+ }
+ return 0;
}
EXPORT_SYMBOL(wait_on_page_bit);
@@ -431,18 +434,20 @@ void end_page_writeback(struct page *pag
EXPORT_SYMBOL(end_page_writeback);
/*
- * Get a lock on the page, assuming we need to sleep to get it.
+ * Get a lock on the page, assuming we need to wait to get it.
*
* Ugly: running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
* random driver's requestfn sets TASK_RUNNING, we could busywait. However
* chances are that on the second loop, the block layer's plug list is empty,
* so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
-void fastcall lock_page_slow(struct page *page)
+int fastcall lock_page_slow(struct page *page, wait_queue_t *wait)
{
- DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+ struct wait_bit_queue *wait_bit
+ = container_of(wait, struct wait_bit_queue, wait);
- __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+ init_wait_bit_key(wait_bit, &page->flags, PG_locked);
+ return __wait_on_bit_lock(page_waitqueue(page), wait_bit, sync_page,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(lock_page_slow);
diff -puN include/linux/pagemap.h~aio-wait-page include/linux/pagemap.h
--- linux-2.6.9-rc1-mm4/include/linux/pagemap.h~aio-wait-page 2004-09-17 09:25:48.000000000 +0530
+++ linux-2.6.9-rc1-mm4-suparna/include/linux/pagemap.h 2004-09-20 22:56:21.000000000 +0530
@@ -151,21 +151,25 @@ static inline pgoff_t linear_page_index(
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
}
-extern void FASTCALL(lock_page_slow(struct page *page));
+extern int FASTCALL(lock_page_slow(struct page *page, wait_queue_t *wait));
extern void FASTCALL(unlock_page(struct page *page));
-static inline void lock_page(struct page *page)
+static inline int __lock_page(struct page *page, wait_queue_t *wait)
{
might_sleep();
if (TestSetPageLocked(page))
- lock_page_slow(page);
+ return lock_page_slow(page, wait);
+ return 0;
}
+
+#define lock_page(page) __lock_page(page, ¤t->__wait.wait)
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
* Never use this directly!
*/
-extern void FASTCALL(wait_on_page_bit(struct page *page, int bit_nr));
+extern int FASTCALL(wait_on_page_bit(struct page *page, int bit_nr,
+ wait_queue_t *wait));
/*
* Wait for a page to be unlocked.
@@ -174,20 +178,29 @@ extern void FASTCALL(wait_on_page_bit(st
* ie with increased "page->count" so that the page won't
* go away during the wait..
*/
-static inline void wait_on_page_locked(struct page *page)
+static inline int __wait_on_page_locked(struct page *page, wait_queue_t *wait)
{
if (PageLocked(page))
- wait_on_page_bit(page, PG_locked);
+ return wait_on_page_bit(page, PG_locked, wait);
+ return 0;
}
+#define wait_on_page_locked(page) \
+ __wait_on_page_locked(page, ¤t->__wait.wait)
+
/*
* Wait for a page to complete writeback
*/
-static inline void wait_on_page_writeback(struct page *page)
+static inline int __wait_on_page_writeback(struct page *page,
+ wait_queue_t *wait)
{
if (PageWriteback(page))
- wait_on_page_bit(page, PG_writeback);
+ return wait_on_page_bit(page, PG_writeback, wait);
+ return 0;
}
+#define wait_on_page_writeback(page) \
+ __wait_on_page_writeback(page, ¤t->__wait.wait)
+
extern void end_page_writeback(struct page *page);
/*
* Fault a userspace page into pagetables. Return non-zero on a fault.
--- linux-2.6.10-rc1/include/linux/sched.h 2004-11-03 12:04:20.000000000 +0530
+++ linux-2.6.10-rc1-aio/include/linux/sched.h 2004-11-10 13:06:03.376403392 +0530
@@ -165,6 +165,7 @@ extern void show_stack(struct task_struc
void io_schedule(void);
long io_schedule_timeout(long timeout);
+int io_wait_schedule(wait_queue_t *wait);
extern void cpu_init (void);
extern void trap_init(void);
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups
2005-06-20 16:01 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Suparna Bhattacharya
` (5 preceding siblings ...)
2005-06-20 16:36 ` [PATCH 6/6] AIO wait page and AIO lock page Suparna Bhattacharya
@ 2005-06-30 15:49 ` Sébastien Dugué
2005-07-01 7:37 ` Suparna Bhattacharya
6 siblings, 1 reply; 25+ messages in thread
From: Sébastien Dugué @ 2005-06-30 15:49 UTC (permalink / raw)
To: suparna
Cc: linux-aio kvack.org, linux-kernel, Benjamin LaHaise, wli, zab, mason
[-- Attachment #1: Type: text/plain, Size: 3283 bytes --]
On Mon, 2005-06-20 at 21:31 +0530, Suparna Bhattacharya wrote:
> On Mon, Jun 20, 2005 at 05:31:54PM +0530, Suparna Bhattacharya wrote:
> > Since AIO development is gaining momentum once again, ocfs2 and
> > samba both appear to be using AIO, NFS needs async semaphores etc,
> > there appears to be an increase in interest in straightening out some
> > of the pending work in this area. So this seems like a good
> > time to re-post some of those patches for discussion and decision.
> >
> > Just to help sync up, here is an initial list based on the pieces
> > that have been in progress with patches in existence (please feel free
> > to add/update ones I missed or reflected inaccurately here):
> >
> > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli)
> > Status: Updated to 2.6.12-rc6, needs review
>
> Here is a little bit of background on the motivation behind this set of
> patches to update AIO for filtered wakeups:
>
> (a) Since the introduction of filtered wakeups support and
> the wait_bit_queue infrastructure in mainline, it is no longer
> sufficient to just embed a wait queue entry in the kiocb
> for AIO operations involving filtered wakeups.
> (b) Given that filesystem reads/writes use filtered wakeups underlying
> wait_on_page_bit, fixing this becomes a pre-req for buffered
> filesystem AIO.
> (c) The wait_bit_queue infrastructure actually enables a cleaner
> implementation of filesystem AIO because it already provides
> for an action routine intended to allow both blocking and
> non-blocking or asynchronous behaviour.
>
> As I was rewriting the patches to address this, there is one other
> change I made to resolve one remaining ugliness in my earlier
> patchsets - special casing of the form
> if (wait == NULL) wait = &local_wait
> to switch to a stack based wait queue entry if not passed a wait
> queue entry associated with an iocb.
>
> To avoid this, I have tried biting the bullet by including a default
> wait bit queue entry in the task structure, to be used instead of
> on-demand allocation of a wait bit queue entry on stack.
>
> All in all, these changes have (hopefully) simplified the code,
> as well as made it more up-to-date. Comments (including
> better names etc as requested by Zach) are welcome !
>
> Regards
> Suparna
>
Just found a bug in aio_run_iocb: after having called the retry
method for the iocb, current->io_wait is RESET to NULL. While this
does not affect applications doing only AIO, applications
mixing sync and async IO (MySQL for example) end up crashing
later on in the sync path when calling lock_page_slow as the io_wait
queue is NULL.
Therefore after the retry method has been called the task io_wait
queue should be set to the default queue.
This patch applies over Suparna's wait-bit patchset and maybe should
be folded into aio-wait-bit.
Sébastien.
--
------------------------------------------------------
Sébastien Dugué BULL/FREC:B1-247
phone: (+33) 476 29 77 70 Bullcom: 229-7770
mailto:sebastien.dugue@bull.net
Linux POSIX AIO: http://www.bullopensource.org/posix
------------------------------------------------------
[-- Attachment #2: aio-retry-iowait-fix --]
[-- Type: application/octet-stream, Size: 1139 bytes --]
When an application is mixing sync and async IO it ends up crashing
later on in the sync path when calling lock_page_slow as the io_wait
queue has been set to NULL in a previous AIO request.
Therefore after the retry method has been called the task io_wait
queue should be set to the default queue.
This patch applies over Suparna's wait-bit patchset:
- modify-wait-bit-action-args
- lock_page_wait
- init-wait-bit-key
- tsk-default-io-wait
- aio-wait-bit
- aio-wait-page
and could be folded into aio-wait-bit.
Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>
aio.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)
Index: linux-2.6.12/fs/aio.c
===================================================================
--- linux-2.6.12.orig/fs/aio.c 2005-06-30 17:48:47.000000000 +0200
+++ linux-2.6.12/fs/aio.c 2005-06-30 17:48:57.000000000 +0200
@@ -714,7 +714,7 @@
BUG_ON(!is_sync_wait(current->io_wait));
current->io_wait = &iocb->ki_wait.wait;
ret = retry(iocb);
- current->io_wait = NULL;
+ current->io_wait = ¤t->__wait.wait;
if (-EIOCBRETRY != ret) {
if (-EIOCBQUEUED != ret) {
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups
2005-06-30 15:49 ` [PATCH 0/6] Integrate AIO with wait-bit based filtered wakeups Sébastien Dugué
@ 2005-07-01 7:37 ` Suparna Bhattacharya
0 siblings, 0 replies; 25+ messages in thread
From: Suparna Bhattacharya @ 2005-07-01 7:37 UTC (permalink / raw)
To: Sébastien Dugué
Cc: linux-aio kvack.org, linux-kernel, Benjamin LaHaise, wli, zab, mason
On Thu, Jun 30, 2005 at 05:49:00PM +0200, Sébastien Dugué wrote:
> On Mon, 2005-06-20 at 21:31 +0530, Suparna Bhattacharya wrote:
> > On Mon, Jun 20, 2005 at 05:31:54PM +0530, Suparna Bhattacharya wrote:
> > > Since AIO development is gaining momentum once again, ocfs2 and
> > > samba both appear to be using AIO, NFS needs async semaphores etc,
> > > there appears to be an increase in interest in straightening out some
> > > of the pending work in this area. So this seems like a good
> > > time to re-post some of those patches for discussion and decision.
> > >
> > > Just to help sync up, here is an initial list based on the pieces
> > > that have been in progress with patches in existence (please feel free
> > > to add/update ones I missed or reflected inaccurately here):
> > >
> > > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli)
> > > Status: Updated to 2.6.12-rc6, needs review
> >
> > Here is a little bit of background on the motivation behind this set of
> > patches to update AIO for filtered wakeups:
> >
> > (a) Since the introduction of filtered wakeups support and
> > the wait_bit_queue infrastructure in mainline, it is no longer
> > sufficient to just embed a wait queue entry in the kiocb
> > for AIO operations involving filtered wakeups.
> > (b) Given that filesystem reads/writes use filtered wakeups underlying
> > wait_on_page_bit, fixing this becomes a pre-req for buffered
> > filesystem AIO.
> > (c) The wait_bit_queue infrastructure actually enables a cleaner
> > implementation of filesystem AIO because it already provides
> > for an action routine intended to allow both blocking and
> > non-blocking or asynchronous behaviour.
> >
> > As I was rewriting the patches to address this, there is one other
> > change I made to resolve one remaining ugliness in my earlier
> > patchsets - special casing of the form
> > if (wait == NULL) wait = &local_wait
> > to switch to a stack based wait queue entry if not passed a wait
> > queue entry associated with an iocb.
> >
> > To avoid this, I have tried biting the bullet by including a default
> > wait bit queue entry in the task structure, to be used instead of
> > on-demand allocation of a wait bit queue entry on stack.
> >
> > All in all, these changes have (hopefully) simplified the code,
> > as well as made it more up-to-date. Comments (including
> > better names etc as requested by Zach) are welcome !
> >
> > Regards
> > Suparna
> >
>
> Just found a bug in aio_run_iocb: after having called the retry
> method for the iocb, current->io_wait is RESET to NULL. While this
> does not affect applications doing only AIO, applications
> mixing sync and async IO (MySQL for example) end up crashing
> later on in the sync path when calling lock_page_slow as the io_wait
> queue is NULL.
Yes this is a problem. I had spotted it too but the implications hadn't
registered well enough for prompt fix - thanks for the patch.
Regards
Suparna
>
> Therefore after the retry method has been called the task io_wait
> queue should be set to the default queue.
>
> This patch applies over Suparna's wait-bit patchset and maybe should
> be folded into aio-wait-bit.
>
> Sébastien.
>
> --
> ------------------------------------------------------
>
> Sébastien Dugué BULL/FREC:B1-247
> phone: (+33) 476 29 77 70 Bullcom: 229-7770
>
> mailto:sebastien.dugue@bull.net
>
> Linux POSIX AIO: http://www.bullopensource.org/posix
>
> ------------------------------------------------------
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
^ permalink raw reply [flat|nested] 25+ messages in thread