linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] remove useless highmem bounce from loop/cryptoloop
@ 2003-10-30 13:41 Ben Slusky
  2003-10-30 18:14 ` Jari Ruusu
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-10-30 13:41 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 688 bytes --]

Greetings,

The attached patch changes the loop device transfer functions (including
cryptoloop transfers) to accept page/offset pairs instead of virtual
addresses, and removes the redundant kmaps in do_lo_send, do_lo_receive,
and loop_transfer_bio. Per Andrew Morton's request a while back.

Combined with my previous patch (loop-recycle.diff) I believe this makes
loop devices safe for use as swap.

Questions? Comments? Flames?

-- 
Ben Slusky                      | What a waste it is to lose
sluskyb@paranoiacs.org          | one's mind. Or not to have a
sluskyb@stwing.org              | mind is being very wasteful.
PGP keyID ADA44B3B              | How true that is.  -Dan Quayle

[-- Attachment #2: loop-highmem.diff --]
[-- Type: text/plain, Size: 11916 bytes --]

--- linux-2.6.0-test/include/linux/loop.h-orig	2003-10-23 07:45:31.000000000 -0400
+++ linux-2.6.0-test/include/linux/loop.h	2003-10-23 07:46:13.000000000 -0400
@@ -34,8 +34,9 @@ struct loop_device {
 	loff_t		lo_sizelimit;
 	int		lo_flags;
 	int		(*transfer)(struct loop_device *, int cmd,
-				    char *raw_buf, char *loop_buf, int size,
-				    sector_t real_block);
+				    struct page *raw_page, unsigned raw_off,
+				    struct page *loop_page, unsigned loop_off,
+				    int size, sector_t real_block);
 	char		lo_file_name[LO_NAME_SIZE];
 	char		lo_crypt_name[LO_NAME_SIZE];
 	char		lo_encrypt_key[LO_KEY_SIZE];
@@ -128,8 +129,10 @@ struct loop_info64 {
 /* Support for loadable transfer modules */
 struct loop_func_table {
 	int number;	/* filter type */ 
-	int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block);
+	int (*transfer)(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block);
 	int (*init)(struct loop_device *, const struct loop_info64 *); 
 	/* release is called from loop_unregister_transfer or clr_fd */
 	int (*release)(struct loop_device *); 
--- linux-2.6.0-test/drivers/block/loop.c-orig	2003-10-30 08:12:17.000000000 -0500
+++ linux-2.6.0-test/drivers/block/loop.c	2003-10-27 09:34:19.000000000 -0500
@@ -75,9 +75,14 @@ static struct gendisk **disks;
 /*
  * Transfer functions
  */
-static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
-			 char *loop_buf, int size, sector_t real_block)
+static int transfer_none(struct loop_device *lo, int cmd,
+			 struct page *raw_page, unsigned raw_off,
+			 struct page *loop_page, unsigned loop_off,
+			 int size, sector_t real_block)
 {
+	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+
 	if (raw_buf != loop_buf) {
 		if (cmd == READ)
 			memcpy(loop_buf, raw_buf, size);
@@ -85,12 +90,19 @@ static int transfer_none(struct loop_dev
 			memcpy(raw_buf, loop_buf, size);
 	}
 
+	kunmap_atomic(raw_page, KM_USER0);
+	kunmap_atomic(loop_page, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
-static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block)
+static int transfer_xor(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block)
 {
+	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 	char	*in, *out, *key;
 	int	i, keysize;
 
@@ -106,6 +118,10 @@ static int transfer_xor(struct loop_devi
 	keysize = lo->lo_encrypt_key_size;
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
+
+	kunmap_atomic(raw_page, KM_USER0);
+	kunmap_atomic(loop_page, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
@@ -162,13 +178,15 @@ figure_loop_size(struct loop_device *lo)
 }
 
 static inline int
-lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
-	       char *lbuf, int size, sector_t rblock)
+lo_do_transfer(struct loop_device *lo, int cmd,
+	       struct page *rpage, unsigned roffs,
+	       struct page *lpage, unsigned loffs,
+	       int size, sector_t rblock)
 {
 	if (!lo->transfer)
 		return 0;
 
-	return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
+	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
 static int
@@ -178,16 +196,15 @@ do_lo_send(struct loop_device *lo, struc
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
 	struct address_space_operations *aops = mapping->a_ops;
 	struct page *page;
-	char *kaddr, *data;
 	pgoff_t index;
-	unsigned size, offset;
+	unsigned size, offset, bv_offs;
 	int len;
 	int ret = 0;
 
 	down(&mapping->host->i_sem);
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-	data = kmap(bvec->bv_page) + bvec->bv_offset;
+	bv_offs = bvec->bv_offset;
 	len = bvec->bv_len;
 	while (len > 0) {
 		sector_t IV;
@@ -204,9 +221,9 @@ do_lo_send(struct loop_device *lo, struc
 			goto fail;
 		if (aops->prepare_write(file, page, offset, offset+size))
 			goto unlock;
-		kaddr = kmap(page);
-		transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset,
-						 data, size, IV);
+		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+						 bvec->bv_page, bv_offs,
+						 size, IV);
 		if (transfer_result) {
 			/*
 			 * The transfer failed, but we still write the data to
@@ -214,7 +231,8 @@ do_lo_send(struct loop_device *lo, struc
 			 */
 			printk(KERN_ERR "loop: transfer error block %llu\n",
 			       (unsigned long long)index);
-			memset(kaddr + offset, 0, size);
+			memset(kmap_atomic(page, KM_USER0) + offset, 0, size);
+			kunmap_atomic(page, KM_USER0);
 		}
 		flush_dcache_page(page);
 		kunmap(page);
@@ -222,7 +240,7 @@ do_lo_send(struct loop_device *lo, struc
 			goto unlock;
 		if (transfer_result)
 			goto unlock;
-		data += size;
+		bv_offs += size;
 		len -= size;
 		offset = 0;
 		index++;
@@ -261,7 +279,8 @@ lo_send(struct loop_device *lo, struct b
 
 struct lo_read_data {
 	struct loop_device *lo;
-	char *data;
+	struct page *page;
+	unsigned offset;
 	int bsize;
 };
 
@@ -269,7 +288,6 @@ static int
 lo_read_actor(read_descriptor_t *desc, struct page *page,
 	      unsigned long offset, unsigned long size)
 {
-	char *kaddr;
 	unsigned long count = desc->count;
 	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 	struct loop_device *lo = p->lo;
@@ -280,18 +298,16 @@ lo_read_actor(read_descriptor_t *desc, s
 	if (size > count)
 		size = count;
 
-	kaddr = kmap(page);
-	if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
+	if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
 		size = 0;
 		printk(KERN_ERR "loop: transfer error block %ld\n",
 		       page->index);
 		desc->error = -EINVAL;
 	}
-	kunmap(page);
 	
 	desc->count = count - size;
 	desc->written += size;
-	p->data += size;
+	p->offset += size;
 	return size;
 }
 
@@ -304,12 +320,12 @@ do_lo_receive(struct loop_device *lo,
 	int retval;
 
 	cookie.lo = lo;
-	cookie.data = kmap(bvec->bv_page) + bvec->bv_offset;
+	cookie.page = bvec->bv_page;
+	cookie.offset = bvec->bv_offset;
 	cookie.bsize = bsize;
 	file = lo->lo_backing_file;
 	retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
 			lo_read_actor, &cookie);
-	kunmap(bvec->bv_page);
 	return (retval < 0)? retval: 0;
 }
 
@@ -569,23 +585,17 @@ static int loop_transfer_bio(struct loop
 {
 	sector_t IV;
 	struct bio_vec *from_bvec, *to_bvec;
-	char *vto, *vfrom;
 	int ret = 0, i;
 
 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
 
 	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
 		from_bvec = &from_bio->bi_io_vec[i];
-
 		if (i >= to_bio->bi_idx) {
-			kmap(from_bvec->bv_page);
-			kmap(to_bvec->bv_page);
-			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-						from_bvec->bv_len, IV);
-			kunmap(from_bvec->bv_page);
-			kunmap(to_bvec->bv_page);
+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio),
+				      to_bvec->bv_page, to_bvec->bv_offset,
+				      from_bvec->bv_page, from_bvec->bv_offset,
+				      from_bvec->bv_len, IV);
 		}
 		IV += from_bvec->bv_len >> 9;
 	}
--- linux-2.6.0-test/drivers/block/cryptoloop.c-orig	2003-10-23 07:45:31.000000000 -0400
+++ linux-2.6.0-test/drivers/block/cryptoloop.c	2003-10-23 07:46:13.000000000 -0400
@@ -87,43 +87,49 @@ typedef int (*encdec_ecb_t)(struct crypt
 
 
 static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_ecb_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
 	}
 
 	while (size > 0) {
 		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = (unsigned long)in & ~PAGE_MASK;
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = (unsigned long)out & ~PAGE_MASK;
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz);
 
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
@@ -135,24 +141,30 @@ typedef int (*encdec_cbc_t)(struct crypt
 			unsigned int nsg, u8 *iv);
 
 static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_cbc_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
 	}
 
@@ -161,39 +173,43 @@ cryptoloop_transfer_cbc(struct loop_devi
 		u32 iv[4] = { 0, };
 		iv[0] = cpu_to_le32(IV & 0xffffffff);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = offset_in_page(in);
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = offset_in_page(out);
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
 
 		IV++;
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
 }
 
 static int
-cryptoloop_transfer(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
 	{
 		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}	
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
 	{	
 		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}
 	
 	/*  This is not supposed to happen */

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-30 13:41 [PATCH] remove useless highmem bounce from loop/cryptoloop Ben Slusky
@ 2003-10-30 18:14 ` Jari Ruusu
  2003-10-30 21:30   ` Andrew Morton
  0 siblings, 1 reply; 22+ messages in thread
From: Jari Ruusu @ 2003-10-30 18:14 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel

Ben Slusky wrote:
> The attached patch changes the loop device transfer functions (including
> cryptoloop transfers) to accept page/offset pairs instead of virtual
> addresses, and removes the redundant kmaps in do_lo_send, do_lo_receive,
> and loop_transfer_bio. Per Andrew Morton's request a while back.

Cryptoloop is not the only user of loop transfer interface. Please don't
change that interface as it breaks code outside of mainline kernel.

Cryptoapi interface is quite broken. Your change extends that breakage to
loop transfer interface. Please don't do that.

Linus, please don't apply this patch.

-- 
Jari Ruusu  1024R/3A220F51 5B 4B F9 BB D3 3F 52 E9  DB 1D EB E3 24 0E A9 DD

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-30 18:14 ` Jari Ruusu
@ 2003-10-30 21:30   ` Andrew Morton
  2003-10-31  0:52     ` Ben Slusky
  2003-10-31 12:15     ` [PATCH] remove useless highmem bounce from loop/cryptoloop Jari Ruusu
  0 siblings, 2 replies; 22+ messages in thread
From: Andrew Morton @ 2003-10-30 21:30 UTC (permalink / raw)
  To: Jari Ruusu; +Cc: sluskyb, linux-kernel

Jari Ruusu <jariruusu@users.sourceforge.net> wrote:
>
> Ben Slusky wrote:
> > The attached patch changes the loop device transfer functions (including
> > cryptoloop transfers) to accept page/offset pairs instead of virtual
> > addresses, and removes the redundant kmaps in do_lo_send, do_lo_receive,
> > and loop_transfer_bio. Per Andrew Morton's request a while back.
> 
> Cryptoloop is not the only user of loop transfer interface. Please don't
> change that interface as it breaks code outside of mainline kernel.

We really should not retain ugly interfaces in the mainline kernel because
some external, unmerged piece of code relies on the old interfaces.  That
way lies madness.

Especially as that external code has, I think, remained unmerged for years,
and there appears to be no momentum moving it forwards.

We *have* to get the mainline codebase up-to-date with current kernel
idioms and working as well as possible.  If you want to submit sane-sized
and documented patches to help us get there then sheesh, go wild.  But
please do not try to stop the rest of us.

> Cryptoapi interface is quite broken. Your change extends that breakage to
> loop transfer interface. Please don't do that.

Please describe this breakage.


Ben, I confess that I'd forgotten about #1198.  I'll take a look at your
memory allocation fix - it seems to be unfortunately large, but we may need
to go that way.

One question is: why do we go down a different code path for blockdevs
nowadays anyway?  The handoff to the loop thread seems to work OK for
file-backed loop, and providing a bmap() for blockdevs is easy enough?



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-30 21:30   ` Andrew Morton
@ 2003-10-31  0:52     ` Ben Slusky
  2003-10-31  9:55       ` Andrew Morton
  2003-10-31 12:15     ` [PATCH] remove useless highmem bounce from loop/cryptoloop Jari Ruusu
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-10-31  0:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jari Ruusu, linux-kernel

On Thu, 30 Oct 2003 13:30:00 -0800, Andrew Morton wrote:
> Ben, I confess that I'd forgotten about #1198.  I'll take a look at your
> memory allocation fix - it seems to be unfortunately large, but we may need
> to go that way.

The current memory allocation procedure really is inadequate. It worked
ok up thru 2.4 because the loop device was used almost exclusively
as a nifty hack to make an initrd or to double-check the ISO you just
created. Throw strong crypto into the mix and it becomes reasonable to
have your laptop mount all its filesystems and swap off of loop devices.

> One question is: why do we go down a different code path for blockdevs
> nowadays anyway?  The handoff to the loop thread seems to work OK for
> file-backed loop, and providing a bmap() for blockdevs is easy enough?

The code path for file-backed loop handles one page at a time, as that's
the limit of the FS interface. Block-backed loop devices can throw
huge bios at their backing device with one make_request. If we could
get both working on the same code path, would that be worth hobbling
block-backed loop?

-- 
Ben Slusky                      | If you're not part of the
sluskyb@paranoiacs.org          | solution, there's good money
sluskyb@stwing.org              | to be made in prolonging the
PGP keyID ADA44B3B              | problem.      www.despair.com

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31  0:52     ` Ben Slusky
@ 2003-10-31  9:55       ` Andrew Morton
  2003-10-31  9:55         ` Andrew Morton
  2003-11-01  0:26         ` Ben Slusky
  0 siblings, 2 replies; 22+ messages in thread
From: Andrew Morton @ 2003-10-31  9:55 UTC (permalink / raw)
  To: Ben Slusky; +Cc: jariruusu, linux-kernel

Ben Slusky <sluskyb@paranoiacs.org> wrote:
>
> On Thu, 30 Oct 2003 13:30:00 -0800, Andrew Morton wrote:
> > Ben, I confess that I'd forgotten about #1198.  I'll take a look at your
> > memory allocation fix - it seems to be unfortunately large, but we may need
> > to go that way.
> 
> The current memory allocation procedure really is inadequate. It worked
> ok up thru 2.4 because the loop device was used almost exclusively
> as a nifty hack to make an initrd or to double-check the ISO you just
> created.

mm..  Last time I looked the 2.4 loop driver is fairly robust from the
memory management point of view.

> Throw strong crypto into the mix and it becomes reasonable to
> have your laptop mount all its filesystems and swap off of loop devices.
> 
> > One question is: why do we go down a different code path for blockdevs
> > nowadays anyway?  The handoff to the loop thread seems to work OK for
> > file-backed loop, and providing a bmap() for blockdevs is easy enough?
> 
> The code path for file-backed loop handles one page at a time, as that's
> the limit of the FS interface.

I doubt if there's much of a difference really - both code paths involve a
single copy of the data.  The additional context switches are probably more
significant.


> Block-backed loop devices can throw
> huge bios at their backing device with one make_request. If we could
> get both working on the same code path, would that be worth hobbling
> block-backed loop?

Here's the patch; feel free to benchmark it.  It kills 200 lines of code
and unifies the block-backed and file-backed codepaths.  That surely is a
good thing.

It fixes bug 1198 too, it appears.


 drivers/block/loop.c |  205 +--------------------------------------------------
 fs/block_dev.c       |    0 
 include/linux/loop.h |    3 
 3 files changed, 6 insertions(+), 202 deletions(-)

diff -puN drivers/block/loop.c~loop-remove-blkdev-special-case drivers/block/loop.c
--- 25/drivers/block/loop.c~loop-remove-blkdev-special-case	2003-10-31 00:37:08.000000000 -0800
+++ 25-akpm/drivers/block/loop.c	2003-10-31 00:37:08.000000000 -0800
@@ -345,23 +345,6 @@ static int do_bio_filebacked(struct loop
 	return ret;
 }
 
-static int loop_end_io_transfer(struct bio *, unsigned int, int);
-
-static void loop_put_buffer(struct bio *bio)
-{
-	/*
-	 * check bi_end_io, may just be a remapped bio
-	 */
-	if (bio && bio->bi_end_io == loop_end_io_transfer) {
-		int i;
-
-		for (i = 0; i < bio->bi_vcnt; i++)
-			__free_page(bio->bi_io_vec[i].bv_page);
-
-		bio_put(bio);
-	}
-}
-
 /*
  * Add bio to back of pending list
  */
@@ -399,129 +382,8 @@ static struct bio *loop_get_bio(struct l
 	return bio;
 }
 
-/*
- * if this was a WRITE lo->transfer stuff has already been done. for READs,
- * queue it for the loop thread and let it do the transfer out of
- * bi_end_io context (we don't want to do decrypt of a page with irqs
- * disabled)
- */
-static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
-{
-	struct bio *rbh = bio->bi_private;
-	struct loop_device *lo = rbh->bi_bdev->bd_disk->private_data;
-
-	if (bio->bi_size)
-		return 1;
-
-	if (err || bio_rw(bio) == WRITE) {
-		bio_endio(rbh, rbh->bi_size, err);
-		if (atomic_dec_and_test(&lo->lo_pending))
-			up(&lo->lo_bh_mutex);
-		loop_put_buffer(bio);
-	} else
-		loop_add_bio(lo, bio);
-
-	return 0;
-}
-
-static struct bio *loop_copy_bio(struct bio *rbh)
-{
-	struct bio *bio;
-	struct bio_vec *bv;
-	int i;
-
-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
-	if (!bio)
-		return NULL;
-
-	/*
-	 * iterate iovec list and alloc pages
-	 */
-	__bio_for_each_segment(bv, rbh, i, 0) {
-		struct bio_vec *bbv = &bio->bi_io_vec[i];
-
-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
-		if (bbv->bv_page == NULL)
-			goto oom;
-
-		bbv->bv_len = bv->bv_len;
-		bbv->bv_offset = bv->bv_offset;
-	}
-
-	bio->bi_vcnt = rbh->bi_vcnt;
-	bio->bi_size = rbh->bi_size;
-
-	return bio;
-
-oom:
-	while (--i >= 0)
-		__free_page(bio->bi_io_vec[i].bv_page);
-
-	bio_put(bio);
-	return NULL;
-}
-
-static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
-{
-	struct bio *bio;
-
-	/*
-	 * When called on the page reclaim -> writepage path, this code can
-	 * trivially consume all memory.  So we drop PF_MEMALLOC to avoid
-	 * stealing all the page reserves and throttle to the writeout rate.
-	 * pdflush will have been woken by page reclaim.  Let it do its work.
-	 */
-	do {
-		int flags = current->flags;
-
-		current->flags &= ~PF_MEMALLOC;
-		bio = loop_copy_bio(rbh);
-		if (flags & PF_MEMALLOC)
-			current->flags |= PF_MEMALLOC;
-
-		if (bio == NULL)
-			blk_congestion_wait(WRITE, HZ/10);
-	} while (bio == NULL);
-
-	bio->bi_end_io = loop_end_io_transfer;
-	bio->bi_private = rbh;
-	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
-	bio->bi_rw = rbh->bi_rw;
-	bio->bi_bdev = lo->lo_device;
-
-	return bio;
-}
-
-static int loop_transfer_bio(struct loop_device *lo,
-			     struct bio *to_bio, struct bio *from_bio)
-{
-	sector_t IV;
-	struct bio_vec *from_bvec, *to_bvec;
-	char *vto, *vfrom;
-	int ret = 0, i;
-
-	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
-
-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
-		to_bvec = &to_bio->bi_io_vec[i];
-
-		kmap(from_bvec->bv_page);
-		kmap(to_bvec->bv_page);
-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-					from_bvec->bv_len, IV);
-		kunmap(from_bvec->bv_page);
-		kunmap(to_bvec->bv_page);
-		IV += from_bvec->bv_len >> 9;
-	}
-
-	return ret;
-}
-		
 static int loop_make_request(request_queue_t *q, struct bio *old_bio)
 {
-	struct bio *new_bio = NULL;
 	struct loop_device *lo = q->queuedata;
 	int rw = bio_rw(old_bio);
 
@@ -543,31 +405,11 @@ static int loop_make_request(request_que
 		printk(KERN_ERR "loop: unknown command (%x)\n", rw);
 		goto err;
 	}
-
-	/*
-	 * file backed, queue for loop_thread to handle
-	 */
-	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		loop_add_bio(lo, old_bio);
-		return 0;
-	}
-
-	/*
-	 * piggy old buffer on original, and submit for I/O
-	 */
-	new_bio = loop_get_buffer(lo, old_bio);
-	if (rw == WRITE) {
-		if (loop_transfer_bio(lo, new_bio, old_bio))
-			goto err;
-	}
-
-	generic_make_request(new_bio);
+	loop_add_bio(lo, old_bio);
 	return 0;
-
 err:
 	if (atomic_dec_and_test(&lo->lo_pending))
 		up(&lo->lo_bh_mutex);
-	loop_put_buffer(new_bio);
 out:
 	bio_io_error(old_bio, old_bio->bi_size);
 	return 0;
@@ -580,20 +422,8 @@ static inline void loop_handle_bio(struc
 {
 	int ret;
 
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
-	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		ret = do_bio_filebacked(lo, bio);
-		bio_endio(bio, bio->bi_size, ret);
-	} else {
-		struct bio *rbh = bio->bi_private;
-
-		ret = loop_transfer_bio(lo, bio, rbh);
-
-		bio_endio(rbh, rbh->bi_size, ret);
-		loop_put_buffer(bio);
-	}
+	ret = do_bio_filebacked(lo, bio);
+	bio_endio(bio, bio->bi_size, ret);
 }
 
 /*
@@ -685,29 +515,19 @@ static int loop_set_fd(struct loop_devic
 
 	error = -EINVAL;
 
-	if (S_ISBLK(inode->i_mode)) {
-		lo_device = I_BDEV(inode);
-		if (lo_device == bdev) {
-			error = -EBUSY;
-			goto out;
-		}
-		lo_blocksize = block_size(lo_device);
-		if (bdev_read_only(lo_device))
-			lo_flags |= LO_FLAGS_READ_ONLY;
-	} else if (S_ISREG(inode->i_mode)) {
+	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 		struct address_space_operations *aops = mapping->a_ops;
 		/*
 		 * If we can't read - sorry. If we only can't write - well,
 		 * it's going to be read-only.
 		 */
-		if (!inode->i_fop->sendfile)
+		if (!lo_file->f_op->sendfile)
 			goto out_putf;
 
 		if (!aops->prepare_write || !aops->commit_write)
 			lo_flags |= LO_FLAGS_READ_ONLY;
 
 		lo_blocksize = inode->i_blksize;
-		lo_flags |= LO_FLAGS_DO_BMAP;
 		error = 0;
 	} else
 		goto out_putf;
@@ -744,21 +564,6 @@ static int loop_set_fd(struct loop_devic
 	 */
 	blk_queue_make_request(lo->lo_queue, loop_make_request);
 	lo->lo_queue->queuedata = lo;
-
-	/*
-	 * we remap to a block device, make sure we correctly stack limits
-	 */
-	if (S_ISBLK(inode->i_mode)) {
-		request_queue_t *q = bdev_get_queue(lo_device);
-
-		blk_queue_max_sectors(lo->lo_queue, q->max_sectors);
-		blk_queue_max_phys_segments(lo->lo_queue,q->max_phys_segments);
-		blk_queue_max_hw_segments(lo->lo_queue, q->max_hw_segments);
-		blk_queue_max_segment_size(lo->lo_queue, q->max_segment_size);
-		blk_queue_segment_boundary(lo->lo_queue, q->seg_boundary_mask);
-		blk_queue_merge_bvec(lo->lo_queue, q->merge_bvec_fn);
-	}
-
 	kernel_thread(loop_thread, lo, CLONE_KERNEL);
 	down(&lo->lo_sem);
 
diff -puN fs/block_dev.c~loop-remove-blkdev-special-case fs/block_dev.c
diff -puN include/linux/loop.h~loop-remove-blkdev-special-case include/linux/loop.h
--- 25/include/linux/loop.h~loop-remove-blkdev-special-case	2003-10-31 00:37:08.000000000 -0800
+++ 25-akpm/include/linux/loop.h	2003-10-31 00:37:08.000000000 -0800
@@ -70,8 +70,7 @@ struct loop_device {
 /*
  * Loop flags
  */
-#define LO_FLAGS_DO_BMAP	1
-#define LO_FLAGS_READ_ONLY	2
+#define LO_FLAGS_READ_ONLY	1
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
 #include <asm/types.h>		/* for __u64 */

_


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31  9:55       ` Andrew Morton
@ 2003-10-31  9:55         ` Andrew Morton
  2003-10-31  9:58           ` Andrew Morton
  2003-11-01  0:26         ` Ben Slusky
  1 sibling, 1 reply; 22+ messages in thread
From: Andrew Morton @ 2003-10-31  9:55 UTC (permalink / raw)
  To: sluskyb, jariruusu, linux-kernel

Andrew Morton <akpm@osdl.org> wrote:
>
> Here's the patch;

And here's your cleanup patch on top of that.

 drivers/block/cryptoloop.c |   80 +++++++++++++++++++++++++++------------------
 drivers/block/loop.c       |   62 +++++++++++++++++++++-------------
 include/linux/loop.h       |   11 +++---
 3 files changed, 94 insertions(+), 59 deletions(-)

diff -puN drivers/block/cryptoloop.c~loop-highmem drivers/block/cryptoloop.c
--- 25/drivers/block/cryptoloop.c~loop-highmem	2003-10-31 00:40:44.000000000 -0800
+++ 25-akpm/drivers/block/cryptoloop.c	2003-10-31 00:40:45.000000000 -0800
@@ -87,43 +87,49 @@ typedef int (*encdec_ecb_t)(struct crypt
 
 
 static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_ecb_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
 	}
 
 	while (size > 0) {
 		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = (unsigned long)in & ~PAGE_MASK;
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = (unsigned long)out & ~PAGE_MASK;
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz);
 
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
@@ -135,24 +141,30 @@ typedef int (*encdec_cbc_t)(struct crypt
 			unsigned int nsg, u8 *iv);
 
 static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_cbc_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
 	}
 
@@ -161,39 +173,43 @@ cryptoloop_transfer_cbc(struct loop_devi
 		u32 iv[4] = { 0, };
 		iv[0] = cpu_to_le32(IV & 0xffffffff);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = offset_in_page(in);
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = offset_in_page(out);
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
 
 		IV++;
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
 }
 
 static int
-cryptoloop_transfer(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
 	{
 		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}	
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
 	{	
 		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}
 	
 	/*  This is not supposed to happen */
diff -puN drivers/block/loop.c~loop-highmem drivers/block/loop.c
--- 25/drivers/block/loop.c~loop-highmem	2003-10-31 00:40:44.000000000 -0800
+++ 25-akpm/drivers/block/loop.c	2003-10-31 00:40:45.000000000 -0800
@@ -76,9 +76,14 @@ static struct gendisk **disks;
 /*
  * Transfer functions
  */
-static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
-			 char *loop_buf, int size, sector_t real_block)
+static int transfer_none(struct loop_device *lo, int cmd,
+			 struct page *raw_page, unsigned raw_off,
+			 struct page *loop_page, unsigned loop_off,
+			 int size, sector_t real_block)
 {
+	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+
 	if (raw_buf != loop_buf) {
 		if (cmd == READ)
 			memcpy(loop_buf, raw_buf, size);
@@ -86,12 +91,19 @@ static int transfer_none(struct loop_dev
 			memcpy(raw_buf, loop_buf, size);
 	}
 
+	kunmap_atomic(raw_page, KM_USER0);
+	kunmap_atomic(loop_page, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
-static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block)
+static int transfer_xor(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block)
 {
+	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 	char	*in, *out, *key;
 	int	i, keysize;
 
@@ -107,6 +119,10 @@ static int transfer_xor(struct loop_devi
 	keysize = lo->lo_encrypt_key_size;
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
+
+	kunmap_atomic(raw_page, KM_USER0);
+	kunmap_atomic(loop_page, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
@@ -162,13 +178,15 @@ figure_loop_size(struct loop_device *lo)
 }
 
 static inline int
-lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
-	       char *lbuf, int size, sector_t rblock)
+lo_do_transfer(struct loop_device *lo, int cmd,
+	       struct page *rpage, unsigned roffs,
+	       struct page *lpage, unsigned loffs,
+	       int size, sector_t rblock)
 {
 	if (!lo->transfer)
 		return 0;
 
-	return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
+	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
 static int
@@ -178,16 +196,15 @@ do_lo_send(struct loop_device *lo, struc
 	struct address_space *mapping = file->f_mapping;
 	struct address_space_operations *aops = mapping->a_ops;
 	struct page *page;
-	char *kaddr, *data;
 	pgoff_t index;
-	unsigned size, offset;
+	unsigned size, offset, bv_offs;
 	int len;
 	int ret = 0;
 
 	down(&mapping->host->i_sem);
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-	data = kmap(bvec->bv_page) + bvec->bv_offset;
+	bv_offs = bvec->bv_offset;
 	len = bvec->bv_len;
 	while (len > 0) {
 		sector_t IV;
@@ -204,9 +221,9 @@ do_lo_send(struct loop_device *lo, struc
 			goto fail;
 		if (aops->prepare_write(file, page, offset, offset+size))
 			goto unlock;
-		kaddr = kmap(page);
-		transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset,
-						 data, size, IV);
+		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+						 bvec->bv_page, bv_offs,
+						 size, IV);
 		if (transfer_result) {
 			/*
 			 * The transfer failed, but we still write the data to
@@ -214,7 +231,8 @@ do_lo_send(struct loop_device *lo, struc
 			 */
 			printk(KERN_ERR "loop: transfer error block %llu\n",
 			       (unsigned long long)index);
-			memset(kaddr + offset, 0, size);
+			memset(kmap_atomic(page, KM_USER0) + offset, 0, size);
+			kunmap_atomic(page, KM_USER0);
 		}
 		flush_dcache_page(page);
 		kunmap(page);
@@ -222,7 +240,7 @@ do_lo_send(struct loop_device *lo, struc
 			goto unlock;
 		if (transfer_result)
 			goto unlock;
-		data += size;
+		bv_offs += size;
 		len -= size;
 		offset = 0;
 		index++;
@@ -263,7 +281,8 @@ lo_send(struct loop_device *lo, struct b
 
 struct lo_read_data {
 	struct loop_device *lo;
-	char *data;
+	struct page *page;
+	unsigned offset;
 	int bsize;
 };
 
@@ -271,7 +290,6 @@ static int
 lo_read_actor(read_descriptor_t *desc, struct page *page,
 	      unsigned long offset, unsigned long size)
 {
-	char *kaddr;
 	unsigned long count = desc->count;
 	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 	struct loop_device *lo = p->lo;
@@ -282,18 +300,16 @@ lo_read_actor(read_descriptor_t *desc, s
 	if (size > count)
 		size = count;
 
-	kaddr = kmap(page);
-	if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
+	if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
 		size = 0;
 		printk(KERN_ERR "loop: transfer error block %ld\n",
 		       page->index);
 		desc->error = -EINVAL;
 	}
-	kunmap(page);
 	
 	desc->count = count - size;
 	desc->written += size;
-	p->data += size;
+	p->offset += size;
 	return size;
 }
 
@@ -306,12 +322,12 @@ do_lo_receive(struct loop_device *lo,
 	int retval;
 
 	cookie.lo = lo;
-	cookie.data = kmap(bvec->bv_page) + bvec->bv_offset;
+	cookie.page = bvec->bv_page;
+	cookie.offset = bvec->bv_offset;
 	cookie.bsize = bsize;
 	file = lo->lo_backing_file;
 	retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
 			lo_read_actor, &cookie);
-	kunmap(bvec->bv_page);
 	return (retval < 0)? retval: 0;
 }
 
diff -puN include/linux/loop.h~loop-highmem include/linux/loop.h
--- 25/include/linux/loop.h~loop-highmem	2003-10-31 00:40:44.000000000 -0800
+++ 25-akpm/include/linux/loop.h	2003-10-31 00:40:45.000000000 -0800
@@ -34,8 +34,9 @@ struct loop_device {
 	loff_t		lo_sizelimit;
 	int		lo_flags;
 	int		(*transfer)(struct loop_device *, int cmd,
-				    char *raw_buf, char *loop_buf, int size,
-				    sector_t real_block);
+				    struct page *raw_page, unsigned raw_off,
+				    struct page *loop_page, unsigned loop_off,
+				    int size, sector_t real_block);
 	char		lo_file_name[LO_NAME_SIZE];
 	char		lo_crypt_name[LO_NAME_SIZE];
 	char		lo_encrypt_key[LO_KEY_SIZE];
@@ -127,8 +128,10 @@ struct loop_info64 {
 /* Support for loadable transfer modules */
 struct loop_func_table {
 	int number;	/* filter type */ 
-	int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block);
+	int (*transfer)(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block);
 	int (*init)(struct loop_device *, const struct loop_info64 *); 
 	/* release is called from loop_unregister_transfer or clr_fd */
 	int (*release)(struct loop_device *); 

_


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31  9:55         ` Andrew Morton
@ 2003-10-31  9:58           ` Andrew Morton
  2003-11-13  3:06             ` Ben Slusky
  0 siblings, 1 reply; 22+ messages in thread
From: Andrew Morton @ 2003-10-31  9:58 UTC (permalink / raw)
  To: sluskyb, jariruusu, linux-kernel

Andrew Morton <akpm@osdl.org> wrote:
>
>  Andrew Morton <akpm@osdl.org> wrote:
>  >
>  > Here's the patch;
> 
>  And here's your cleanup patch on top of that.

And here are some fixes against your patch.  kunmap_atomic() takes a kernel
virtual address, not a pageframe pointer.  And there were a couple of stray
kunmap()s left over.


--- 25/drivers/block/loop.c~loop-highmem-fixes	2003-10-31 00:55:17.000000000 -0800
+++ 25-akpm/drivers/block/loop.c	2003-10-31 01:01:32.000000000 -0800
@@ -81,18 +81,16 @@ static int transfer_none(struct loop_dev
 			 struct page *loop_page, unsigned loop_off,
 			 int size, sector_t real_block)
 {
-	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
-	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 
-	if (raw_buf != loop_buf) {
-		if (cmd == READ)
-			memcpy(loop_buf, raw_buf, size);
-		else
-			memcpy(raw_buf, loop_buf, size);
-	}
+	if (cmd == READ)
+		memcpy(loop_buf, raw_buf, size);
+	else
+		memcpy(raw_buf, loop_buf, size);
 
-	kunmap_atomic(raw_page, KM_USER0);
-	kunmap_atomic(loop_page, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
 	cond_resched();
 	return 0;
 }
@@ -102,10 +100,10 @@ static int transfer_xor(struct loop_devi
 			struct page *loop_page, unsigned loop_off,
 			int size, sector_t real_block)
 {
-	char	*raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
-	char	*loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
-	char	*in, *out, *key;
-	int	i, keysize;
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+	char *in, *out, *key;
+	int i, keysize;
 
 	if (cmd == READ) {
 		in = raw_buf;
@@ -120,8 +118,8 @@ static int transfer_xor(struct loop_devi
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
 
-	kunmap_atomic(raw_page, KM_USER0);
-	kunmap_atomic(loop_page, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
 	cond_resched();
 	return 0;
 }
@@ -225,17 +223,19 @@ do_lo_send(struct loop_device *lo, struc
 						 bvec->bv_page, bv_offs,
 						 size, IV);
 		if (transfer_result) {
+			char *kaddr;
+
 			/*
 			 * The transfer failed, but we still write the data to
 			 * keep prepare/commit calls balanced.
 			 */
 			printk(KERN_ERR "loop: transfer error block %llu\n",
 			       (unsigned long long)index);
-			memset(kmap_atomic(page, KM_USER0) + offset, 0, size);
-			kunmap_atomic(page, KM_USER0);
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + offset, 0, size);
+			kunmap_atomic(kaddr, KM_USER0);
 		}
 		flush_dcache_page(page);
-		kunmap(page);
 		if (aops->commit_write(file, page, offset, offset+size))
 			goto unlock;
 		if (transfer_result)
@@ -250,7 +250,6 @@ do_lo_send(struct loop_device *lo, struc
 	}
 	up(&mapping->host->i_sem);
 out:
-	kunmap(bvec->bv_page);
 	return ret;
 
 unlock:

_


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-30 21:30   ` Andrew Morton
  2003-10-31  0:52     ` Ben Slusky
@ 2003-10-31 12:15     ` Jari Ruusu
  2003-10-31 13:02       ` Jens Axboe
  1 sibling, 1 reply; 22+ messages in thread
From: Jari Ruusu @ 2003-10-31 12:15 UTC (permalink / raw)
  To: Andrew Morton; +Cc: sluskyb, linux-kernel

Andrew Morton wrote:
> Jari Ruusu <jariruusu@users.sourceforge.net> wrote:
> > Cryptoapi interface is quite broken. Your change extends that breakage to
> > loop transfer interface. Please don't do that.
> 
> Please describe this breakage.

It is broken because it includes kmaps. kmaps may be required evil thing
that is currectly required, but in long term kmaps deserve fate of DOS EMS.
Less interfaces that include that damage, the better. Please don't include
that damage in loop transfer interface.

-- 
Jari Ruusu  1024R/3A220F51 5B 4B F9 BB D3 3F 52 E9  DB 1D EB E3 24 0E A9 DD

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31 12:15     ` [PATCH] remove useless highmem bounce from loop/cryptoloop Jari Ruusu
@ 2003-10-31 13:02       ` Jens Axboe
  0 siblings, 0 replies; 22+ messages in thread
From: Jens Axboe @ 2003-10-31 13:02 UTC (permalink / raw)
  To: Jari Ruusu; +Cc: Andrew Morton, sluskyb, linux-kernel

On Fri, Oct 31 2003, Jari Ruusu wrote:
> Andrew Morton wrote:
> > Jari Ruusu <jariruusu@users.sourceforge.net> wrote:
> > > Cryptoapi interface is quite broken. Your change extends that breakage to
> > > loop transfer interface. Please don't do that.
> > 
> > Please describe this breakage.
> 
> It is broken because it includes kmaps. kmaps may be required evil thing
> that is currectly required, but in long term kmaps deserve fate of DOS EMS.
> Less interfaces that include that damage, the better. Please don't include
> that damage in loop transfer interface.

Bouncing is better?! Get real.

An interface based on page,len,offset is so much better than a virtual
address. It works equally well on sane archs as insane ones.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31  9:55       ` Andrew Morton
  2003-10-31  9:55         ` Andrew Morton
@ 2003-11-01  0:26         ` Ben Slusky
  2003-11-02 20:46           ` Ben Slusky
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-11-01  0:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: jariruusu, linux-kernel

On Fri, 31 Oct 2003 01:55:00 -0800, Andrew Morton wrote:
> Ben Slusky <sluskyb@paranoiacs.org> wrote:
> > The current memory allocation procedure really is inadequate. It worked
> > ok up thru 2.4 because the loop device was used almost exclusively
> > as a nifty hack to make an initrd or to double-check the ISO you just
> > created.
> 
> mm..  Last time I looked the 2.4 loop driver is fairly robust from the
> memory management point of view.

Memory management is fine after we've allocated the memory. The problem is
in the approach of going back to square one if only n-1 out of n pages
could be allocated. That approach is inherently prone to deadlock.

> Here's the patch; feel free to benchmark it.  It kills 200 lines of code
> and unifies the block-backed and file-backed codepaths.  That surely is a
> good thing.

I will benchmark it soon... meantime I have a real concern about what
you've done to block-backed loop reads. Now the loop thread has to read
and transform (decrypt) each bio, whereas in the old code reading was
done asynchronously in the backing block device driver, leaving the loop
thread free to do some transforms at the same time. I don't see how this
could not hurt performance.

> It fixes bug 1198 too, it appears.

The file-backed loop code path allocates memory in a sane fashion.
File-backed loops never manifested the bug.

-- 
Ben Slusky                      | A free society is a society
sluskyb@paranoiacs.org          | where it is safe to be
sluskyb@stwing.org              | unpopular.
PGP keyID ADA44B3B              |               -Adlai Stevenson


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-11-01  0:26         ` Ben Slusky
@ 2003-11-02 20:46           ` Ben Slusky
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-11-02 20:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: jariruusu, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 482 bytes --]

My astute and obviously flawless technical analysis seems to be at odds
with the benchmarks, which show reads completed in half the time with
Andrew's patches as with mine. Writes are a little slower tho'.

Results attached.

-- 
Ben Slusky                      | Some drink from the Fountain
sluskyb@paranoiacs.org          | of Knowledge... Others just
sluskyb@stwing.org              | gargle. ...And some pee in it.
PGP keyID ADA44B3B              |               -Dave Aronson

[-- Attachment #2: times-sluskyb --]
[-- Type: text/plain, Size: 4149 bytes --]


Write test:
time sh -c 'dd if=/dev/urandom of=testfoo bs=32k count=5000 conv=notrunc; sync'
0.03user 131.66system 2:26.62elapsed 89%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 133.35system 2:34.15elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 133.35system 2:30.97elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 131.64system 2:29.25elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 128.32system 2:26.25elapsed 87%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 130.65system 2:28.31elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 128.49system 2:25.50elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 128.48system 2:26.03elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 128.63system 2:25.96elapsed 88%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 127.40system 2:24.91elapsed 87%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps

Read test:
time dd if=testfoo of=/dev/null bs=32k
0.04user 1.45system 0:37.20elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.45system 0:37.07elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.42system 0:37.10elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.46system 0:37.12elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.44system 0:37.13elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.43system 0:37.10elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.44system 0:37.23elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.43system 0:37.07elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.44system 0:37.13elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.43system 0:37.12elapsed 4%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps

Read/Write test:
time sh -c 'tar xjf src/linux-2.6.0-test1.tar.bz2; sync'
58.32user 20.09system 1:58.83elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
58.86user 18.22system 1:57.10elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
59.00user 17.58system 1:56.83elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
59.28user 17.49system 1:57.10elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
58.84user 18.19system 1:56.98elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
58.82user 17.74system 1:56.15elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
58.89user 17.71system 1:57.00elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
59.07user 17.82system 1:57.33elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
59.36user 18.29system 1:57.05elapsed 66%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
59.14user 17.59system 1:56.91elapsed 65%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps

[-- Attachment #3: times-akpm --]
[-- Type: text/plain, Size: 4094 bytes --]

Write test:
time sh -c 'dd if=/dev/urandom of=testfoo bs=32k count=5000 conv=notrunc; sync'
0.05user 131.32system 2:36.10elapsed 84%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 137.84system 2:40.38elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.02user 137.72system 2:40.32elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 137.09system 2:39.38elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 136.31system 2:38.62elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 135.40system 2:37.26elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 133.00system 2:33.92elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.03user 134.56system 2:35.92elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 133.76system 2:35.15elapsed 86%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (439major+100minor)pagefaults 0swaps
0.04user 133.41system 2:35.23elapsed 85%CPU (0avgtext+0avgdata 0maxresident)k

Read test:
time dd if=testfoo of=/dev/null bs=32k
0.03user 1.37system 0:18.46elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.37system 0:18.20elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.32system 0:17.85elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.30system 0:17.66elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.31system 0:17.67elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.30system 0:17.67elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.05user 1.27system 0:17.65elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.30system 0:17.64elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.35system 0:18.09elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps
0.04user 1.38system 0:18.42elapsed 7%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (364major+75minor)pagefaults 0swaps

Read/Write test:
time sh -c 'tar xjf src/linux-2.6.0-test1.tar.bz2; sync'
60.41user 13.18system 1:57.85elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.37user 13.26system 1:58.74elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.59user 13.30system 1:57.83elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.53user 13.25system 1:58.48elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.41user 13.29system 1:58.47elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.46user 13.33system 1:57.36elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.44user 13.07system 1:59.50elapsed 61%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.14user 13.12system 1:58.03elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.28user 13.18system 1:58.58elapsed 61%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps
60.44user 13.32system 1:58.93elapsed 62%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (623major+1033minor)pagefaults 0swaps

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] remove useless highmem bounce from loop/cryptoloop
  2003-10-31  9:58           ` Andrew Morton
@ 2003-11-13  3:06             ` Ben Slusky
  0 siblings, 0 replies; 22+ messages in thread
From: Ben Slusky @ 2003-11-13  3:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: jariruusu, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 604 bytes --]

One more patch --- this fixes a minor bio handling bug in the filebacked
code path. I'd fixed it incidentally in the loop-recycle patch.

I don't think you could actually see damage from this bug unless you
run device mapper on top of loop devices, but still this is the correct
behavior. Please apply.

This should also apply cleanly over 2.6.0-test9-mm2.

-
-Ben


-- 
Ben Slusky                      | I'm amazed that I'm still
sluskyb@paranoiacs.org          | standing and I demand that we
sluskyb@stwing.org              | all blend in
PGP keyID ADA44B3B              |               -Foo Fighters

[-- Attachment #2: loop-bio-index.diff --]
[-- Type: text/plain, Size: 1019 bytes --]

--- linux-2.6.0-test9-akpm/drivers/block/loop.c	2003-11-08 19:28:07.000000000 -0500
+++ linux-2.6.0-test9-sluskyb/drivers/block/loop.c	2003-11-08 19:14:41.000000000 -0500
@@ -264,12 +264,10 @@ fail:
 static int
 lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_send(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;
@@ -333,12 +331,10 @@ do_lo_receive(struct loop_device *lo,
 static int
 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_receive(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] loop.c patches, take two
  2003-11-02 20:46           ` Ben Slusky
@ 2003-12-21 19:55             ` Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
                                 ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Ben Slusky @ 2003-12-21 19:55 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, jariruusu

[-- Attachment #1: Type: text/plain, Size: 1185 bytes --]

Hi everybody,

Well, it appears that neither my loop.c patches nor Andrew's were merged
into 2.6.0... I'd request that my patches be merged into mainline,
since Jari Ruusu has pointed out that Andrew's patch (which removes the
separate code path for block-backed loop devices) will break journaling
filesystems on loop devices. Right now, journaling FS's on file-backed
loop devices are kinda iffy (they will work only if the underlying FS is
also journaled, with the correct journal options) but journaling FS's
on block-backed loop devices work perfectly. Andrew's patches would
break this.

This first patch improves the memory allocation technique used by
block-backed loop devices. Specifically the loop device will make
efficient use of whatever pages it can get, rather than throwing them all
back in case it didn't get as many as it wanted -- this fixes Bugzilla
bug #1198.

Taken together, this patch and the following patch make loop devices
safe for use as swap. Please apply.

-
-Ben


-- 
Ben Slusky                      | "I have a cunning plan! 
sluskyb@paranoiacs.org          |  RUN!"
sluskyb@stwing.org              |       -Spider Jerusalem
PGP keyID ADA44B3B      

[-- Attachment #2: loop-recycle-mk2.diff --]
[-- Type: text/plain, Size: 7985 bytes --]

diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:46:53.430260696 -0500
+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:47:11.743476664 -0500
@@ -247,12 +247,10 @@ fail:
 static int
 lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_send(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;
@@ -318,12 +316,10 @@ do_lo_receive(struct loop_device *lo,
 static int
 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_receive(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;
@@ -353,10 +349,12 @@ static void loop_put_buffer(struct bio *
 	 * check bi_end_io, may just be a remapped bio
 	 */
 	if (bio && bio->bi_end_io == loop_end_io_transfer) {
+		struct bio_vec *bv;
 		int i;
 
-		for (i = 0; i < bio->bi_vcnt; i++)
-			__free_page(bio->bi_io_vec[i].bv_page);
+		if (!bio_flagged(bio, BIO_CLONED))
+			bio_for_each_segment(bv, bio, i)
+				__free_page(bv->bv_page);
 
 		bio_put(bio);
 	}
@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
 	if (bio->bi_size)
 		return 1;
 
-	if (err || bio_rw(bio) == WRITE) {
+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
 		bio_endio(rbh, rbh->bi_size, err);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);
@@ -430,35 +428,41 @@ static struct bio *loop_copy_bio(struct 
 	struct bio_vec *bv;
 	int i;
 
-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
+	if (bio_rw(rbh) != WRITE) {
+		bio = bio_clone(rbh, GFP_NOIO);
+		return bio;
+	}
+
+	bio = bio_alloc(GFP_NOIO, rbh->bi_vcnt);
 	if (!bio)
 		return NULL;
 
 	/*
 	 * iterate iovec list and alloc pages
 	 */
-	__bio_for_each_segment(bv, rbh, i, 0) {
+	bio_for_each_segment(bv, rbh, i) {
 		struct bio_vec *bbv = &bio->bi_io_vec[i];
 
-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
+		/* We need one page; the rest we can live without */
+		bbv->bv_page = alloc_page((bio->bi_vcnt ? __GFP_NOWARN : GFP_NOIO) | __GFP_HIGHMEM);
 		if (bbv->bv_page == NULL)
-			goto oom;
+			break;
 
-		bbv->bv_len = bv->bv_len;
 		bbv->bv_offset = bv->bv_offset;
+		bio->bi_size += (bbv->bv_len = bv->bv_len);
+		bio->bi_vcnt++;
 	}
 
-	bio->bi_vcnt = rbh->bi_vcnt;
-	bio->bi_size = rbh->bi_size;
-
-	return bio;
+	/* Can't get anything done if we didn't get any pages */
+	if (unlikely(!bio->bi_vcnt)) {
+		bio_put(bio);
+		return NULL;
+	}
 
-oom:
-	while (--i >= 0)
-		__free_page(bio->bi_io_vec[i].bv_page);
+	bio->bi_vcnt += (bio->bi_idx = rbh->bi_idx);
+	bio->bi_rw = rbh->bi_rw;
 
-	bio_put(bio);
-	return NULL;
+	return bio;
 }
 
 static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
@@ -479,19 +483,85 @@ static struct bio *loop_get_buffer(struc
 		if (flags & PF_MEMALLOC)
 			current->flags |= PF_MEMALLOC;
 
-		if (bio == NULL)
+		if (unlikely(bio == NULL)) {
+			printk("loop: alloc failed\n");
 			blk_congestion_wait(WRITE, HZ/10);
+		}
 	} while (bio == NULL);
 
 	bio->bi_end_io = loop_end_io_transfer;
 	bio->bi_private = rbh;
 	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
-	bio->bi_rw = rbh->bi_rw;
 	bio->bi_bdev = lo->lo_device;
 
 	return bio;
 }
 
+static void loop_recycle_buffer(struct loop_device *lo, struct bio *bio)
+{
+	struct bio *rbh = bio->bi_private;
+	struct bio_vec *bv, *bbv, *rbv;
+	int i, flags, nvecs = bio->bi_vcnt - bio->bi_idx;
+
+	/*
+	 * Comments in ll_rw_blk.c reserve for generic_make_request the right to
+	 * "change bi_dev and bi_sector for remaps as it sees fit." Doh!
+	 * Workaround: reset the bi_bdev and recompute the starting sector for
+	 * the next write.
+	 */
+	bio->bi_bdev = lo->lo_device;
+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
+	/* Clean up the flags too */
+	bio->bi_flags &= (~(BIO_POOL_MASK - 1) | (1 << BIO_UPTODATE));
+
+	/*
+	 * Move the allocated pages into position to transfer more data.
+	 */
+	__bio_for_each_segment(bv, bio, i, rbh->bi_idx) {
+		rbv = &rbh->bi_io_vec[i];
+		bbv = bv + nvecs;
+
+		/* Workaround -- see note above */
+		bio->bi_sector += rbv->bv_len >> 9;
+		if (i < bio->bi_idx)
+			continue;
+
+		if (i + nvecs < rbh->bi_vcnt) {
+			bbv->bv_page = bv->bv_page;
+			bbv->bv_offset = rbv->bv_offset;
+			bio->bi_size += (bbv->bv_len = rbv->bv_len);
+		} else
+			__free_page(bv->bv_page);
+		memset(bv, 0, sizeof(*bv));
+	}
+
+	bio->bi_idx = bio->bi_vcnt;
+	bio->bi_vcnt += nvecs;
+	bio->bi_vcnt = min(bio->bi_vcnt, rbh->bi_vcnt);
+
+	/*
+	 * If we need more pages, try to get some.
+	 * Clear PF_MEMALLOC to avoid consuming all available memory.
+	 */
+	flags = current->flags;
+	current->flags &= ~PF_MEMALLOC;
+
+	__bio_for_each_segment(rbv, rbh, i, bio->bi_vcnt) {
+		bv = &bio->bi_io_vec[i];
+
+		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
+		if (bv->bv_page == NULL)
+			break;
+
+		bv->bv_offset = rbv->bv_offset;
+		bio->bi_size += (bv->bv_len = rbv->bv_len);
+		bio->bi_vcnt++;
+	}
+
+	if (flags & PF_MEMALLOC)
+		current->flags |= PF_MEMALLOC;
+}
+
 static int loop_transfer_bio(struct loop_device *lo,
 			     struct bio *to_bio, struct bio *from_bio)
 {
@@ -502,17 +572,19 @@ static int loop_transfer_bio(struct loop
 
 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
 
-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
-		to_bvec = &to_bio->bi_io_vec[i];
+	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
+		from_bvec = &from_bio->bi_io_vec[i];
 
-		kmap(from_bvec->bv_page);
-		kmap(to_bvec->bv_page);
-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-					from_bvec->bv_len, IV);
-		kunmap(from_bvec->bv_page);
-		kunmap(to_bvec->bv_page);
+		if (i >= to_bio->bi_idx) {
+			kmap(from_bvec->bv_page);
+			kmap(to_bvec->bv_page);
+			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
+			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
+						from_bvec->bv_len, IV);
+			kunmap(from_bvec->bv_page);
+			kunmap(to_bvec->bv_page);
+		}
 		IV += from_bvec->bv_len >> 9;
 	}
 
@@ -579,16 +651,30 @@ inactive:
 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
+	struct bio *rbh;
 
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 		ret = do_bio_filebacked(lo, bio);
 		bio_endio(bio, bio->bi_size, ret);
-	} else {
-		struct bio *rbh = bio->bi_private;
+	} else if (bio_rw(bio) == WRITE) {
+		/*
+		 * Write complete, but more pages remain;
+		 * encrypt and write some more pages
+		 */
+		loop_recycle_buffer(lo, bio);
 
+		rbh = bio->bi_private;
+		if ((ret = loop_transfer_bio(lo, bio, rbh))) {
+			bio_endio(bio, bio->bi_size, ret);
+			return;
+		}
+
+		generic_make_request(bio);
+	} else {
+		/*
+		 * Read complete; do decryption now
+		 */
+		rbh = bio->bi_private;
 		ret = loop_transfer_bio(lo, bio, rbh);
 
 		bio_endio(rbh, rbh->bi_size, ret);
@@ -606,6 +692,7 @@ static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
 	struct bio *bio;
+	int x;
 
 	daemonize("loop%d", lo->lo_number);
 
@@ -640,7 +727,11 @@ static int loop_thread(void *data)
 			printk("loop: missing bio\n");
 			continue;
 		}
+
+		x = (lo->lo_flags & LO_FLAGS_DO_BMAP) || bio_rw(bio) != WRITE;
 		loop_handle_bio(lo, bio);
+		if (!x)
+			continue;
 
 		/*
 		 * upped both for pending work and tear-down, lo_pending

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
@ 2003-12-21 20:07               ` Ben Slusky
  2003-12-21 20:49               ` Mika Penttilä
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 22+ messages in thread
From: Ben Slusky @ 2003-12-21 20:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, jariruusu

[-- Attachment #1: Type: text/plain, Size: 640 bytes --]

This patch will localize (and atomicize) the kmaps in loop.c, moving
them from do_lo_send, do_lo_receive, and loop_transfer_bio into the
transform functions. The effect is to eliminate the page,offset ->
vaddr -> page,offset -> vaddr back-and-forth in cryptoloop devices.

I've incorporated Andrew's corrections.

Taken together, the preceding patch and this patch make loop devices
safe for use as swap. Please apply.

-
-Ben


-- 
Ben Slusky                      | Integrity is the key. Once you
sluskyb@paranoiacs.org          | can fake that...
sluskyb@stwing.org              |                -Simon Travaglia
PGP keyID ADA44B3B      

[-- Attachment #2: loop-highmem-mk2.diff --]
[-- Type: text/plain, Size: 12398 bytes --]

diff -pu linux-2.6.0/include/linux/loop.h-orig linux-2.6.0/include/linux/loop.h
--- linux-2.6.0/include/linux/loop.h-orig	2003-12-20 21:36:18.579772640 -0500
+++ linux-2.6.0/include/linux/loop.h	2003-12-20 21:37:22.086118208 -0500
@@ -34,8 +34,9 @@ struct loop_device {
 	loff_t		lo_sizelimit;
 	int		lo_flags;
 	int		(*transfer)(struct loop_device *, int cmd,
-				    char *raw_buf, char *loop_buf, int size,
-				    sector_t real_block);
+				    struct page *raw_page, unsigned raw_off,
+				    struct page *loop_page, unsigned loop_off,
+				    int size, sector_t real_block);
 	char		lo_file_name[LO_NAME_SIZE];
 	char		lo_crypt_name[LO_NAME_SIZE];
 	char		lo_encrypt_key[LO_KEY_SIZE];
@@ -128,8 +129,10 @@ struct loop_info64 {
 /* Support for loadable transfer modules */
 struct loop_func_table {
 	int number;	/* filter type */ 
-	int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block);
+	int (*transfer)(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block);
 	int (*init)(struct loop_device *, const struct loop_info64 *); 
 	/* release is called from loop_unregister_transfer or clr_fd */
 	int (*release)(struct loop_device *); 
diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:36:18.614767320 -0500
+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:37:42.488016648 -0500
@@ -75,24 +75,34 @@ static struct gendisk **disks;
 /*
  * Transfer functions
  */
-static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
-			 char *loop_buf, int size, sector_t real_block)
+static int transfer_none(struct loop_device *lo, int cmd,
+			 struct page *raw_page, unsigned raw_off,
+			 struct page *loop_page, unsigned loop_off,
+			 int size, sector_t real_block)
 {
-	if (raw_buf != loop_buf) {
-		if (cmd == READ)
-			memcpy(loop_buf, raw_buf, size);
-		else
-			memcpy(raw_buf, loop_buf, size);
-	}
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 
+	if (cmd == READ)
+		memcpy(loop_buf, raw_buf, size);
+	else
+		memcpy(raw_buf, loop_buf, size);
+
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
-static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block)
-{
-	char	*in, *out, *key;
-	int	i, keysize;
+static int transfer_xor(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block)
+{
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+	char *in, *out, *key;
+	int i, keysize;
 
 	if (cmd == READ) {
 		in = raw_buf;
@@ -106,6 +116,10 @@ static int transfer_xor(struct loop_devi
 	keysize = lo->lo_encrypt_key_size;
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
+
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
@@ -162,13 +176,15 @@ figure_loop_size(struct loop_device *lo)
 }
 
 static inline int
-lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
-	       char *lbuf, int size, sector_t rblock)
+lo_do_transfer(struct loop_device *lo, int cmd,
+	       struct page *rpage, unsigned roffs,
+	       struct page *lpage, unsigned loffs,
+	       int size, sector_t rblock)
 {
 	if (!lo->transfer)
 		return 0;
 
-	return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
+	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
 static int
@@ -178,16 +194,15 @@ do_lo_send(struct loop_device *lo, struc
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
 	struct address_space_operations *aops = mapping->a_ops;
 	struct page *page;
-	char *kaddr, *data;
 	pgoff_t index;
-	unsigned size, offset;
+	unsigned size, offset, bv_offs;
 	int len;
 	int ret = 0;
 
 	down(&mapping->host->i_sem);
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-	data = kmap(bvec->bv_page) + bvec->bv_offset;
+	bv_offs = bvec->bv_offset;
 	len = bvec->bv_len;
 	while (len > 0) {
 		sector_t IV;
@@ -204,25 +219,28 @@ do_lo_send(struct loop_device *lo, struc
 			goto fail;
 		if (aops->prepare_write(file, page, offset, offset+size))
 			goto unlock;
-		kaddr = kmap(page);
-		transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset,
-						 data, size, IV);
+		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+						 bvec->bv_page, bv_offs,
+						 size, IV);
 		if (transfer_result) {
+			char *kaddr;
+
 			/*
 			 * The transfer failed, but we still write the data to
 			 * keep prepare/commit calls balanced.
 			 */
 			printk(KERN_ERR "loop: transfer error block %llu\n",
 			       (unsigned long long)index);
+			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + offset, 0, size);
+			kunmap_atomic(kaddr, KM_USER0);
 		}
 		flush_dcache_page(page);
-		kunmap(page);
 		if (aops->commit_write(file, page, offset, offset+size))
 			goto unlock;
 		if (transfer_result)
 			goto unlock;
-		data += size;
+		bv_offs += size;
 		len -= size;
 		offset = 0;
 		index++;
@@ -232,7 +250,6 @@ do_lo_send(struct loop_device *lo, struc
 	}
 	up(&mapping->host->i_sem);
 out:
-	kunmap(bvec->bv_page);
 	return ret;
 
 unlock:
@@ -261,7 +278,8 @@ lo_send(struct loop_device *lo, struct b
 
 struct lo_read_data {
 	struct loop_device *lo;
-	char *data;
+	struct page *page;
+	unsigned offset;
 	int bsize;
 };
 
@@ -269,7 +287,6 @@ static int
 lo_read_actor(read_descriptor_t *desc, struct page *page,
 	      unsigned long offset, unsigned long size)
 {
-	char *kaddr;
 	unsigned long count = desc->count;
 	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 	struct loop_device *lo = p->lo;
@@ -280,18 +297,16 @@ lo_read_actor(read_descriptor_t *desc, s
 	if (size > count)
 		size = count;
 
-	kaddr = kmap(page);
-	if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
+	if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
 		size = 0;
 		printk(KERN_ERR "loop: transfer error block %ld\n",
 		       page->index);
 		desc->error = -EINVAL;
 	}
-	kunmap(page);
 	
 	desc->count = count - size;
 	desc->written += size;
-	p->data += size;
+	p->offset += size;
 	return size;
 }
 
@@ -304,12 +319,12 @@ do_lo_receive(struct loop_device *lo,
 	int retval;
 
 	cookie.lo = lo;
-	cookie.data = kmap(bvec->bv_page) + bvec->bv_offset;
+	cookie.page = bvec->bv_page;
+	cookie.offset = bvec->bv_offset;
 	cookie.bsize = bsize;
 	file = lo->lo_backing_file;
 	retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
 			lo_read_actor, &cookie);
-	kunmap(bvec->bv_page);
 	return (retval < 0)? retval: 0;
 }
 
@@ -567,23 +582,17 @@ static int loop_transfer_bio(struct loop
 {
 	sector_t IV;
 	struct bio_vec *from_bvec, *to_bvec;
-	char *vto, *vfrom;
 	int ret = 0, i;
 
 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
 
 	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
 		from_bvec = &from_bio->bi_io_vec[i];
-
 		if (i >= to_bio->bi_idx) {
-			kmap(from_bvec->bv_page);
-			kmap(to_bvec->bv_page);
-			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-						from_bvec->bv_len, IV);
-			kunmap(from_bvec->bv_page);
-			kunmap(to_bvec->bv_page);
+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio),
+				      to_bvec->bv_page, to_bvec->bv_offset,
+				      from_bvec->bv_page, from_bvec->bv_offset,
+				      from_bvec->bv_len, IV);
 		}
 		IV += from_bvec->bv_len >> 9;
 	}
diff -pu linux-2.6.0/drivers/block/cryptoloop.c-orig linux-2.6.0/drivers/block/cryptoloop.c
--- linux-2.6.0/drivers/block/cryptoloop.c-orig	2003-12-20 21:36:18.630764888 -0500
+++ linux-2.6.0/drivers/block/cryptoloop.c	2003-12-20 21:37:22.130111520 -0500
@@ -87,43 +87,49 @@ typedef int (*encdec_ecb_t)(struct crypt
 
 
 static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_ecb_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
 	}
 
 	while (size > 0) {
 		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = (unsigned long)in & ~PAGE_MASK;
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = (unsigned long)out & ~PAGE_MASK;
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz);
 
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
@@ -135,24 +141,30 @@ typedef int (*encdec_cbc_t)(struct crypt
 			unsigned int nsg, u8 *iv);
 
 static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_cbc_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
 	}
 
@@ -161,39 +173,43 @@ cryptoloop_transfer_cbc(struct loop_devi
 		u32 iv[4] = { 0, };
 		iv[0] = cpu_to_le32(IV & 0xffffffff);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = offset_in_page(in);
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = offset_in_page(out);
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
 
 		IV++;
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
 }
 
 static int
-cryptoloop_transfer(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
 	{
 		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}	
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
 	{	
 		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}
 	
 	/*  This is not supposed to happen */

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
@ 2003-12-21 20:49               ` Mika Penttilä
  2003-12-21 21:12                 ` Ben Slusky
  2003-12-21 22:01                 ` Christophe Saout
  2003-12-22  6:33               ` Andrew Morton
  2003-12-22 23:32               ` Ben Slusky
  3 siblings, 2 replies; 22+ messages in thread
From: Mika Penttilä @ 2003-12-21 20:49 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu

Yet another Big Loop Patch... :)

It's not obvious which parts are bug fixes, and which performance 
improvements. What exactly breaks loops on journalling filesystems, and 
how do you solve it?

What's the clone_bio() business? Why on reads only?

--Mika


Ben Slusky wrote:

>Hi everybody,
>
>Well, it appears that neither my loop.c patches nor Andrew's were merged
>into 2.6.0... I'd request that my patches be merged into mainline,
>since Jari Ruusu has pointed out that Andrew's patch (which removes the
>separate code path for block-backed loop devices) will break journaling
>filesystems on loop devices. Right now, journaling FS's on file-backed
>loop devices are kinda iffy (they will work only if the underlying FS is
>also journaled, with the correct journal options) but journaling FS's
>on block-backed loop devices work perfectly. Andrew's patches would
>break this.
>
>This first patch improves the memory allocation technique used by
>block-backed loop devices. Specifically the loop device will make
>efficient use of whatever pages it can get, rather than throwing them all
>back in case it didn't get as many as it wanted -- this fixes Bugzilla
>bug #1198.
>
>Taken together, this patch and the following patch make loop devices
>safe for use as swap. Please apply.
>
>-
>-Ben
>
>
>  
>
>------------------------------------------------------------------------
>
>diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
>--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:46:53.430260696 -0500
>+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:47:11.743476664 -0500
>@@ -247,12 +247,10 @@ fail:
> static int
> lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_send(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -318,12 +316,10 @@ do_lo_receive(struct loop_device *lo,
> static int
> lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_receive(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -353,10 +349,12 @@ static void loop_put_buffer(struct bio *
> 	 * check bi_end_io, may just be a remapped bio
> 	 */
> 	if (bio && bio->bi_end_io == loop_end_io_transfer) {
>+		struct bio_vec *bv;
> 		int i;
> 
>-		for (i = 0; i < bio->bi_vcnt; i++)
>-			__free_page(bio->bi_io_vec[i].bv_page);
>+		if (!bio_flagged(bio, BIO_CLONED))
>+			bio_for_each_segment(bv, bio, i)
>+				__free_page(bv->bv_page);
> 
> 		bio_put(bio);
> 	}
>@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
> 	if (bio->bi_size)
> 		return 1;
> 
>-	if (err || bio_rw(bio) == WRITE) {
>+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
> 		bio_endio(rbh, rbh->bi_size, err);
> 		if (atomic_dec_and_test(&lo->lo_pending))
> 			up(&lo->lo_bh_mutex);
>@@ -430,35 +428,41 @@ static struct bio *loop_
>

>_bio(struct 
> 	struct bio_vec *bv;
> 	int i;
> 
>-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
>+	if (bio_rw(rbh) != WRITE) {
>+		bio = bio_clone(rbh, GFP_NOIO);
>+		return bio;
>+	}
>+
>+	bio = bio_alloc(GFP_NOIO, rbh->bi_vcnt);
> 	if (!bio)
> 		return NULL;
> 
> 	/*
> 	 * iterate iovec list and alloc pages
> 	 */
>-	__bio_for_each_segment(bv, rbh, i, 0) {
>+	bio_for_each_segment(bv, rbh, i) {
> 		struct bio_vec *bbv = &bio->bi_io_vec[i];
> 
>-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		/* We need one page; the rest we can live without */
>+		bbv->bv_page = alloc_page((bio->bi_vcnt ? __GFP_NOWARN : GFP_NOIO) | __GFP_HIGHMEM);
> 		if (bbv->bv_page == NULL)
>-			goto oom;
>+			break;
> 
>-		bbv->bv_len = bv->bv_len;
> 		bbv->bv_offset = bv->bv_offset;
>+		bio->bi_size += (bbv->bv_len = bv->bv_len);
>+		bio->bi_vcnt++;
> 	}
> 
>-	bio->bi_vcnt = rbh->bi_vcnt;
>-	bio->bi_size = rbh->bi_size;
>-
>-	return bio;
>+	/* Can't get anything done if we didn't get any pages */
>+	if (unlikely(!bio->bi_vcnt)) {
>+		bio_put(bio);
>+		return NULL;
>+	}
> 
>-oom:
>-	while (--i >= 0)
>-		__free_page(bio->bi_io_vec[i].bv_page);
>+	bio->bi_vcnt += (bio->bi_idx = rbh->bi_idx);
>+	bio->bi_rw = rbh->bi_rw;
> 
>-	bio_put(bio);
>-	return NULL;
>+	return bio;
> }
> 
> static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
>@@ -479,19 +483,85 @@ static struct bio *loop_get_buffer(struc
> 		if (flags & PF_MEMALLOC)
> 			current->flags |= PF_MEMALLOC;
> 
>-		if (bio == NULL)
>+		if (unlikely(bio == NULL)) {
>+			printk("loop: alloc failed\n");
> 			blk_congestion_wait(WRITE, HZ/10);
>+		}
> 	} while (bio == NULL);
> 
> 	bio->bi_end_io = loop_end_io_transfer;
> 	bio->bi_private = rbh;
> 	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>-	bio->bi_rw = rbh->bi_rw;
> 	bio->bi_bdev = lo->lo_device;
> 
> 	return bio;
> }
> 
>+static void loop_recycle_buffer(struct loop_device *lo, struct bio *bio)
>+{
>+	struct bio *rbh = bio->bi_private;
>+	struct bio_vec *bv, *bbv, *rbv;
>+	int i, flags, nvecs = bio->bi_vcnt - bio->bi_idx;
>+
>+	/*
>+	 * Comments in ll_rw_blk.c reserve for generic_make_request the right to
>+	 * "change bi_dev and bi_sector for remaps as it sees fit." Doh!
>+	 * Workaround: reset the bi_bdev and recompute the starting sector for
>+	 * the next write.
>+	 */
>+	bio->bi_bdev = lo->lo_device;
>+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>+	/* Clean up the flags too */
>+	bio->bi_flags &= (~(BIO_POOL_MASK - 1) | (1 << BIO_UPTODATE));
>+
>+	/*
>+	 * Move the allocated pages into position to transfer more data.
>+	 */
>+	__bio_for_each_segment(bv, bio, i, rbh->bi_idx) {
>+		rbv = &rbh->bi_io_vec[i];
>+		bbv = bv + nvecs;
>+
>+		/* Workaround -- see note above */
>+		bio->bi_sector += rbv->bv_len >> 9;
>+		if (i < bio->bi_idx)
>+			continue;
>+
>+		if (i + nvecs < rbh->bi_vcnt) {
>+			bbv->bv_page = bv->bv_page;
>+			bbv->bv_offset = rbv->bv_offset;
>+			bio->bi_size += (bbv->bv_len = rbv->bv_len);
>+		} else
>+			__free_page(bv->bv_page);
>+		memset(bv, 0, sizeof(*bv));
>+	}
>+
>+	bio->bi_idx = bio->bi_vcnt;
>+	bio->bi_vcnt += nvecs;
>+	bio->bi_vcnt = min(bio->bi_vcnt, rbh->bi_vcnt);
>+
>+	/*
>+	 * If we need more pages, try to get some.
>+	 * Clear PF_MEMALLOC to avoid consuming all available memory.
>+	 */
>+	flags = current->flags;
>+	current->flags &= ~PF_MEMALLOC;
>+
>+	__bio_for_each_segment(rbv, rbh, i, bio->bi_vcnt) {
>+		bv = &bio->bi_io_vec[i];
>+
>+		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		if (bv->bv_page == NULL)
>+			break;
>+
>+		bv->bv_offset = rbv->bv_offset;
>+		bio->bi_size += (bv->bv_len = rbv->bv_len);
>+		bio->bi_vcnt++;
>+	}
>+
>+	if (flags & PF_MEMALLOC)
>+		current->flags |= PF_MEMALLOC;
>+}
>+
> static int loop_transfer_bio(struct loop_device *lo,
> 			     struct bio *to_bio, struct bio *from_bio)
> {
>@@ -502,17 +572,19 @@ static int loop_transfer_bio(struct loop
> 
> 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
> 
>-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
>-		to_bvec = &to_bio->bi_io_vec[i];
>+	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
>+		from_bvec = &from_bio->bi_io_vec[i];
> 
>-		kmap(from_bvec->bv_page);
>-		kmap(to_bvec->bv_page);
>-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>-					from_bvec->bv_len, IV);
>-		kunmap(from_bvec->bv_page);
>-		kunmap(to_bvec->bv_page);
>+		if (i >= to_bio->bi_idx) {
>+			kmap(from_bvec->bv_page);
>+			kmap(to_bvec->bv_page);
>+			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>+			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>+						from_bvec->bv_len, IV);
>+			kunmap(from_bvec->bv_page);
>+			kunmap(to_bvec->bv_page);
>+		}
> 		IV += from_bvec->bv_len >> 9;
> 	}
> 
>@@ -579,16 +651,30 @@ inactive:
> static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
> {
> 	int ret;
>+	struct bio *rbh;
> 
>-	/*
>-	 * For block backed loop, we know this is a READ
>-	 */
> 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
> 		ret = do_bio_filebacked(lo, bio);
> 		bio_endio(bio, bio->bi_size, ret);
>-	} else {
>-		struct bio *rbh = bio->bi_private;
>+	} else if (bio_rw(bio) == WRITE) {
>+		/*
>+		 * Write complete, but more pages remain;
>+		 * encrypt and write some more pages
>+		 */
>+		loop_recycle_buffer(lo, bio);
> 
>+		rbh = bio->bi_private;
>+		if ((ret = loop_transfer_bio(lo, bio, rbh))) {
>+			bio_endio(bio, bio->bi_size, ret);
>+			return;
>+		}
>+
>+		generic_make_request(bio);
>+	} else {
>+		/*
>+		 * Read complete; do decryption now
>+		 */
>+		rbh = bio->bi_private;
> 		ret = loop_transfer_bio(lo, bio, rbh);
> 
> 		bio_endio(rbh, rbh->bi_size, ret);
>@@ -606,6 +692,7 @@ static int loop_thread(void *data)
> {
> 	struct loop_device *lo = data;
> 	struct bio *bio;
>+	int x;
> 
> 	daemonize("loop%d", lo->lo_number);
> 
>@@ -640,7 +727,11 @@ static int loop_thread(void *data)
> 			printk("loop: missing bio\n");
> 			continue;
> 		}
>+
>+		x = (lo->lo_flags & LO_FLAGS_DO_BMAP) || bio_rw(bio) != WRITE;
> 		loop_handle_bio(lo, bio);
>+		if (!x)
>+			continue;
> 
> 		/*
> 		 * upped both for pending work and tear-down, lo_pending
>  
>


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 20:49               ` Mika Penttilä
@ 2003-12-21 21:12                 ` Ben Slusky
  2003-12-21 23:00                   ` Mika Penttilä
  2003-12-21 22:01                 ` Christophe Saout
  1 sibling, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-12-21 21:12 UTC (permalink / raw)
  To: Mika Penttil?; +Cc: linux-kernel, Andrew Morton, jariruusu

On Sun, 21 Dec 2003 22:49:47 +0200, Mika Penttil? wrote:
> Yet another Big Loop Patch... :)
> 
> It's not obvious which parts are bug fixes, and which performance 
> improvements. What exactly breaks loops on journalling filesystems, and 
> how do you solve it?

See <URL:http://www.ussg.iu.edu/hypermail/linux/kernel/0310.3/1151.html>...
I'd submitted these patches a while back. Andrew observed that the
problems they fixed did not manifest in file-backed loops, so his
solution (which was merged into -mm but not mainstream) was to cut out
the block-backed code path entirely. THAT is what breaks journaling
filesystems on loops (note: not vice versa).

> What's the clone_bio() business? Why on reads only?

There's no need to allocate memory for a second copy of the data on
a read. This is a performance improvenment but I'd say it's closely
related to the main point of the patch (i.e. take what pages you can get
and recycle them); I'm making the block-backed code path significantly
more complex and at the same time having reads take a shortcut. But I
could split that into a separate patch if desired.

-- 
Ben Slusky                      | Yakka foob mog. Grug pubbawup
sluskyb@paranoiacs.org          | zink wattoom gazork. Chumble
sluskyb@stwing.org              | spuzz.
PGP keyID ADA44B3B              |               -Calvin

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 20:49               ` Mika Penttilä
  2003-12-21 21:12                 ` Ben Slusky
@ 2003-12-21 22:01                 ` Christophe Saout
  1 sibling, 0 replies; 22+ messages in thread
From: Christophe Saout @ 2003-12-21 22:01 UTC (permalink / raw)
  To: Mika Penttilä; +Cc: Ben Slusky, linux-kernel, Andrew Morton, jariruusu

Am So, den 21.12.2003 schrieb Mika Penttilä um 21:49:

> Yet another Big Loop Patch... :)
> 
> It's not obvious which parts are bug fixes, and which performance 
> improvements. What exactly breaks loops on journalling filesystems, and 
> how do you solve it?

What about dropping block device backed support for the loop driver
altogether? We now have a nice device mapper in the 2.6 kernel. I don't
like the schizophrenic way the loop driver handles these things (and
you'll always run into similar trouble when trying to fix or resolve
this issue). I know this is kind of radical... but everyone loves
radical ideas. ;)

There also a new device mapper target dm-crypt I've written some time
ago, which does basically the same cryptoloop is doing (or tries to do,
whatever ;)) and which seems to work quite well.

I've never had any feedback from the kernel maintainers about including
it into the main kernel. Andrew?

--
Christophe Saout <christophe@saout.de>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 21:12                 ` Ben Slusky
@ 2003-12-21 23:00                   ` Mika Penttilä
  2003-12-21 23:05                     ` Ben Slusky
  0 siblings, 1 reply; 22+ messages in thread
From: Mika Penttilä @ 2003-12-21 23:00 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu

 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
+	struct bio *rbh;
 
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 		ret = do_bio_filebacked(lo, bio);
 		bio_endio(bio, bio->bi_size, ret);
-	} else {
-		struct bio *rbh = bio->bi_private;
+	} else if (bio_rw(bio) == WRITE) {
+		/*


AFAICS, this code path is never taken. You don't queue block device writes for the loop thread.


+		 * Write complete, but more pages remain;
+		 * encrypt and write some more pages
+		 */
+		loop_recycle_buffer(lo, bio);



--Mika




^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 23:00                   ` Mika Penttilä
@ 2003-12-21 23:05                     ` Ben Slusky
  2003-12-21 23:51                       ` Mika Penttilä
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Slusky @ 2003-12-21 23:05 UTC (permalink / raw)
  To: Mika Penttil?; +Cc: linux-kernel, Andrew Morton, jariruusu

On Mon, 22 Dec 2003 01:00:39 +0200, Mika Penttil? wrote:
> AFAICS, this code path is never taken. You don't queue block device writes 
> for the loop thread.

Yes I do, in loop_end_io_transfer. If we allocated fewer pages for the copy
bio than contained in the original bio, then those pages are recycled for
the next write.

@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
 	if (bio->bi_size)
 		return 1;
 
-	if (err || bio_rw(bio) == WRITE) {
+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
 		bio_endio(rbh, rbh->bi_size, err);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);

-- 
Ben Slusky                | "Looks like yet another megatrend
sluskyb@paranoiacs.org    |  has come and gone without affecting
sluskyb@stwing.org        |  me in any way whatsoever."
PGP keyID ADA44B3B        |                     www.theonion.com

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 23:05                     ` Ben Slusky
@ 2003-12-21 23:51                       ` Mika Penttilä
  0 siblings, 0 replies; 22+ messages in thread
From: Mika Penttilä @ 2003-12-21 23:51 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu



Ben Slusky wrote:

>On Mon, 22 Dec 2003 01:00:39 +0200, Mika Penttil? wrote:
>  
>
>>AFAICS, this code path is never taken. You don't queue block device writes 
>>for the loop thread.
>>    
>>
>
>Yes I do, in loop_end_io_transfer. If we allocated fewer pages for the copy
>bio than contained in the original bio, then those pages are recycled for
>the next write.
>
>@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
> 	if (bio->bi_size)
> 		return 1;
> 
>-	if (err || bio_rw(bio) == WRITE) {
>+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
> 		bio_endio(rbh, rbh->bi_size, err);
> 		if (atomic_dec_and_test(&lo->lo_pending))
> 			up(&lo->lo_bh_mutex);
>  
>
I see, subtle...

--Mika



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
  2003-12-21 20:49               ` Mika Penttilä
@ 2003-12-22  6:33               ` Andrew Morton
  2003-12-22 23:32               ` Ben Slusky
  3 siblings, 0 replies; 22+ messages in thread
From: Andrew Morton @ 2003-12-22  6:33 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, jariruusu

Ben Slusky <sluskyb@paranoiacs.org> wrote:
>
> Well, it appears that neither my loop.c patches nor Andrew's were merged
>  into 2.6.0... I'd request that my patches be merged into mainline,
>  since Jari Ruusu has pointed out that Andrew's patch (which removes the
>  separate code path for block-backed loop devices) will break journaling
>  filesystems on loop devices. Right now, journaling FS's on file-backed
>  loop devices are kinda iffy (they will work only if the underlying FS is
>  also journaled, with the correct journal options) but journaling FS's
>  on block-backed loop devices work perfectly. Andrew's patches would
>  break this.

I'm not sure how important this is?

Remember that one of the reasons for dropping the block-backed special case
was that it ran like crap under heavy load.  It locked up, iirc.  Has that
been fixed here?


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
                                 ` (2 preceding siblings ...)
  2003-12-22  6:33               ` Andrew Morton
@ 2003-12-22 23:32               ` Ben Slusky
  3 siblings, 0 replies; 22+ messages in thread
From: Ben Slusky @ 2003-12-22 23:32 UTC (permalink / raw)
  To: linux-kernel

On Mon, Dec 22 2003 01:36:29 -0500, Andrew Morton <akpm@osdl.org> wrote:
> I'm not sure how important this is?
> 
> Remember that one of the reasons for dropping the block-backed special case
> was that it ran like crap under heavy load. It locked up, iirc. Has that
> been fixed here?

Yes, the old block-backed code was prone to deadlock when trying to
allocate memory under heavy I/O. This patch fixes it.

As to how important this is, it fixes this bug and makes loop devices
usable for swap, i.e. enables encrypted swap. Everybody likes bugfixes
and encrypted swap, right?

-
-Ben


-- 
Ben Slusky                      | The human race never solves
sluskyb@paranoiacs.org          | any of its problems. It merely
sluskyb@stwing.org              | outlives them.
PGP keyID ADA44B3B              |               -David Gerrold

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2003-12-22 23:32 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-10-30 13:41 [PATCH] remove useless highmem bounce from loop/cryptoloop Ben Slusky
2003-10-30 18:14 ` Jari Ruusu
2003-10-30 21:30   ` Andrew Morton
2003-10-31  0:52     ` Ben Slusky
2003-10-31  9:55       ` Andrew Morton
2003-10-31  9:55         ` Andrew Morton
2003-10-31  9:58           ` Andrew Morton
2003-11-13  3:06             ` Ben Slusky
2003-11-01  0:26         ` Ben Slusky
2003-11-02 20:46           ` Ben Slusky
2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
2003-12-21 20:07               ` Ben Slusky
2003-12-21 20:49               ` Mika Penttilä
2003-12-21 21:12                 ` Ben Slusky
2003-12-21 23:00                   ` Mika Penttilä
2003-12-21 23:05                     ` Ben Slusky
2003-12-21 23:51                       ` Mika Penttilä
2003-12-21 22:01                 ` Christophe Saout
2003-12-22  6:33               ` Andrew Morton
2003-12-22 23:32               ` Ben Slusky
2003-10-31 12:15     ` [PATCH] remove useless highmem bounce from loop/cryptoloop Jari Ruusu
2003-10-31 13:02       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).