All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
@ 2010-04-06 22:45 Mark Lord
  2010-04-06 22:47 ` Mark Lord
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Mark Lord @ 2010-04-06 22:45 UTC (permalink / raw)
  To: IDE/ATA development list, Jeff Garzik, Tejun Heo, Alan Cox, Ric Wheeler

Most drives from Seagate, Hitachi, and possibly other brands,
do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
So instead use LBA48 for such accesses.

This bug could bite a lot of systems, especially when the user has
taken care to align partitions to 4KB boundaries.  On misaligned systems,
it is less likely to be encountered, since a 4KB read would end at 0x10000000
rather than at 0x0fffffff.

Signed-off-by: Mark Lord <mlord@pobox.com>

--- linux-2.6.34-rc3/include/linux/ata.h	2010-03-30 12:24:39.000000000 -0400
+++ linux/include/linux/ata.h	2010-04-06 18:39:41.167702612 -0400
@@ -1025,8 +1025,8 @@
  
  static inline int lba_28_ok(u64 block, u32 n_block)
  {
-	/* check the ending block number */
-	return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
+	/* check the ending block number: must be LESS THAN 0x0fffffff */
+	return ((block + n_block) < (u64)((1 << 28) - 1)) && (n_block <= 256);
  }
  
  static inline int lba_48_ok(u64 block, u32 n_block)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
  2010-04-06 22:45 [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) Mark Lord
@ 2010-04-06 22:47 ` Mark Lord
  2010-04-07  1:29 ` Tejun Heo
  2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
  2 siblings, 0 replies; 11+ messages in thread
From: Mark Lord @ 2010-04-06 22:47 UTC (permalink / raw)
  To: IDE/ATA development list, Jeff Garzik, Tejun Heo, Alan Cox, Ric Wheeler

Here is a test program, to see if a drive
On 06/04/10 06:45 PM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
...

Here is a test program, to see if a particular drive has this problem or not.
It works only on drives larger than 128GB.

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>

#include <linux/fs.h>
#include <linux/hdreg.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>

typedef unsigned long long u64;

enum {
	ATA_CMD_DMA_READ		= 0xc8,
	ATA_CMD_DMA_READ_EXT		= 0x25,
	ATA_SECT_SIZE			= 512,
	ATA_16				= 0x85,
	ATA_16_LEN			= 16,
	ATA_DEV_REG_LBA			= (1 << 6),
	ATA_LBA48			= 1,
	ATA_PROTO_DMA			= ( 6 << 1),
};

static int sg_read (int fd, u64 lba, unsigned int nsects, void *buf, int force_lba28)
{
	unsigned char cdb[ATA_16_LEN] = { ATA_16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	struct sg_io_hdr hdr;
	unsigned char sense[32];

	cdb[ 1] = ATA_PROTO_DMA;
	cdb[ 6] = nsects;
	cdb[ 8] = lba;
	cdb[10] = lba >>  8;
	cdb[12] = lba >> 16;
	cdb[13] = ATA_DEV_REG_LBA;

	if (force_lba28 || (nsects <= 256 && (lba + nsects) < (1ULL << 28))) {
		cdb[13] |= (lba >> 24) & 0x0f;
		cdb[14]  = ATA_CMD_DMA_READ;
	} else {
		cdb[ 1] |= ATA_LBA48;
		cdb[ 5]  = nsects >> 8;
		cdb[ 7]  = lba >> 24;
		cdb[ 9]  = lba >> 32;
		cdb[11]  = lba >> 40;
		cdb[14]  = ATA_CMD_DMA_READ_EXT;
	}

	memset(&hdr, 0, sizeof(struct sg_io_hdr));
	hdr.interface_id	= 'S';
	hdr.cmd_len		= ATA_16_LEN;
	hdr.mx_sb_len		= sizeof(sense);
	hdr.dxfer_direction	= SG_DXFER_FROM_DEV;
	hdr.dxfer_len		= nsects * ATA_SECT_SIZE;
	hdr.dxferp		= buf;
	hdr.cmdp		= cdb;
	hdr.sbp			= sense;
	hdr.pack_id		= lba;
	hdr.timeout		= 5000; /* milliseconds */

	memset(sense, 0, sizeof(sense));
	if (ioctl(fd, SG_IO, &hdr) < 0) {
		perror("ioctl(SG_IO)");
		return (-1);
	}
	if (hdr.status == 0 && hdr.host_status == 0 && hdr.driver_status == 0)
		return 0; /* success */

	if (hdr.status > 0) {
		unsigned char *s = sense + 8;
		/* SCSI status is non-zero, let's go for the error LBA */
		lba = ((u64)s[10] << 40) | ((u64)s[8] << 32) | (s[6] << 24) |
			(s[11] << 16) | (s[9] << 8) | s[7];
		if (0) fprintf(stderr, "SG_IO error: SCSI sense=0x%x/%02x/%02x, ATA=0x%02x/%02x, LBA=%llu (0x%llx)\n",
			sense[1] & 0xf, sense[2], sense[3], s[13], s[3], lba, lba);
		return -1;
	}

	/* some other error we don't know about yet */
	fprintf(stderr, "SG_IO returned: SCSI status=0x%x, host_status=0x%x, driver_status=0x%x",
		hdr.status, hdr.host_status, hdr.driver_status);
	return -1;
}

static void print_drive_model (const char *devpath)
{
	char cmd[256];

	snprintf(cmd, sizeof(cmd), "hdparm -I %s | grep 'Model Number' | sed '-es/^[ 	]*//'", devpath);
	system(cmd);
	snprintf(cmd, sizeof(cmd), "hdparm -I %s | grep 'Firmware Revision' | sed '-es/^[ 	]*//'", devpath);
	system(cmd);
}

int main (int argc, char *argv[])
{
	unsigned char buf[8 * ATA_SECT_SIZE];
	const char *devpath;
	int rc, fd, nsects;
	u64 lba;

	if (argc != 2) {
		fprintf(stderr, "%s: bad/missing parms: expected <devpath>\n", argv[0]);
		exit(1);
	}
	devpath = argv[1];

	fd = open(devpath, O_RDONLY);
	if (fd == -1) {
		perror(devpath);
		exit(1);
	}

	print_drive_model(devpath);

	nsects = 8;
	lba = 0x0ffffff8;
	printf("Reading %u sectors starting at LBA=%llu (0x%llx): ", nsects, lba, lba);
	fflush(stdout);
	rc = sg_read(fd, lba, nsects, buf, 1);
	printf("%s\n", rc ? "FAILED" : "succeeded");

	nsects = 1;
	lba = 0x0fffffff;
	printf("Reading %u sectors starting at LBA=%llu (0x%llx): ", nsects, lba, lba);
	fflush(stdout);
	rc = sg_read(fd, lba, nsects, buf, 1);
	printf("%s\n", rc ? "FAILED" : "succeeded");

	exit(0);
}

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
  2010-04-06 22:45 [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) Mark Lord
  2010-04-06 22:47 ` Mark Lord
@ 2010-04-07  1:29 ` Tejun Heo
  2010-04-07  3:30   ` Mark Lord
  2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
  2 siblings, 1 reply; 11+ messages in thread
From: Tejun Heo @ 2010-04-07  1:29 UTC (permalink / raw)
  To: Mark Lord; +Cc: IDE/ATA development list, Jeff Garzik, Alan Cox, Ric Wheeler

Hello, Mark.

On 04/07/2010 07:45 AM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
> 
> This bug could bite a lot of systems, especially when the user has
> taken care to align partitions to 4KB boundaries.  On misaligned systems,
> it is less likely to be encountered, since a 4KB read would end at
> 0x10000000 rather than at 0x0fffffff.

Oh, nice catch.  Kinda scary.

> Signed-off-by: Mark Lord <mlord@pobox.com>
>
> --- linux-2.6.34-rc3/include/linux/ata.h    2010-03-30
> 12:24:39.000000000 -0400
> +++ linux/include/linux/ata.h    2010-04-06 18:39:41.167702612 -0400
> @@ -1025,8 +1025,8 @@
>  
>  static inline int lba_28_ok(u64 block, u32 n_block)
>  {
> -    /* check the ending block number */
> -    return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
> +    /* check the ending block number: must be LESS THAN 0x0fffffff */
> +    return ((block + n_block) < (u64)((1 << 28) - 1)) && (n_block <= 256);

But why move the type casting?  The cast isn't required to begin with
but starting with u64 constant means the whole compile time
calculation will be in u64 (the intention of that cast I guess).

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
  2010-04-07  1:29 ` Tejun Heo
@ 2010-04-07  3:30   ` Mark Lord
  2010-04-07  5:11     ` Tejun Heo
  0 siblings, 1 reply; 11+ messages in thread
From: Mark Lord @ 2010-04-07  3:30 UTC (permalink / raw)
  To: Tejun Heo; +Cc: IDE/ATA development list, Jeff Garzik, Alan Cox, Ric Wheeler

On 06/04/10 09:29 PM, Tejun Heo wrote:
..
>> -    /* check the ending block number */
>> -    return ((block + n_block)<  ((u64)1<<  28))&&  (n_block<= 256);
>> +    /* check the ending block number: must be LESS THAN 0x0fffffff */
>> +    return ((block + n_block)<  (u64)((1<<  28) - 1))&&  (n_block<= 256);
>
> But why move the type casting?  The cast isn't required to begin with
> but starting with u64 constant means the whole compile time
> calculation will be in u64 (the intention of that cast I guess).
..

The cast was there already, so I left it there.
Just moved it to cover the entire compile-time expression as a unit,
rather than just the (u64)1  as the existing code did it.

If you prefer a different format for that line, then chirp up!  :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
  2010-04-07  3:30   ` Mark Lord
@ 2010-04-07  5:11     ` Tejun Heo
  2010-04-07 17:52       ` Mark Lord
  0 siblings, 1 reply; 11+ messages in thread
From: Tejun Heo @ 2010-04-07  5:11 UTC (permalink / raw)
  To: Mark Lord; +Cc: IDE/ATA development list, Jeff Garzik, Alan Cox, Ric Wheeler

Hello,

On 04/07/2010 12:30 PM, Mark Lord wrote:
> The cast was there already, so I left it there.
> Just moved it to cover the entire compile-time expression as a unit,
> rather than just the (u64)1  as the existing code did it.

Oh, but there's functional difference between the two.  In the
original place, the initial casting ripples through the whole
expression making each step of the calculation u64.  It's meant to
prevent int overflow during calculation (which isn't necessary in the
current expression BTW) but if you cast the end result, it doesn't
really mean anything.  If you want to kill the casting, kill it but
moving the casting outside doesn't make much sense.  Can you repost
without moving the cast?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2)
  2010-04-06 22:45 [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) Mark Lord
  2010-04-06 22:47 ` Mark Lord
  2010-04-07  1:29 ` Tejun Heo
@ 2010-04-07 17:52 ` Mark Lord
  2010-04-07 19:18   ` Alan Cox
                     ` (2 more replies)
  2 siblings, 3 replies; 11+ messages in thread
From: Mark Lord @ 2010-04-07 17:52 UTC (permalink / raw)
  To: IDE/ATA development list, Jeff Garzik, Tejun Heo, Alan Cox, Ric Wheeler

Most drives from Seagate, Hitachi, and possibly other brands,
do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
So instead use LBA48 for such accesses.

This bug could bite a lot of systems, especially when the user has
taken care to align partitions to 4KB boundaries. On misaligned systems,
it is less likely to be encountered, since a 4KB read would end at
0x10000000 rather than at 0x0fffffff.

Signed-off-by: Mark Lord <mlord@pobox.com>
---

Reposting with the pre-existing (u64) cast removed.

--- linux-2.6.34-rc3/include/linux/ata.h	2010-03-30 12:24:39.000000000 -0400
+++ linux/include/linux/ata.h	2010-04-06 18:39:41.167702612 -0400
@@ -1025,8 +1025,8 @@
  
  static inline int lba_28_ok(u64 block, u32 n_block)
  {
-	/* check the ending block number */
-	return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
+	/* check the ending block number: must be LESS THAN 0x0fffffff */
+	return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
  }
  
  static inline int lba_48_ok(u64 block, u32 n_block)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty)
  2010-04-07  5:11     ` Tejun Heo
@ 2010-04-07 17:52       ` Mark Lord
  0 siblings, 0 replies; 11+ messages in thread
From: Mark Lord @ 2010-04-07 17:52 UTC (permalink / raw)
  To: Tejun Heo; +Cc: IDE/ATA development list, Jeff Garzik, Alan Cox, Ric Wheeler

On 07/04/10 01:11 AM, Tejun Heo wrote:
> Hello,
>
> On 04/07/2010 12:30 PM, Mark Lord wrote:
>> The cast was there already, so I left it there.
>> Just moved it to cover the entire compile-time expression as a unit,
>> rather than just the (u64)1  as the existing code did it.
>
> Oh, but there's functional difference between the two.  In the
> original place, the initial casting ripples through the whole
> expression making each step of the calculation u64.  It's meant to
> prevent int overflow during calculation (which isn't necessary in the
> current expression BTW) but if you cast the end result, it doesn't
> really mean anything.  If you want to kill the casting, kill it but
> moving the casting outside doesn't make much sense.  Can you repost
> without moving the cast?
..

No.  But I can/did repost with it completely gone.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2)
  2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
@ 2010-04-07 19:18   ` Alan Cox
  2010-04-07 21:01   ` Tejun Heo
  2010-04-08 17:00   ` Jeff Garzik
  2 siblings, 0 replies; 11+ messages in thread
From: Alan Cox @ 2010-04-07 19:18 UTC (permalink / raw)
  To: Mark Lord; +Cc: IDE/ATA development list, Jeff Garzik, Tejun Heo, Ric Wheeler

On Wed, 07 Apr 2010 13:52:08 -0400
Mark Lord <kernel@teksavvy.com> wrote:

> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
> 
> This bug could bite a lot of systems, especially when the user has
> taken care to align partitions to 4KB boundaries. On misaligned systems,
> it is less likely to be encountered, since a 4KB read would end at
> 0x10000000 rather than at 0x0fffffff.
> 
> Signed-off-by: Mark Lord <mlord@pobox.com>

Acked-by: Alan Cox <alan@linux.intel.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2)
  2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
  2010-04-07 19:18   ` Alan Cox
@ 2010-04-07 21:01   ` Tejun Heo
  2010-04-08 17:00   ` Jeff Garzik
  2 siblings, 0 replies; 11+ messages in thread
From: Tejun Heo @ 2010-04-07 21:01 UTC (permalink / raw)
  To: Mark Lord; +Cc: IDE/ATA development list, Jeff Garzik, Alan Cox, Ric Wheeler

On 04/08/2010 02:52 AM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
> 
> This bug could bite a lot of systems, especially when the user has
> taken care to align partitions to 4KB boundaries. On misaligned systems,
> it is less likely to be encountered, since a 4KB read would end at
> 0x10000000 rather than at 0x0fffffff.
> 
> Signed-off-by: Mark Lord <mlord@pobox.com>

Heh, yeah, killing the cast works too.

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2)
  2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
  2010-04-07 19:18   ` Alan Cox
  2010-04-07 21:01   ` Tejun Heo
@ 2010-04-08 17:00   ` Jeff Garzik
  2010-04-15 13:46     ` Mark Lord
  2 siblings, 1 reply; 11+ messages in thread
From: Jeff Garzik @ 2010-04-08 17:00 UTC (permalink / raw)
  To: Mark Lord
  Cc: IDE/ATA development list, Tejun Heo, Alan Cox, Ric Wheeler,
	David Milburn

On 04/07/2010 01:52 PM, Mark Lord wrote:
> Most drives from Seagate, Hitachi, and possibly other brands,
> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
> So instead use LBA48 for such accesses.
>
> This bug could bite a lot of systems, especially when the user has
> taken care to align partitions to 4KB boundaries. On misaligned systems,
> it is less likely to be encountered, since a 4KB read would end at
> 0x10000000 rather than at 0x0fffffff.
>
> Signed-off-by: Mark Lord <mlord@pobox.com>
> ---
>
> Reposting with the pre-existing (u64) cast removed.
>
> --- linux-2.6.34-rc3/include/linux/ata.h 2010-03-30 12:24:39.000000000
> -0400
> +++ linux/include/linux/ata.h 2010-04-06 18:39:41.167702612 -0400
> @@ -1025,8 +1025,8 @@
>
> static inline int lba_28_ok(u64 block, u32 n_block)
> {
> - /* check the ending block number */
> - return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
> + /* check the ending block number: must be LESS THAN 0x0fffffff */
> + return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
> }

applied.

Neither 'git am' nor patch(1) would apply the patch for some reason, so 
I applied it manually.  Weird...  I could not spot obvious whitespace or 
word-wrap corruption.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2)
  2010-04-08 17:00   ` Jeff Garzik
@ 2010-04-15 13:46     ` Mark Lord
  0 siblings, 0 replies; 11+ messages in thread
From: Mark Lord @ 2010-04-15 13:46 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: IDE/ATA development list

On 08/04/10 01:00 PM, Jeff Garzik wrote:
> On 04/07/2010 01:52 PM, Mark Lord wrote:
>> Most drives from Seagate, Hitachi, and possibly other brands,
>> do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1).
>> So instead use LBA48 for such accesses.
>>
>> This bug could bite a lot of systems, especially when the user has
>> taken care to align partitions to 4KB boundaries. On misaligned systems,
>> it is less likely to be encountered, since a 4KB read would end at
>> 0x10000000 rather than at 0x0fffffff.
>>
>> Signed-off-by: Mark Lord <mlord@pobox.com>
>> ---
>>
>> Reposting with the pre-existing (u64) cast removed.
>>
>> --- linux-2.6.34-rc3/include/linux/ata.h 2010-03-30 12:24:39.000000000
>> -0400
>> +++ linux/include/linux/ata.h 2010-04-06 18:39:41.167702612 -0400
>> @@ -1025,8 +1025,8 @@
>>
>> static inline int lba_28_ok(u64 block, u32 n_block)
>> {
>> - /* check the ending block number */
>> - return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256);
>> + /* check the ending block number: must be LESS THAN 0x0fffffff */
>> + return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
>> }
>
> applied.
>
> Neither 'git am' nor patch(1) would apply the patch for some reason, so
> I applied it manually. Weird... I could not spot obvious whitespace or
> word-wrap corruption.
..

I figured this this out (paritially) today:
Somehow, the (v2) repost ended up with MS-DOS line separators (CR-LF).

I imagine those would confuse most of the tools,
yet remain transparent (hidden) in editors and email programs.

Dunno how they got in there, though.

Cheers
  

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-04-15 13:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-06 22:45 [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) Mark Lord
2010-04-06 22:47 ` Mark Lord
2010-04-07  1:29 ` Tejun Heo
2010-04-07  3:30   ` Mark Lord
2010-04-07  5:11     ` Tejun Heo
2010-04-07 17:52       ` Mark Lord
2010-04-07 17:52 ` [PATCH] Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Mark Lord
2010-04-07 19:18   ` Alan Cox
2010-04-07 21:01   ` Tejun Heo
2010-04-08 17:00   ` Jeff Garzik
2010-04-15 13:46     ` Mark Lord

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.