All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] dc395x: Fix support for highmem
       [not found] <200503160209.j2G29cAf010870@hera.kernel.org>
@ 2005-03-16  7:58 ` Jens Axboe
  2005-03-16 15:13   ` James Bottomley
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-16  7:58 UTC (permalink / raw)
  To: linux-scsi; +Cc: g.liakhovetski

On Thu, Mar 03 2005, Linux Kernel Mailing List wrote:
> ChangeSet 1.1982.29.77, 2005/03/03 10:41:40+02:00, lenehan@twibble.org
> 
> 	[PATCH] dc395x: Fix support for highmem
> 	
> 	From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
> 	
> 	Removes the page_to_virt and maps sg lists dynamically.
> 	This makes the driver work with highmem pages.
> 	
> 	Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
> 	Signed-off-by: Jamie Lenehan <lenehan@twibble.org>
> 	Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

Guys, who reviewed this? It looks completely bogus, using kmap() for tha
entire sg list is just wrong and can deadlock easily. The proper way is
of course to skip the virtual address requirement and dma map the sg
array properly.

>  dc395x.c |   48 +++++++++++++++++++++++++++++++++++-------------
>  1 files changed, 35 insertions(+), 13 deletions(-)
> 
> 
> diff -Nru a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
> --- a/drivers/scsi/dc395x.c	2005-03-15 18:09:50 -08:00
> +++ b/drivers/scsi/dc395x.c	2005-03-15 18:09:50 -08:00
> @@ -182,7 +182,7 @@
>   * cross a page boundy.
>   */
>  #define SEGMENTX_LEN	(sizeof(struct SGentry)*DC395x_MAX_SG_LISTENTRY)
> -
> +#define VIRTX_LEN	(sizeof(void *) * DC395x_MAX_SG_LISTENTRY)
>  
>  struct SGentry {
>  	u32 address;		/* bus! address */
> @@ -234,6 +234,7 @@
>  	u8 sg_count;			/* No of HW sg entries for this request */
>  	u8 sg_index;			/* Index of HW sg entry for this request */
>  	u32 total_xfer_length;		/* Total number of bytes remaining to be transfered */
> +	void **virt_map;
>  	unsigned char *virt_addr;	/* Virtual address of current transfer position */
>  
>  	/*
> @@ -1020,14 +1021,14 @@
>  			reqlen, cmd->request_buffer, cmd->use_sg,
>  			srb->sg_count);
>  
> -		srb->virt_addr = page_address(sl->page);
>  		for (i = 0; i < srb->sg_count; i++) {
> -			u32 busaddr = (u32)sg_dma_address(&sl[i]);
> -			u32 seglen = (u32)sl[i].length;
> -			sgp[i].address = busaddr;
> +			u32 seglen = (u32)sg_dma_len(sl + i);
> +			sgp[i].address = (u32)sg_dma_address(sl + i);
>  			sgp[i].length = seglen;
>  			srb->total_xfer_length += seglen;
> +			srb->virt_map[i] = kmap(sl[i].page);
>  		}
> +		srb->virt_addr = srb->virt_map[0];
>  		sgp += srb->sg_count - 1;
>  
>  		/*
> @@ -1964,6 +1965,7 @@
>  	int segment = cmd->use_sg;
>  	u32 xferred = srb->total_xfer_length - left; /* bytes transfered */
>  	struct SGentry *psge = srb->segment_x + srb->sg_index;
> +	void **virt = srb->virt_map;
>  
>  	dprintkdbg(DBG_0,
>  		"sg_update_list: Transfered %i of %i bytes, %i remain\n",
> @@ -2003,16 +2005,16 @@
>  
>  	/* We have to walk the scatterlist to find it */
>  	sg = (struct scatterlist *)cmd->request_buffer;
> +	idx = 0;
>  	while (segment--) {
>  		unsigned long mask =
>  		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
>  		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
> -			srb->virt_addr = (page_address(sg->page)
> -					   + psge->address -
> -					   (psge->address & PAGE_MASK));
> +			srb->virt_addr = virt[idx] + (psge->address & ~PAGE_MASK);
>  			return;
>  		}
>  		++sg;
> +		++idx;
>  	}
>  
>  	dprintkl(KERN_ERR, "sg_update_list: sg_to_virt failed\n");
> @@ -2138,7 +2140,7 @@
>  				DC395x_read32(acb, TRM_S1040_DMA_CXCNT));
>  		}
>  		/*
> -		 * calculate all the residue data that not yet tranfered
> +		 * calculate all the residue data that not yet transfered
>  		 * SCSI transfer counter + left in SCSI FIFO data
>  		 *
>  		 * .....TRM_S1040_SCSI_COUNTER (24bits)
> @@ -3256,6 +3258,7 @@
>  	struct scsi_cmnd *cmd = srb->cmd;
>  	enum dma_data_direction dir = cmd->sc_data_direction;
>  	if (cmd->use_sg && dir != PCI_DMA_NONE) {
> +		int i;
>  		/* unmap DC395x SG list */
>  		dprintkdbg(DBG_SG, "pci_unmap_srb: list=%08x(%05x)\n",
>  			srb->sg_bus_addr, SEGMENTX_LEN);
> @@ -3265,6 +3268,8 @@
>  		dprintkdbg(DBG_SG, "pci_unmap_srb: segs=%i buffer=%p\n",
>  			cmd->use_sg, cmd->request_buffer);
>  		/* unmap the sg segments */
> +		for (i = 0; i < srb->sg_count; i++)
> +			kunmap(virt_to_page(srb->virt_map[i]));
>  		pci_unmap_sg(acb->dev,
>  			     (struct scatterlist *)cmd->request_buffer,
>  			     cmd->use_sg, dir);
> @@ -3311,7 +3316,7 @@
>  
>  	if (cmd->use_sg) {
>  		struct scatterlist* sg = (struct scatterlist *)cmd->request_buffer;
> -		ptr = (struct ScsiInqData *)(page_address(sg->page) + sg->offset);
> +		ptr = (struct ScsiInqData *)(srb->virt_map[0] + sg->offset);
>  	} else {
>  		ptr = (struct ScsiInqData *)(cmd->request_buffer);
>  	}
> @@ -4246,8 +4251,9 @@
>  	const unsigned srbs_per_page = PAGE_SIZE/SEGMENTX_LEN;
>  
>  	for (i = 0; i < DC395x_MAX_SRB_CNT; i += srbs_per_page)
> -		if (acb->srb_array[i].segment_x)
> -			kfree(acb->srb_array[i].segment_x);
> +		kfree(acb->srb_array[i].segment_x);
> +
> +	vfree(acb->srb_array[0].virt_map);
>  }
>  
>  
> @@ -4263,9 +4269,12 @@
>  	int srb_idx = 0;
>  	unsigned i = 0;
>  	struct SGentry *ptr;
> +	void **virt_array;
>  
> -	for (i = 0; i < DC395x_MAX_SRB_CNT; i++)
> +	for (i = 0; i < DC395x_MAX_SRB_CNT; i++) {
>  		acb->srb_array[i].segment_x = NULL;
> +		acb->srb_array[i].virt_map = NULL;
> +	}
>  
>  	dprintkdbg(DBG_1, "Allocate %i pages for SG tables\n", pages);
>  	while (pages--) {
> @@ -4286,6 +4295,19 @@
>  		    ptr + (i * DC395x_MAX_SG_LISTENTRY);
>  	else
>  		dprintkl(KERN_DEBUG, "No space for tmsrb SG table reserved?!\n");
> +
> +	virt_array = vmalloc((DC395x_MAX_SRB_CNT + 1) * DC395x_MAX_SG_LISTENTRY * sizeof(void*));
> +
> +	if (!virt_array) {
> +		adapter_sg_tables_free(acb);
> +		return 1;
> +	}
> +
> +	for (i = 0; i < DC395x_MAX_SRB_CNT + 1; i++) {
> +		acb->srb_array[i].virt_map = virt_array;
> +		virt_array += DC395x_MAX_SG_LISTENTRY;
> +	}
> +
>  	return 0;
>  }
>  
> -
> To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16  7:58 ` [PATCH] dc395x: Fix support for highmem Jens Axboe
@ 2005-03-16 15:13   ` James Bottomley
  2005-03-16 16:04     ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: James Bottomley @ 2005-03-16 15:13 UTC (permalink / raw)
  To: Jens Axboe; +Cc: SCSI Mailing List, g.liakhovetski

On Wed, 2005-03-16 at 08:58 +0100, Jens Axboe wrote:
> Guys, who reviewed this? It looks completely bogus, using kmap() for tha
> entire sg list is just wrong and can deadlock easily. The proper way is
> of course to skip the virtual address requirement and dma map the sg
> array properly.

I suppose ultimately, the responsibility is mine.

The problem with this particular card (at least as I read the comments
in the driver) is that most of the time, it can actually do DMA on its
own.  However, when something unexpected occurs (Like a SCSI device
disconnections), the DMA engine halts and the last pieces of data have
to be read in manually using PIO.

I agree the kmap is inefficient.  The efficient alternative is to do
dma_map_sg() and use kmap_atomic() in the interrupt routine where we do
the PIO cleanup---I'm afraid I just passed on explaining how to do
this ... unless you care to do the honours ?

James



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 15:13   ` James Bottomley
@ 2005-03-16 16:04     ` Jens Axboe
  2005-03-16 16:48       ` Matthew Wilcox
  2005-03-16 20:24       ` Guennadi Liakhovetski
  0 siblings, 2 replies; 36+ messages in thread
From: Jens Axboe @ 2005-03-16 16:04 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List, g.liakhovetski

On Wed, Mar 16 2005, James Bottomley wrote:
> On Wed, 2005-03-16 at 08:58 +0100, Jens Axboe wrote:
> > Guys, who reviewed this? It looks completely bogus, using kmap() for tha
> > entire sg list is just wrong and can deadlock easily. The proper way is
> > of course to skip the virtual address requirement and dma map the sg
> > array properly.
> 
> I suppose ultimately, the responsibility is mine.
> 
> The problem with this particular card (at least as I read the comments
> in the driver) is that most of the time, it can actually do DMA on its
> own.  However, when something unexpected occurs (Like a SCSI device
> disconnections), the DMA engine halts and the last pieces of data have
> to be read in manually using PIO.

Ah, that is very similar to IDE in fact. Not lovely :-)

> I agree the kmap is inefficient.  The efficient alternative is to do
> dma_map_sg() and use kmap_atomic() in the interrupt routine where we do
> the PIO cleanup---I'm afraid I just passed on explaining how to do
> this ... unless you care to do the honours ?

The kmap() isn't just inefficient, it's a problem to iterate over the sg
list and kmap all the pages. That is illegal.

But it's not so tricky to get right, if the punting just happens in the
isr. Basically just iterate over every sg entry left ala:

        for (i = start; i < sg_entries; i++) {
                unsigned long flags;
                char *ptr;

                local_irq_save(flags);
                ptr = kmap_atomic(sg->page, KM_BIO_SRC_IRQ);

                /* transfer to/from ptr + sg->offset, sg->length bytes */

                kunmap_atomic(ptr, KM_BIO_SRC_IRQ);
                local_irq_restore(flags);
        }

I _think_ the sg->length field is universally called so, you should not
use sg->dma_length/sg_dma_len() or sg_dma_address(), as we are outside
the work of the iommu at this point.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 16:04     ` Jens Axboe
@ 2005-03-16 16:48       ` Matthew Wilcox
  2005-03-16 16:53         ` Jens Axboe
  2005-03-16 20:24       ` Guennadi Liakhovetski
  1 sibling, 1 reply; 36+ messages in thread
From: Matthew Wilcox @ 2005-03-16 16:48 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, g.liakhovetski

On Wed, Mar 16, 2005 at 05:04:47PM +0100, Jens Axboe wrote:
> The kmap() isn't just inefficient, it's a problem to iterate over the sg
> list and kmap all the pages. That is illegal.
> 
> But it's not so tricky to get right, if the punting just happens in the
> isr. Basically just iterate over every sg entry left ala:
> 
>         for (i = start; i < sg_entries; i++) {
>                 unsigned long flags;
>                 char *ptr;
> 
>                 local_irq_save(flags);
>                 ptr = kmap_atomic(sg->page, KM_BIO_SRC_IRQ);
> 
>                 /* transfer to/from ptr + sg->offset, sg->length bytes */
> 
>                 kunmap_atomic(ptr, KM_BIO_SRC_IRQ);
>                 local_irq_restore(flags);
>         }
> 
> I _think_ the sg->length field is universally called so, you should not
> use sg->dma_length/sg_dma_len() or sg_dma_address(), as we are outside
> the work of the iommu at this point.

Documentation/DMA-mapping.txt agrees with you:

                        Platform Issues

1) Struct scatterlist requirements.

   Struct scatterlist must contain, at a minimum, the following
   members:

        struct page *page;
        unsigned int offset;
        unsigned int length;

Would you mind writing up a change to DMA-mapping.txt that explains why
one would need to do the kmap solution you outline above?  If it needs
to be done for IDE and one SCSI driver, I bet it needs to be done for
more devices, and it'd be a handy place to refer to.

I'm a bit confused why the approach outlined in DMA-mapping.txt doesn't
work for this driver.  Is it because you don't find out until you're
in the interrupt handler that you need to map the sg-list and you can't
call pci_map_sg() from interrupt context?

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 16:48       ` Matthew Wilcox
@ 2005-03-16 16:53         ` Jens Axboe
  2005-03-16 17:02           ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-16 16:53 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: James Bottomley, SCSI Mailing List, g.liakhovetski

On Wed, Mar 16 2005, Matthew Wilcox wrote:
> On Wed, Mar 16, 2005 at 05:04:47PM +0100, Jens Axboe wrote:
> > The kmap() isn't just inefficient, it's a problem to iterate over the sg
> > list and kmap all the pages. That is illegal.
> > 
> > But it's not so tricky to get right, if the punting just happens in the
> > isr. Basically just iterate over every sg entry left ala:
> > 
> >         for (i = start; i < sg_entries; i++) {
> >                 unsigned long flags;
> >                 char *ptr;
> > 
> >                 local_irq_save(flags);
> >                 ptr = kmap_atomic(sg->page, KM_BIO_SRC_IRQ);
> > 
> >                 /* transfer to/from ptr + sg->offset, sg->length bytes */
> > 
> >                 kunmap_atomic(ptr, KM_BIO_SRC_IRQ);
> >                 local_irq_restore(flags);
> >         }
> > 
> > I _think_ the sg->length field is universally called so, you should not
> > use sg->dma_length/sg_dma_len() or sg_dma_address(), as we are outside
> > the work of the iommu at this point.
> 
> Documentation/DMA-mapping.txt agrees with you:
> 
>                         Platform Issues
> 
> 1) Struct scatterlist requirements.
> 
>    Struct scatterlist must contain, at a minimum, the following
>    members:
> 
>         struct page *page;
>         unsigned int offset;
>         unsigned int length;

Ah excellent, thanks!

> Would you mind writing up a change to DMA-mapping.txt that explains why
> one would need to do the kmap solution you outline above?  If it needs
> to be done for IDE and one SCSI driver, I bet it needs to be done for
> more devices, and it'd be a handy place to refer to.

But it isn't really DMA mapping, quite the opposite :)

It's hard to document generically. For IDE, it doesn't have special
'ide' command, it just uses the request structure. So it can use the bio
mapping helpers (see bio.h, bvec_kmap_irq()/bvec_kunmap_irq()). For this
SCSI driver, it needs to walk the sglist and do the same thing,
basically.

> I'm a bit confused why the approach outlined in DMA-mapping.txt doesn't
> work for this driver.  Is it because you don't find out until you're
> in the interrupt handler that you need to map the sg-list and you can't
> call pci_map_sg() from interrupt context?

The list doesn't really need dma mapping at that point, the problem here
is that the driver needs to punt to pio mode because of foo. So calling
pci/dma_map_* is pointless, since the CPU will have to do the transfer
anyways. What the driver is really looking for at this point, is a way
to map the pages in the sglist to a virtual address.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 16:53         ` Jens Axboe
@ 2005-03-16 17:02           ` Christoph Hellwig
  2005-03-16 17:04             ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2005-03-16 17:02 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Matthew Wilcox, James Bottomley, SCSI Mailing List, g.liakhovetski

On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> The list doesn't really need dma mapping at that point, the problem here
> is that the driver needs to punt to pio mode because of foo. So calling
> pci/dma_map_* is pointless, since the CPU will have to do the transfer
> anyways. What the driver is really looking for at this point, is a way
> to map the pages in the sglist to a virtual address.

Given that there's quite a few cases of this "problem" it would be nice
to have common helpers for it.  Especially as it's really difficult when
we allow merging of sg list entries


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 17:02           ` Christoph Hellwig
@ 2005-03-16 17:04             ` Jens Axboe
  2005-03-16 18:44               ` Mike Christie
  2005-03-20  9:14               ` Christoph Hellwig
  0 siblings, 2 replies; 36+ messages in thread
From: Jens Axboe @ 2005-03-16 17:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Matthew Wilcox, James Bottomley, SCSI Mailing List, g.liakhovetski

On Wed, Mar 16 2005, Christoph Hellwig wrote:
> On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> > The list doesn't really need dma mapping at that point, the problem here
> > is that the driver needs to punt to pio mode because of foo. So calling
> > pci/dma_map_* is pointless, since the CPU will have to do the transfer
> > anyways. What the driver is really looking for at this point, is a way
> > to map the pages in the sglist to a virtual address.
> 
> Given that there's quite a few cases of this "problem" it would be nice
> to have common helpers for it.  Especially as it's really difficult when
> we allow merging of sg list entries

I thought about that when writing the above, but is there really more
than one case for SCSI drivers? If there is, sure lets add the helpers.
But I would consider it a quite rare occurence, I've never seen it
before.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 17:04             ` Jens Axboe
@ 2005-03-16 18:44               ` Mike Christie
  2005-03-16 18:53                 ` iSCSI and scatterlists Matthew Wilcox
  2005-03-16 18:53                 ` [PATCH] dc395x: Fix support for highmem Jens Axboe
  2005-03-20  9:14               ` Christoph Hellwig
  1 sibling, 2 replies; 36+ messages in thread
From: Mike Christie @ 2005-03-16 18:44 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, g.liakhovetski

Jens Axboe wrote:
> On Wed, Mar 16 2005, Christoph Hellwig wrote:
> 
>>On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
>>
>>>The list doesn't really need dma mapping at that point, the problem here
>>>is that the driver needs to punt to pio mode because of foo. So calling
>>>pci/dma_map_* is pointless, since the CPU will have to do the transfer
>>>anyways. What the driver is really looking for at this point, is a way
>>>to map the pages in the sglist to a virtual address.
>>
>>Given that there's quite a few cases of this "problem" it would be nice
>>to have common helpers for it.  Especially as it's really difficult when
>>we allow merging of sg list entries
> 
> 
> I thought about that when writing the above, but is there really more
> than one case for SCSI drivers? If there is, sure lets add the helpers.
> But I would consider it a quite rare occurence, I've never seen it
> before.
> 

I got lost here. If you are talking about the need to kmap a sglist then 
software iscsi has it. iscsi-sfnet used to do

while (...)
	kmap()

but I fixed that (I think I need to use kmap_atomic though, is that 
correct or is it just a performance improvement - I am calling kmap from 
a thread too so). I just added kmap_atomic to open-iscsi and I believe 
pyx does something similar to the loop above.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* iSCSI and scatterlists
  2005-03-16 18:44               ` Mike Christie
@ 2005-03-16 18:53                 ` Matthew Wilcox
  2005-03-16 19:59                   ` Dmitry Yusupov
  2005-03-16 18:53                 ` [PATCH] dc395x: Fix support for highmem Jens Axboe
  1 sibling, 1 reply; 36+ messages in thread
From: Matthew Wilcox @ 2005-03-16 18:53 UTC (permalink / raw)
  To: Mike Christie
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, netdev

On Wed, Mar 16, 2005 at 10:44:50AM -0800, Mike Christie wrote:
> I got lost here. If you are talking about the need to kmap a sglist then 
> software iscsi has it. iscsi-sfnet used to do
> 
> while (...)
> 	kmap()
> 
> but I fixed that (I think I need to use kmap_atomic though, is that 
> correct or is it just a performance improvement - I am calling kmap from 
> a thread too so). I just added kmap_atomic to open-iscsi and I believe 
> pyx does something similar to the loop above.

Sounds like networking should grow an interface to accept a sglist as
input.  I'm really not familiar with Linux's networking stack to know
how to do it ... cc'ing netdev to get their thoughts.

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 18:44               ` Mike Christie
  2005-03-16 18:53                 ` iSCSI and scatterlists Matthew Wilcox
@ 2005-03-16 18:53                 ` Jens Axboe
  1 sibling, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2005-03-16 18:53 UTC (permalink / raw)
  To: Mike Christie
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, g.liakhovetski

On Wed, Mar 16 2005, Mike Christie wrote:
> Jens Axboe wrote:
> >On Wed, Mar 16 2005, Christoph Hellwig wrote:
> >
> >>On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> >>
> >>>The list doesn't really need dma mapping at that point, the problem here
> >>>is that the driver needs to punt to pio mode because of foo. So calling
> >>>pci/dma_map_* is pointless, since the CPU will have to do the transfer
> >>>anyways. What the driver is really looking for at this point, is a way
> >>>to map the pages in the sglist to a virtual address.
> >>
> >>Given that there's quite a few cases of this "problem" it would be nice
> >>to have common helpers for it.  Especially as it's really difficult when
> >>we allow merging of sg list entries
> >
> >
> >I thought about that when writing the above, but is there really more
> >than one case for SCSI drivers? If there is, sure lets add the helpers.
> >But I would consider it a quite rare occurence, I've never seen it
> >before.
> >
> 
> I got lost here. If you are talking about the need to kmap a sglist then 
> software iscsi has it. iscsi-sfnet used to do

I guess the need to kmap the sglist is not that uncommon, for other
reasons - libata would need it for manually filling data for commands it
emulates, other drives I know do the same. The punt-to-pio part is the
weird part, I hope no one else does that :-)

I guess we should add something ala

        sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
                /* transfer sg_virt_len(sg) to/from output_ptr */
        }

> while (...)
> 	kmap()
> 
> but I fixed that (I think I need to use kmap_atomic though, is that 
> correct or is it just a performance improvement - I am calling kmap from 
> a thread too so). I just added kmap_atomic to open-iscsi and I believe 
> pyx does something similar to the loop above.

The problem with this driver was that it did multiple kmaps, that is no
good. There's no problem doing a single kmap, copy, kunmap from your
thread. kmap_atomic() may be faster, but it requires that you stay
pinned on the same CPU. Usually it is still faster to us it, if you only
keep the mapping for a short time.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: iSCSI and scatterlists
  2005-03-16 18:53                 ` iSCSI and scatterlists Matthew Wilcox
@ 2005-03-16 19:59                   ` Dmitry Yusupov
  0 siblings, 0 replies; 36+ messages in thread
From: Dmitry Yusupov @ 2005-03-16 19:59 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Mike Christie, Jens Axboe, Christoph Hellwig, James Bottomley,
	SCSI Mailing List, netdev, open-iscsi

On Wed, 2005-03-16 at 18:53 +0000, Matthew Wilcox wrote:
> On Wed, Mar 16, 2005 at 10:44:50AM -0800, Mike Christie wrote:
> > I got lost here. If you are talking about the need to kmap a sglist then 
> > software iscsi has it. iscsi-sfnet used to do
> > 
> > while (...)
> > 	kmap()
> > 
> > but I fixed that (I think I need to use kmap_atomic though, is that 
> > correct or is it just a performance improvement - I am calling kmap from 
> > a thread too so). I just added kmap_atomic to open-iscsi and I believe 
> > pyx does something similar to the loop above.
> 
> Sounds like networking should grow an interface to accept a sglist as
> input.  I'm really not familiar with Linux's networking stack to know
> how to do it ... cc'ing netdev to get their thoughts.

This is a nice idea, but will not always work with iSCSI protocol simply
because iSCSI PDU's data sizes might be negotiated to be lesser/bigger
than original WRITE's sglist size. It might help to optimize some data
path when PDU's data segment size >= sglist size. i.e. entire sglist
needs to be passed down to the stack.

i'm cross-posting to open-iscsi mailing list, so open-iscsi folks might
participate in the discussion.

Dmitry

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 16:04     ` Jens Axboe
  2005-03-16 16:48       ` Matthew Wilcox
@ 2005-03-16 20:24       ` Guennadi Liakhovetski
  2005-03-17  7:40         ` Jens Axboe
  1 sibling, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-03-16 20:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List

Hi

Thanks for reviewing this patch!

On Wed, 16 Mar 2005, Jens Axboe wrote:

> On Wed, Mar 16 2005, James Bottomley wrote:
...
> > I agree the kmap is inefficient.  The efficient alternative is to do
> > dma_map_sg() and use kmap_atomic() in the interrupt routine where we do
> > the PIO cleanup---I'm afraid I just passed on explaining how to do
> > this ... unless you care to do the honours ?

In fact, the first version of my patch (attached below) did exactly this - 
only when the driver recognises, that it needs to do PIO in the interrupt, 
I would call kmap_atomic(), do PIO, then kunmap_atomic(). The main reason, 
why I didn't submit that patch, was that I got confused about various KM_ 
macros, and I thought, since it is a valuable limited resource, only very 
"special" drivers are allowed to use them / are allocated one of them. 
But, I guess now, you can just do

	local_irq_save(flags);
	kmap_atomic();
	...
	kunmap_atomic();
	local_irq_restore(flags);

Please, have a look. Or should we indeed go the "generic helper functions" 
way?

> The kmap() isn't just inefficient, it's a problem to iterate over the sg
> list and kmap all the pages. That is illegal.

Hm, what do you mean "illegal"? Could you explain why?

> But it's not so tricky to get right, if the punting just happens in the
> isr. Basically just iterate over every sg entry left ala:
> 
>         for (i = start; i < sg_entries; i++) {
>                 unsigned long flags;
>                 char *ptr;
> 
>                 local_irq_save(flags);
>                 ptr = kmap_atomic(sg->page, KM_BIO_SRC_IRQ);
> 
>                 /* transfer to/from ptr + sg->offset, sg->length bytes */
> 
>                 kunmap_atomic(ptr, KM_BIO_SRC_IRQ);
>                 local_irq_restore(flags);
>         }
> 
> I _think_ the sg->length field is universally called so, you should not
> use sg->dma_length/sg_dma_len() or sg_dma_address(), as we are outside
> the work of the iommu at this point.

One more fragment in the driver I wasn't sure about is this:

 		unsigned long mask =
 		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
 		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {

Is sg->length guaranteed to be something like a power of 2 or smaller 
than page? I thought about replacing the above with

+		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {

but wasn't sure.

Thanks
Guennadi
---
Guennadi Liakhovetski

Index: drivers/scsi/dc395x.c
===================================================================
RCS file: /usr/src/cvs/linux-2_6/drivers/scsi/dc395x.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 dc395x.c
--- drivers/scsi/dc395x.c	17 Nov 2004 21:04:51 -0000	1.1.1.4
+++ drivers/scsi/dc395x.c	16 Mar 2005 20:12:07 -0000
@@ -185,7 +185,7 @@
 
 
 struct SGentry {
-	u32 address;		/* bus! address */
+	dma_addr_t address;		/* bus! address */
 	u32 length;
 };
 
@@ -234,7 +234,8 @@
 	u8 sg_count;			/* No of HW sg entries for this request */
 	u8 sg_index;			/* Index of HW sg entry for this request */
 	u32 total_xfer_length;		/* Total number of bytes remaining to be transfered */
-	unsigned char *virt_addr;	/* Virtual address of current transfer position */
+	struct page *page;
+	unsigned int offset;
 
 	/*
 	 * The sense buffer handling function, request_sense, uses
@@ -989,7 +990,8 @@
 	srb->sg_count = 0;
 	srb->total_xfer_length = 0;
 	srb->sg_bus_addr = 0;
-	srb->virt_addr = NULL;
+	srb->page = NULL;
+	srb->offset = 0;
 	srb->sg_index = 0;
 	srb->adapter_status = 0;
 	srb->target_status = 0;
@@ -1020,11 +1022,11 @@
 			reqlen, cmd->request_buffer, cmd->use_sg,
 			srb->sg_count);
 
-		srb->virt_addr = page_address(sl->page);
+		srb->page = sl->page;
+		srb->offset = sl->offset;
 		for (i = 0; i < srb->sg_count; i++) {
-			u32 busaddr = (u32)sg_dma_address(&sl[i]);
-			u32 seglen = (u32)sl[i].length;
-			sgp[i].address = busaddr;
+			u32 seglen = (u32)sg_dma_len(sl + i);
+			sgp[i].address = cpu_to_le32(0xffffffff & sg_dma_address(sl +i));
 			sgp[i].length = seglen;
 			srb->total_xfer_length += seglen;
 		}
@@ -1065,7 +1067,8 @@
 			srb->total_xfer_length++;
 
 		srb->segment_x[0].length = srb->total_xfer_length;
-		srb->virt_addr = cmd->request_buffer;
+		srb->page = virt_to_page(cmd->request_buffer);
+		srb->offset = (unsigned long)cmd->request_buffer & ~PAGE_MASK;
 		dprintkdbg(DBG_0,
 			"build_srb: [1] len=%d buf=%p use_sg=%d map=%08x\n",
 			srb->total_xfer_length, cmd->request_buffer,
@@ -1984,8 +1987,7 @@
 			psge->length -= xferred;
 			psge->address += xferred;
 			srb->sg_index = idx;
-			pci_dma_sync_single_for_device(srb->dcb->
-					    acb->dev,
+			pci_dma_sync_single_for_device(srb->dcb->acb->dev,
 					    srb->sg_bus_addr,
 					    SEGMENTX_LEN,
 					    PCI_DMA_TODEVICE);
@@ -1995,28 +1997,21 @@
 	}
 	sg_verify_length(srb);
 
-	/* we need the corresponding virtual address */
-	if (!segment) {
-		srb->virt_addr += xferred;
-		return;
-	}
-
 	/* We have to walk the scatterlist to find it */
 	sg = (struct scatterlist *)cmd->request_buffer;
 	while (segment--) {
 		unsigned long mask =
 		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
 		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
-			srb->virt_addr = (page_address(sg->page)
-					   + psge->address -
-					   (psge->address & PAGE_MASK));
+//		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {
+			srb->page = sg->page;
+			srb->offset = psge->address & ~PAGE_MASK;
 			return;
 		}
 		++sg;
 	}
 
 	dprintkl(KERN_ERR, "sg_update_list: sg_to_virt failed\n");
-	srb->virt_addr = NULL;
 }
 
 
@@ -2138,7 +2133,7 @@
 				DC395x_read32(acb, TRM_S1040_DMA_CXCNT));
 		}
 		/*
-		 * calculate all the residue data that not yet tranfered
+		 * calculate all the residue data that not yet transfered
 		 * SCSI transfer counter + left in SCSI FIFO data
 		 *
 		 * .....TRM_S1040_SCSI_COUNTER (24bits)
@@ -2184,15 +2179,10 @@
 			    (dcb->sync_period & WIDE_SYNC) ? 2 : 1;
 			sg_update_list(srb, d_left_counter);
 			/* KG: Most ugly hack! Apparently, this works around a chip bug */
-			if ((srb->segment_x[srb->sg_index].length ==
-			     diff && srb->cmd->use_sg)
-			    || ((oldxferred & ~PAGE_MASK) ==
-				(PAGE_SIZE - diff))
-			    ) {
-				dprintkl(KERN_INFO, "data_out_phase0: "
-					"Work around chip bug (%i)?\n", diff);
-				d_left_counter =
-				    srb->total_xfer_length - diff;
+			if ((srb->segment_x[srb->sg_index].length == diff && srb->cmd->use_sg) ||
+			    (oldxferred & ~PAGE_MASK) == PAGE_SIZE - diff) {
+				dprintkl(KERN_INFO, "data_out_phase0: Work around chip bug (%i)?\n", diff);
+				d_left_counter = srb->total_xfer_length - diff;
 				sg_update_list(srb, d_left_counter);
 				/*srb->total_xfer_length -= diff; */
 				/*srb->virt_addr += diff; */
@@ -2297,19 +2287,18 @@
 		    && srb->total_xfer_length <= DC395x_LASTPIO) {
 			/*u32 addr = (srb->segment_x[srb->sg_index].address); */
 			/*sg_update_list (srb, d_left_counter); */
-			dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) to "
-				"%p for remaining %i bytes:",
-				DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1f,
-				(srb->dcb->sync_period & WIDE_SYNC) ?
-				    "words" : "bytes",
-				srb->virt_addr,
-				srb->total_xfer_length);
+			char *page_addr, *virt_addr;
+			unsigned long flags;
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 					      CFG2_WIDEFIFO);
+			local_irq_save(flags);
+			page_addr = kmap_atomic(srb->page, KM_USER0);
+			virt_addr = page_addr + srb->offset;
+
 			while (DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) != 0x40) {
 				u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-				*(srb->virt_addr)++ = byte;
+				*(virt_addr)++ = byte;
 				if (debug_enabled(DBG_PIO))
 					printk(" %02x", byte);
 				d_left_counter--;
@@ -2320,7 +2309,7 @@
                 /* Read the last byte ... */
 				if (srb->total_xfer_length > 0) {
 					u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-					*(srb->virt_addr)++ = byte;
+					*(virt_addr)++ = byte;
 					srb->total_xfer_length--;
 					if (debug_enabled(DBG_PIO))
 						printk(" %02x", byte);
@@ -2328,6 +2317,8 @@
 #endif
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0);
 			}
+			kunmap_atomic(page_addr, KM_IRQ0);
+			local_irq_restore(flags);
 			/*printk(" %08x", *(u32*)(bus_to_virt (addr))); */
 			/*srb->total_xfer_length = 0; */
 			if (debug_enabled(DBG_PIO))
@@ -2452,7 +2443,7 @@
 			DC395x_write32(acb, TRM_S1040_DMA_XLOWADDR,
 				       srb->segment_x[0].address);
 			DC395x_write32(acb, TRM_S1040_DMA_XCNT,
-				       srb->segment_x[0].length);
+				       cpu_to_le32(srb->segment_x[0].length));
 		}
 		/* load total transfer length (24bits) max value 16Mbyte */
 		DC395x_write32(acb, TRM_S1040_SCSI_COUNTER,
@@ -2485,22 +2476,25 @@
 				      SCMD_FIFO_IN);
 		} else {	/* write */
 			int ln = srb->total_xfer_length;
+			char *page_addr, *virt_addr;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			page_addr = kmap_atomic(srb->page, KM_USER0);
+			virt_addr = page_addr + srb->offset;
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 				     CFG2_WIDEFIFO);
-			dprintkdbg(DBG_PIO,
-				"data_io_transfer: PIO %i bytes from %p:",
-				srb->total_xfer_length, srb->virt_addr);
-
 			while (srb->total_xfer_length) {
 				if (debug_enabled(DBG_PIO))
-					printk(" %02x", (unsigned char) *(srb->virt_addr));
+					printk(" %02x", (unsigned char) *virt_addr);
 
 				DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 
-				     *(srb->virt_addr)++);
-
+				     *virt_addr++);
 				sg_subtract_one(srb);
 			}
+			kunmap_atomic(page_addr, KM_USER0);
+			local_irq_restore(flags);
 			if (srb->dcb->sync_period & WIDE_SYNC) {
 				if (ln % 2) {
 					DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 20:24       ` Guennadi Liakhovetski
@ 2005-03-17  7:40         ` Jens Axboe
       [not found]           ` <22734.1111050528@www13.gmx.net>
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-17  7:40 UTC (permalink / raw)
  To: Guennadi Liakhovetski; +Cc: James Bottomley, SCSI Mailing List

On Wed, Mar 16 2005, Guennadi Liakhovetski wrote:
> Hi
> 
> Thanks for reviewing this patch!
> 
> On Wed, 16 Mar 2005, Jens Axboe wrote:
> 
> > On Wed, Mar 16 2005, James Bottomley wrote:
> ...
> > > I agree the kmap is inefficient.  The efficient alternative is to do
> > > dma_map_sg() and use kmap_atomic() in the interrupt routine where we do
> > > the PIO cleanup---I'm afraid I just passed on explaining how to do
> > > this ... unless you care to do the honours ?
> 
> In fact, the first version of my patch (attached below) did exactly this - 
> only when the driver recognises, that it needs to do PIO in the interrupt, 
> I would call kmap_atomic(), do PIO, then kunmap_atomic(). The main reason, 
> why I didn't submit that patch, was that I got confused about various KM_ 
> macros, and I thought, since it is a valuable limited resource, only very 
> "special" drivers are allowed to use them / are allocated one of them. 
> But, I guess now, you can just do
> 
> 	local_irq_save(flags);
> 	kmap_atomic();
> 	...
> 	kunmap_atomic();
> 	local_irq_restore(flags);
> 
> Please, have a look. Or should we indeed go the "generic helper functions" 
> way?

In generel it looks ok, comments below.

> > The kmap() isn't just inefficient, it's a problem to iterate over the sg
> > list and kmap all the pages. That is illegal.
> 
> Hm, what do you mean "illegal"? Could you explain why?

You risk deadlocking.

> > But it's not so tricky to get right, if the punting just happens in the
> > isr. Basically just iterate over every sg entry left ala:
> > 
> >         for (i = start; i < sg_entries; i++) {
> >                 unsigned long flags;
> >                 char *ptr;
> > 
> >                 local_irq_save(flags);
> >                 ptr = kmap_atomic(sg->page, KM_BIO_SRC_IRQ);
> > 
> >                 /* transfer to/from ptr + sg->offset, sg->length bytes */
> > 
> >                 kunmap_atomic(ptr, KM_BIO_SRC_IRQ);
> >                 local_irq_restore(flags);
> >         }
> > 
> > I _think_ the sg->length field is universally called so, you should not
> > use sg->dma_length/sg_dma_len() or sg_dma_address(), as we are outside
> > the work of the iommu at this point.
> 
> One more fragment in the driver I wasn't sure about is this:
> 
>  		unsigned long mask =
>  		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
>  		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
> 
> Is sg->length guaranteed to be something like a power of 2 or smaller 
> than page? I thought about replacing the above with

No, it's not guaranteed to be a power-of-2.

> +		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {

What is it trying to accomplish?

> @@ -1020,11 +1022,11 @@
>  			reqlen, cmd->request_buffer, cmd->use_sg,
>  			srb->sg_count);
>  
> -		srb->virt_addr = page_address(sl->page);
> +		srb->page = sl->page;
> +		srb->offset = sl->offset;
>  		for (i = 0; i < srb->sg_count; i++) {
> -			u32 busaddr = (u32)sg_dma_address(&sl[i]);
> -			u32 seglen = (u32)sl[i].length;
> -			sgp[i].address = busaddr;
> +			u32 seglen = (u32)sg_dma_len(sl + i);
> +			sgp[i].address = cpu_to_le32(0xffffffff & sg_dma_address(sl +i));
>  			sgp[i].length = seglen;
>  			srb->total_xfer_length += seglen;
>  		}

I don't understand this change, why the cpu_to_le32?

> @@ -2297,19 +2287,18 @@
>  		    && srb->total_xfer_length <= DC395x_LASTPIO) {
>  			/*u32 addr = (srb->segment_x[srb->sg_index].address); */
>  			/*sg_update_list (srb, d_left_counter); */
> -			dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) to "
> -				"%p for remaining %i bytes:",
> -				DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1f,
> -				(srb->dcb->sync_period & WIDE_SYNC) ?
> -				    "words" : "bytes",
> -				srb->virt_addr,
> -				srb->total_xfer_length);
> +			char *page_addr, *virt_addr;
> +			unsigned long flags;
>  			if (srb->dcb->sync_period & WIDE_SYNC)
>  				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
>  					      CFG2_WIDEFIFO);
> +			local_irq_save(flags);
> +			page_addr = kmap_atomic(srb->page, KM_USER0);
> +			virt_addr = page_addr + srb->offset;
> +

You can't use KM_USER0 here, use one of the bio assigned kmap types (you
can use KM_BIO_SRC_IRQ, for instance - the reason there are two is for
the bouncing that needs to kmap both source and destination at the same
time).

>  			while (DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) != 0x40) {
>  				u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
> -				*(srb->virt_addr)++ = byte;
> +				*(virt_addr)++ = byte;
>  				if (debug_enabled(DBG_PIO))
>  					printk(" %02x", byte);
>  				d_left_counter--;
> @@ -2320,7 +2309,7 @@
>                  /* Read the last byte ... */
>  				if (srb->total_xfer_length > 0) {
>  					u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
> -					*(srb->virt_addr)++ = byte;
> +					*(virt_addr)++ = byte;
>  					srb->total_xfer_length--;
>  					if (debug_enabled(DBG_PIO))
>  						printk(" %02x", byte);
> @@ -2328,6 +2317,8 @@
>  #endif
>  				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0);
>  			}
> +			kunmap_atomic(page_addr, KM_IRQ0);
> +			local_irq_restore(flags);

Here you kunmap_atomic() with a different kmap type than you mapped
with? Must be the same.

Same applies to the matchin section further down.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
       [not found]           ` <22734.1111050528@www13.gmx.net>
@ 2005-03-17  9:41             ` gl
  2005-03-17 10:08               ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: gl @ 2005-03-17  9:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, Guennadi Liakhovetski

Hi

On Thu, 17 Mar 2005, Jens Axboe wrote:

> On Wed, Mar 16 2005, Guennadi Liakhovetski wrote:
> >
> > On Wed, 16 Mar 2005, Jens Axboe wrote:
> > > The kmap() isn't just inefficient, it's a problem to iterate over the sg
> > > list and kmap all the pages. That is illegal.
> >
> > Hm, what do you mean "illegal"? Could you explain why?
>
> You risk deadlocking.

Emn, I am curious, would be nice to know details:-)

> > One more fragment in the driver I wasn't sure about is this:
> >
> >  		unsigned long mask =
> >  		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
> >  		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
> >
> > Is sg->length guaranteed to be something like a power of 2 or smaller
> > than page? I thought about replacing the above with
>
> No, it's not guaranteed to be a power-of-2.
>
> > +		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {
>
> What is it trying to accomplish?

So, I was not sure if the old check was correct, was it? The test is
supposed to find the current sg-element.

>
> > @@ -1020,11 +1022,11 @@
> >  			reqlen, cmd->request_buffer, cmd->use_sg,
> >  			srb->sg_count);
> >
> > -		srb->virt_addr = page_address(sl->page);
> > +		srb->page = sl->page;
> > +		srb->offset = sl->offset;
> >  		for (i = 0; i < srb->sg_count; i++) {
> > -			u32 busaddr = (u32)sg_dma_address(&sl[i]);
> > -			u32 seglen = (u32)sl[i].length;
> > -			sgp[i].address = busaddr;
> > +			u32 seglen = (u32)sg_dma_len(sl + i);
> > +			sgp[i].address = cpu_to_le32(0xffffffff & sg_dma_address(sl +i));
> >  			sgp[i].length = seglen;
> >  			srb->total_xfer_length += seglen;
> >  		}
>
> I don't understand this change, why the cpu_to_le32?

Well, I copied it from tmscsim:

pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));

I am somewhat confused. In both cases these are bus addresses, right? and
sg_dma_address() gives already the bus address, so, both are wrong?

BTW, looking at tmscsim again, isn't this

	if( residual )
	{
		bval = DC390_read8 (ScsiFifo);      /* get one residual byte */
		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
		*ptr = bval;
		pSRB->SGBusAddr++; xferCnt++;
		pSRB->TotalXferredLen++;
		pSRB->SGToBeXferLen--;
	}

also a case for dma_map_atomic?

> > +			local_irq_save(flags);
> > +			page_addr = kmap_atomic(srb->page, KM_USER0);
> > +			virt_addr = page_addr + srb->offset;
> > +
>
> You can't use KM_USER0 here, use one of the bio assigned kmap types (you
> can use KM_BIO_SRC_IRQ, for instance - the reason there are two is for
> the bouncing that needs to kmap both source and destination at the same
> time).

Ok, will change that.

> > +			kunmap_atomic(page_addr, KM_IRQ0);
> > +			local_irq_restore(flags);
>
> Here you kunmap_atomic() with a different kmap type than you mapped
> with? Must be the same.

Auch, manual patch-editing error:-)

> Same applies to the matchin section further down.

Yep, will fix.

Thanks
Guennadi
---------------------------------
Guennadi Liakhovetski, Ph.D.
DSA Daten- und Systemtechnik GmbH
Pascalstr. 28
D-52076 Aachen
Germany

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-17  9:41             ` gl
@ 2005-03-17 10:08               ` Jens Axboe
  2005-03-17 10:56                 ` gl
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-17 10:08 UTC (permalink / raw)
  To: gl; +Cc: James Bottomley, SCSI Mailing List, Guennadi Liakhovetski

On Thu, Mar 17 2005, gl@dsa-ac.de wrote:
> Hi
> 
> On Thu, 17 Mar 2005, Jens Axboe wrote:
> 
> > On Wed, Mar 16 2005, Guennadi Liakhovetski wrote:
> > >
> > > On Wed, 16 Mar 2005, Jens Axboe wrote:
> > > > The kmap() isn't just inefficient, it's a problem to iterate over the sg
> > > > list and kmap all the pages. That is illegal.
> > >
> > > Hm, what do you mean "illegal"? Could you explain why?
> >
> > You risk deadlocking.
> 
> Emn, I am curious, would be nice to know details:-)

See the kmap implementation, mm/highmem.c:map_new_virtual() to be
precise. If you run out of entries while processing your sglist, you
will end up waiting on a free kmap entry for an sg entry that will not
become available before your previous kmaps are released => deadlock.

> > > One more fragment in the driver I wasn't sure about is this:
> > >
> > >  		unsigned long mask =
> > >  		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
> > >  		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
> > >
> > > Is sg->length guaranteed to be something like a power of 2 or smaller
> > > than page? I thought about replacing the above with
> >
> > No, it's not guaranteed to be a power-of-2.
> >
> > > +		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {
> >
> > What is it trying to accomplish?
> 
> So, I was not sure if the old check was correct, was it? The test is
> supposed to find the current sg-element.

When this happens, do you know how many bytes of io already completed?
Most sane drives do, so I would use that to find the current sg entry.

> >
> > > @@ -1020,11 +1022,11 @@
> > >  			reqlen, cmd->request_buffer, cmd->use_sg,
> > >  			srb->sg_count);
> > >
> > > -		srb->virt_addr = page_address(sl->page);
> > > +		srb->page = sl->page;
> > > +		srb->offset = sl->offset;
> > >  		for (i = 0; i < srb->sg_count; i++) {
> > > -			u32 busaddr = (u32)sg_dma_address(&sl[i]);
> > > -			u32 seglen = (u32)sl[i].length;
> > > -			sgp[i].address = busaddr;
> > > +			u32 seglen = (u32)sg_dma_len(sl + i);
> > > +			sgp[i].address = cpu_to_le32(0xffffffff & sg_dma_address(sl +i));
> > >  			sgp[i].length = seglen;
> > >  			srb->total_xfer_length += seglen;
> > >  		}
> >
> > I don't understand this change, why the cpu_to_le32?
> 
> Well, I copied it from tmscsim:
> 
> pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
> 
> I am somewhat confused. In both cases these are bus addresses, right? and
> sg_dma_address() gives already the bus address, so, both are wrong?

The only reason that would make sense is if ->SGBusAddr is passed to the
hardware and it requires LE encoding. If it is just driver internal use,
it doesn't make any sense.

> BTW, looking at tmscsim again, isn't this
> 
> 	if( residual )
> 	{
> 		bval = DC390_read8 (ScsiFifo);      /* get one residual byte */
> 		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
> 		*ptr = bval;
> 		pSRB->SGBusAddr++; xferCnt++;
> 		pSRB->TotalXferredLen++;
> 		pSRB->SGToBeXferLen--;
> 	}
> 
> also a case for dma_map_atomic?

It breaks for iommu, for sure.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-17 10:08               ` Jens Axboe
@ 2005-03-17 10:56                 ` gl
  2005-03-17 20:51                   ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: gl @ 2005-03-17 10:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, Guennadi Liakhovetski

On Thu, 17 Mar 2005, Jens Axboe wrote:

> See the kmap implementation, mm/highmem.c:map_new_virtual() to be
> precise. If you run out of entries while processing your sglist, you
> will end up waiting on a free kmap entry for an sg entry that will not
> become available before your previous kmaps are released => deadlock.

Right, thanks for the explanation!

> > > > +		if (sg_dma_address(sg) <= psge->address && sg_dma_address(sg) + psge->length > psge->address) {
> > >
> > > What is it trying to accomplish?
> >
> > So, I was not sure if the old check was correct, was it? The test is
> > supposed to find the current sg-element.
>
> When this happens, do you know how many bytes of io already completed?
> Most sane drives do, so I would use that to find the current sg entry.

Yeah, it is known at this point. I guess, I'll leave this change out for
now - for this patch at least.

> > > > +			sgp[i].address = cpu_to_le32(0xffffffff & sg_dma_address(sl +i));
> > > >  			sgp[i].length = seglen;
> > > >  			srb->total_xfer_length += seglen;
> > > >  		}
> > >
> > > I don't understand this change, why the cpu_to_le32?
> >
> > Well, I copied it from tmscsim:
> >
> > pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
> >
> > I am somewhat confused. In both cases these are bus addresses, right? and
> > sg_dma_address() gives already the bus address, so, both are wrong?
>
> The only reason that would make sense is if ->SGBusAddr is passed to the
> hardware and it requires LE encoding. If it is just driver internal use,
> it doesn't make any sense.

Yes, it is passed to the controller. I think, it is also the case with the
dc395x, although, it is smarter than am53c974 in sg-list processing. Will,
probably, leave it out too.

> > 		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
...
> > also a case for dma_map_atomic?
>
> It breaks for iommu, for sure.

Ok, expect a patch soon.

So, do we want this kmap_atomic() patch for dc395x or we wait for
helper-functions?

BTW, can one test highmem correctness by booting with, e.g., highmem=xxx
even if no physical highmem is available?

Thanks
Guennadi
---------------------------------
Guennadi Liakhovetski, Ph.D.
DSA Daten- und Systemtechnik GmbH
Pascalstr. 28
D-52076 Aachen
Germany

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-17 10:56                 ` gl
@ 2005-03-17 20:51                   ` Guennadi Liakhovetski
  0 siblings, 0 replies; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-03-17 20:51 UTC (permalink / raw)
  To: gl
  Cc: Jens Axboe, James Bottomley, SCSI Mailing List,
	Christoph Hellwig, Jamie Lenehan

Hi

Here's a fixed patch - thanks to everybody for reviewing and commenting!

However, if it is decided to implement helper functions, as suggested by 
Christoph and others, feel free to drop it. There, probably, was only 1 
user of this card with highmem and he retired his card too:-)

Thanks
Guennadi
---
Guennadi Liakhovetski

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>

diff -u a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
--- a/drivers/scsi/dc395x.c	17 Nov 2004 21:04:51
+++ b/drivers/scsi/dc395x.c	17 Mar 2005 20:18:14
@@ -234,7 +234,8 @@
 	u8 sg_count;			/* No of HW sg entries for this request */
 	u8 sg_index;			/* Index of HW sg entry for this request */
 	u32 total_xfer_length;		/* Total number of bytes remaining to be transfered */
-	unsigned char *virt_addr;	/* Virtual address of current transfer position */
+	struct page *page;
+	unsigned int offset;
 
 	/*
 	 * The sense buffer handling function, request_sense, uses
@@ -989,7 +990,8 @@
 	srb->sg_count = 0;
 	srb->total_xfer_length = 0;
 	srb->sg_bus_addr = 0;
-	srb->virt_addr = NULL;
+	srb->page = NULL;
+	srb->offset = 0;
 	srb->sg_index = 0;
 	srb->adapter_status = 0;
 	srb->target_status = 0;
@@ -1020,7 +1022,8 @@
 			reqlen, cmd->request_buffer, cmd->use_sg,
 			srb->sg_count);
 
-		srb->virt_addr = page_address(sl->page);
+		srb->page = sl->page;
+		srb->offset = sl->offset;
 		for (i = 0; i < srb->sg_count; i++) {
 			u32 busaddr = (u32)sg_dma_address(&sl[i]);
 			u32 seglen = (u32)sl[i].length;
@@ -1065,7 +1068,8 @@
 			srb->total_xfer_length++;
 
 		srb->segment_x[0].length = srb->total_xfer_length;
-		srb->virt_addr = cmd->request_buffer;
+		srb->page = virt_to_page(cmd->request_buffer);
+		srb->offset = (unsigned long)cmd->request_buffer & ~PAGE_MASK;
 		dprintkdbg(DBG_0,
 			"build_srb: [1] len=%d buf=%p use_sg=%d map=%08x\n",
 			srb->total_xfer_length, cmd->request_buffer,
@@ -1984,8 +1988,7 @@
 			psge->length -= xferred;
 			psge->address += xferred;
 			srb->sg_index = idx;
-			pci_dma_sync_single_for_device(srb->dcb->
-					    acb->dev,
+			pci_dma_sync_single_for_device(srb->dcb->acb->dev,
 					    srb->sg_bus_addr,
 					    SEGMENTX_LEN,
 					    PCI_DMA_TODEVICE);
@@ -1995,28 +1998,20 @@
 	}
 	sg_verify_length(srb);
 
-	/* we need the corresponding virtual address */
-	if (!segment) {
-		srb->virt_addr += xferred;
-		return;
-	}
-
 	/* We have to walk the scatterlist to find it */
 	sg = (struct scatterlist *)cmd->request_buffer;
 	while (segment--) {
 		unsigned long mask =
 		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
 		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
-			srb->virt_addr = (page_address(sg->page)
-					   + psge->address -
-					   (psge->address & PAGE_MASK));
+			srb->page = sg->page;
+			srb->offset = psge->address & ~PAGE_MASK;
 			return;
 		}
 		++sg;
 	}
 
 	dprintkl(KERN_ERR, "sg_update_list: sg_to_virt failed\n");
-	srb->virt_addr = NULL;
 }
 
 
@@ -2138,7 +2133,7 @@
 				DC395x_read32(acb, TRM_S1040_DMA_CXCNT));
 		}
 		/*
-		 * calculate all the residue data that not yet tranfered
+		 * calculate all the residue data that not yet transfered
 		 * SCSI transfer counter + left in SCSI FIFO data
 		 *
 		 * .....TRM_S1040_SCSI_COUNTER (24bits)
@@ -2184,15 +2179,10 @@
 			    (dcb->sync_period & WIDE_SYNC) ? 2 : 1;
 			sg_update_list(srb, d_left_counter);
 			/* KG: Most ugly hack! Apparently, this works around a chip bug */
-			if ((srb->segment_x[srb->sg_index].length ==
-			     diff && srb->cmd->use_sg)
-			    || ((oldxferred & ~PAGE_MASK) ==
-				(PAGE_SIZE - diff))
-			    ) {
-				dprintkl(KERN_INFO, "data_out_phase0: "
-					"Work around chip bug (%i)?\n", diff);
-				d_left_counter =
-				    srb->total_xfer_length - diff;
+			if ((srb->segment_x[srb->sg_index].length == diff && srb->cmd->use_sg) ||
+			    (oldxferred & ~PAGE_MASK) == PAGE_SIZE - diff) {
+				dprintkl(KERN_INFO, "data_out_phase0: Work around chip bug (%i)?\n", diff);
+				d_left_counter = srb->total_xfer_length - diff;
 				sg_update_list(srb, d_left_counter);
 				/*srb->total_xfer_length -= diff; */
 				/*srb->virt_addr += diff; */
@@ -2297,19 +2287,18 @@
 		    && srb->total_xfer_length <= DC395x_LASTPIO) {
 			/*u32 addr = (srb->segment_x[srb->sg_index].address); */
 			/*sg_update_list (srb, d_left_counter); */
-			dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) to "
-				"%p for remaining %i bytes:",
-				DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1f,
-				(srb->dcb->sync_period & WIDE_SYNC) ?
-				    "words" : "bytes",
-				srb->virt_addr,
-				srb->total_xfer_length);
+			char *page_addr, *virt_addr;
+			unsigned long flags;
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 					      CFG2_WIDEFIFO);
+			local_irq_save(flags);
+			page_addr = kmap_atomic(srb->page, KM_BIO_SRC_IRQ);
+			virt_addr = page_addr + srb->offset;
+
 			while (DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) != 0x40) {
 				u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-				*(srb->virt_addr)++ = byte;
+				*(virt_addr)++ = byte;
 				if (debug_enabled(DBG_PIO))
 					printk(" %02x", byte);
 				d_left_counter--;
@@ -2320,7 +2309,7 @@
                 /* Read the last byte ... */
 				if (srb->total_xfer_length > 0) {
 					u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-					*(srb->virt_addr)++ = byte;
+					*(virt_addr)++ = byte;
 					srb->total_xfer_length--;
 					if (debug_enabled(DBG_PIO))
 						printk(" %02x", byte);
@@ -2328,6 +2317,8 @@
 #endif
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0);
 			}
+			kunmap_atomic(page_addr, KM_BIO_SRC_IRQ);
+			local_irq_restore(flags);
 			/*printk(" %08x", *(u32*)(bus_to_virt (addr))); */
 			/*srb->total_xfer_length = 0; */
 			if (debug_enabled(DBG_PIO))
@@ -2485,22 +2476,25 @@
 				      SCMD_FIFO_IN);
 		} else {	/* write */
 			int ln = srb->total_xfer_length;
+			char *page_addr, *virt_addr;
+			unsigned long flags;
+
+			local_irq_save(flags);
+			page_addr = kmap_atomic(srb->page, KM_BIO_SRC_IRQ);
+			virt_addr = page_addr + srb->offset;
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 				     CFG2_WIDEFIFO);
-			dprintkdbg(DBG_PIO,
-				"data_io_transfer: PIO %i bytes from %p:",
-				srb->total_xfer_length, srb->virt_addr);
-
 			while (srb->total_xfer_length) {
 				if (debug_enabled(DBG_PIO))
-					printk(" %02x", (unsigned char) *(srb->virt_addr));
+					printk(" %02x", (unsigned char) *virt_addr);
 
 				DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 
-				     *(srb->virt_addr)++);
-
+				     *virt_addr++);
 				sg_subtract_one(srb);
 			}
+			kunmap_atomic(page_addr, KM_BIO_SRC_IRQ);
+			local_irq_restore(flags);
 			if (srb->dcb->sync_period & WIDE_SYNC) {
 				if (ln % 2) {
 					DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-16 17:04             ` Jens Axboe
  2005-03-16 18:44               ` Mike Christie
@ 2005-03-20  9:14               ` Christoph Hellwig
  2005-03-20 20:51                 ` Guennadi Liakhovetski
  1 sibling, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2005-03-20  9:14 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, g.liakhovetski

On Wed, Mar 16, 2005 at 06:04:17PM +0100, Jens Axboe wrote:
> On Wed, Mar 16 2005, Christoph Hellwig wrote:
> > On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> > > The list doesn't really need dma mapping at that point, the problem here
> > > is that the driver needs to punt to pio mode because of foo. So calling
> > > pci/dma_map_* is pointless, since the CPU will have to do the transfer
> > > anyways. What the driver is really looking for at this point, is a way
> > > to map the pages in the sglist to a virtual address.
> > 
> > Given that there's quite a few cases of this "problem" it would be nice
> > to have common helpers for it.  Especially as it's really difficult when
> > we allow merging of sg list entries
> 
> I thought about that when writing the above, but is there really more
> than one case for SCSI drivers? If there is, sure lets add the helpers.
> But I would consider it a quite rare occurence, I've never seen it
> before.

There's lots of pio only drivers, aswell as raid drivers that need to
look into the non I/O-path command and things like iscsi.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-20  9:14               ` Christoph Hellwig
@ 2005-03-20 20:51                 ` Guennadi Liakhovetski
  2005-03-21  7:55                   ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-03-20 20:51 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Matthew Wilcox, James Bottomley, SCSI Mailing List

On Sun, 20 Mar 2005, Christoph Hellwig wrote:

> On Wed, Mar 16, 2005 at 06:04:17PM +0100, Jens Axboe wrote:
> > On Wed, Mar 16 2005, Christoph Hellwig wrote:
> > > On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> > > > The list doesn't really need dma mapping at that point, the problem here
> > > > is that the driver needs to punt to pio mode because of foo. So calling
> > > > pci/dma_map_* is pointless, since the CPU will have to do the transfer
> > > > anyways. What the driver is really looking for at this point, is a way
> > > > to map the pages in the sglist to a virtual address.
> > > 
> > > Given that there's quite a few cases of this "problem" it would be nice
> > > to have common helpers for it.  Especially as it's really difficult when
> > > we allow merging of sg list entries
> > 
> > I thought about that when writing the above, but is there really more
> > than one case for SCSI drivers? If there is, sure lets add the helpers.
> > But I would consider it a quite rare occurence, I've never seen it
> > before.
> 
> There's lots of pio only drivers, aswell as raid drivers that need to
> look into the non I/O-path command and things like iscsi.

Well, how about something like

char *kmap_atomic_sg(struct scatterlist *sg, unsigned int offset, int *mapped);
void kunmap_atomic_sg(struct scatterlist *sg, int mapped);

The latter would just call the kunmap_atomic with the respective KM_ type. 
By "merging of sg list entries" above is meant, that pci_map_sg may return 
a number smaller than the number of elements in the original sg list 
because some adjacent elements were merged during the mapping?

Thanks
Guennadi
---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-20 20:51                 ` Guennadi Liakhovetski
@ 2005-03-21  7:55                   ` Jens Axboe
       [not found]                     ` <7044.1111398919@www16.gmx.net>
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-21  7:55 UTC (permalink / raw)
  To: Guennadi Liakhovetski
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley, SCSI Mailing List

On Sun, Mar 20 2005, Guennadi Liakhovetski wrote:
> On Sun, 20 Mar 2005, Christoph Hellwig wrote:
> 
> > On Wed, Mar 16, 2005 at 06:04:17PM +0100, Jens Axboe wrote:
> > > On Wed, Mar 16 2005, Christoph Hellwig wrote:
> > > > On Wed, Mar 16, 2005 at 05:53:39PM +0100, Jens Axboe wrote:
> > > > > The list doesn't really need dma mapping at that point, the problem here
> > > > > is that the driver needs to punt to pio mode because of foo. So calling
> > > > > pci/dma_map_* is pointless, since the CPU will have to do the transfer
> > > > > anyways. What the driver is really looking for at this point, is a way
> > > > > to map the pages in the sglist to a virtual address.
> > > > 
> > > > Given that there's quite a few cases of this "problem" it would be nice
> > > > to have common helpers for it.  Especially as it's really difficult when
> > > > we allow merging of sg list entries
> > > 
> > > I thought about that when writing the above, but is there really more
> > > than one case for SCSI drivers? If there is, sure lets add the helpers.
> > > But I would consider it a quite rare occurence, I've never seen it
> > > before.
> > 
> > There's lots of pio only drivers, aswell as raid drivers that need to
> > look into the non I/O-path command and things like iscsi.
> 
> Well, how about something like
> 
> char *kmap_atomic_sg(struct scatterlist *sg, unsigned int offset, int *mapped);
> void kunmap_atomic_sg(struct scatterlist *sg, int mapped);
> 
> The latter would just call the kunmap_atomic with the respective KM_ type. 
> By "merging of sg list entries" above is meant, that pci_map_sg may return 
> a number smaller than the number of elements in the original sg list 
> because some adjacent elements were merged during the mapping?

Same problem, you want to map N entries at the time which is simply not
easily doable. I made a suggestion earlier in the thread, you need to do
something ala

        sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
                /* transfer sg_virt_len(sg) to/from output_ptr */
        }

that maps each entry successively.

BTW, wrt your earlier question, it is pretty easy to test highmem on a
non-highmem machine. Try and google for highmem debug, Andrea had a
little patch in his -aa kernels for 2.4 that should be easily adoptable
to 2.6.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
       [not found]                     ` <7044.1111398919@www16.gmx.net>
@ 2005-03-21 10:38                       ` gl
  2005-03-21 10:44                         ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: gl @ 2005-03-21 10:38 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, Guennadi Liakhovetski

On Mon, 21 Mar 2005, Jens Axboe wrote:

> On Sun, Mar 20 2005, Guennadi Liakhovetski wrote:
> >
> > char *kmap_atomic_sg(struct scatterlist *sg, unsigned int offset, int *mapped);
> > void kunmap_atomic_sg(struct scatterlist *sg, int mapped);
> >
> > The latter would just call the kunmap_atomic with the respective KM_ type.
> > By "merging of sg list entries" above is meant, that pci_map_sg may return
> > a number smaller than the number of elements in the original sg list
> > because some adjacent elements were merged during the mapping?
>
> Same problem, you want to map N entries at the time which is simply not
> easily doable. I made a suggestion earlier in the thread, you need to do
> something ala
>
>         sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
>                 /* transfer sg_virt_len(sg) to/from output_ptr */
>         }
>
> that maps each entry successively.

Well, I don't know how and when other drivers use / need this mapping, in
dc395x and tmscsim you just occasionally need to transfer a couple of
bytes per PIO, so, it would be a waste to map each sg-entry? If other
drivers do always need all, shouldn't we then define 2 APIs - for a single
mapping and for all. Also, I think, at least am53c974 (tmscsim) does
sg-processing only semi-automatically, that is you get interrupts and do
some stuff for each element. So, mapping all sg-entries each time is not
needed, and you cannot kmap them atomically if you want to keep them
mapped all the time. In your proposed API, when would you unmap them?

> BTW, wrt your earlier question, it is pretty easy to test highmem on a
> non-highmem machine. Try and google for highmem debug, Andrea had a
> little patch in his -aa kernels for 2.4 that should be easily adoptable
> to 2.6.

Thanks, I'll have a google.

Thanks
Guennadi
---------------------------------
Guennadi Liakhovetski, Ph.D.
DSA Daten- und Systemtechnik GmbH
Pascalstr. 28
D-52076 Aachen
Germany

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-21 10:38                       ` gl
@ 2005-03-21 10:44                         ` Jens Axboe
  2005-03-21 11:00                           ` gl
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-03-21 10:44 UTC (permalink / raw)
  To: gl
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, Guennadi Liakhovetski

On Mon, Mar 21 2005, gl@dsa-ac.de wrote:
> On Mon, 21 Mar 2005, Jens Axboe wrote:
> 
> > On Sun, Mar 20 2005, Guennadi Liakhovetski wrote:
> > >
> > > char *kmap_atomic_sg(struct scatterlist *sg, unsigned int offset, int *mapped);
> > > void kunmap_atomic_sg(struct scatterlist *sg, int mapped);
> > >
> > > The latter would just call the kunmap_atomic with the respective KM_ type.
> > > By "merging of sg list entries" above is meant, that pci_map_sg may return
> > > a number smaller than the number of elements in the original sg list
> > > because some adjacent elements were merged during the mapping?
> >
> > Same problem, you want to map N entries at the time which is simply not
> > easily doable. I made a suggestion earlier in the thread, you need to do
> > something ala
> >
> >         sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
> >                 /* transfer sg_virt_len(sg) to/from output_ptr */
> >         }
> >
> > that maps each entry successively.
> 
> Well, I don't know how and when other drivers use / need this mapping, in
> dc395x and tmscsim you just occasionally need to transfer a couple of
> bytes per PIO, so, it would be a waste to map each sg-entry? If other

So you start at the entry you need, and break after handling the ones
you need to handle. Either pass in a start offset, or just pass in
sglist+offset.

> drivers do always need all, shouldn't we then define 2 APIs - for a single
> mapping and for all. Also, I think, at least am53c974 (tmscsim) does
> sg-processing only semi-automatically, that is you get interrupts and do
> some stuff for each element. So, mapping all sg-entries each time is not
> needed, and you cannot kmap them atomically if you want to keep them
> mapped all the time. In your proposed API, when would you unmap them?

The loop handles the mapping and unmapping for you.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-21 10:44                         ` Jens Axboe
@ 2005-03-21 11:00                           ` gl
  2005-03-30 21:22                             ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: gl @ 2005-03-21 11:00 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, Guennadi Liakhovetski

On Mon, 21 Mar 2005, Jens Axboe wrote:

> On Mon, Mar 21 2005, gl@dsa-ac.de wrote:
> > On Mon, 21 Mar 2005, Jens Axboe wrote:
> >
> > > easily doable. I made a suggestion earlier in the thread, you need to do
> > > something ala
> > >
> > >         sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
> > >                 /* transfer sg_virt_len(sg) to/from output_ptr */
> > >         }
> > >
> > > that maps each entry successively.
> >
> > Well, I don't know how and when other drivers use / need this mapping, in
> > dc395x and tmscsim you just occasionally need to transfer a couple of
> > bytes per PIO, so, it would be a waste to map each sg-entry? If other
>
> So you start at the entry you need, and break after handling the ones
> you need to handle. Either pass in a start offset, or just pass in
> sglist+offset.

Yeah, you could do that. I just wanted to automate finding the needed
sg-entry and offset within based on the number of bytes processed.  Well,
not a big deal, but if many will have to do this calculation, maybe worth
defining at least a macro / inline?

> > drivers do always need all, shouldn't we then define 2 APIs - for a single
> > mapping and for all. Also, I think, at least am53c974 (tmscsim) does
> > sg-processing only semi-automatically, that is you get interrupts and do
> > some stuff for each element. So, mapping all sg-entries each time is not
> > needed, and you cannot kmap them atomically if you want to keep them
> > mapped all the time. In your proposed API, when would you unmap them?
>
> The loop handles the mapping and unmapping for you.

Sorry, still don't understand - how do you want to "break" then?

Thanks
Guennadi
---------------------------------
Guennadi Liakhovetski, Ph.D.
DSA Daten- und Systemtechnik GmbH
Pascalstr. 28
D-52076 Aachen
Germany

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-21 11:00                           ` gl
@ 2005-03-30 21:22                             ` Guennadi Liakhovetski
  2005-03-30 22:13                               ` James Bottomley
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-03-30 21:22 UTC (permalink / raw)
  To: gl
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, James Bottomley,
	SCSI Mailing List, Guennadi Liakhovetski

Hi

What is going to happen to this stuff? The current Linus' tree contains a 
broken (by me:-() dc395x, other drivers (including tmscsim) would benefit 
from a generic API. I understand, the drivers are not top importance, but 
still.

I guess, if nothing happens, I'll just have to submit a patch reverting my 
dc395x highmem "fix".

Thanks
Guennadi

On Mon, 21 Mar 2005 gl@dsa-ac.de wrote:

> On Mon, 21 Mar 2005, Jens Axboe wrote:
> 
> > On Mon, Mar 21 2005, gl@dsa-ac.de wrote:
> > > On Mon, 21 Mar 2005, Jens Axboe wrote:
> > >
> > > > easily doable. I made a suggestion earlier in the thread, you need to do
> > > > something ala
> > > >
> > > >         sg_map_each_entry(sglist, entries, sg, ouput_ptr, flags) {
> > > >                 /* transfer sg_virt_len(sg) to/from output_ptr */
> > > >         }
> > > >
> > > > that maps each entry successively.
> > >
> > > Well, I don't know how and when other drivers use / need this mapping, in
> > > dc395x and tmscsim you just occasionally need to transfer a couple of
> > > bytes per PIO, so, it would be a waste to map each sg-entry? If other
> >
> > So you start at the entry you need, and break after handling the ones
> > you need to handle. Either pass in a start offset, or just pass in
> > sglist+offset.
> 
> Yeah, you could do that. I just wanted to automate finding the needed
> sg-entry and offset within based on the number of bytes processed.  Well,
> not a big deal, but if many will have to do this calculation, maybe worth
> defining at least a macro / inline?
> 
> > > drivers do always need all, shouldn't we then define 2 APIs - for a single
> > > mapping and for all. Also, I think, at least am53c974 (tmscsim) does
> > > sg-processing only semi-automatically, that is you get interrupts and do
> > > some stuff for each element. So, mapping all sg-entries each time is not
> > > needed, and you cannot kmap them atomically if you want to keep them
> > > mapped all the time. In your proposed API, when would you unmap them?
> >
> > The loop handles the mapping and unmapping for you.
> 
> Sorry, still don't understand - how do you want to "break" then?
> 
> Thanks
> Guennadi
> ---------------------------------
> Guennadi Liakhovetski, Ph.D.
> DSA Daten- und Systemtechnik GmbH
> Pascalstr. 28
> D-52076 Aachen
> Germany
> 

---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-30 21:22                             ` Guennadi Liakhovetski
@ 2005-03-30 22:13                               ` James Bottomley
  2005-03-31  8:58                                 ` gl
  0 siblings, 1 reply; 36+ messages in thread
From: James Bottomley @ 2005-03-30 22:13 UTC (permalink / raw)
  To: Guennadi Liakhovetski
  Cc: gl, Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List

On Wed, 2005-03-30 at 23:22 +0200, Guennadi Liakhovetski wrote:
> What is going to happen to this stuff? The current Linus' tree contains a 
> broken (by me:-() dc395x, other drivers (including tmscsim) would benefit 
> from a generic API. I understand, the drivers are not top importance, but 
> still.

I'd propose fixing the driver first and then using that fix as the basis
for a generic API.  Is the patch you sent on the 17th of March what
you'd consider to be your final fix for this?

James



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-30 22:13                               ` James Bottomley
@ 2005-03-31  8:58                                 ` gl
  2005-04-09 22:48                                   ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: gl @ 2005-03-31  8:58 UTC (permalink / raw)
  To: James Bottomley
  Cc: Guennadi Liakhovetski, Jens Axboe, Christoph Hellwig,
	Matthew Wilcox, SCSI Mailing List

On Wed, 30 Mar 2005, James Bottomley wrote:

> On Wed, 2005-03-30 at 23:22 +0200, Guennadi Liakhovetski wrote:
> > What is going to happen to this stuff? The current Linus' tree contains a
> > broken (by me:-() dc395x, other drivers (including tmscsim) would benefit
> > from a generic API. I understand, the drivers are not top importance, but
> > still.
>
> I'd propose fixing the driver first and then using that fix as the basis
> for a generic API.  Is the patch you sent on the 17th of March what
> you'd consider to be your final fix for this?

Well, it could be. But I'd use Jens' suggestion and actually test it for
highmem. And, since we are at it, I'd try to organise this code in
separate API, so, that we just have to replace dc395x_* with scsi_* in
function names. Makes sense?

Thanks
Guennadi
---------------------------------
Guennadi Liakhovetski, Ph.D.
DSA Daten- und Systemtechnik GmbH
Pascalstr. 28
D-52076 Aachen
Germany

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-03-31  8:58                                 ` gl
@ 2005-04-09 22:48                                   ` Guennadi Liakhovetski
  2005-04-21 21:49                                     ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-09 22:48 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List

On Thu, 31 Mar 2005 gl@dsa-ac.de wrote:

> On Wed, 30 Mar 2005, James Bottomley wrote:
> 
> > On Wed, 2005-03-30 at 23:22 +0200, Guennadi Liakhovetski wrote:
> > > What is going to happen to this stuff? The current Linus' tree contains a
> > > broken (by me:-() dc395x, other drivers (including tmscsim) would benefit
> > > from a generic API. I understand, the drivers are not top importance, but
> > > still.
> >
> > I'd propose fixing the driver first and then using that fix as the basis
> > for a generic API.  Is the patch you sent on the 17th of March what
> > you'd consider to be your final fix for this?
> 
> Well, it could be. But I'd use Jens' suggestion and actually test it for
> highmem. And, since we are at it, I'd try to organise this code in
> separate API, so, that we just have to replace dc395x_* with scsi_* in
> function names. Makes sense?

Ok, here's what I've come up with so far. Too bad - I cannot actually test 
it. Not only for high-, but also for lowmem. In my tests those PIO paths 
are not entered. More precisely - they are entered, e.g., when reading the 
disk's partition table, or doing sginfo -a, but only without 
scatter-gather... I also cannot test the wide part. I've only got a narrow 
(dc315, I think) card and (easy) I can only connect an external 100MB ZIP 
and an internal tape drives. Anybody has an idea when those paths are 
actually entered? Or anybody could test the patch? Another bad thing -
diffstat:

dc395x.c |  200 +++++++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 132 insertions(+), 68 deletions(-)

One of the reasons the number of "+"s is so large, is that on the one 
hand, not knowing the hardware and having no datasheet at hand, I tried 
not to modify the flow essentially, and on the other hand, I tried to make 
it rather generic, e.g., I am pretty sure, that those PIO bytes always 
belong to only one sg-segment, but I did it more generic, perhaps, an 
overkill. Anyway, please, comment. Not asking for inclusion - yet.

Thanks
Guennadi
---
Guennadi Liakhovetski

diff -u a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
--- a/drivers/scsi/dc395x.c	4 Mar 2005 22:42:35
+++ b/drivers/scsi/dc395x.c	9 Apr 2005 22:10:23
@@ -229,12 +229,8 @@
 	struct scsi_cmnd *cmd;
 
 	struct SGentry *segment_x;	/* Linear array of hw sg entries (up to 64 entries) */
-	u32 sg_bus_addr;	        /* Bus address of sg list (ie, of segment_x) */
-
-	u8 sg_count;			/* No of HW sg entries for this request */
-	u8 sg_index;			/* Index of HW sg entry for this request */
-	u32 total_xfer_length;		/* Total number of bytes remaining to be transfered */
-	unsigned char *virt_addr;	/* Virtual address of current transfer position */
+	size_t total_xfer_length;	/* Total number of bytes remaining to be transfered */
+	size_t request_length;		/* Total number of bytes in this request */
 
 	/*
 	 * The sense buffer handling function, request_sense, uses
@@ -245,7 +241,12 @@
 	 * total_xfer_length in xferred. These values are restored in
 	 * pci_unmap_srb_sense. This is the only place xferred is used.
 	 */
-	u32 xferred;		        /* Saved copy of total_xfer_length */
+	size_t xferred;		        /* Saved copy of total_xfer_length */
+
+	u32 sg_bus_addr;	        /* Bus address of sg list (ie, of segment_x) */
+
+	u8 sg_count;			/* No of HW sg entries for this request */
+	u8 sg_index;			/* Index of HW sg entry for this request */
 
 	u16 state;
 
@@ -989,7 +990,6 @@
 	srb->sg_count = 0;
 	srb->total_xfer_length = 0;
 	srb->sg_bus_addr = 0;
-	srb->virt_addr = NULL;
 	srb->sg_index = 0;
 	srb->adapter_status = 0;
 	srb->target_status = 0;
@@ -1020,7 +1020,6 @@
 			reqlen, cmd->request_buffer, cmd->use_sg,
 			srb->sg_count);
 
-		srb->virt_addr = page_address(sl->page);
 		for (i = 0; i < srb->sg_count; i++) {
 			u32 busaddr = (u32)sg_dma_address(&sl[i]);
 			u32 seglen = (u32)sl[i].length;
@@ -1065,12 +1064,14 @@
 			srb->total_xfer_length++;
 
 		srb->segment_x[0].length = srb->total_xfer_length;
-		srb->virt_addr = cmd->request_buffer;
+
 		dprintkdbg(DBG_0,
 			"build_srb: [1] len=%d buf=%p use_sg=%d map=%08x\n",
 			srb->total_xfer_length, cmd->request_buffer,
 			cmd->use_sg, srb->segment_x[0].address);
 	}
+
+	srb->request_length = srb->total_xfer_length;
 }
 
 
@@ -1954,14 +1955,12 @@
 
 /*
  * Compute the next Scatter Gather list index and adjust its length
- * and address if necessary; also compute virt_addr
+ * and address if necessary
  */
 static void sg_update_list(struct ScsiReqBlk *srb, u32 left)
 {
 	u8 idx;
-	struct scatterlist *sg;
 	struct scsi_cmnd *cmd = srb->cmd;
-	int segment = cmd->use_sg;
 	u32 xferred = srb->total_xfer_length - left; /* bytes transfered */
 	struct SGentry *psge = srb->segment_x + srb->sg_index;
 
@@ -1994,29 +1993,6 @@
 		psge++;
 	}
 	sg_verify_length(srb);
-
-	/* we need the corresponding virtual address */
-	if (!segment) {
-		srb->virt_addr += xferred;
-		return;
-	}
-
-	/* We have to walk the scatterlist to find it */
-	sg = (struct scatterlist *)cmd->request_buffer;
-	while (segment--) {
-		unsigned long mask =
-		    ~((unsigned long)sg->length - 1) & PAGE_MASK;
-		if ((sg_dma_address(sg) & mask) == (psge->address & mask)) {
-			srb->virt_addr = (page_address(sg->page)
-					   + psge->address -
-					   (psge->address & PAGE_MASK));
-			return;
-		}
-		++sg;
-	}
-
-	dprintkl(KERN_ERR, "sg_update_list: sg_to_virt failed\n");
-	srb->virt_addr = NULL;
 }
 
 
@@ -2035,7 +2011,6 @@
 		if (debug_enabled(DBG_PIO))
 			printk(" (next segment)");
 		srb->sg_index++;
-		sg_update_list(srb, srb->total_xfer_length);
 	}
 }
 
@@ -2195,7 +2170,6 @@
 				    srb->total_xfer_length - diff;
 				sg_update_list(srb, d_left_counter);
 				/*srb->total_xfer_length -= diff; */
-				/*srb->virt_addr += diff; */
 				/*if (srb->cmd->use_sg) */
 				/*      srb->sg_index++; */
 			}
@@ -2217,12 +2191,42 @@
 	data_io_transfer(acb, srb, XFERDATAOUT);
 }
 
+/**
+ * dc395x_kmap_atomic_sg - find and atomically map an sg-elemnt
+ * @sg:		scatter-gather list
+ * @sg_count:	number of segments in sg
+ * @offset:	offset in bytes into sg
+ * @len:	on return number of bytes mapped
+ *
+ * Return virtual address
+ */
+static void *dc395x_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t offset, size_t *len)
+{
+	int i;
+	size_t sg_len = 0;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_len += sg[i].length;
+		if (sg_len > offset)
+			break;
+	}
+
+	BUG_ON(i == sg_count);
+
+	*len = sg_len - offset;
+
+	return kmap_atomic(sg[i].page, KM_BIO_SRC_IRQ) + sg[i].offset + sg[i].length - *len;
+}
+
+static void dc395x_kunmap_atomic_sg(void *virt)
+{
+	kunmap_atomic(virt_to_page(virt), KM_BIO_SRC_IRQ);
+}
 
 static void data_in_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
 	u16 scsi_status = *pscsi_status;
-	u32 d_left_counter = 0;
 	dprintkdbg(DBG_0, "data_in_phase0: (pid#%li) <%02i-%i>\n",
 		srb->cmd->pid, srb->cmd->device->id, srb->cmd->device->lun);
 
@@ -2240,6 +2244,7 @@
 	 * seem to be a bad idea, actually.
 	 */
 	if (!(srb->state & SRB_XFERPAD)) {
+		unsigned int d_left_counter, max_left;
 		if (scsi_status & PARITYERROR) {
 			dprintkl(KERN_INFO, "data_in_phase0: (pid#%li) "
 				"Parity Error\n", srb->cmd->pid);
@@ -2292,42 +2297,81 @@
 			DC395x_read32(acb, TRM_S1040_DMA_CXCNT),
 			srb->total_xfer_length, d_left_counter);
 #if DC395x_LASTPIO
+		max_left = max(d_left_counter, srb->total_xfer_length);
 		/* KG: Less than or equal to 4 bytes can not be transfered via DMA, it seems. */
 		if (d_left_counter
 		    && srb->total_xfer_length <= DC395x_LASTPIO) {
+			size_t left_io = srb->total_xfer_length;
+
 			/*u32 addr = (srb->segment_x[srb->sg_index].address); */
 			/*sg_update_list (srb, d_left_counter); */
-			dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) to "
-				"%p for remaining %i bytes:",
+			dprintkdbg(DBG_PIO, "data_in_phase0: PIO (%i %s) "
+				"for remaining %i bytes:",
 				DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) & 0x1f,
 				(srb->dcb->sync_period & WIDE_SYNC) ?
 				    "words" : "bytes",
-				srb->virt_addr,
 				srb->total_xfer_length);
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 					      CFG2_WIDEFIFO);
-			while (DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT) != 0x40) {
-				u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-				*(srb->virt_addr)++ = byte;
-				if (debug_enabled(DBG_PIO))
-					printk(" %02x", byte);
-				d_left_counter--;
-				sg_subtract_one(srb);
-			}
-			if (srb->dcb->sync_period & WIDE_SYNC) {
-#if 1
-                /* Read the last byte ... */
-				if (srb->total_xfer_length > 0) {
-					u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
-					*(srb->virt_addr)++ = byte;
-					srb->total_xfer_length--;
+
+			while (left_io) {
+				unsigned char *virt, *tmp = NULL;
+				unsigned long flags = 0;
+				size_t len;
+				u8 fifocnt = 0;
+
+				if (srb->cmd->use_sg) {
+					local_irq_save(flags);
+					virt = tmp = dc395x_kmap_atomic_sg((struct scatterlist *)srb->cmd->request_buffer,
+									   srb->sg_count, srb->request_length - left_io, &len);
+				} else {
+					virt = srb->cmd->request_buffer + srb->cmd->request_bufflen - left_io;
+					len = left_io;
+				}
+				left_io -= len;
+
+				while (len) {
+					u8 byte;
+					fifocnt = DC395x_read8(acb, TRM_S1040_SCSI_FIFOCNT);
+
+					if (fifocnt == 0x40) {
+						left_io = 0;
+						break;
+					}
+
+					byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
+					*virt++ = byte;
+
 					if (debug_enabled(DBG_PIO))
 						printk(" %02x", byte);
+
+					d_left_counter--;
+					sg_subtract_one(srb);
+
+					len--;
 				}
+
+				if (fifocnt == 0x40 && (srb->dcb->sync_period & WIDE_SYNC)) {
+#if 1
+					/* Read the last byte ... */
+					if (srb->total_xfer_length > 0) {
+						u8 byte = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
+						*virt++ = byte;
+						srb->total_xfer_length--;
+						if (debug_enabled(DBG_PIO))
+							printk(" %02x", byte);
+					}
 #endif
-				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0);
+					DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2, 0);
+				}
+
+				if (srb->cmd->use_sg) {
+					dc395x_kunmap_atomic_sg(tmp);
+					local_irq_restore(flags);
+				}
 			}
+
 			/*printk(" %08x", *(u32*)(bus_to_virt (addr))); */
 			/*srb->total_xfer_length = 0; */
 			if (debug_enabled(DBG_PIO))
@@ -2485,22 +2529,42 @@
 				      SCMD_FIFO_IN);
 		} else {	/* write */
 			int ln = srb->total_xfer_length;
+			size_t left_io = srb->total_xfer_length;
+
 			if (srb->dcb->sync_period & WIDE_SYNC)
 				DC395x_write8(acb, TRM_S1040_SCSI_CONFIG2,
 				     CFG2_WIDEFIFO);
-			dprintkdbg(DBG_PIO,
-				"data_io_transfer: PIO %i bytes from %p:",
-				srb->total_xfer_length, srb->virt_addr);
-
-			while (srb->total_xfer_length) {
-				if (debug_enabled(DBG_PIO))
-					printk(" %02x", (unsigned char) *(srb->virt_addr));
 
-				DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 
-				     *(srb->virt_addr)++);
+			while (left_io) {
+				unsigned char *virt, *tmp = NULL;
+				unsigned long flags = 0;
+				size_t len;
+
+				if (srb->cmd->use_sg) {
+					local_irq_save(flags);
+					virt = tmp = dc395x_kmap_atomic_sg((struct scatterlist *)srb->cmd->request_buffer,
+									   srb->sg_count, srb->request_length - left_io, &len);
+				} else {
+					virt = srb->cmd->request_buffer + srb->cmd->request_bufflen - left_io;
+					len = left_io;
+				}
+				left_io -= len;
+
+				while (len--) {
+					if (debug_enabled(DBG_PIO))
+						printk(" %02x", *virt);
+
+					DC395x_write8(acb, TRM_S1040_SCSI_FIFO, *virt++);
 
-				sg_subtract_one(srb);
+					sg_subtract_one(srb);
+				}
+
+				if (srb->cmd->use_sg) {
+					dc395x_kunmap_atomic_sg(tmp);
+					local_irq_restore(flags);
+				}
 			}
+
 			if (srb->dcb->sync_period & WIDE_SYNC) {
 				if (ln % 2) {
 					DC395x_write8(acb, TRM_S1040_SCSI_FIFO, 0);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-09 22:48                                   ` Guennadi Liakhovetski
@ 2005-04-21 21:49                                     ` Guennadi Liakhovetski
  2005-04-22 11:36                                       ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-21 21:49 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

Hi again

A patch to tmscsim.c, removing bus_to_virt() and, while at it, cleaning up 
cpu / le32 conversions, could look something like this. Notice, it uses 
the same <...>_k(un)map_atomic_sg() as proposed for dc395x, so, certainly 
this is not a final patch - in the end those function would migrate to 
something like scsi_lib.c(?). No, I still don't know how to test those PIO 
paths.

Kurt, there are your comments in dc395x.c around the PIO code. Have you 
found out when and how it gets invoked? Do you still remember? Any hints 
for testing possibilities?

Thanks
Guennadi
---
Guennadi Liakhovetski

diff -u a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
--- a/drivers/scsi/tmscsim.c	13 Jan 2005 21:10:01
+++ b/drivers/scsi/tmscsim.c	21 Apr 2005 21:36:51
@@ -856,8 +856,8 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
@@ -882,10 +882,42 @@
     }	    
 }
 
+
+/**
+ * dc390_kmap_atomic_sg - find and atomically map an sg-elemnt
+ * @sg:		scatter-gather list
+ * @sg_count:	number of segments in sg
+ * @offset:	offset in bytes into sg
+ * @len:	on return number of bytes mapped
+ *
+ * Return virtual address
+ */
+static void *dc390_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t offset, size_t *len)
+{
+	int i;
+	size_t sg_len = 0;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_len += sg[i].length;
+		if (sg_len > offset)
+			break;
+	}
+
+	BUG_ON(i == sg_count);
+
+	*len = sg_len - offset;
+
+	return kmap_atomic(sg[i].page, KM_BIO_SRC_IRQ) + sg[i].offset + sg[i].length - *len;
+}
+
+static void dc390_kunmap_atomic_sg(void *virt)
+{
+	kunmap_atomic(virt_to_page(virt), KM_BIO_SRC_IRQ);
+}
 static void
 dc390_DataIn_0( struct dc390_acb* pACB, struct dc390_srb* pSRB, u8 *psstatus)
 {
-    u8   sstatus, residual, bval;
+    u8   sstatus;
     struct scatterlist *psgl;
     u32    ResidCnt, i;
     unsigned long   xferCnt;
@@ -931,16 +963,17 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
 	}
 	else	/* phase changed */
 	{
-	    residual = 0;
-	    bval = DC390_read8 (Current_Fifo);
+	    u8 residual = 0;
+	    u8 bval = DC390_read8 (Current_Fifo);
+
 	    while( bval & 0x1f )
 	    {
 		DEBUG1(printk (KERN_DEBUG "Check for residuals,"));
@@ -988,10 +1021,20 @@
 
 	    if( residual )
 	    {
+		size_t count = 1;
+		unsigned long flags;
+
 		bval = DC390_read8 (ScsiFifo);	    /* get one residual byte */
-		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
+
+		local_irq_save(flags);
+		ptr = (u8*)dc390_kmap_atomic_sg(pSRB->pSegmentList, pSRB->SGcount, pSRB->TotalXferredLen, &count);
+
 		*ptr = bval;
-		pSRB->SGBusAddr++; xferCnt++;
+		dc390_kunmap_atomic_sg(ptr);
+		local_irq_restore(flags);
+
+		pSRB->SGBusAddr++;
+		xferCnt++;
 		pSRB->TotalXferredLen++;
 		pSRB->SGToBeXferLen--;
 	    }
@@ -1212,32 +1255,31 @@
 {
     struct scsi_cmnd *pcmd = pSRB->pcmd;
     struct scatterlist *psgl;
+
     pSRB->TotalXferredLen = 0;
     pSRB->SGIndex = 0;
     if (pcmd->use_sg) {
+	unsigned long last_xfer;
+
 	pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
-	psgl = pSRB->pSegmentList;
-	//dc390_pci_sync(pSRB);
 
-	while (pSRB->TotalXferredLen + (unsigned long) sg_dma_len(psgl) < pSRB->Saved_Ptr)
-	{
-	    pSRB->TotalXferredLen += (unsigned long) sg_dma_len(psgl);
-	    pSRB->SGIndex++;
-	    if( pSRB->SGIndex < pSRB->SGcount )
-	    {
-		pSRB->pSegmentList++;
-		psgl = pSRB->pSegmentList;
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
-	    }
-	    else
-		pSRB->SGToBeXferLen = 0;
+	for (psgl = pSRB->pSegmentList; pSRB->TotalXferredLen + sg_dma_len(psgl) < pSRB->Saved_Ptr; psgl++) {
+		pSRB->TotalXferredLen += sg_dma_len(psgl);
+		pSRB->SGIndex++;
 	}
-	pSRB->SGToBeXferLen -= (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	pSRB->SGBusAddr += (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
-		pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
 
+	BUG_ON(pSRB->SGIndex >= pSRB->SGcount);
+
+	last_xfer = pSRB->Saved_Ptr - pSRB->TotalXferredLen;
+
+	pSRB->pSegmentList += pSRB->SGIndex;
+	pSRB->SGBusAddr = sg_dma_address(psgl) + last_xfer;
+	pSRB->SGToBeXferLen = sg_dma_len(psgl) - last_xfer;
+
+	//dc390_pci_sync(pSRB);
+
+	DEBUG0(printk(KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+		      pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr));
     } else if(pcmd->request_buffer) {
 	//dc390_pci_sync(pSRB);
 
@@ -1245,11 +1287,11 @@
 	pSRB->SGcount = 1;
 	pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
     } else {
-	 pSRB->SGcount = 0;
-	 printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+	pSRB->SGcount = 0;
+	printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
     }
 
-  pSRB->TotalXferredLen = pSRB->Saved_Ptr;
+    pSRB->TotalXferredLen = pSRB->Saved_Ptr;
 }
 
 
@@ -1385,8 +1427,8 @@
 	if( !pSRB->SGToBeXferLen )
 	{
 	    psgl = pSRB->pSegmentList;
-	    pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-	    pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+	    pSRB->SGBusAddr = sg_dma_address(psgl);
+	    pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    DEBUG1(printk (KERN_DEBUG " DC390: Next SG segment."));
 	}
 	lval = pSRB->SGToBeXferLen;
@@ -1397,8 +1439,8 @@
 	lval >>= 8;
 	DC390_write8 (CtcReg_High, (u8) lval);
 
-	DC390_write32 (DMA_XferCnt, pSRB->SGToBeXferLen);
-	DC390_write32 (DMA_XferAddr, pSRB->SGBusAddr);
+	DC390_write32 (DMA_XferCnt, cpu_to_le32(pSRB->SGToBeXferLen));
+	DC390_write32 (DMA_XferAddr, cpu_to_le32((u32)pSRB->SGBusAddr));
 
 	//DC390_write8 (DMA_Cmd, DMA_IDLE_CMD | ioDir); /* | DMA_INT; */
 	pSRB->SRBState = SRB_DATA_XFER;
@@ -2036,7 +2078,7 @@
 
     if (pSRB) 
     {
-	printk ("DC390: SRB: Xferred %08lx, Remain %08lx, State %08x, Phase %02x\n",
+	printk ("DC390: SRB: Xferred %08lx, Remain %08x, State %08x, Phase %02x\n",
 		pSRB->TotalXferredLen, pSRB->SGToBeXferLen, pSRB->SRBState,
 		pSRB->ScsiPhase);
 	printk ("DC390: AdpaterStatus: %02x, SRB Status %02x\n", pSRB->AdaptStatus, pSRB->SRBStatus);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-21 21:49                                     ` Guennadi Liakhovetski
@ 2005-04-22 11:36                                       ` Jens Axboe
  2005-04-23  9:35                                         ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: Jens Axboe @ 2005-04-22 11:36 UTC (permalink / raw)
  To: Guennadi Liakhovetski
  Cc: James Bottomley, Christoph Hellwig, Matthew Wilcox,
	SCSI Mailing List, Kurt Garloff

On Thu, Apr 21 2005, Guennadi Liakhovetski wrote:
> +static void *dc390_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t offset, size_t *len)
> +{
> +	int i;
> +	size_t sg_len = 0;
> +
> +	for (i = 0; i < sg_count; i++) {
> +		sg_len += sg[i].length;
> +		if (sg_len > offset)
> +			break;
> +	}
> +
> +	BUG_ON(i == sg_count);
> +
> +	*len = sg_len - offset;
> +
> +	return kmap_atomic(sg[i].page, KM_BIO_SRC_IRQ) + sg[i].offset + sg[i].length - *len;
> +}
> +
> +static void dc390_kunmap_atomic_sg(void *virt)
> +{
> +	kunmap_atomic(virt_to_page(virt), KM_BIO_SRC_IRQ);
> +}

Please remember to test this with highmem debug. The above is buggy,
kunmap_atomic() takes the mapped pointer, not the page structure.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-22 11:36                                       ` Jens Axboe
@ 2005-04-23  9:35                                         ` Guennadi Liakhovetski
  2005-04-23 10:06                                           ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-23  9:35 UTC (permalink / raw)
  To: Jens Axboe
  Cc: James Bottomley, Christoph Hellwig, Matthew Wilcox,
	SCSI Mailing List, Kurt Garloff

On Fri, 22 Apr 2005, Jens Axboe wrote:

> On Thu, Apr 21 2005, Guennadi Liakhovetski wrote:
> > +static void dc390_kunmap_atomic_sg(void *virt)
> > +{
> > +	kunmap_atomic(virt_to_page(virt), KM_BIO_SRC_IRQ);
> > +}
> 
> Please remember to test this with highmem debug. The above is buggy,
> kunmap_atomic() takes the mapped pointer, not the page structure.

Oops... Thanks. Sure, I'd like to test it - the problem been, I don't know 
how to get the damn thing to execute that place. I'll keep thinking about 
testing possibilities. So far on both my test machines with HDs, CD-ROM, 
tape and an external ZIP it didn't hit that path. Below is an updated 
patch.

By highmem debug do you mean just enabling the respective option under 
kernel debugging or one still needs that patch from Andrea you mentioned 
earlier to test it on low mem machines?

Thanks
Guennadi
---
Guennadi Liakhovetski

Index: drivers/scsi/tmscsim.c
===================================================================
RCS file: /usr/src/cvs/linux-2_6/drivers/scsi/tmscsim.c,v
retrieving revision 1.1.1.8
diff -u -r1.1.1.8 tmscsim.c
--- drivers/scsi/tmscsim.c	13 Jan 2005 21:10:01 -0000	1.1.1.8
+++ drivers/scsi/tmscsim.c	23 Apr 2005 09:04:34 -0000
@@ -461,6 +461,8 @@
 	return error;
 }
 
+static int debug_kmap;
+
 /* Remove pci mapping */
 static void dc390_pci_unmap (struct dc390_srb* pSRB)
 {
@@ -856,8 +858,8 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
@@ -882,10 +884,44 @@
     }	    
 }
 
+
+/**
+ * dc390_kmap_atomic_sg - find and atomically map an sg-elemnt
+ * @sg:		scatter-gather list
+ * @sg_count:	number of segments in sg
+ * @offset:	offset in bytes into sg, on return offset into mapped area
+ * @len:	on return number of bytes mapped
+ *
+ * Returns virtual address of the start of the mapped page
+ */
+static void *dc390_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len)
+{
+	int i;
+	size_t sg_len = 0;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_len += sg[i].length;
+		if (sg_len > *offset)
+			break;
+	}
+
+	BUG_ON(i == sg_count);
+
+	*len = sg_len - *offset;
+	*offset = sg[i].offset + sg[i].length - *len;
+
+	return kmap_atomic(sg[i].page, KM_BIO_SRC_IRQ);
+}
+
+static void dc390_kunmap_atomic_sg(void *virt)
+{
+	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+}
+
 static void
 dc390_DataIn_0( struct dc390_acb* pACB, struct dc390_srb* pSRB, u8 *psstatus)
 {
-    u8   sstatus, residual, bval;
+    u8   sstatus;
     struct scatterlist *psgl;
     u32    ResidCnt, i;
     unsigned long   xferCnt;
@@ -931,16 +967,17 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
 	}
 	else	/* phase changed */
 	{
-	    residual = 0;
-	    bval = DC390_read8 (Current_Fifo);
+	    u8 residual = 0;
+	    u8 bval = DC390_read8 (Current_Fifo);
+
 	    while( bval & 0x1f )
 	    {
 		DEBUG1(printk (KERN_DEBUG "Check for residuals,"));
@@ -988,10 +1025,24 @@
 
 	    if( residual )
 	    {
+		size_t count = 1;
+		unsigned long flags;
+		size_t offset = pSRB->TotalXferredLen;
+		struct scsi_cmnd *pcmd = pSRB->pcmd;
+		struct scatterlist *sg = pcmd->use_sg ?
+			(struct scatterlist *)pcmd->request_buffer : &pSRB->pSegmentx;
+
 		bval = DC390_read8 (ScsiFifo);	    /* get one residual byte */
-		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
-		*ptr = bval;
-		pSRB->SGBusAddr++; xferCnt++;
+
+		local_irq_save(flags);
+		ptr = (u8*)dc390_kmap_atomic_sg(sg, pSRB->SGcount, &offset, &count);
+
+		*(ptr + offset) = bval;
+		dc390_kunmap_atomic_sg(ptr);
+		local_irq_restore(flags);
+
+		pSRB->SGBusAddr++;
+		xferCnt++;
 		pSRB->TotalXferredLen++;
 		pSRB->SGToBeXferLen--;
 	    }
@@ -1212,32 +1263,31 @@
 {
     struct scsi_cmnd *pcmd = pSRB->pcmd;
     struct scatterlist *psgl;
+
     pSRB->TotalXferredLen = 0;
     pSRB->SGIndex = 0;
     if (pcmd->use_sg) {
+	unsigned long last_xfer;
+
 	pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
-	psgl = pSRB->pSegmentList;
-	//dc390_pci_sync(pSRB);
 
-	while (pSRB->TotalXferredLen + (unsigned long) sg_dma_len(psgl) < pSRB->Saved_Ptr)
-	{
-	    pSRB->TotalXferredLen += (unsigned long) sg_dma_len(psgl);
-	    pSRB->SGIndex++;
-	    if( pSRB->SGIndex < pSRB->SGcount )
-	    {
-		pSRB->pSegmentList++;
-		psgl = pSRB->pSegmentList;
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
-	    }
-	    else
-		pSRB->SGToBeXferLen = 0;
+	for (psgl = pSRB->pSegmentList; pSRB->TotalXferredLen + sg_dma_len(psgl) < pSRB->Saved_Ptr; psgl++) {
+		pSRB->TotalXferredLen += sg_dma_len(psgl);
+		pSRB->SGIndex++;
 	}
-	pSRB->SGToBeXferLen -= (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	pSRB->SGBusAddr += (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
-		pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
 
+	BUG_ON(pSRB->SGIndex >= pSRB->SGcount);
+
+	last_xfer = pSRB->Saved_Ptr - pSRB->TotalXferredLen;
+
+	pSRB->pSegmentList += pSRB->SGIndex;
+	pSRB->SGBusAddr = sg_dma_address(psgl) + last_xfer;
+	pSRB->SGToBeXferLen = sg_dma_len(psgl) - last_xfer;
+
+	//dc390_pci_sync(pSRB);
+
+	DEBUG0(printk(KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+		      pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr));
     } else if(pcmd->request_buffer) {
 	//dc390_pci_sync(pSRB);
 
@@ -1245,11 +1295,11 @@
 	pSRB->SGcount = 1;
 	pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
     } else {
-	 pSRB->SGcount = 0;
-	 printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+	pSRB->SGcount = 0;
+	printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
     }
 
-  pSRB->TotalXferredLen = pSRB->Saved_Ptr;
+    pSRB->TotalXferredLen = pSRB->Saved_Ptr;
 }
 
 
@@ -1385,8 +1435,8 @@
 	if( !pSRB->SGToBeXferLen )
 	{
 	    psgl = pSRB->pSegmentList;
-	    pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-	    pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+	    pSRB->SGBusAddr = sg_dma_address(psgl);
+	    pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    DEBUG1(printk (KERN_DEBUG " DC390: Next SG segment."));
 	}
 	lval = pSRB->SGToBeXferLen;
@@ -1397,8 +1447,8 @@
 	lval >>= 8;
 	DC390_write8 (CtcReg_High, (u8) lval);
 
-	DC390_write32 (DMA_XferCnt, pSRB->SGToBeXferLen);
-	DC390_write32 (DMA_XferAddr, pSRB->SGBusAddr);
+	DC390_write32 (DMA_XferCnt, cpu_to_le32(pSRB->SGToBeXferLen));
+	DC390_write32 (DMA_XferAddr, cpu_to_le32((u32)pSRB->SGBusAddr));
 
 	//DC390_write8 (DMA_Cmd, DMA_IDLE_CMD | ioDir); /* | DMA_INT; */
 	pSRB->SRBState = SRB_DATA_XFER;

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-23  9:35                                         ` Guennadi Liakhovetski
@ 2005-04-23 10:06                                           ` Guennadi Liakhovetski
  2005-04-23 19:01                                             ` James Bottomley
  0 siblings, 1 reply; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-23 10:06 UTC (permalink / raw)
  To: Jens Axboe
  Cc: James Bottomley, Christoph Hellwig, Matthew Wilcox,
	SCSI Mailing List, Kurt Garloff

Hm, it is sometimes a good idea to at least compile-test ones patches... 
So, the one below at least compiles.

One more thing I wanted to ask - what can one assume about request 
buffers? If it is a sg, I guess, each segment is within one page frame, 
right? And if sg is not used, is request_buffer also guaranteed to be 
within one page or not?

On Sat, 23 Apr 2005, Guennadi Liakhovetski wrote:

> On Fri, 22 Apr 2005, Jens Axboe wrote:
> 
> > On Thu, Apr 21 2005, Guennadi Liakhovetski wrote:
> > > +static void dc390_kunmap_atomic_sg(void *virt)
> > > +{
> > > +	kunmap_atomic(virt_to_page(virt), KM_BIO_SRC_IRQ);
> > > +}
> > 
> > Please remember to test this with highmem debug. The above is buggy,
> > kunmap_atomic() takes the mapped pointer, not the page structure.
> 
> Oops... Thanks. Sure, I'd like to test it - the problem been, I don't know 
> how to get the damn thing to execute that place. I'll keep thinking about 
> testing possibilities. So far on both my test machines with HDs, CD-ROM, 
> tape and an external ZIP it didn't hit that path. Below is an updated 
> patch.
> 
> By highmem debug do you mean just enabling the respective option under 
> kernel debugging or one still needs that patch from Andrea you mentioned 
> earlier to test it on low mem machines?

Thanks
Guennadi
---
Guennadi Liakhovetski

diff -u a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
--- a/drivers/scsi/tmscsim.c	13 Jan 2005 21:10:01
+++ b/drivers/scsi/tmscsim.c	23 Apr 2005 09:56:50
@@ -856,8 +856,8 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
@@ -882,10 +882,44 @@
     }	    
 }
 
+
+/**
+ * dc390_kmap_atomic_sg - find and atomically map an sg-elemnt
+ * @sg:		scatter-gather list
+ * @sg_count:	number of segments in sg
+ * @offset:	offset in bytes into sg, on return offset into mapped area
+ * @len:	on return number of bytes mapped
+ *
+ * Returns virtual address of the start of the mapped page
+ */
+static void *dc390_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len)
+{
+	int i;
+	size_t sg_len = 0;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_len += sg[i].length;
+		if (sg_len > *offset)
+			break;
+	}
+
+	BUG_ON(i == sg_count);
+
+	*len = sg_len - *offset;
+	*offset = sg[i].offset + sg[i].length - *len;
+
+	return kmap_atomic(sg[i].page, KM_BIO_SRC_IRQ);
+}
+
+static void dc390_kunmap_atomic_sg(void *virt)
+{
+	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+}
+
 static void
 dc390_DataIn_0( struct dc390_acb* pACB, struct dc390_srb* pSRB, u8 *psstatus)
 {
-    u8   sstatus, residual, bval;
+    u8   sstatus;
     struct scatterlist *psgl;
     u32    ResidCnt, i;
     unsigned long   xferCnt;
@@ -931,16 +965,17 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
 	}
 	else	/* phase changed */
 	{
-	    residual = 0;
-	    bval = DC390_read8 (Current_Fifo);
+	    u8 residual = 0;
+	    u8 bval = DC390_read8 (Current_Fifo);
+
 	    while( bval & 0x1f )
 	    {
 		DEBUG1(printk (KERN_DEBUG "Check for residuals,"));
@@ -988,10 +1023,24 @@
 
 	    if( residual )
 	    {
+		size_t count = 1;
+		unsigned long flags;
+		size_t offset = pSRB->TotalXferredLen;
+		struct scsi_cmnd *pcmd = pSRB->pcmd;
+		struct scatterlist *sg = pcmd->use_sg ?
+			(struct scatterlist *)pcmd->request_buffer : &pSRB->Segmentx;
+
 		bval = DC390_read8 (ScsiFifo);	    /* get one residual byte */
-		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
-		*ptr = bval;
-		pSRB->SGBusAddr++; xferCnt++;
+
+		local_irq_save(flags);
+		ptr = (u8*)dc390_kmap_atomic_sg(sg, pSRB->SGcount, &offset, &count);
+
+		*(ptr + offset) = bval;
+		dc390_kunmap_atomic_sg(ptr);
+		local_irq_restore(flags);
+
+		pSRB->SGBusAddr++;
+		xferCnt++;
 		pSRB->TotalXferredLen++;
 		pSRB->SGToBeXferLen--;
 	    }
@@ -1212,32 +1261,31 @@
 {
     struct scsi_cmnd *pcmd = pSRB->pcmd;
     struct scatterlist *psgl;
+
     pSRB->TotalXferredLen = 0;
     pSRB->SGIndex = 0;
     if (pcmd->use_sg) {
+	unsigned long last_xfer;
+
 	pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
-	psgl = pSRB->pSegmentList;
-	//dc390_pci_sync(pSRB);
 
-	while (pSRB->TotalXferredLen + (unsigned long) sg_dma_len(psgl) < pSRB->Saved_Ptr)
-	{
-	    pSRB->TotalXferredLen += (unsigned long) sg_dma_len(psgl);
-	    pSRB->SGIndex++;
-	    if( pSRB->SGIndex < pSRB->SGcount )
-	    {
-		pSRB->pSegmentList++;
-		psgl = pSRB->pSegmentList;
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
-	    }
-	    else
-		pSRB->SGToBeXferLen = 0;
+	for (psgl = pSRB->pSegmentList; pSRB->TotalXferredLen + sg_dma_len(psgl) < pSRB->Saved_Ptr; psgl++) {
+		pSRB->TotalXferredLen += sg_dma_len(psgl);
+		pSRB->SGIndex++;
 	}
-	pSRB->SGToBeXferLen -= (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	pSRB->SGBusAddr += (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
-		pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
 
+	BUG_ON(pSRB->SGIndex >= pSRB->SGcount);
+
+	last_xfer = pSRB->Saved_Ptr - pSRB->TotalXferredLen;
+
+	pSRB->pSegmentList += pSRB->SGIndex;
+	pSRB->SGBusAddr = sg_dma_address(psgl) + last_xfer;
+	pSRB->SGToBeXferLen = sg_dma_len(psgl) - last_xfer;
+
+	//dc390_pci_sync(pSRB);
+
+	DEBUG0(printk(KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+		      pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr));
     } else if(pcmd->request_buffer) {
 	//dc390_pci_sync(pSRB);
 
@@ -1245,11 +1293,11 @@
 	pSRB->SGcount = 1;
 	pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
     } else {
-	 pSRB->SGcount = 0;
-	 printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+	pSRB->SGcount = 0;
+	printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
     }
 
-  pSRB->TotalXferredLen = pSRB->Saved_Ptr;
+    pSRB->TotalXferredLen = pSRB->Saved_Ptr;
 }
 
 
@@ -1385,8 +1433,8 @@
 	if( !pSRB->SGToBeXferLen )
 	{
 	    psgl = pSRB->pSegmentList;
-	    pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-	    pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+	    pSRB->SGBusAddr = sg_dma_address(psgl);
+	    pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    DEBUG1(printk (KERN_DEBUG " DC390: Next SG segment."));
 	}
 	lval = pSRB->SGToBeXferLen;
@@ -1397,8 +1445,8 @@
 	lval >>= 8;
 	DC390_write8 (CtcReg_High, (u8) lval);
 
-	DC390_write32 (DMA_XferCnt, pSRB->SGToBeXferLen);
-	DC390_write32 (DMA_XferAddr, pSRB->SGBusAddr);
+	DC390_write32 (DMA_XferCnt, cpu_to_le32(pSRB->SGToBeXferLen));
+	DC390_write32 (DMA_XferAddr, cpu_to_le32((u32)pSRB->SGBusAddr));
 
 	//DC390_write8 (DMA_Cmd, DMA_IDLE_CMD | ioDir); /* | DMA_INT; */
 	pSRB->SRBState = SRB_DATA_XFER;
@@ -2036,7 +2084,7 @@
 
     if (pSRB) 
     {
-	printk ("DC390: SRB: Xferred %08lx, Remain %08lx, State %08x, Phase %02x\n",
+	printk ("DC390: SRB: Xferred %08lx, Remain %08x, State %08x, Phase %02x\n",
 		pSRB->TotalXferredLen, pSRB->SGToBeXferLen, pSRB->SRBState,
 		pSRB->ScsiPhase);
 	printk ("DC390: AdpaterStatus: %02x, SRB Status %02x\n", pSRB->AdaptStatus, pSRB->SRBStatus);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-23 10:06                                           ` Guennadi Liakhovetski
@ 2005-04-23 19:01                                             ` James Bottomley
  2005-04-24  0:22                                               ` Guennadi Liakhovetski
  0 siblings, 1 reply; 36+ messages in thread
From: James Bottomley @ 2005-04-23 19:01 UTC (permalink / raw)
  To: Guennadi Liakhovetski
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

On Sat, 2005-04-23 at 12:06 +0200, Guennadi Liakhovetski wrote:
> Hm, it is sometimes a good idea to at least compile-test ones patches... 
> So, the one below at least compiles.
> 
> One more thing I wanted to ask - what can one assume about request 
> buffers? If it is a sg, I guess, each segment is within one page frame, 
> right? And if sg is not used, is request_buffer also guaranteed to be 
> within one page or not?

No to the first one if you've enabled clustering or have an IOMMU and no
to the second one.

As long as the sg list hasn't gone through dma_map_sg, then you can rely
on its elements being contiguous in physical space.  Once it's gone
through dma_map_sg, it's elements are contiguous in the bus space beyond
the IOMMU, so no longer (possibly) even physically contiguous.

The request_buffer for no sg cases is guaranteed to be both physically
and virtually contiguous.

James



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-23 19:01                                             ` James Bottomley
@ 2005-04-24  0:22                                               ` Guennadi Liakhovetski
  2005-04-24 14:31                                                 ` Guennadi Liakhovetski
  2005-04-24 22:11                                                 ` James Bottomley
  0 siblings, 2 replies; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-24  0:22 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

On Sat, 23 Apr 2005, James Bottomley wrote:

> As long as the sg list hasn't gone through dma_map_sg, then you can rely
> on its elements being contiguous in physical space.  Once it's gone
> through dma_map_sg, it's elements are contiguous in the bus space beyond
> the IOMMU, so no longer (possibly) even physically contiguous.

Thanks for the info. Let me see, if I got you right. Say, we've got an sg 
with 2 elements: first 2 * PAGE_SIZE long, offset 0, pointing to page #0, 
second PAGE_SIZE long, offset 0, page #3. Say, dma_map_sg returned 1, so, 
it mapped all those 3 (physically discontiguous) pages to a contiguous bus 
address range. And now sg_dma_len() returns 3 * PAGE_SIZE. But, I hope, 
the .page, .length, and .offset elements stayed unchanged, so, one can 
still walk elements 0 and 1, calculating a sum of sg[i].length and thus 
arrive to the required page, right?

I'll redo the patch.

Thanks
Guennadi
---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-24  0:22                                               ` Guennadi Liakhovetski
@ 2005-04-24 14:31                                                 ` Guennadi Liakhovetski
  2005-04-24 22:11                                                 ` James Bottomley
  1 sibling, 0 replies; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-24 14:31 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

On Sun, 24 Apr 2005, Guennadi Liakhovetski wrote:

> On Sat, 23 Apr 2005, James Bottomley wrote:
> 
> > As long as the sg list hasn't gone through dma_map_sg, then you can rely
> > on its elements being contiguous in physical space.  Once it's gone
> > through dma_map_sg, it's elements are contiguous in the bus space beyond
> > the IOMMU, so no longer (possibly) even physically contiguous.
> 
> Thanks for the info. Let me see, if I got you right. Say, we've got an sg 
> with 2 elements: first 2 * PAGE_SIZE long, offset 0, pointing to page #0, 
> second PAGE_SIZE long, offset 0, page #3. Say, dma_map_sg returned 1, so, 
> it mapped all those 3 (physically discontiguous) pages to a contiguous bus 
> address range. And now sg_dma_len() returns 3 * PAGE_SIZE. But, I hope, 
> the .page, .length, and .offset elements stayed unchanged, so, one can 
> still walk elements 0 and 1, calculating a sum of sg[i].length and thus 
> arrive to the required page, right?
> 
> I'll redo the patch.

If my assumption above is right, the patch below should cover bigger than 
page sg-elements too.

Thanks
Guennadi
---
Guennadi Liakhovetski

Clean up endianness conversions, don't use bus_to_virt(), map atomically 
instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>

diff -u a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
--- a/drivers/scsi/tmscsim.c	13 Jan 2005 21:10:01
+++ b/drivers/scsi/tmscsim.c	24 Apr 2005 12:59:36
@@ -856,8 +856,8 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
@@ -882,10 +882,48 @@
     }	    
 }
 
+
+/**
+ * dc390_kmap_atomic_sg - find and atomically map an sg-elemnt
+ * @sg:		scatter-gather list
+ * @sg_count:	number of segments in sg
+ * @offset:	offset in bytes into sg, on return offset into the mapped area
+ * @len:	on return number of bytes mapped
+ *
+ * Returns virtual address of the start of the mapped page
+ */
+static void *dc390_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len)
+{
+	int i;
+	size_t sg_len = 0;
+	struct page *page;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_len += sg[i].length;
+		if (sg_len > *offset)
+			break;
+	}
+
+	BUG_ON(i == sg_count);
+
+	*len = sg_len - *offset;
+	*offset = sg[i].offset + sg[i].length - *len;
+
+	page = sg[i].page + (*offset >> PAGE_SHIFT);
+	*offset &= ~PAGE_MASK;
+
+	return kmap_atomic(page, KM_BIO_SRC_IRQ);
+}
+
+static void dc390_kunmap_atomic_sg(void *virt)
+{
+	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+}
+
 static void
 dc390_DataIn_0( struct dc390_acb* pACB, struct dc390_srb* pSRB, u8 *psstatus)
 {
-    u8   sstatus, residual, bval;
+    u8   sstatus;
     struct scatterlist *psgl;
     u32    ResidCnt, i;
     unsigned long   xferCnt;
@@ -931,16 +969,17 @@
 		pSRB->pSegmentList++;
 		psgl = pSRB->pSegmentList;
 
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+		pSRB->SGBusAddr = sg_dma_address(psgl);
+		pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    }
 	    else
 		pSRB->SGToBeXferLen = 0;
 	}
 	else	/* phase changed */
 	{
-	    residual = 0;
-	    bval = DC390_read8 (Current_Fifo);
+	    u8 residual = 0;
+	    u8 bval = DC390_read8 (Current_Fifo);
+
 	    while( bval & 0x1f )
 	    {
 		DEBUG1(printk (KERN_DEBUG "Check for residuals,"));
@@ -988,10 +1027,25 @@
 
 	    if( residual )
 	    {
+		size_t count = 1;
+		unsigned long flags;
+		size_t offset = pSRB->TotalXferredLen;
+		struct scsi_cmnd *pcmd = pSRB->pcmd;
+		int sg_count = pcmd->use_sg ? : 1;
+		struct scatterlist *sg = pcmd->use_sg ?
+			(struct scatterlist *)pcmd->request_buffer : &pSRB->Segmentx;
+
 		bval = DC390_read8 (ScsiFifo);	    /* get one residual byte */
-		ptr = (u8 *) bus_to_virt( pSRB->SGBusAddr );
-		*ptr = bval;
-		pSRB->SGBusAddr++; xferCnt++;
+
+		local_irq_save(flags);
+		ptr = (u8*)dc390_kmap_atomic_sg(sg, sg_count, &offset, &count);
+
+		*(ptr + offset) = bval;
+		dc390_kunmap_atomic_sg(ptr);
+		local_irq_restore(flags);
+
+		pSRB->SGBusAddr++;
+		xferCnt++;
 		pSRB->TotalXferredLen++;
 		pSRB->SGToBeXferLen--;
 	    }
@@ -1212,32 +1266,31 @@
 {
     struct scsi_cmnd *pcmd = pSRB->pcmd;
     struct scatterlist *psgl;
+
     pSRB->TotalXferredLen = 0;
     pSRB->SGIndex = 0;
     if (pcmd->use_sg) {
+	unsigned long last_xfer;
+
 	pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
-	psgl = pSRB->pSegmentList;
-	//dc390_pci_sync(pSRB);
 
-	while (pSRB->TotalXferredLen + (unsigned long) sg_dma_len(psgl) < pSRB->Saved_Ptr)
-	{
-	    pSRB->TotalXferredLen += (unsigned long) sg_dma_len(psgl);
-	    pSRB->SGIndex++;
-	    if( pSRB->SGIndex < pSRB->SGcount )
-	    {
-		pSRB->pSegmentList++;
-		psgl = pSRB->pSegmentList;
-		pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-		pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
-	    }
-	    else
-		pSRB->SGToBeXferLen = 0;
+	for (psgl = pSRB->pSegmentList; pSRB->TotalXferredLen + sg_dma_len(psgl) < pSRB->Saved_Ptr; psgl++) {
+		pSRB->TotalXferredLen += sg_dma_len(psgl);
+		pSRB->SGIndex++;
 	}
-	pSRB->SGToBeXferLen -= (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	pSRB->SGBusAddr += (pSRB->Saved_Ptr - pSRB->TotalXferredLen);
-	printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
-		pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
 
+	BUG_ON(pSRB->SGIndex >= pSRB->SGcount);
+
+	last_xfer = pSRB->Saved_Ptr - pSRB->TotalXferredLen;
+
+	pSRB->pSegmentList += pSRB->SGIndex;
+	pSRB->SGBusAddr = sg_dma_address(psgl) + last_xfer;
+	pSRB->SGToBeXferLen = sg_dma_len(psgl) - last_xfer;
+
+	//dc390_pci_sync(pSRB);
+
+	DEBUG0(printk(KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+		      pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr));
     } else if(pcmd->request_buffer) {
 	//dc390_pci_sync(pSRB);
 
@@ -1245,11 +1298,11 @@
 	pSRB->SGcount = 1;
 	pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
     } else {
-	 pSRB->SGcount = 0;
-	 printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+	pSRB->SGcount = 0;
+	printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
     }
 
-  pSRB->TotalXferredLen = pSRB->Saved_Ptr;
+    pSRB->TotalXferredLen = pSRB->Saved_Ptr;
 }
 
 
@@ -1385,8 +1438,8 @@
 	if( !pSRB->SGToBeXferLen )
 	{
 	    psgl = pSRB->pSegmentList;
-	    pSRB->SGBusAddr = cpu_to_le32(pci_dma_lo32(sg_dma_address(psgl)));
-	    pSRB->SGToBeXferLen = cpu_to_le32(sg_dma_len(psgl));
+	    pSRB->SGBusAddr = sg_dma_address(psgl);
+	    pSRB->SGToBeXferLen = sg_dma_len(psgl);
 	    DEBUG1(printk (KERN_DEBUG " DC390: Next SG segment."));
 	}
 	lval = pSRB->SGToBeXferLen;
@@ -1397,8 +1450,8 @@
 	lval >>= 8;
 	DC390_write8 (CtcReg_High, (u8) lval);
 
-	DC390_write32 (DMA_XferCnt, pSRB->SGToBeXferLen);
-	DC390_write32 (DMA_XferAddr, pSRB->SGBusAddr);
+	DC390_write32 (DMA_XferCnt, cpu_to_le32(pSRB->SGToBeXferLen));
+	DC390_write32 (DMA_XferAddr, cpu_to_le32((u32)pSRB->SGBusAddr));
 
 	//DC390_write8 (DMA_Cmd, DMA_IDLE_CMD | ioDir); /* | DMA_INT; */
 	pSRB->SRBState = SRB_DATA_XFER;
@@ -2036,7 +2089,7 @@
 
     if (pSRB) 
     {
-	printk ("DC390: SRB: Xferred %08lx, Remain %08lx, State %08x, Phase %02x\n",
+	printk ("DC390: SRB: Xferred %08lx, Remain %08x, State %08x, Phase %02x\n",
 		pSRB->TotalXferredLen, pSRB->SGToBeXferLen, pSRB->SRBState,
 		pSRB->ScsiPhase);
 	printk ("DC390: AdpaterStatus: %02x, SRB Status %02x\n", pSRB->AdaptStatus, pSRB->SRBStatus);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-24  0:22                                               ` Guennadi Liakhovetski
  2005-04-24 14:31                                                 ` Guennadi Liakhovetski
@ 2005-04-24 22:11                                                 ` James Bottomley
  2005-04-25 19:56                                                   ` Guennadi Liakhovetski
  1 sibling, 1 reply; 36+ messages in thread
From: James Bottomley @ 2005-04-24 22:11 UTC (permalink / raw)
  To: Guennadi Liakhovetski
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

On Sun, 2005-04-24 at 02:22 +0200, Guennadi Liakhovetski wrote:
> Thanks for the info. Let me see, if I got you right. Say, we've got an sg 
> with 2 elements: first 2 * PAGE_SIZE long, offset 0, pointing to page #0, 
> second PAGE_SIZE long, offset 0, page #3. Say, dma_map_sg returned 1, so, 
> it mapped all those 3 (physically discontiguous) pages to a contiguous bus 
> address range. And now sg_dma_len() returns 3 * PAGE_SIZE.

Yes, that's true.

>  But, I hope, 
> the .page, .length, and .offset elements stayed unchanged, so, one can 
> still walk elements 0 and 1, calculating a sum of sg[i].length and thus 
> arrive to the required page, right?

This, I'm not entirely sure about.  There are some weird iommu mapping
implementations out there.  The only absolute requirement is that the
map_sg unmap_sg map_sg actually work.  There was some debate over
whether unmap_sg was supposed to return the sg list to its original
form, but x86_64 got beaten up in the argument, so this is true too.

So ... I think the answer to your question is "yes", at least for all
the iommu implementations I know about.  However, nothing in the
published API actually requires this.

What I think all this means is that you can get away with what you're
proposing.  However, the proper route would be to unmap the sglist
before you start feeding the leftovers via pio.

James



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] dc395x: Fix support for highmem
  2005-04-24 22:11                                                 ` James Bottomley
@ 2005-04-25 19:56                                                   ` Guennadi Liakhovetski
  0 siblings, 0 replies; 36+ messages in thread
From: Guennadi Liakhovetski @ 2005-04-25 19:56 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, Christoph Hellwig, Matthew Wilcox, SCSI Mailing List,
	Kurt Garloff

Hi

On Sun, 24 Apr 2005, James Bottomley wrote:

> On Sun, 2005-04-24 at 02:22 +0200, Guennadi Liakhovetski wrote:
> >  But, I hope, 
> > the .page, .length, and .offset elements stayed unchanged, so, one can 
> > still walk elements 0 and 1, calculating a sum of sg[i].length and thus 
> > arrive to the required page, right?
> 
> This, I'm not entirely sure about.  There are some weird iommu mapping
> implementations out there.  The only absolute requirement is that the
> map_sg unmap_sg map_sg actually work.  There was some debate over
> whether unmap_sg was supposed to return the sg list to its original
> form, but x86_64 got beaten up in the argument, so this is true too.
> 
> So ... I think the answer to your question is "yes", at least for all
> the iommu implementations I know about.  However, nothing in the
> published API actually requires this.
> 
> What I think all this means is that you can get away with what you're
> proposing.  However, the proper route would be to unmap the sglist
> before you start feeding the leftovers via pio.

Hmm, does anyone already on "CC" know the exact answer? Shall I forward 
it to LKML? It'd be nice to know the answer exactly. I'm not quite sure if 
unmapping sg would be always feasible / at all doable. E.g., in tmscsim 
(AFAIR, in dc395x too) PIO is done in ISR. Would it be too limiting / 
impossible to actually require that .page, .length and .offset stay 
unchanged over map_sg?

Ok, I actually looked through

alpha
ia64
parisc
ppc64
sparc64
x86_64

- AFAICS, none of them ever modifies those sg members on sg_(un)map. Did I 
miss anybody with an IOMMU?

Thanks
Guennadi
---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2005-04-25 20:11 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <200503160209.j2G29cAf010870@hera.kernel.org>
2005-03-16  7:58 ` [PATCH] dc395x: Fix support for highmem Jens Axboe
2005-03-16 15:13   ` James Bottomley
2005-03-16 16:04     ` Jens Axboe
2005-03-16 16:48       ` Matthew Wilcox
2005-03-16 16:53         ` Jens Axboe
2005-03-16 17:02           ` Christoph Hellwig
2005-03-16 17:04             ` Jens Axboe
2005-03-16 18:44               ` Mike Christie
2005-03-16 18:53                 ` iSCSI and scatterlists Matthew Wilcox
2005-03-16 19:59                   ` Dmitry Yusupov
2005-03-16 18:53                 ` [PATCH] dc395x: Fix support for highmem Jens Axboe
2005-03-20  9:14               ` Christoph Hellwig
2005-03-20 20:51                 ` Guennadi Liakhovetski
2005-03-21  7:55                   ` Jens Axboe
     [not found]                     ` <7044.1111398919@www16.gmx.net>
2005-03-21 10:38                       ` gl
2005-03-21 10:44                         ` Jens Axboe
2005-03-21 11:00                           ` gl
2005-03-30 21:22                             ` Guennadi Liakhovetski
2005-03-30 22:13                               ` James Bottomley
2005-03-31  8:58                                 ` gl
2005-04-09 22:48                                   ` Guennadi Liakhovetski
2005-04-21 21:49                                     ` Guennadi Liakhovetski
2005-04-22 11:36                                       ` Jens Axboe
2005-04-23  9:35                                         ` Guennadi Liakhovetski
2005-04-23 10:06                                           ` Guennadi Liakhovetski
2005-04-23 19:01                                             ` James Bottomley
2005-04-24  0:22                                               ` Guennadi Liakhovetski
2005-04-24 14:31                                                 ` Guennadi Liakhovetski
2005-04-24 22:11                                                 ` James Bottomley
2005-04-25 19:56                                                   ` Guennadi Liakhovetski
2005-03-16 20:24       ` Guennadi Liakhovetski
2005-03-17  7:40         ` Jens Axboe
     [not found]           ` <22734.1111050528@www13.gmx.net>
2005-03-17  9:41             ` gl
2005-03-17 10:08               ` Jens Axboe
2005-03-17 10:56                 ` gl
2005-03-17 20:51                   ` Guennadi Liakhovetski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.