All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] skge: add dma_mapping check
@ 2013-08-05  0:22 Stephen Hemminger
  2013-08-05  1:35 ` David Miller
  0 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-05  0:22 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

This old driver never checked for DMA mapping errors.
Causing splats with the new DMA mapping checks:
	WARNING: at lib/dma-debug.c:937 check_unmap+0x47b/0x930()
	skge 0000:01:09.0: DMA-API: device driver failed to check map

Add checks and unwind code.

Reported-by: poma <pomidorabelisima@gmail.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>


--- a/drivers/net/ethernet/marvell/skge.c	2013-08-04 11:03:20.842978549 -0700
+++ b/drivers/net/ethernet/marvell/skge.c	2013-08-04 17:08:19.256002544 -0700
@@ -931,15 +931,18 @@ static int skge_ring_alloc(struct skge_r
 }
 
 /* Allocate and setup a new buffer for receiving */
-static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
+static int skge_rx_setup(struct skge_port *skge, struct skge_element *e,
 			  struct sk_buff *skb, unsigned int bufsize)
 {
 	struct skge_rx_desc *rd = e->desc;
-	u64 map;
+	dma_addr_t map;
 
 	map = pci_map_single(skge->hw->pdev, skb->data, bufsize,
 			     PCI_DMA_FROMDEVICE);
 
+	if (pci_dma_mapping_error(skge->hw->pdev, map))
+		return -1;
+
 	rd->dma_lo = map;
 	rd->dma_hi = map >> 32;
 	e->skb = skb;
@@ -953,6 +956,7 @@ static void skge_rx_setup(struct skge_po
 	rd->control = BMU_OWN | BMU_STF | BMU_IRQ_EOF | BMU_TCP_CHECK | bufsize;
 	dma_unmap_addr_set(e, mapaddr, map);
 	dma_unmap_len_set(e, maplen, bufsize);
+	return 0;
 }
 
 /* Resume receiving using existing skb,
@@ -1014,7 +1018,10 @@ static int skge_rx_fill(struct net_devic
 			return -ENOMEM;
 
 		skb_reserve(skb, NET_IP_ALIGN);
-		skge_rx_setup(skge, e, skb, skge->rx_buf_size);
+		if (skge_rx_setup(skge, e, skb, skge->rx_buf_size) < 0) {
+			dev_kfree_skb(skb);
+			return -EIO;
+		}
 	} while ((e = e->next) != ring->start);
 
 	ring->to_clean = ring->start;
@@ -2729,7 +2736,7 @@ static netdev_tx_t skge_xmit_frame(struc
 	struct skge_tx_desc *td;
 	int i;
 	u32 control, len;
-	u64 map;
+	dma_addr_t map;
 
 	if (skb_padto(skb, ETH_ZLEN))
 		return NETDEV_TX_OK;
@@ -2743,6 +2750,9 @@ static netdev_tx_t skge_xmit_frame(struc
 	e->skb = skb;
 	len = skb_headlen(skb);
 	map = pci_map_single(hw->pdev, skb->data, len, PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(hw->pdev, map))
+		goto mapping_error;
+
 	dma_unmap_addr_set(e, mapaddr, map);
 	dma_unmap_len_set(e, maplen, len);
 
@@ -2778,6 +2788,8 @@ static netdev_tx_t skge_xmit_frame(struc
 
 			map = skb_frag_dma_map(&hw->pdev->dev, frag, 0,
 					       skb_frag_size(frag), DMA_TO_DEVICE);
+			if (dma_mapping_error(&hw->pdev->dev, map))
+				goto mapping_unwind;
 
 			e = e->next;
 			e->skb = skb;
@@ -2815,6 +2827,26 @@ static netdev_tx_t skge_xmit_frame(struc
 	}
 
 	return NETDEV_TX_OK;
+
+mapping_unwind:
+	e = skge->tx_ring.to_use;
+	pci_unmap_single(hw->pdev,
+			 dma_unmap_addr(e, mapaddr),
+			 dma_unmap_len(e, maplen),
+			 PCI_DMA_TODEVICE);
+	while (i-- > 0) {
+		e = e->next;
+		pci_unmap_page(hw->pdev,
+			       dma_unmap_addr(e, mapaddr),
+			       dma_unmap_len(e, maplen),
+			       PCI_DMA_TODEVICE);
+	}
+
+mapping_error:
+	if (net_ratelimit())
+		dev_warn(&hw->pdev->dev, "%s: tx mapping error\n", dev->name);
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 
 
@@ -3058,13 +3090,17 @@ static struct sk_buff *skge_rx_get(struc
 		if (!nskb)
 			goto resubmit;
 
+		if (skge_rx_setup(skge, e, nskb, skge->rx_buf_size) < 0) {
+			dev_kfree_skb(nskb);
+			goto resubmit;
+		}
+
 		pci_unmap_single(skge->hw->pdev,
 				 dma_unmap_addr(e, mapaddr),
 				 dma_unmap_len(e, maplen),
 				 PCI_DMA_FROMDEVICE);
 		skb = e->skb;
 		prefetch(skb->data);
-		skge_rx_setup(skge, e, nskb, skge->rx_buf_size);
 	}
 
 	skb_put(skb, len);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: add dma_mapping check
  2013-08-05  0:22 [PATCH net] skge: add dma_mapping check Stephen Hemminger
@ 2013-08-05  1:35 ` David Miller
  2013-08-05  3:40   ` [PATCH net] skge: fix build on 32 bit Stephen Hemminger
  2013-08-10 11:51   ` [PATCH net] skge: add dma_mapping check poma
  0 siblings, 2 replies; 25+ messages in thread
From: David Miller @ 2013-08-05  1:35 UTC (permalink / raw)
  To: stephen; +Cc: netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 4 Aug 2013 17:22:34 -0700

> This old driver never checked for DMA mapping errors.
> Causing splats with the new DMA mapping checks:
> 	WARNING: at lib/dma-debug.c:937 check_unmap+0x47b/0x930()
> 	skge 0000:01:09.0: DMA-API: device driver failed to check map
> 
> Add checks and unwind code.
> 
> Reported-by: poma <pomidorabelisima@gmail.com>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Applied, but:

> -static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
> +static int skge_rx_setup(struct skge_port *skge, struct skge_element *e,
>  			  struct sk_buff *skb, unsigned int bufsize)

I reflowed the argument indentation for this in the final commit.

Thanks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net] skge: fix build on 32 bit
  2013-08-05  1:35 ` David Miller
@ 2013-08-05  3:40   ` Stephen Hemminger
  2013-08-05  6:37     ` David Miller
  2013-08-10 11:51   ` [PATCH net] skge: add dma_mapping check poma
  1 sibling, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-05  3:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

The following is needed as well to fix warning/error about shifting a 32 bit
value 32 bits which occurs if building on 32 bit platform caused by conversion
to using dma_addr_t

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

--- a/drivers/net/ethernet/marvell/skge.c	2013-08-04 20:30:57.003636568 -0700
+++ b/drivers/net/ethernet/marvell/skge.c	2013-08-04 20:34:43.104589097 -0700
@@ -943,8 +943,8 @@ static int skge_rx_setup(struct skge_por
 	if (pci_dma_mapping_error(skge->hw->pdev, map))
 		return -1;
 
-	rd->dma_lo = map;
-	rd->dma_hi = map >> 32;
+	rd->dma_lo = lower_32_bits(map);
+	rd->dma_hi = upper_32_bits(map);
 	e->skb = skb;
 	rd->csum1_start = ETH_HLEN;
 	rd->csum2_start = ETH_HLEN;
@@ -2551,7 +2551,7 @@ static int skge_up(struct net_device *de
 
 	BUG_ON(skge->dma & 7);
 
-	if ((u64)skge->dma >> 32 != ((u64) skge->dma + skge->mem_size) >> 32) {
+	if (upper_32_bits(skge->dma) != upper_32_bits(skge->dma + skge->mem_size)) {
 		dev_err(&hw->pdev->dev, "pci_alloc_consistent region crosses 4G boundary\n");
 		err = -EINVAL;
 		goto free_pci_mem;
@@ -2756,8 +2756,8 @@ static netdev_tx_t skge_xmit_frame(struc
 	dma_unmap_addr_set(e, mapaddr, map);
 	dma_unmap_len_set(e, maplen, len);
 
-	td->dma_lo = map;
-	td->dma_hi = map >> 32;
+	td->dma_lo = lower_32_bits(map);
+	td->dma_hi = upper_32_bits(map);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const int offset = skb_checksum_start_offset(skb);
@@ -2796,8 +2796,8 @@ static netdev_tx_t skge_xmit_frame(struc
 			tf = e->desc;
 			BUG_ON(tf->control & BMU_OWN);
 
-			tf->dma_lo = map;
-			tf->dma_hi = (u64) map >> 32;
+			tf->dma_lo = lower_32_bits(map);
+			tf->dma_hi = upper_32_bits(map);
 			dma_unmap_addr_set(e, mapaddr, map);
 			dma_unmap_len_set(e, maplen, skb_frag_size(frag));
 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: fix build on 32 bit
  2013-08-05  3:40   ` [PATCH net] skge: fix build on 32 bit Stephen Hemminger
@ 2013-08-05  6:37     ` David Miller
  0 siblings, 0 replies; 25+ messages in thread
From: David Miller @ 2013-08-05  6:37 UTC (permalink / raw)
  To: stephen; +Cc: netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 4 Aug 2013 20:40:34 -0700

> The following is needed as well to fix warning/error about shifting a 32 bit
> value 32 bits which occurs if building on 32 bit platform caused by conversion
> to using dma_addr_t
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Applied, thanks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: add dma_mapping check
  2013-08-05  1:35 ` David Miller
  2013-08-05  3:40   ` [PATCH net] skge: fix build on 32 bit Stephen Hemminger
@ 2013-08-10 11:51   ` poma
  2013-08-10 17:41     ` Stephen Hemminger
  1 sibling, 1 reply; 25+ messages in thread
From: poma @ 2013-08-10 11:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 4529 bytes --]

On 05.08.2013 03:35, David Miller wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Sun, 4 Aug 2013 17:22:34 -0700
> 
>> This old driver never checked for DMA mapping errors.
>> Causing splats with the new DMA mapping checks:
>> 	WARNING: at lib/dma-debug.c:937 check_unmap+0x47b/0x930()
>> 	skge 0000:01:09.0: DMA-API: device driver failed to check map
>>
>> Add checks and unwind code.
>>
>> Reported-by: poma <pomidorabelisima@gmail.com>
>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> Applied, but:
> 
>> -static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
>> +static int skge_rx_setup(struct skge_port *skge, struct skge_element *e,
>>  			  struct sk_buff *skb, unsigned int bufsize)
> 
> I reflowed the argument indentation for this in the final commit.
> 
> Thanks.
> 

skge-add-dma_mapping-check.patch
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git/patch/drivers/net/ethernet/marvell?id=136d8f377e1575463b47840bc5f1b22d94bf8f63

/usr/lib/modules/3.11.0-0.rc4.git2.1.fc20.x86_64/updates/skge.ko

skge: module verification failed: signature and/or required key missing
- tainting kernel
skge: 1.14 addr 0xfbffc000 irq 19 chip Yukon rev 1
skge 0000:01:09.0 eth0: addr nn:nn:nn:nn:nn:nn
skge 0000:01:09.0 enp1s9: enabling interface
skge 0000:01:09.0 enp1s9: Link is up at 1000 Mbps, full duplex, flow
control both
IPv6: ADDRCONF(NETDEV_CHANGE): enp1s9: link becomes ready

Starting Nmap 6.25

------------[ cut here ]------------
WARNING: CPU: 2 PID: 2443 at lib/dma-debug.c:986 check_sync+0x4bc/0x580()
skge 0000:01:09.0: DMA-API: device driver tries to sync DMA memory it
has not allocated [device address=0x00000000cf390040] [size=60 bytes]
Modules linked in: skge(OF) raid1 nouveau video mxm_wmi i2c_algo_bit
drm_kms_helper ttm drm i2c_core wmi
CPU: 2 PID: 2443 Comm: nmap Tainted: GF          O
3.11.0-0.rc4.git2.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n
09/07/2010
 0000000000000009 ffff88012aa03c48 ffffffff81723ac3 ffff88012aa03c90
 ffff88012aa03c80 ffffffff8107462d ffff88012837c2b0 000000000000003c
 ffff880128363470 00000000cf390040 ffff880126f79160 ffff88012aa03ce0
Call Trace:
 <IRQ>  [<ffffffff81723ac3>] dump_stack+0x54/0x74
 [<ffffffff8107462d>] warn_slowpath_common+0x7d/0xa0
 [<ffffffff8107469c>] warn_slowpath_fmt+0x4c/0x50
 [<ffffffff81392a5c>] check_sync+0x4bc/0x580
 [<ffffffff8138519e>] ? debug_check_no_obj_freed+0x14e/0x250
 [<ffffffff81392b6b>] debug_dma_sync_single_for_cpu+0x4b/0x50
 [<ffffffff810e6b6c>] ? trace_hardirqs_on_caller+0xac/0x1c0
 [<ffffffff815d9390>] ? build_skb+0x30/0x1d0
 [<ffffffff815db349>] ? __netdev_alloc_skb+0x89/0xf0
 [<ffffffffa01c7e21>] skge_poll+0x3a1/0x9f0 [skge]
 [<ffffffff815edb41>] ? net_rx_action+0xa1/0x380
 [<ffffffff815edc12>] net_rx_action+0x172/0x380
 [<ffffffff8107b4a7>] __do_softirq+0x107/0x410
 [<ffffffff8107b985>] irq_exit+0xc5/0xd0
 [<ffffffff81738c16>] do_IRQ+0x56/0xc0
 [<ffffffff8172d432>] common_interrupt+0x72/0x72
 <EOI>  [<ffffffff81721f92>] ? __slab_alloc+0x4c2/0x526
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
 [<ffffffff811d2d71>] __kmalloc_node_track_caller+0x1a1/0x410
 [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
 [<ffffffff815da8a1>] __kmalloc_reserve.isra.25+0x31/0x90
 [<ffffffff815db08e>] __alloc_skb+0x7e/0x2b0
 [<ffffffff815d754e>] sock_alloc_send_pskb+0x27e/0x400
 [<ffffffff815d76e5>] sock_alloc_send_skb+0x15/0x20
 [<ffffffff816614af>] raw_sendmsg+0x74f/0xc50
 [<ffffffff81660e7d>] ? raw_sendmsg+0x11d/0xc50
 [<ffffffff8130246d>] ? avc_has_perm_flags+0x16d/0x350
 [<ffffffff81302329>] ? avc_has_perm_flags+0x29/0x350
 [<ffffffff810b797f>] ? local_clock+0x5f/0x70
 [<ffffffff810e3fcf>] ? lock_release_holdtime.part.28+0xf/0x1a0
 [<ffffffff816723a7>] inet_sendmsg+0x117/0x230
 [<ffffffff81672295>] ? inet_sendmsg+0x5/0x230
 [<ffffffff815d0939>] sock_sendmsg+0x99/0xd0
 [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff810e9738>] ? lock_release_non_nested+0x308/0x350
 [<ffffffff811312e7>] ? rcu_irq_exit+0x77/0xc0
 [<ffffffff8172d4f3>] ? retint_restore_args+0x13/0x13
 [<ffffffff815d0e94>] SYSC_sendto+0x124/0x1d0
 [<ffffffff817369c5>] ? sysret_check+0x22/0x5d
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137af2e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff815d1ffe>] SyS_sendto+0xe/0x10
 [<ffffffff81736999>] system_call_fastpath+0x16/0x1b
---[ end trace ef4521ca4028fd28 ]---


poma




[-- Attachment #2: skge-add-dma_mapping-check-3.11.0-0.rc4.git2.1.fc20.x86_64.txt --]
[-- Type: text/plain, Size: 3680 bytes --]


skge-add-dma_mapping-check.patch
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git/patch/drivers/net/ethernet/marvell?id=136d8f377e1575463b47840bc5f1b22d94bf8f63

/usr/lib/modules/3.11.0-0.rc4.git2.1.fc20.x86_64/updates/skge.ko

skge: module verification failed: signature and/or required key missing - tainting kernel
skge: 1.14 addr 0xfbffc000 irq 19 chip Yukon rev 1
skge 0000:01:09.0 eth0: addr nn:nn:nn:nn:nn:nn
skge 0000:01:09.0 enp1s9: enabling interface
skge 0000:01:09.0 enp1s9: Link is up at 1000 Mbps, full duplex, flow control both
IPv6: ADDRCONF(NETDEV_CHANGE): enp1s9: link becomes ready

Starting Nmap 6.25 

------------[ cut here ]------------
WARNING: CPU: 2 PID: 2443 at lib/dma-debug.c:986 check_sync+0x4bc/0x580()
skge 0000:01:09.0: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x00000000cf390040] [size=60 bytes]
Modules linked in: skge(OF) raid1 nouveau video mxm_wmi i2c_algo_bit drm_kms_helper ttm drm i2c_core wmi
CPU: 2 PID: 2443 Comm: nmap Tainted: GF          O 3.11.0-0.rc4.git2.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 0000000000000009 ffff88012aa03c48 ffffffff81723ac3 ffff88012aa03c90
 ffff88012aa03c80 ffffffff8107462d ffff88012837c2b0 000000000000003c
 ffff880128363470 00000000cf390040 ffff880126f79160 ffff88012aa03ce0
Call Trace:
 <IRQ>  [<ffffffff81723ac3>] dump_stack+0x54/0x74
 [<ffffffff8107462d>] warn_slowpath_common+0x7d/0xa0
 [<ffffffff8107469c>] warn_slowpath_fmt+0x4c/0x50
 [<ffffffff81392a5c>] check_sync+0x4bc/0x580
 [<ffffffff8138519e>] ? debug_check_no_obj_freed+0x14e/0x250
 [<ffffffff81392b6b>] debug_dma_sync_single_for_cpu+0x4b/0x50
 [<ffffffff810e6b6c>] ? trace_hardirqs_on_caller+0xac/0x1c0
 [<ffffffff815d9390>] ? build_skb+0x30/0x1d0
 [<ffffffff815db349>] ? __netdev_alloc_skb+0x89/0xf0
 [<ffffffffa01c7e21>] skge_poll+0x3a1/0x9f0 [skge]
 [<ffffffff815edb41>] ? net_rx_action+0xa1/0x380
 [<ffffffff815edc12>] net_rx_action+0x172/0x380
 [<ffffffff8107b4a7>] __do_softirq+0x107/0x410
 [<ffffffff8107b985>] irq_exit+0xc5/0xd0
 [<ffffffff81738c16>] do_IRQ+0x56/0xc0
 [<ffffffff8172d432>] common_interrupt+0x72/0x72
 <EOI>  [<ffffffff81721f92>] ? __slab_alloc+0x4c2/0x526
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
 [<ffffffff811d2d71>] __kmalloc_node_track_caller+0x1a1/0x410
 [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
 [<ffffffff815da8a1>] __kmalloc_reserve.isra.25+0x31/0x90
 [<ffffffff815db08e>] __alloc_skb+0x7e/0x2b0
 [<ffffffff815d754e>] sock_alloc_send_pskb+0x27e/0x400
 [<ffffffff815d76e5>] sock_alloc_send_skb+0x15/0x20
 [<ffffffff816614af>] raw_sendmsg+0x74f/0xc50
 [<ffffffff81660e7d>] ? raw_sendmsg+0x11d/0xc50
 [<ffffffff8130246d>] ? avc_has_perm_flags+0x16d/0x350
 [<ffffffff81302329>] ? avc_has_perm_flags+0x29/0x350
 [<ffffffff810b797f>] ? local_clock+0x5f/0x70
 [<ffffffff810e3fcf>] ? lock_release_holdtime.part.28+0xf/0x1a0
 [<ffffffff816723a7>] inet_sendmsg+0x117/0x230
 [<ffffffff81672295>] ? inet_sendmsg+0x5/0x230
 [<ffffffff815d0939>] sock_sendmsg+0x99/0xd0
 [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff810e9738>] ? lock_release_non_nested+0x308/0x350
 [<ffffffff811312e7>] ? rcu_irq_exit+0x77/0xc0
 [<ffffffff8172d4f3>] ? retint_restore_args+0x13/0x13
 [<ffffffff815d0e94>] SYSC_sendto+0x124/0x1d0
 [<ffffffff817369c5>] ? sysret_check+0x22/0x5d
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137af2e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff815d1ffe>] SyS_sendto+0xe/0x10
 [<ffffffff81736999>] system_call_fastpath+0x16/0x1b
---[ end trace ef4521ca4028fd28 ]---



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: add dma_mapping check
  2013-08-10 11:51   ` [PATCH net] skge: add dma_mapping check poma
@ 2013-08-10 17:41     ` Stephen Hemminger
  2013-08-10 20:29       ` David Miller
  0 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-10 17:41 UTC (permalink / raw)
  To: poma; +Cc: David Miller, netdev

On Sat, 10 Aug 2013 13:51:49 +0200
poma <pomidorabelisima@gmail.com> wrote:

> On 05.08.2013 03:35, David Miller wrote:
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Date: Sun, 4 Aug 2013 17:22:34 -0700
> > 
> >> This old driver never checked for DMA mapping errors.
> >> Causing splats with the new DMA mapping checks:
> >> 	WARNING: at lib/dma-debug.c:937 check_unmap+0x47b/0x930()
> >> 	skge 0000:01:09.0: DMA-API: device driver failed to check map
> >>
> >> Add checks and unwind code.
> >>
> >> Reported-by: poma <pomidorabelisima@gmail.com>
> >> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> > 
> > Applied, but:
> > 
> >> -static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
> >> +static int skge_rx_setup(struct skge_port *skge, struct skge_element *e,
> >>  			  struct sk_buff *skb, unsigned int bufsize)
> > 
> > I reflowed the argument indentation for this in the final commit.
> > 
> > Thanks.
> > 
> 
> skge-add-dma_mapping-check.patch
> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git/patch/drivers/net/ethernet/marvell?id=136d8f377e1575463b47840bc5f1b22d94bf8f63
> 
> /usr/lib/modules/3.11.0-0.rc4.git2.1.fc20.x86_64/updates/skge.ko
> 
> skge: module verification failed: signature and/or required key missing
> - tainting kernel
> skge: 1.14 addr 0xfbffc000 irq 19 chip Yukon rev 1
> skge 0000:01:09.0 eth0: addr nn:nn:nn:nn:nn:nn
> skge 0000:01:09.0 enp1s9: enabling interface
> skge 0000:01:09.0 enp1s9: Link is up at 1000 Mbps, full duplex, flow
> control both
> IPv6: ADDRCONF(NETDEV_CHANGE): enp1s9: link becomes ready
> 
> Starting Nmap 6.25
> 
> ------------[ cut here ]------------
> WARNING: CPU: 2 PID: 2443 at lib/dma-debug.c:986 check_sync+0x4bc/0x580()
> skge 0000:01:09.0: DMA-API: device driver tries to sync DMA memory it
> has not allocated [device address=0x00000000cf390040] [size=60 bytes]
> Modules linked in: skge(OF) raid1 nouveau video mxm_wmi i2c_algo_bit
> drm_kms_helper ttm drm i2c_core wmi
> CPU: 2 PID: 2443 Comm: nmap Tainted: GF          O
> 3.11.0-0.rc4.git2.1.fc20.x86_64 #1
> Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n
> 09/07/2010
>  0000000000000009 ffff88012aa03c48 ffffffff81723ac3 ffff88012aa03c90
>  ffff88012aa03c80 ffffffff8107462d ffff88012837c2b0 000000000000003c
>  ffff880128363470 00000000cf390040 ffff880126f79160 ffff88012aa03ce0
> Call Trace:
>  <IRQ>  [<ffffffff81723ac3>] dump_stack+0x54/0x74
>  [<ffffffff8107462d>] warn_slowpath_common+0x7d/0xa0
>  [<ffffffff8107469c>] warn_slowpath_fmt+0x4c/0x50
>  [<ffffffff81392a5c>] check_sync+0x4bc/0x580
>  [<ffffffff8138519e>] ? debug_check_no_obj_freed+0x14e/0x250
>  [<ffffffff81392b6b>] debug_dma_sync_single_for_cpu+0x4b/0x50
>  [<ffffffff810e6b6c>] ? trace_hardirqs_on_caller+0xac/0x1c0
>  [<ffffffff815d9390>] ? build_skb+0x30/0x1d0
>  [<ffffffff815db349>] ? __netdev_alloc_skb+0x89/0xf0
>  [<ffffffffa01c7e21>] skge_poll+0x3a1/0x9f0 [skge]
>  [<ffffffff815edb41>] ? net_rx_action+0xa1/0x380
>  [<ffffffff815edc12>] net_rx_action+0x172/0x380
>  [<ffffffff8107b4a7>] __do_softirq+0x107/0x410
>  [<ffffffff8107b985>] irq_exit+0xc5/0xd0
>  [<ffffffff81738c16>] do_IRQ+0x56/0xc0
>  [<ffffffff8172d432>] common_interrupt+0x72/0x72
>  <EOI>  [<ffffffff81721f92>] ? __slab_alloc+0x4c2/0x526
>  [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
>  [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
>  [<ffffffff811d2d71>] __kmalloc_node_track_caller+0x1a1/0x410
>  [<ffffffff815db08e>] ? __alloc_skb+0x7e/0x2b0
>  [<ffffffff815da8a1>] __kmalloc_reserve.isra.25+0x31/0x90
>  [<ffffffff815db08e>] __alloc_skb+0x7e/0x2b0
>  [<ffffffff815d754e>] sock_alloc_send_pskb+0x27e/0x400
>  [<ffffffff815d76e5>] sock_alloc_send_skb+0x15/0x20
>  [<ffffffff816614af>] raw_sendmsg+0x74f/0xc50
>  [<ffffffff81660e7d>] ? raw_sendmsg+0x11d/0xc50
>  [<ffffffff8130246d>] ? avc_has_perm_flags+0x16d/0x350
>  [<ffffffff81302329>] ? avc_has_perm_flags+0x29/0x350
>  [<ffffffff810b797f>] ? local_clock+0x5f/0x70
>  [<ffffffff810e3fcf>] ? lock_release_holdtime.part.28+0xf/0x1a0
>  [<ffffffff816723a7>] inet_sendmsg+0x117/0x230
>  [<ffffffff81672295>] ? inet_sendmsg+0x5/0x230
>  [<ffffffff815d0939>] sock_sendmsg+0x99/0xd0
>  [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
>  [<ffffffff810e9738>] ? lock_release_non_nested+0x308/0x350
>  [<ffffffff811312e7>] ? rcu_irq_exit+0x77/0xc0
>  [<ffffffff8172d4f3>] ? retint_restore_args+0x13/0x13
>  [<ffffffff815d0e94>] SYSC_sendto+0x124/0x1d0
>  [<ffffffff817369c5>] ? sysret_check+0x22/0x5d
>  [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
>  [<ffffffff8137af2e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
>  [<ffffffff815d1ffe>] SyS_sendto+0xe/0x10
>  [<ffffffff81736999>] system_call_fastpath+0x16/0x1b
> ---[ end trace ef4521ca4028fd28 ]---

The DMA debug check insists that the call to sync_single has
to be the same length as the mapping. The driver is only bothering
to sync the bytes that matter (got received) rather than the whole
map.

In reality that is not required, but I will make a patch anyway.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: add dma_mapping check
  2013-08-10 17:41     ` Stephen Hemminger
@ 2013-08-10 20:29       ` David Miller
  2013-08-10 22:02         ` [PATCH net] skge: dma_sync the whole receive buffer Stephen Hemminger
  0 siblings, 1 reply; 25+ messages in thread
From: David Miller @ 2013-08-10 20:29 UTC (permalink / raw)
  To: stephen; +Cc: pomidorabelisima, netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 10 Aug 2013 10:41:00 -0700

> The DMA debug check insists that the call to sync_single has
> to be the same length as the mapping. The driver is only bothering
> to sync the bytes that matter (got received) rather than the whole
> map.
> 
> In reality that is not required, but I will make a patch anyway.

PCI and other system controllers will prefetch, I think it can matter.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-10 20:29       ` David Miller
@ 2013-08-10 22:02         ` Stephen Hemminger
  2013-08-11  4:23           ` poma
  2013-08-13 22:09           ` David Miller
  0 siblings, 2 replies; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-10 22:02 UTC (permalink / raw)
  To: David Miller; +Cc: pomidorabelisima, netdev

The DMA sync should sync the whole receive buffer, not just
part of it. Fixes log messages dma_sync_check.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

--- a/drivers/net/ethernet/marvell/skge.c	2013-08-10 14:54:13.184737163 -0700
+++ b/drivers/net/ethernet/marvell/skge.c	2013-08-10 14:54:54.908099676 -0700
@@ -3077,11 +3077,13 @@ static struct sk_buff *skge_rx_get(struc
 
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    dma_unmap_addr(e, mapaddr),
-					    len, PCI_DMA_FROMDEVICE);
+					    dma_unmap_len(e, maplen),
+					    PCI_DMA_FROMDEVICE);
 		skb_copy_from_linear_data(e->skb, skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       dma_unmap_addr(e, mapaddr),
-					       len, PCI_DMA_FROMDEVICE);
+					       dma_unmap_len(e, maplen),
+					       PCI_DMA_FROMDEVICE);
 		skge_rx_reuse(e, skge->rx_buf_size);
 	} else {
 		struct sk_buff *nskb;

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-10 22:02         ` [PATCH net] skge: dma_sync the whole receive buffer Stephen Hemminger
@ 2013-08-11  4:23           ` poma
  2013-08-13 22:09           ` David Miller
  1 sibling, 0 replies; 25+ messages in thread
From: poma @ 2013-08-11  4:23 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 1484 bytes --]

On 11.08.2013 00:02, Stephen Hemminger wrote:
> The DMA sync should sync the whole receive buffer, not just
> part of it. Fixes log messages dma_sync_check.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> --- a/drivers/net/ethernet/marvell/skge.c	2013-08-10 14:54:13.184737163 -0700
> +++ b/drivers/net/ethernet/marvell/skge.c	2013-08-10 14:54:54.908099676 -0700
> @@ -3077,11 +3077,13 @@ static struct sk_buff *skge_rx_get(struc
>  
>  		pci_dma_sync_single_for_cpu(skge->hw->pdev,
>  					    dma_unmap_addr(e, mapaddr),
> -					    len, PCI_DMA_FROMDEVICE);
> +					    dma_unmap_len(e, maplen),
> +					    PCI_DMA_FROMDEVICE);
>  		skb_copy_from_linear_data(e->skb, skb->data, len);
>  		pci_dma_sync_single_for_device(skge->hw->pdev,
>  					       dma_unmap_addr(e, mapaddr),
> -					       len, PCI_DMA_FROMDEVICE);
> +					       dma_unmap_len(e, maplen),
> +					       PCI_DMA_FROMDEVICE);
>  		skge_rx_reuse(e, skge->rx_buf_size);
>  	} else {
>  		struct sk_buff *nskb;
> 

Apart from all these warnings, oopses and kernel crashes, via skge iface
there is no traffic initiated starting from the fourth layer up i.e.
ssh, whatsoever.
Counting from the first patch - 'skge-add-dma_mapping-check.patch'.

Attached are readings after the third patch -
'skge-fix-log-messages-dma_sync_check.patch',
- 'dmesg-t-3.11.0-0.rc4.git4.1.fc20.x86_64-nmap.txt'
- 'dmesg-t-3.11.0-0.rc4.git4.1.fc20.x86_64-modprobe.txt'

Anyway thanks for the effort.


poma






[-- Attachment #2: dmesg-t-3.11.0-0.rc4.git4.1.fc20.x86_64-modprobe.txt --]
[-- Type: text/plain, Size: 16792 bytes --]


skge 0000:01:09.0 enp1s9: disabling interface
------------[ cut here ]------------
WARNING: CPU: 1 PID: 2111 at lib/dma-debug.c:877 check_unmap+0x837/0x930()
skge 0000:01:09.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x000000007fd60040] [size=1536 bytes]
Modules linked in: skge(OF-) raid1 nouveau video mxm_wmi i2c_algo_bit drm_kms_helper ttm drm i2c_core wmi
CPU: 1 PID: 2111 Comm: modprobe Tainted: GF          O 3.11.0-0.rc4.git4.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 0000000000000009 ffff8800c0d85a68 ffffffff81723ef6 ffff8800c0d85ab0
 ffff8800c0d85aa0 ffffffff8107462d ffff88012837c2b0 0000000000000600
 000000007fd60040 000000007fd60040 0000000000000600 ffff8800c0d85b00
Call Trace:
 [<ffffffff81723ef6>] dump_stack+0x54/0x74
 [<ffffffff8107462d>] warn_slowpath_common+0x7d/0xa0
 [<ffffffff8107469c>] warn_slowpath_fmt+0x4c/0x50
 [<ffffffff81391bbc>] ? debug_dma_mapping_error+0x7c/0x90
 [<ffffffff81393a07>] check_unmap+0x837/0x930
 [<ffffffff811312e7>] ? rcu_irq_exit+0x77/0xc0
 [<ffffffff8172d933>] ? retint_restore_args+0x13/0x13
 [<ffffffff81393b5f>] debug_dma_unmap_page+0x5f/0x70
 [<ffffffffa01bdc4c>] skge_rx_clean+0x7c/0xf0 [skge]
 [<ffffffffa01bf315>] skge_down+0x505/0x7c0 [skge]
 [<ffffffff815e7275>] __dev_close_many+0x95/0xe0
 [<ffffffff815e73a8>] dev_close_many+0x88/0x100
 [<ffffffff815e84b0>] rollback_registered_many+0xb0/0x220
 [<ffffffff815e8651>] rollback_registered+0x31/0x40
 [<ffffffff815e98e8>] unregister_netdevice_queue+0x48/0x90
 [<ffffffff815e994c>] unregister_netdev+0x1c/0x30
 [<ffffffffa01ba3d9>] skge_remove+0x59/0x120 [skge]
 [<ffffffff813a34db>] pci_device_remove+0x3b/0xb0
 [<ffffffff8148741f>] __device_release_driver+0x7f/0xf0
 [<ffffffff81487dd8>] driver_detach+0xc8/0xd0
 [<ffffffff81487021>] bus_remove_driver+0x91/0x120
 [<ffffffff814884c2>] driver_unregister+0x62/0xa0
 [<ffffffff813a25ca>] pci_unregister_driver+0x2a/0x80
 [<ffffffffa01c0b70>] skge_cleanup_module+0x10/0x4a0 [skge]
 [<ffffffff810f55ed>] SyS_delete_module+0x16d/0x300
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137b24e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff81736dd9>] system_call_fastpath+0x16/0x1b
---[ end trace 02aebf528f817f06 ]---
=============================================================================
BUG skbuff_head_cache (Tainted: GF       W  O): Poison overwritten
=============================================================================
BUG skbuff_head_cache (Tainted: GF       W  O): Poison overwritten
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: 0xffff8801219952ec-0xffff8801219952ec. First byte 0x6a instead of 0x6b
INFO: Allocated in __alloc_skb+0x4e/0x2b0 age=157381 cpu=3 pid=1340
	__slab_alloc+0x45f/0x526
	kmem_cache_alloc_node+0xd8/0x3d0
	__alloc_skb+0x4e/0x2b0
	netlink_sendmsg+0x36a/0x750
	sock_sendmsg+0x99/0xd0
	SYSC_sendto+0x124/0x1d0
	SyS_sendto+0xe/0x10
	system_call_fastpath+0x16/0x1b
INFO: Freed in kfree_skbmem+0x37/0x90 age=157381 cpu=3 pid=1340
	__slab_free+0x3a/0x382
	kmem_cache_free+0x38a/0x3a0
	kfree_skbmem+0x37/0x90
	consume_skb+0x38/0x150
	netlink_unicast+0xe5/0x190
	netlink_sendmsg+0x329/0x750
	sock_sendmsg+0x99/0xd0
	SYSC_sendto+0x124/0x1d0
	SyS_sendto+0xe/0x10
	system_call_fastpath+0x16/0x1b
INFO: Slab 0xffffea0004866500 objects=28 used=28 fp=0x          (null) flags=0x2ffc0000004080
INFO: Object 0xffff880121995200 @offset=4608 fp=0xffff880121996880

Bytes b4 ffff8801219951f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
Object ffff880121995200: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995210: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995220: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995230: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995240: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995250: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995260: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995270: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995280: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff880121995290: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8801219952a0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8801219952b0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8801219952c0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8801219952d0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8801219952e0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a 6b 6b a5  kkkkkkkkkkkkjkk.
Redzone ffff8801219952f0: bb bb bb bb bb bb bb bb                          ........
Padding ffff880121995430: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
CPU: 1 PID: 1186 Comm: NetworkManager Tainted: GF   B   W  O 3.11.0-0.rc4.git4.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 ffff880121995200 ffff880108e37960 ffffffff81723ef6 ffff880128ad5200
 ffff880108e379a0 ffffffff811cbebd 0000000000000010 ffff880100000001
 ffff8801219952ed ffff880128ad5200 000000000000006b ffff880121995200
Call Trace:
 [<ffffffff81723ef6>] dump_stack+0x54/0x74
 [<ffffffff811cbebd>] print_trailer+0x14d/0x200
 [<ffffffff811cc0af>] check_bytes_and_report+0xcf/0x110
 [<ffffffff811ccfd7>] check_object+0x1d7/0x250
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff8172168d>] alloc_debug_processing+0x76/0x118
 [<ffffffff81722362>] __slab_alloc+0x45f/0x526
 [<ffffffff810218b9>] ? sched_clock+0x9/0x10
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff811d0168>] kmem_cache_alloc_node+0xd8/0x3d0
 [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff815db49e>] __alloc_skb+0x4e/0x2b0
 [<ffffffff815d798e>] sock_alloc_send_pskb+0x27e/0x400
 [<ffffffff8172cb47>] ? _raw_spin_unlock+0x27/0x40
 [<ffffffff816b40c3>] unix_dgram_sendmsg+0x163/0x620
 [<ffffffff81021845>] ? native_sched_clock+0x15/0x80
 [<ffffffff815d0d79>] sock_sendmsg+0x99/0xd0
 [<ffffffff810218b9>] ? sched_clock+0x9/0x10
 [<ffffffff810b7845>] ? sched_clock_cpu+0xb5/0x100
 [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff815d12d4>] SYSC_sendto+0x124/0x1d0
 [<ffffffff81736e05>] ? sysret_check+0x22/0x5d
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137b24e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff815d243e>] SyS_sendto+0xe/0x10
 [<ffffffff81736dd9>] system_call_fastpath+0x16/0x1b
FIX skbuff_head_cache: Restoring 0xffff8801219952ec-0xffff8801219952ec=0x6b

FIX skbuff_head_cache: Marking all objects used
-----------------------------------------------------------------------------

INFO: 0xffff8800cf9d9bec-0xffff8800cf9d9bec. First byte 0x69 instead of 0x6b
INFO: Allocated in __alloc_skb+0x4e/0x2b0 age=131154 cpu=3 pid=2031
	__slab_alloc+0x45f/0x526
	kmem_cache_alloc_node+0xd8/0x3d0
	__alloc_skb+0x4e/0x2b0
	netlink_sendmsg+0x36a/0x750
	sock_sendmsg+0x99/0xd0
	___sys_sendmsg+0x39e/0x3b0
	__sys_sendmsg+0x42/0x80
	SyS_sendmsg+0x12/0x20
	system_call_fastpath+0x16/0x1b
INFO: Freed in kfree_skbmem+0x37/0x90 age=131158 cpu=3 pid=2031
	__slab_free+0x3a/0x382
	kmem_cache_free+0x38a/0x3a0
	kfree_skbmem+0x37/0x90
	consume_skb+0x38/0x150
	netlink_unicast+0xe5/0x190
	netlink_sendmsg+0x329/0x750
	sock_sendmsg+0x99/0xd0
	___sys_sendmsg+0x39e/0x3b0
	__sys_sendmsg+0x42/0x80
	SyS_sendmsg+0x12/0x20
	system_call_fastpath+0x16/0x1b
INFO: Slab 0xffffea00033e7600 objects=28 used=27 fp=0xffff8800cf9d98c0 flags=0x1ffc0000004080
INFO: Object 0xffff8800cf9d9b00 @offset=6912 fp=0xffff8800cf9d9f80

Bytes b4 ffff8800cf9d9af0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
Object ffff8800cf9d9b00: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b10: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b20: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b30: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b40: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b50: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b60: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b70: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b80: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9b90: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9ba0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9bb0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9bc0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9bd0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cf9d9be0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 69 6b 6b a5  kkkkkkkkkkkkikk.
Redzone ffff8800cf9d9bf0: bb bb bb bb bb bb bb bb                          ........
Padding ffff8800cf9d9d30: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
CPU: 0 PID: 2111 Comm: modprobe Tainted: GF   B   W  O 3.11.0-0.rc4.git4.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 ffff8800cf9d9b00 ffff8800c0d858b0 ffffffff81723ef6 ffff880128ad5200
 ffff8800c0d858f0 ffffffff811cbebd 0000000000000010 ffff880000000001
 ffff8800cf9d9bed ffff880128ad5200 000000000000006b ffff8800cf9d9b00
Call Trace:
 [<ffffffff81723ef6>] dump_stack+0x54/0x74
 [<ffffffff811cbebd>] print_trailer+0x14d/0x200
 [<ffffffff811cc0af>] check_bytes_and_report+0xcf/0x110
 [<ffffffff811ccfd7>] check_object+0x1d7/0x250
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff8172168d>] alloc_debug_processing+0x76/0x118
 [<ffffffff81722362>] __slab_alloc+0x45f/0x526
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff810e3c77>] ? add_lock_to_list.isra.27.constprop.45+0x77/0xb0
 [<ffffffff810e8280>] ? __lock_acquire+0x12b0/0x1b20
 [<ffffffff815db49e>] ? __alloc_skb+0x4e/0x2b0
 [<ffffffff811d0168>] kmem_cache_alloc_node+0xd8/0x3d0
 [<ffffffff81021845>] ? native_sched_clock+0x15/0x80
 [<ffffffff815db49e>] __alloc_skb+0x4e/0x2b0
 [<ffffffff815f829f>] __neigh_notify+0x3f/0xe0
 [<ffffffff815f969b>] neigh_cleanup_and_release+0x2b/0x50
 [<ffffffff815f9da2>] neigh_flush_dev+0x1e2/0x270
 [<ffffffff815f9ead>] neigh_ifdown+0x3d/0xf0
 [<ffffffff8166a688>] arp_ifdown+0x18/0x20
 [<ffffffff8167c0d6>] fib_disable_ip+0x36/0x40
 [<ffffffff8167e112>] fib_netdev_event+0xc2/0x130
 [<ffffffff81731d86>] notifier_call_chain+0x66/0x150
 [<ffffffff810a7656>] raw_notifier_call_chain+0x16/0x20
 [<ffffffff815e7135>] call_netdevice_notifiers_info+0x35/0x60
 [<ffffffff815e73db>] dev_close_many+0xbb/0x100
 [<ffffffff815e84b0>] rollback_registered_many+0xb0/0x220
 [<ffffffff815e8651>] rollback_registered+0x31/0x40
 [<ffffffff815e98e8>] unregister_netdevice_queue+0x48/0x90
 [<ffffffff815e994c>] unregister_netdev+0x1c/0x30
 [<ffffffffa01ba3d9>] skge_remove+0x59/0x120 [skge]
 [<ffffffff813a34db>] pci_device_remove+0x3b/0xb0
 [<ffffffff8148741f>] __device_release_driver+0x7f/0xf0
 [<ffffffff81487dd8>] driver_detach+0xc8/0xd0
 [<ffffffff81487021>] bus_remove_driver+0x91/0x120
 [<ffffffff814884c2>] driver_unregister+0x62/0xa0
 [<ffffffff813a25ca>] pci_unregister_driver+0x2a/0x80
 [<ffffffffa01c0b70>] skge_cleanup_module+0x10/0x4a0 [skge]
 [<ffffffff810f55ed>] SyS_delete_module+0x16d/0x300
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137b24e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff81736dd9>] system_call_fastpath+0x16/0x1b
FIX skbuff_head_cache: Restoring 0xffff8800cf9d9bec-0xffff8800cf9d9bec=0x6b

FIX skbuff_head_cache: Marking all objects used
=============================================================================
BUG skbuff_head_cache (Tainted: GF   B   W  O): Poison overwritten
-----------------------------------------------------------------------------

INFO: 0xffff8800cfa947ac-0xffff8800cfa947ac. First byte 0x6a instead of 0x6b
INFO: Allocated in skb_clone+0x49/0xb0 age=157080 cpu=0 pid=82
	__slab_alloc+0x45f/0x526
	kmem_cache_alloc+0x2ff/0x380
	skb_clone+0x49/0xb0
	netlink_trim+0x7e/0xe0
	netlink_unicast+0x46/0x190
	kauditd_send_skb+0x2b/0x80
	kauditd_thread+0xb9/0x1f0
	kthread+0xed/0x100
	ret_from_fork+0x7c/0xb0
INFO: Freed in kfree_skbmem+0x37/0x90 age=157083 cpu=1 pid=980
	__slab_free+0x3a/0x382
	kmem_cache_free+0x38a/0x3a0
	kfree_skbmem+0x37/0x90
	consume_skb+0x38/0x150
	skb_free_datagram+0x15/0x40
	netlink_recvmsg+0x147/0x390
	sock_recvmsg+0xa8/0xe0
	SYSC_recvfrom+0xe2/0x160
	SyS_recvfrom+0xe/0x10
	system_call_fastpath+0x16/0x1b
INFO: Slab 0xffffea00033ea500 objects=28 used=28 fp=0x          (null) flags=0x1ffc0000004080
INFO: Object 0xffff8800cfa946c0 @offset=1728 fp=0xffff8800cfa97840

Bytes b4 ffff8800cfa946b0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
Object ffff8800cfa946c0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa946d0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa946e0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa946f0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94700: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94710: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94720: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94730: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94740: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94750: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94760: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94770: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94780: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa94790: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff8800cfa947a0: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a 6b 6b a5  kkkkkkkkkkkkjkk.
Redzone ffff8800cfa947b0: bb bb bb bb bb bb bb bb                          ........
Padding ffff8800cfa948f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
CPU: 2 PID: 2112 Comm: systemd-udevd Tainted: GF   B   W  O 3.11.0-0.rc4.git4.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 ffff8800cfa946c0 ffff8800cf81b8d0 ffffffff81723ef6 ffff880128ad5200
 ffff8800cf81b910 ffffffff811cbebd 0000000000000010 ffff880000000001
 ffff8800cfa947ad ffff880128ad5200 000000000000006b ffff8800cfa946c0
Call Trace:
 [<ffffffff81723ef6>] dump_stack+0x54/0x74
 [<ffffffff811cbebd>] print_trailer+0x14d/0x200
 [<ffffffff811cc0af>] check_bytes_and_report+0xcf/0x110
 [<ffffffff811ccfd7>] check_object+0x1d7/0x250
 [<ffffffff815dc289>] ? skb_clone+0x49/0xb0
 [<ffffffff8172168d>] alloc_debug_processing+0x76/0x118
 [<ffffffff81722362>] __slab_alloc+0x45f/0x526
 [<ffffffff810e6c8d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff815dc289>] ? skb_clone+0x49/0xb0
 [<ffffffff815d9747>] ? kfree_skbmem+0x37/0x90
 [<ffffffff815dc289>] ? skb_clone+0x49/0xb0
 [<ffffffff811cf49f>] kmem_cache_alloc+0x2ff/0x380
 [<ffffffff815dc289>] skb_clone+0x49/0xb0
 [<ffffffff8161eb81>] netlink_broadcast_filtered+0x2c1/0x370
 [<ffffffff816208f8>] netlink_sendmsg+0x648/0x750
 [<ffffffff815d0d79>] sock_sendmsg+0x99/0xd0
 [<ffffffff811ae3cb>] ? page_add_file_rmap+0x1b/0x1a0
 [<ffffffff815d119e>] ___sys_sendmsg+0x39e/0x3b0
 [<ffffffff811a4385>] ? handle_mm_fault+0x2a5/0x5c0
 [<ffffffff810a5b4f>] ? up_read+0x1f/0x40
 [<ffffffff811fca82>] ? final_putname+0x22/0x50
 [<ffffffff8120feac>] ? fget_light+0x28c/0x510
 [<ffffffff815d2702>] __sys_sendmsg+0x42/0x80
 [<ffffffff815d2752>] SyS_sendmsg+0x12/0x20
 [<ffffffff81736dd9>] system_call_fastpath+0x16/0x1b
FIX skbuff_head_cache: Restoring 0xffff8800cfa947ac-0xffff8800cfa947ac=0x6b

FIX skbuff_head_cache: Marking all objects used



[-- Attachment #3: dmesg-t-3.11.0-0.rc4.git4.1.fc20.x86_64-nmap.txt --]
[-- Type: text/plain, Size: 2676 bytes --]


IPv6: ADDRCONF(NETDEV_CHANGE): enp1s9: link becomes ready
------------[ cut here ]------------
WARNING: CPU: 0 PID: 2183 at lib/dma-debug.c:986 check_sync+0x4bc/0x580()
skge 0000:01:09.0: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x00000000ce550040] [size=1536 bytes]
Modules linked in: skge(OF) raid1 nouveau video mxm_wmi i2c_algo_bit drm_kms_helper ttm drm i2c_core wmi
CPU: 0 PID: 2183 Comm: nmap Tainted: GF          O 3.11.0-0.rc4.git4.1.fc20.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. M720-US3/M720-US3, BIOS F7n 09/07/2010
 0000000000000009 ffff88012a603c88 ffffffff81723ef6 ffff88012a603cd0
 ffff88012a603cc0 ffffffff8107462d ffff88012837c2b0 0000000000000600
 ffff880128363470 00000000ce550040 ffff8800c0fb3a80 ffff88012a603d20
Call Trace:
 <IRQ>  [<ffffffff81723ef6>] dump_stack+0x54/0x74
 [<ffffffff8107462d>] warn_slowpath_common+0x7d/0xa0
 [<ffffffff8107469c>] warn_slowpath_fmt+0x4c/0x50
 [<ffffffff81392d7c>] check_sync+0x4bc/0x580
 [<ffffffff81392e8b>] debug_dma_sync_single_for_cpu+0x4b/0x50
 [<ffffffff811cf2ad>] ? kmem_cache_alloc+0x10d/0x380
 [<ffffffff810e6b6c>] ? trace_hardirqs_on_caller+0xac/0x1c0
 [<ffffffff815d97d0>] ? build_skb+0x30/0x1d0
 [<ffffffff815db789>] ? __netdev_alloc_skb+0x89/0xf0
 [<ffffffffa01bfe1e>] skge_poll+0x39e/0x9d0 [skge]
 [<ffffffff815edf81>] ? net_rx_action+0xa1/0x380
 [<ffffffff815ee052>] net_rx_action+0x172/0x380
 [<ffffffff8107b4a7>] __do_softirq+0x107/0x410
 [<ffffffff8173873c>] call_softirq+0x1c/0x30
 <EOI>  [<ffffffff8101ba75>] do_softirq+0x85/0xc0
 [<ffffffff81635586>] ? ip_finish_output+0x2f6/0x800
 [<ffffffff8107a29b>] local_bh_enable+0xdb/0xf0
 [<ffffffff81635586>] ip_finish_output+0x2f6/0x800
 [<ffffffff816353c8>] ? ip_finish_output+0x138/0x800
 [<ffffffff8163720c>] ip_output+0x5c/0x100
 [<ffffffff81661a78>] raw_sendmsg+0x8d8/0xc50
 [<ffffffff8130278d>] ? avc_has_perm_flags+0x16d/0x350
 [<ffffffff81302649>] ? avc_has_perm_flags+0x29/0x350
 [<ffffffff810b797f>] ? local_clock+0x5f/0x70
 [<ffffffff810e3fcf>] ? lock_release_holdtime.part.28+0xf/0x1a0
 [<ffffffff816727e7>] inet_sendmsg+0x117/0x230
 [<ffffffff816726d5>] ? inet_sendmsg+0x5/0x230
 [<ffffffff815d0d79>] sock_sendmsg+0x99/0xd0
 [<ffffffff810e346d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff810e9738>] ? lock_release_non_nested+0x308/0x350
 [<ffffffff815d12d4>] SYSC_sendto+0x124/0x1d0
 [<ffffffff81736e05>] ? sysret_check+0x22/0x5d
 [<ffffffff810e6bbd>] ? trace_hardirqs_on_caller+0xfd/0x1c0
 [<ffffffff8137b24e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff815d243e>] SyS_sendto+0xe/0x10
 [<ffffffff81736dd9>] system_call_fastpath+0x16/0x1b
---[ end trace 9007da55b42056f7 ]---



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-10 22:02         ` [PATCH net] skge: dma_sync the whole receive buffer Stephen Hemminger
  2013-08-11  4:23           ` poma
@ 2013-08-13 22:09           ` David Miller
  2013-08-13 22:20             ` Stephen Hemminger
  2013-08-14  1:00             ` Stephen Hemminger
  1 sibling, 2 replies; 25+ messages in thread
From: David Miller @ 2013-08-13 22:09 UTC (permalink / raw)
  To: stephen; +Cc: pomidorabelisima, netdev

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 10 Aug 2013 15:02:07 -0700

> The DMA sync should sync the whole receive buffer, not just
> part of it. Fixes log messages dma_sync_check.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Applied, but I really suspect that your "check DMA mapping errors"
patch has added a serious regression.  A regression much worse than
the bug you were trying to fix with that change.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-13 22:09           ` David Miller
@ 2013-08-13 22:20             ` Stephen Hemminger
  2013-08-14  1:00             ` Stephen Hemminger
  1 sibling, 0 replies; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-13 22:20 UTC (permalink / raw)
  To: David Miller; +Cc: pomidorabelisima, netdev

On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Sat, 10 Aug 2013 15:02:07 -0700
> 
> > The DMA sync should sync the whole receive buffer, not just
> > part of it. Fixes log messages dma_sync_check.
> > 
> > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> Applied, but I really suspect that your "check DMA mapping errors"
> patch has added a serious regression.  A regression much worse than
> the bug you were trying to fix with that change.

I am retesting with hardware, will have answer tonight.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-13 22:09           ` David Miller
  2013-08-13 22:20             ` Stephen Hemminger
@ 2013-08-14  1:00             ` Stephen Hemminger
  2013-08-14 10:20               ` poma
  1 sibling, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-14  1:00 UTC (permalink / raw)
  To: David Miller; +Cc: pomidorabelisima, netdev

On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Sat, 10 Aug 2013 15:02:07 -0700
> 
> > The DMA sync should sync the whole receive buffer, not just
> > part of it. Fixes log messages dma_sync_check.
> > 
> > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> Applied, but I really suspect that your "check DMA mapping errors"
> patch has added a serious regression.  A regression much worse than
> the bug you were trying to fix with that change.

Argh. The problem is deeper than that. Device got broken somewhere between
3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
The config's are different though so checking that as well.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-14  1:00             ` Stephen Hemminger
@ 2013-08-14 10:20               ` poma
  2013-08-14 16:20                 ` Stephen Hemminger
  0 siblings, 1 reply; 25+ messages in thread
From: poma @ 2013-08-14 10:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On 14.08.2013 03:00, Stephen Hemminger wrote:
> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
> David Miller <davem@davemloft.net> wrote:
> 
>> From: Stephen Hemminger <stephen@networkplumber.org>
>> Date: Sat, 10 Aug 2013 15:02:07 -0700
>>
>>> The DMA sync should sync the whole receive buffer, not just
>>> part of it. Fixes log messages dma_sync_check.
>>>
>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>
>> Applied, but I really suspect that your "check DMA mapping errors"
>> patch has added a serious regression.  A regression much worse than
>> the bug you were trying to fix with that change.
> 
> Argh. The problem is deeper than that. Device got broken somewhere between
> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
> The config's are different though so checking that as well.
> 

Can I help you with debugging?
DGE-530T is rather solid device.


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-14 10:20               ` poma
@ 2013-08-14 16:20                 ` Stephen Hemminger
  2013-08-14 18:29                   ` poma
  0 siblings, 1 reply; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-14 16:20 UTC (permalink / raw)
  To: poma; +Cc: David Miller, netdev

On Wed, 14 Aug 2013 12:20:03 +0200
poma <pomidorabelisima@gmail.com> wrote:

> On 14.08.2013 03:00, Stephen Hemminger wrote:
> > On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
> > David Miller <davem@davemloft.net> wrote:
> > 
> >> From: Stephen Hemminger <stephen@networkplumber.org>
> >> Date: Sat, 10 Aug 2013 15:02:07 -0700
> >>
> >>> The DMA sync should sync the whole receive buffer, not just
> >>> part of it. Fixes log messages dma_sync_check.
> >>>
> >>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> >>
> >> Applied, but I really suspect that your "check DMA mapping errors"
> >> patch has added a serious regression.  A regression much worse than
> >> the bug you were trying to fix with that change.
> > 
> > Argh. The problem is deeper than that. Device got broken somewhere between
> > 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
> > The config's are different though so checking that as well.
> > 
> 
> Can I help you with debugging?
> DGE-530T is rather solid device.

Don't think it is a hardware problem.
The failure is when the board access the Receive ring PCI memory area.
This region is allocated with pci_alloc_consistent and therefore should
be available. Two possible issues are driver math issues, or hardware
problems with where the region is located. Some of these cards don't
really have full 64 bit PCI support.

My board is:
05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
	I/O ports at c000 [size=256]
	Expansion ROM at f7d00000 [disabled] [size=128K]
	Capabilities: [48] Power Management version 2
	Capabilities: [50] Vital Product Data
	Kernel driver in use: skge


What is your config?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-14 16:20                 ` Stephen Hemminger
@ 2013-08-14 18:29                   ` poma
  2013-08-15 15:41                     ` Stephen Hemminger
  0 siblings, 1 reply; 25+ messages in thread
From: poma @ 2013-08-14 18:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On 14.08.2013 18:20, Stephen Hemminger wrote:
> On Wed, 14 Aug 2013 12:20:03 +0200
> poma <pomidorabelisima@gmail.com> wrote:
> 
>> On 14.08.2013 03:00, Stephen Hemminger wrote:
>>> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
>>> David Miller <davem@davemloft.net> wrote:
>>>
>>>> From: Stephen Hemminger <stephen@networkplumber.org>
>>>> Date: Sat, 10 Aug 2013 15:02:07 -0700
>>>>
>>>>> The DMA sync should sync the whole receive buffer, not just
>>>>> part of it. Fixes log messages dma_sync_check.
>>>>>
>>>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>>>
>>>> Applied, but I really suspect that your "check DMA mapping errors"
>>>> patch has added a serious regression.  A regression much worse than
>>>> the bug you were trying to fix with that change.
>>>
>>> Argh. The problem is deeper than that. Device got broken somewhere between
>>> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
>>> The config's are different though so checking that as well.
>>>
>>
>> Can I help you with debugging?
>> DGE-530T is rather solid device.
> 
> Don't think it is a hardware problem.
> The failure is when the board access the Receive ring PCI memory area.
> This region is allocated with pci_alloc_consistent and therefore should
> be available. Two possible issues are driver math issues, or hardware
> problems with where the region is located. Some of these cards don't
> really have full 64 bit PCI support.
> 
> My board is:
> 05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
> 	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
> 	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
> 	I/O ports at c000 [size=256]
> 	Expansion ROM at f7d00000 [disabled] [size=128K]
> 	Capabilities: [48] Power Management version 2
> 	Capabilities: [50] Vital Product Data
> 	Kernel driver in use: skge
> 
> 
> What is your config?
> 

01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
	Memory at fbffc000 (32-bit, non-prefetchable) [size=16K]
	I/O ports at b400 [size=256]
	[virtual] Expansion ROM at ec000000 [disabled] [size=128K]
	Capabilities: [48] Power Management version 2
	Capabilities: [50] Vital Product Data
	Kernel driver in use: skge


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-14 18:29                   ` poma
@ 2013-08-15 15:41                     ` Stephen Hemminger
  2013-08-16 14:36                       ` poma
  2013-08-19  0:49                       ` poma
  0 siblings, 2 replies; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-15 15:41 UTC (permalink / raw)
  To: poma; +Cc: David Miller, netdev

On Wed, 14 Aug 2013 20:29:06 +0200
poma <pomidorabelisima@gmail.com> wrote:

> On 14.08.2013 18:20, Stephen Hemminger wrote:
> > On Wed, 14 Aug 2013 12:20:03 +0200
> > poma <pomidorabelisima@gmail.com> wrote:
> > 
> >> On 14.08.2013 03:00, Stephen Hemminger wrote:
> >>> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
> >>> David Miller <davem@davemloft.net> wrote:
> >>>
> >>>> From: Stephen Hemminger <stephen@networkplumber.org>
> >>>> Date: Sat, 10 Aug 2013 15:02:07 -0700
> >>>>
> >>>>> The DMA sync should sync the whole receive buffer, not just
> >>>>> part of it. Fixes log messages dma_sync_check.
> >>>>>
> >>>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> >>>>
> >>>> Applied, but I really suspect that your "check DMA mapping errors"
> >>>> patch has added a serious regression.  A regression much worse than
> >>>> the bug you were trying to fix with that change.
> >>>
> >>> Argh. The problem is deeper than that. Device got broken somewhere between
> >>> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
> >>> The config's are different though so checking that as well.
> >>>
> >>
> >> Can I help you with debugging?
> >> DGE-530T is rather solid device.
> > 
> > Don't think it is a hardware problem.
> > The failure is when the board access the Receive ring PCI memory area.
> > This region is allocated with pci_alloc_consistent and therefore should
> > be available. Two possible issues are driver math issues, or hardware
> > problems with where the region is located. Some of these cards don't
> > really have full 64 bit PCI support.
> > 
> > My board is:
> > 05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
> > 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
> > 	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
> > 	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
> > 	I/O ports at c000 [size=256]
> > 	Expansion ROM at f7d00000 [disabled] [size=128K]
> > 	Capabilities: [48] Power Management version 2
> > 	Capabilities: [50] Vital Product Data
> > 	Kernel driver in use: skge
> > 
> > 
> > What is your config?
> > 
> 
> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
> 	Memory at fbffc000 (32-bit, non-prefetchable) [size=16K]
> 	I/O ports at b400 [size=256]
> 	[virtual] Expansion ROM at ec000000 [disabled] [size=128K]
> 	Capabilities: [48] Power Management version 2
> 	Capabilities: [50] Vital Product Data
> 	Kernel driver in use: skge
> 
> 
> poma
> 

In the course of debugging this, I moved the card to another slot
and all the problems went away. I suspect either card insertion or more likely
the crap consumer motherboards don't have full PCI support on some slots.

There doesn't seem to be anyway to address this in software.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-15 15:41                     ` Stephen Hemminger
@ 2013-08-16 14:36                       ` poma
  2013-08-19  0:49                       ` poma
  1 sibling, 0 replies; 25+ messages in thread
From: poma @ 2013-08-16 14:36 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On 15.08.2013 17:41, Stephen Hemminger wrote:

> In the course of debugging this, I moved the card to another slot
> and all the problems went away. I suspect either card insertion or more likely
> the crap consumer motherboards don't have full PCI support on some slots.
> 
> There doesn't seem to be anyway to address this in software.
> 

Noted, thanks.
Follows overhaul.


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-15 15:41                     ` Stephen Hemminger
  2013-08-16 14:36                       ` poma
@ 2013-08-19  0:49                       ` poma
  2013-08-20  3:28                         ` poma
  1 sibling, 1 reply; 25+ messages in thread
From: poma @ 2013-08-19  0:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On 15.08.2013 17:41, Stephen Hemminger wrote:
> On Wed, 14 Aug 2013 20:29:06 +0200
> poma <pomidorabelisima@gmail.com> wrote:
> 
>> On 14.08.2013 18:20, Stephen Hemminger wrote:
>>> On Wed, 14 Aug 2013 12:20:03 +0200
>>> poma <pomidorabelisima@gmail.com> wrote:
>>>
>>>> On 14.08.2013 03:00, Stephen Hemminger wrote:
>>>>> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
>>>>> David Miller <davem@davemloft.net> wrote:
>>>>>
>>>>>> From: Stephen Hemminger <stephen@networkplumber.org>
>>>>>> Date: Sat, 10 Aug 2013 15:02:07 -0700
>>>>>>
>>>>>>> The DMA sync should sync the whole receive buffer, not just
>>>>>>> part of it. Fixes log messages dma_sync_check.
>>>>>>>
>>>>>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>>>>>
>>>>>> Applied, but I really suspect that your "check DMA mapping errors"
>>>>>> patch has added a serious regression.  A regression much worse than
>>>>>> the bug you were trying to fix with that change.
>>>>>
>>>>> Argh. The problem is deeper than that. Device got broken somewhere between
>>>>> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
>>>>> The config's are different though so checking that as well.
>>>>>
>>>>
>>>> Can I help you with debugging?
>>>> DGE-530T is rather solid device.
>>>
>>> Don't think it is a hardware problem.
>>> The failure is when the board access the Receive ring PCI memory area.
>>> This region is allocated with pci_alloc_consistent and therefore should
>>> be available. Two possible issues are driver math issues, or hardware
>>> problems with where the region is located. Some of these cards don't
>>> really have full 64 bit PCI support.
>>>
>>> My board is:
>>> 05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
>>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>>> 	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
>>> 	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
>>> 	I/O ports at c000 [size=256]
>>> 	Expansion ROM at f7d00000 [disabled] [size=128K]
>>> 	Capabilities: [48] Power Management version 2
>>> 	Capabilities: [50] Vital Product Data
>>> 	Kernel driver in use: skge
>>>
>>>
>>> What is your config?
>>>
>>
>> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>> (rev 11)
>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
>> 	Memory at fbffc000 (32-bit, non-prefetchable) [size=16K]
>> 	I/O ports at b400 [size=256]
>> 	[virtual] Expansion ROM at ec000000 [disabled] [size=128K]
>> 	Capabilities: [48] Power Management version 2
>> 	Capabilities: [50] Vital Product Data
>> 	Kernel driver in use: skge
>>
>>
>> poma
>>
> 
> In the course of debugging this, I moved the card to another slot
> and all the problems went away. I suspect either card insertion or more likely
> the crap consumer motherboards don't have full PCI support on some slots.
> 
> There doesn't seem to be anyway to address this in software.
> 


DGE-530T is further tested in the 3 available slots:
01:06.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
01:07.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
01:08.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
And the result is the same as in the slot:
01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
warnings, oopses and kernel crashes.

However DGE-528T(RTL8110s) on the same bus runs without errors:
01:09.0 Ethernet controller: D-Link System Inc DGE-528T Gigabit Ethernet
Adapter (rev 10)
	Subsystem: D-Link System Inc DGE-528T Gigabit Ethernet Adapter
	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
	I/O ports at cc00 [size=256]
	Memory at fbfff000 (32-bit, non-prefetchable) [size=256]
	[virtual] Expansion ROM at fbe00000 [disabled] [size=128K]
	Capabilities: [dc] Power Management version 2
	Kernel driver in use: r8169

Besides comparing the behavior of these two cards, e.g. NFS upload, I
noticed an obvious difference in the data flow.
Via DGE-528T transmission is steady, while via DGE-530T the traffic is
at times interrupted and unstable.
So it seems that the "WARNING: at lib/dma-debug.c:937 check_unmap…"
isn't just a fun.


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-19  0:49                       ` poma
@ 2013-08-20  3:28                         ` poma
  2013-08-21 16:04                           ` poma
  0 siblings, 1 reply; 25+ messages in thread
From: poma @ 2013-08-20  3:28 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev

On 19.08.2013 02:49, poma wrote:
> On 15.08.2013 17:41, Stephen Hemminger wrote:
>> On Wed, 14 Aug 2013 20:29:06 +0200
>> poma <pomidorabelisima@gmail.com> wrote:
>>
>>> On 14.08.2013 18:20, Stephen Hemminger wrote:
>>>> On Wed, 14 Aug 2013 12:20:03 +0200
>>>> poma <pomidorabelisima@gmail.com> wrote:
>>>>
>>>>> On 14.08.2013 03:00, Stephen Hemminger wrote:
>>>>>> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
>>>>>> David Miller <davem@davemloft.net> wrote:
>>>>>>
>>>>>>> From: Stephen Hemminger <stephen@networkplumber.org>
>>>>>>> Date: Sat, 10 Aug 2013 15:02:07 -0700
>>>>>>>
>>>>>>>> The DMA sync should sync the whole receive buffer, not just
>>>>>>>> part of it. Fixes log messages dma_sync_check.
>>>>>>>>
>>>>>>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>>>>>>
>>>>>>> Applied, but I really suspect that your "check DMA mapping errors"
>>>>>>> patch has added a serious regression.  A regression much worse than
>>>>>>> the bug you were trying to fix with that change.
>>>>>>
>>>>>> Argh. The problem is deeper than that. Device got broken somewhere between
>>>>>> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
>>>>>> The config's are different though so checking that as well.
>>>>>>
>>>>>
>>>>> Can I help you with debugging?
>>>>> DGE-530T is rather solid device.
>>>>
>>>> Don't think it is a hardware problem.
>>>> The failure is when the board access the Receive ring PCI memory area.
>>>> This region is allocated with pci_alloc_consistent and therefore should
>>>> be available. Two possible issues are driver math issues, or hardware
>>>> problems with where the region is located. Some of these cards don't
>>>> really have full 64 bit PCI support.
>>>>
>>>> My board is:
>>>> 05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
>>>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>>>> 	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
>>>> 	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
>>>> 	I/O ports at c000 [size=256]
>>>> 	Expansion ROM at f7d00000 [disabled] [size=128K]
>>>> 	Capabilities: [48] Power Management version 2
>>>> 	Capabilities: [50] Vital Product Data
>>>> 	Kernel driver in use: skge
>>>>
>>>>
>>>> What is your config?
>>>>
>>>
>>> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>>> (rev 11)
>>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>>> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
>>> 	Memory at fbffc000 (32-bit, non-prefetchable) [size=16K]
>>> 	I/O ports at b400 [size=256]
>>> 	[virtual] Expansion ROM at ec000000 [disabled] [size=128K]
>>> 	Capabilities: [48] Power Management version 2
>>> 	Capabilities: [50] Vital Product Data
>>> 	Kernel driver in use: skge
>>>
>>>
>>> poma
>>>
>>
>> In the course of debugging this, I moved the card to another slot
>> and all the problems went away. I suspect either card insertion or more likely
>> the crap consumer motherboards don't have full PCI support on some slots.
>>
>> There doesn't seem to be anyway to address this in software.
>>
> 
> 
> DGE-530T is further tested in the 3 available slots:
> 01:06.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> 01:07.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> 01:08.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> And the result is the same as in the slot:
> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> warnings, oopses and kernel crashes.
> 
> However DGE-528T(RTL8110s) on the same bus runs without errors:
> 01:09.0 Ethernet controller: D-Link System Inc DGE-528T Gigabit Ethernet
> Adapter (rev 10)
> 	Subsystem: D-Link System Inc DGE-528T Gigabit Ethernet Adapter
> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
> 	I/O ports at cc00 [size=256]
> 	Memory at fbfff000 (32-bit, non-prefetchable) [size=256]
> 	[virtual] Expansion ROM at fbe00000 [disabled] [size=128K]
> 	Capabilities: [dc] Power Management version 2
> 	Kernel driver in use: r8169
> 
> Besides comparing the behavior of these two cards, e.g. NFS upload, I
> noticed an obvious difference in the data flow.
> Via DGE-528T transmission is steady, while via DGE-530T the traffic is
> at times interrupted and unstable.
> So it seems that the "WARNING: at lib/dma-debug.c:937 check_unmap…"
> isn't just a fun.
> 

In support of the validity of the device I made a test with the
2.6.32-358.14.1.el6.x86_64.debug kernel.
And everything worked as it should.

01:08.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
(rev 11)
	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 18
	Memory at fbff8000 (32-bit, non-prefetchable) [size=16K]
	I/O ports at cc00 [size=256]
	[virtual] Expansion ROM at fbe00000 [disabled] [size=128K]
	Capabilities: [48] Power Management version 2
	Capabilities: [50] Vital Product Data
	Kernel driver in use: skge
	Kernel modules: skge

filename:
/lib/modules/2.6.32-358.14.1.el6.x86_64.debug/kernel/drivers/net/skge.ko
version:        1.13
license:        GPL
author:         Stephen Hemminger <shemminger@linux-foundation.org>
description:    SysKonnect Gigabit Ethernet driver
srcversion:     ADF6781C2E0D2D895F86279
alias:          pci:v00001737d00001032sv*sd00000015bc*sc*i*
alias:          pci:v00001737d00001064sv*sd*bc*sc*i*
alias:          pci:v00001371d0000434Esv*sd*bc*sc*i*
alias:          pci:v000011ABd00005005sv*sd*bc*sc*i*
alias:          pci:v000011ABd00004320sv*sd*bc*sc*i*
alias:          pci:v00001186d00004B01sv*sd*bc*sc*i*
alias:          pci:v00001186d00004C00sv*sd*bc*sc*i*
alias:          pci:v00001148d00004320sv*sd*bc*sc*i*
alias:          pci:v00001148d00004300sv*sd*bc*sc*i*
alias:          pci:v000010B7d000080EBsv*sd*bc*sc*i*
alias:          pci:v000010B7d00001700sv*sd*bc*sc*i*
depends:
vermagic:       2.6.32-358.14.1.el6.x86_64.debug SMP mod_unload modversions
parm:           debug:Debug level (0=none,...,16=all) (int)


Given all the tests and all written, something isn't right, at all.
Should I quote Shakespeare. :)


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-20  3:28                         ` poma
@ 2013-08-21 16:04                           ` poma
  2013-08-22  0:40                             ` Greg KH
  0 siblings, 1 reply; 25+ messages in thread
From: poma @ 2013-08-21 16:04 UTC (permalink / raw)
  To: Greg KH; +Cc: Stephen Hemminger, David Miller, netdev, Linus Torvalds

On 20.08.2013 05:28, poma wrote:
> On 19.08.2013 02:49, poma wrote:
>> On 15.08.2013 17:41, Stephen Hemminger wrote:
>>> On Wed, 14 Aug 2013 20:29:06 +0200
>>> poma <pomidorabelisima@gmail.com> wrote:
>>>
>>>> On 14.08.2013 18:20, Stephen Hemminger wrote:
>>>>> On Wed, 14 Aug 2013 12:20:03 +0200
>>>>> poma <pomidorabelisima@gmail.com> wrote:
>>>>>
>>>>>> On 14.08.2013 03:00, Stephen Hemminger wrote:
>>>>>>> On Tue, 13 Aug 2013 15:09:55 -0700 (PDT)
>>>>>>> David Miller <davem@davemloft.net> wrote:
>>>>>>>
>>>>>>>> From: Stephen Hemminger <stephen@networkplumber.org>
>>>>>>>> Date: Sat, 10 Aug 2013 15:02:07 -0700
>>>>>>>>
>>>>>>>>> The DMA sync should sync the whole receive buffer, not just
>>>>>>>>> part of it. Fixes log messages dma_sync_check.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>>>>>>>
>>>>>>>> Applied, but I really suspect that your "check DMA mapping errors"
>>>>>>>> patch has added a serious regression.  A regression much worse than
>>>>>>>> the bug you were trying to fix with that change.
>>>>>>>
>>>>>>> Argh. The problem is deeper than that. Device got broken somewhere between
>>>>>>> 3.2 and 3.4. My old Dlink card works on 3.2 but gets DMA errors on 3.4.
>>>>>>> The config's are different though so checking that as well.
>>>>>>>
>>>>>>
>>>>>> Can I help you with debugging?
>>>>>> DGE-530T is rather solid device.
>>>>>
>>>>> Don't think it is a hardware problem.
>>>>> The failure is when the board access the Receive ring PCI memory area.
>>>>> This region is allocated with pci_alloc_consistent and therefore should
>>>>> be available. Two possible issues are driver math issues, or hardware
>>>>> problems with where the region is located. Some of these cards don't
>>>>> really have full 64 bit PCI support.
>>>>>
>>>>> My board is:
>>>>> 05:01.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter (rev 11)
>>>>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>>>>> 	Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 18
>>>>> 	Memory at f7d20000 (32-bit, non-prefetchable) [size=16K]
>>>>> 	I/O ports at c000 [size=256]
>>>>> 	Expansion ROM at f7d00000 [disabled] [size=128K]
>>>>> 	Capabilities: [48] Power Management version 2
>>>>> 	Capabilities: [50] Vital Product Data
>>>>> 	Kernel driver in use: skge
>>>>>
>>>>>
>>>>> What is your config?
>>>>>
>>>>
>>>> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>>>> (rev 11)
>>>> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
>>>> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
>>>> 	Memory at fbffc000 (32-bit, non-prefetchable) [size=16K]
>>>> 	I/O ports at b400 [size=256]
>>>> 	[virtual] Expansion ROM at ec000000 [disabled] [size=128K]
>>>> 	Capabilities: [48] Power Management version 2
>>>> 	Capabilities: [50] Vital Product Data
>>>> 	Kernel driver in use: skge
>>>>
>>>>
>>>> poma
>>>>
>>>
>>> In the course of debugging this, I moved the card to another slot
>>> and all the problems went away. I suspect either card insertion or more likely
>>> the crap consumer motherboards don't have full PCI support on some slots.
>>>
>>> There doesn't seem to be anyway to address this in software.
>>>
>>
>>
>> DGE-530T is further tested in the 3 available slots:
>> 01:06.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>> (rev 11)
>> 01:07.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>> (rev 11)
>> 01:08.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>> (rev 11)
>> And the result is the same as in the slot:
>> 01:09.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
>> (rev 11)
>> warnings, oopses and kernel crashes.
>>
>> However DGE-528T(RTL8110s) on the same bus runs without errors:
>> 01:09.0 Ethernet controller: D-Link System Inc DGE-528T Gigabit Ethernet
>> Adapter (rev 10)
>> 	Subsystem: D-Link System Inc DGE-528T Gigabit Ethernet Adapter
>> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
>> 	I/O ports at cc00 [size=256]
>> 	Memory at fbfff000 (32-bit, non-prefetchable) [size=256]
>> 	[virtual] Expansion ROM at fbe00000 [disabled] [size=128K]
>> 	Capabilities: [dc] Power Management version 2
>> 	Kernel driver in use: r8169
>>
>> Besides comparing the behavior of these two cards, e.g. NFS upload, I
>> noticed an obvious difference in the data flow.
>> Via DGE-528T transmission is steady, while via DGE-530T the traffic is
>> at times interrupted and unstable.
>> So it seems that the "WARNING: at lib/dma-debug.c:937 check_unmap…"
>> isn't just a fun.
>>
> 
> In support of the validity of the device I made a test with the
> 2.6.32-358.14.1.el6.x86_64.debug kernel.
> And everything worked as it should.
> 
> 01:08.0 Ethernet controller: D-Link System Inc Gigabit Ethernet Adapter
> (rev 11)
> 	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter
> 	Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 18
> 	Memory at fbff8000 (32-bit, non-prefetchable) [size=16K]
> 	I/O ports at cc00 [size=256]
> 	[virtual] Expansion ROM at fbe00000 [disabled] [size=128K]
> 	Capabilities: [48] Power Management version 2
> 	Capabilities: [50] Vital Product Data
> 	Kernel driver in use: skge
> 	Kernel modules: skge
> 
> filename:
> /lib/modules/2.6.32-358.14.1.el6.x86_64.debug/kernel/drivers/net/skge.ko
> version:        1.13
> license:        GPL
> author:         Stephen Hemminger <shemminger@linux-foundation.org>
> description:    SysKonnect Gigabit Ethernet driver
> srcversion:     ADF6781C2E0D2D895F86279
> alias:          pci:v00001737d00001032sv*sd00000015bc*sc*i*
> alias:          pci:v00001737d00001064sv*sd*bc*sc*i*
> alias:          pci:v00001371d0000434Esv*sd*bc*sc*i*
> alias:          pci:v000011ABd00005005sv*sd*bc*sc*i*
> alias:          pci:v000011ABd00004320sv*sd*bc*sc*i*
> alias:          pci:v00001186d00004B01sv*sd*bc*sc*i*
> alias:          pci:v00001186d00004C00sv*sd*bc*sc*i*
> alias:          pci:v00001148d00004320sv*sd*bc*sc*i*
> alias:          pci:v00001148d00004300sv*sd*bc*sc*i*
> alias:          pci:v000010B7d000080EBsv*sd*bc*sc*i*
> alias:          pci:v000010B7d00001700sv*sd*bc*sc*i*
> depends:
> vermagic:       2.6.32-358.14.1.el6.x86_64.debug SMP mod_unload modversions
> parm:           debug:Debug level (0=none,...,16=all) (int)
> 
> 
> Given all the tests and all written, something isn't right, at all.
> Should I quote Shakespeare. :)
> 

Additionally, I have researched the history of the event and made a few
more tests.
The last kernel that worked flawlessly is from the 3.7.10 series.
I tested with the 3.7.10-400.fc19.x86_64.debug kernel.
The first kernel afterwards - the 3.8 series - introduced problems with
DMA-API, "… device driver failed to check map error".
An example that follows shows the skge module brokenness in its current
state.
The only thing that is produced is a timeout.
The same result was achieved with the 3.11.0-0.rc6.git1.1.fc20.i686 kernel.

[CLIENT]

$ lspci -knn -d 1186:4c00
01:08.0 Ethernet controller [0200]: D-Link System Inc Gigabit Ethernet
Adapter [1186:4c00] (rev 11)
	Subsystem: D-Link System Inc DGE-530T Gigabit Ethernet Adapter [1186:4c00]
	Kernel driver in use: skge

$ modinfo skge
filename:
/lib/modules/3.11.0-0.rc6.git1.1.fc20.x86_64/kernel/drivers/net/ethernet/marvell/skge.ko
version:        1.14
license:        GPL
author:         Stephen Hemminger <shemminger@linux-foundation.org>
description:    SysKonnect Gigabit Ethernet driver
srcversion:     BF56B39CFC55B011E27DAB9
alias:          pci:v00001737d00001032sv*sd00000015bc*sc*i*
alias:          pci:v00001737d00001064sv*sd*bc*sc*i*
alias:          pci:v00001371d0000434Esv*sd*bc*sc*i*
alias:          pci:v000011ABd00005005sv*sd*bc*sc*i*
alias:          pci:v000011ABd00004320sv*sd*bc*sc*i*
alias:          pci:v00001186d00004302sv*sd*bc*sc*i*
alias:          pci:v00001186d00004C00sv*sd*bc*sc*i*
alias:          pci:v00001186d00004B01sv*sd*bc*sc*i*
alias:          pci:v00001148d00004320sv*sd*bc*sc*i*
alias:          pci:v00001148d00004300sv*sd*bc*sc*i*
alias:          pci:v000010B7d000080EBsv*sd*bc*sc*i*
alias:          pci:v000010B7d00001700sv*sd*bc*sc*i*
depends:
intree:         Y
vermagic:       3.11.0-0.rc6.git1.1.fc20.x86_64 SMP mod_unload
signer:         Fedora kernel signing key
sig_key:        B1:4E:0F:25:52:6B:EE:0B:8B:66:BA:D6:38:99:D2:21:5D:37:E1:C1
sig_hashalgo:   sha256
parm:           debug:Debug level (0=none,...,16=all) (int)

$ time ssh -vvv <SERVER_IP>
OpenSSH_6.2p2, OpenSSL 1.0.1e-fips 11 Feb 2013
debug1: Reading configuration data $HOME/.ssh/config
debug1: Reading configuration data /etc/ssh/ssh_config
debug1: /etc/ssh/ssh_config line 51: Applying options for *
debug2: ssh_connect: needpriv 0
debug1: Connecting to <SERVER_IP> [<SERVER_IP>] port 22.
debug1: Connection established.
debug1: identity file $HOME/.ssh/id_rsa type -1
debug1: identity file $HOME/.ssh/id_rsa-cert type -1
debug3: Incorrect RSA1 identifier
debug3: Could not load "$HOME/.ssh/id_dsa" as a RSA1 public key
debug1: identity file $HOME/.ssh/id_dsa type 2
debug1: identity file $HOME/.ssh/id_dsa-cert type -1
debug1: Enabling compatibility mode for protocol 2.0
debug1: Local version string SSH-2.0-OpenSSH_6.2
debug1: Remote protocol version 2.0, remote software version OpenSSH_6.2
debug1: match: OpenSSH_6.2 pat OpenSSH*
debug2: fd 3 setting O_NONBLOCK
debug3: load_hostkeys: loading entries for host "<SERVER_IP>" from file
"$HOME/.ssh/known_hosts"
debug3: load_hostkeys: found key type RSA in file $HOME/.ssh/known_hosts:1
debug3: load_hostkeys: loaded 1 keys
debug3: order_hostkeyalgs: prefer hostkeyalgs:
ssh-rsa-cert-v01@openssh.com,ssh-rsa-cert-v00@openssh.com,ssh-rsa
debug1: SSH2_MSG_KEXINIT sent
Connection to <SERVER_IP> timed out while waiting to read

real	1m0.133s
user	0m0.006s
sys	0m0.036s

# tcptrack -i enp1s8 port 22
Client                Server                State        Idle A Speed

 <CLIENT_IP>:53602     <SERVER_IP>:22        ESTABLISHED  1m    0 B/s

[\CLIENT]
.
.
[SERVER]

/var/log/secure
<DATE> <SERVER> sshd[25248]: Connection closed by <CLIENT_IP> [preauth]

[\SERVER]


Signor Greg you are supposed to be very resourceful guy, especially in
matters concerning the hardware, so please if you can set aside your
valuable time and help us finally resolve this issue.


poma


A complete thread:
http://www.spinics.net/lists/netdev/msg245381.html

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-21 16:04                           ` poma
@ 2013-08-22  0:40                             ` Greg KH
  2013-08-22  3:30                               ` poma
  0 siblings, 1 reply; 25+ messages in thread
From: Greg KH @ 2013-08-22  0:40 UTC (permalink / raw)
  To: poma; +Cc: Stephen Hemminger, David Miller, netdev, Linus Torvalds

On Wed, Aug 21, 2013 at 06:04:11PM +0200, poma wrote:
> Signor Greg you are supposed to be very resourceful guy, especially in
> matters concerning the hardware, so please if you can set aside your
> valuable time and help us finally resolve this issue.

Please take it up with Stephen, it's his driver, not mine.

greg k-h

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-22  0:40                             ` Greg KH
@ 2013-08-22  3:30                               ` poma
  2013-08-22  4:00                                 ` Greg KH
  2013-08-22  4:08                                 ` Stephen Hemminger
  0 siblings, 2 replies; 25+ messages in thread
From: poma @ 2013-08-22  3:30 UTC (permalink / raw)
  To: Greg KH; +Cc: Stephen Hemminger, David Miller, netdev, Linus Torvalds

On 22.08.2013 02:40, Greg KH wrote:
> On Wed, Aug 21, 2013 at 06:04:11PM +0200, poma wrote:
>> Signor Greg you are supposed to be very resourceful guy, especially in
>> matters concerning the hardware, so please if you can set aside your
>> valuable time and help us finally resolve this issue.
> 
> Please take it up with Stephen, it's his driver, not mine.
> 
> greg k-h
> 

I guess I wrongly assumed that developers help each other when they get
stuck. Isn't it obvious that Steph and Dave already decided to ship the
broken module in da birthday kernel. What can I do in the first place,
as a simple user. That's why I asked you to help.
However thank you for the answer.


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-22  3:30                               ` poma
@ 2013-08-22  4:00                                 ` Greg KH
  2013-08-22 14:46                                   ` poma
  2013-08-22  4:08                                 ` Stephen Hemminger
  1 sibling, 1 reply; 25+ messages in thread
From: Greg KH @ 2013-08-22  4:00 UTC (permalink / raw)
  To: poma; +Cc: Stephen Hemminger, David Miller, netdev, Linus Torvalds

On Thu, Aug 22, 2013 at 05:30:17AM +0200, poma wrote:
> On 22.08.2013 02:40, Greg KH wrote:
> > On Wed, Aug 21, 2013 at 06:04:11PM +0200, poma wrote:
> >> Signor Greg you are supposed to be very resourceful guy, especially in
> >> matters concerning the hardware, so please if you can set aside your
> >> valuable time and help us finally resolve this issue.
> > 
> > Please take it up with Stephen, it's his driver, not mine.
> > 
> > greg k-h
> > 
> 
> I guess I wrongly assumed that developers help each other when they get
> stuck. Isn't it obvious that Steph and Dave already decided to ship the
> broken module in da birthday kernel.

No, it's not obvious at all.  If it's broken for you, please work with
them, not against them by trying to go around their back to someone
else.  I have nothing to do with networking drivers, that's Stephen and
David's area of the kernel, so there's nothing I can even do here,
sorry.

greg k-h

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-22  3:30                               ` poma
  2013-08-22  4:00                                 ` Greg KH
@ 2013-08-22  4:08                                 ` Stephen Hemminger
  1 sibling, 0 replies; 25+ messages in thread
From: Stephen Hemminger @ 2013-08-22  4:08 UTC (permalink / raw)
  To: poma; +Cc: Greg KH, David Miller, netdev, Linus Torvalds

On Thu, 22 Aug 2013 05:30:17 +0200
poma <pomidorabelisima@gmail.com> wrote:

> On 22.08.2013 02:40, Greg KH wrote:
> > On Wed, Aug 21, 2013 at 06:04:11PM +0200, poma wrote:
> >> Signor Greg you are supposed to be very resourceful guy, especially in
> >> matters concerning the hardware, so please if you can set aside your
> >> valuable time and help us finally resolve this issue.
> > 
> > Please take it up with Stephen, it's his driver, not mine.
> > 
> > greg k-h
> > 
> 
> I guess I wrongly assumed that developers help each other when they get
> stuck. Isn't it obvious that Steph and Dave already decided to ship the
> broken module in da birthday kernel. What can I do in the first place,
> as a simple user. That's why I asked you to help.
> However thank you for the answer.
> 
> 
> poma
> 
> 

If you have the patience and time, it would be useful to bisect the problem.
It may not just be in skge.c but a side effect of other config changes.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH net] skge: dma_sync the whole receive buffer
  2013-08-22  4:00                                 ` Greg KH
@ 2013-08-22 14:46                                   ` poma
  0 siblings, 0 replies; 25+ messages in thread
From: poma @ 2013-08-22 14:46 UTC (permalink / raw)
  To: Greg KH; +Cc: Stephen Hemminger, David Miller, netdev, Linus Torvalds



    Then out spake brave Horatius,
    The Captain of the Gate:
    "To every man upon this earth
    Death cometh soon or late.
    And how can man die better
    Than facing fearful odds,
    For the ashes of his fathers,
    And the temples of his Gods."


poma

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2013-08-22 14:46 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-05  0:22 [PATCH net] skge: add dma_mapping check Stephen Hemminger
2013-08-05  1:35 ` David Miller
2013-08-05  3:40   ` [PATCH net] skge: fix build on 32 bit Stephen Hemminger
2013-08-05  6:37     ` David Miller
2013-08-10 11:51   ` [PATCH net] skge: add dma_mapping check poma
2013-08-10 17:41     ` Stephen Hemminger
2013-08-10 20:29       ` David Miller
2013-08-10 22:02         ` [PATCH net] skge: dma_sync the whole receive buffer Stephen Hemminger
2013-08-11  4:23           ` poma
2013-08-13 22:09           ` David Miller
2013-08-13 22:20             ` Stephen Hemminger
2013-08-14  1:00             ` Stephen Hemminger
2013-08-14 10:20               ` poma
2013-08-14 16:20                 ` Stephen Hemminger
2013-08-14 18:29                   ` poma
2013-08-15 15:41                     ` Stephen Hemminger
2013-08-16 14:36                       ` poma
2013-08-19  0:49                       ` poma
2013-08-20  3:28                         ` poma
2013-08-21 16:04                           ` poma
2013-08-22  0:40                             ` Greg KH
2013-08-22  3:30                               ` poma
2013-08-22  4:00                                 ` Greg KH
2013-08-22 14:46                                   ` poma
2013-08-22  4:08                                 ` Stephen Hemminger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.