All of lore.kernel.org
 help / color / mirror / Atom feed
* IGMP Join dropping multicast packets
@ 2009-03-14 20:16 Dave Boutcher
  2009-03-15  2:37 ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Boutcher @ 2009-03-14 20:16 UTC (permalink / raw)
  To: netdev

I'm running into an interesting problem with joining multiple
multicast feeds.  If you join multiple multicast feeds using
setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
multicast feeds to get dropped.  We have a multicast feed on a rock
solid network, and we were very surprised to see dropped packets.  The
cause was a different process/program being run by a different user
joining a bunch of mulitcast feeds.

I can recreate this with a fairly simple testcase (attached below.)
The problem doesn't happen with unicast UDP data, and it doesn't
happen with loopback, so you need at least two systems to run this
(and what subscriber to netdev doesn't have at least two systems.)  To
recreate, run "receiver" on one system, "sender", on another, and then
"joiner" on the receiving system.  You should see a message pop out
saying that packets have been dropped.  I've recreated this on a few
different kernel versions (the latest being 2.6.28) and a few
different sets off hardware.  I HAVEN"T recreated it if the system
doing the IP_ADD_MEMBERSHIP specifies a specific interface rather than
INADDR_ANY.  I'm not sure if that is core to the issue or not.  You
may also need to bump the value in
/proc/sys/net/ipv4/igmp_max_memberships (though that hasn't seemed
necessary for me.)

I poked around in igmp.c, but its mojo exceeds my threshold.  If
anyone has any ideas or questions I'd be happy to hear them.

diff -uNr null/joiner.c multicast/joiner.c
--- null/joiner.c	1969-12-31 18:00:00.000000000 -0600
+++ multicast/joiner.c	2009-03-14 15:04:10.000000000 -0500
@@ -0,0 +1,44 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#define NUMSOCK 55
+
+int main(int argc, char **argv)
+{
+	struct ip_mreq mreq;
+	int i;
+	int sd;
+	char ipaddr[64];
+
+	for (i=0; i<NUMSOCK; i++) {
+		sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+		
+		if (sd < 0) {
+			perror("socket");
+			exit(0);
+		}
+		
+		sprintf(ipaddr,"239.192.2.%d",i+1);
+		
+		mreq.imr_multiaddr.s_addr = inet_addr(ipaddr);
+		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
+		
+		if (setsockopt(sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq))) {
+			perror("IP_ADD_MEMBERSHIP");
+			exit(0);
+		}
+	}
+
+	printf("Sleeping for 10 seconds\n");
+	sleep(10);
+
+	exit(0);
+}
+
diff -uNr null/Makefile multicast/Makefile
--- null/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ multicast/Makefile	2009-03-14 15:13:09.000000000 -0500
@@ -0,0 +1,9 @@
+CFLAGS = -Wall -g
+
+all: sender receiver joiner
+
+receiver:
+
+sender:
+
+joiner:
diff -uNr null/mctest.h multicast/mctest.h
--- null/mctest.h	1969-12-31 18:00:00.000000000 -0600
+++ multicast/mctest.h	2009-03-14 14:47:13.000000000 -0500
@@ -0,0 +1,10 @@
+#ifndef __MCTEST_H__
+#define __MCTEST_H__
+
+struct mcdata {
+	int32_t seq1;
+	char data[60];
+	int32_t seq2;
+};
+
+#endif
Binary files null/receiver and multicast/receiver differ
diff -uNr null/receiver.c multicast/receiver.c
--- null/receiver.c	1969-12-31 18:00:00.000000000 -0600
+++ multicast/receiver.c	2009-03-14 14:48:25.000000000 -0500
@@ -0,0 +1,71 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "mctest.h"
+
+int main(int argc, char **argv)
+{
+	int sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	int on = 1;
+	struct sockaddr_in addr;
+	uint32_t seq = 1;
+	int bytes;
+	struct ip_mreq mreq;
+
+	struct mcdata data;
+
+	if (sd < 0) {
+		perror("socket");
+		exit(0);
+	}
+
+	mreq.imr_multiaddr.s_addr = inet_addr("239.192.1.1");
+	mreq.imr_interface.s_addr = htonl(INADDR_ANY);
+
+	if (setsockopt(sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq))) {
+		perror("IP_ADD_MEMBERSHIP");
+		exit(0);
+	}
+
+	if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int))) {
+		perror("SO_REUSEADDR");
+		exit(0);
+	}
+
+	bzero(&addr, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = ntohs(60604);
+	addr.sin_addr.s_addr = inet_addr("239.192.1.1");
+
+	if (bind(sd, (struct sockaddr*)&addr, sizeof(addr))) {
+		perror("bind");
+		exit(0);
+	}
+
+	while(1) {
+		bytes = recv(sd, &data, sizeof(data), 0);
+		if (bytes != sizeof(data)) {
+			printf("recv got %d, expected %lu\n",
+			       bytes, sizeof(data));
+			exit(0);
+		}
+
+		if ((ntohl(data.seq1) != seq) ||
+		    (ntohl(data.seq2) != seq)) {
+			printf("Mismatched seq! Expected %u, got %u/%u\n",
+			       seq, ntohl(data.seq1), ntohl(data.seq2));
+		}
+
+		seq = ntohl(data.seq1)+1;
+		if (seq % 10000 == 0)
+			printf("got seq %u\n",seq);
+	}
+}
+
diff -uNr null/sender.c multicast/sender.c
--- null/sender.c	1969-12-31 18:00:00.000000000 -0600
+++ multicast/sender.c	2009-03-14 14:47:13.000000000 -0500
@@ -0,0 +1,55 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "mctest.h"
+
+int main(int argc, char **argv)
+{
+	int sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	int on = 1;
+	struct sockaddr_in addr;
+	uint32_t seq = 1;
+	int bytes;
+
+	struct mcdata data;
+
+	if (sd < 0) {
+		perror("socket");
+		exit(0);
+	}
+
+	if (setsockopt(sd, IPPROTO_IP, IP_MULTICAST_LOOP, &on, sizeof(int))) {
+		perror("IO_MULTICAST_LOOP");
+		exit(0);
+	}
+
+	bzero(&addr, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = ntohs(60604);
+	addr.sin_addr.s_addr = inet_addr("239.192.1.1");
+
+	memset(data.data, 0xdb, sizeof(data.data));
+
+	while (1) {
+		data.seq1 = data.seq2 = htonl(seq);
+
+		bytes = sendto(sd, &data, sizeof(data), 0,
+			       (struct sockaddr *)&addr, sizeof(addr));
+		if (bytes != sizeof(data)) {
+			perror("send");
+			exit(0);
+		}
+
+		seq++;
+		usleep(1000);
+	}
+
+	return 0;
+}

-- 
Dave B

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-14 20:16 IGMP Join dropping multicast packets Dave Boutcher
@ 2009-03-15  2:37 ` Eric Dumazet
  2009-03-16  2:04   ` Dave Boutcher
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2009-03-15  2:37 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: netdev

Dave Boutcher a écrit :
> I'm running into an interesting problem with joining multiple
> multicast feeds.  If you join multiple multicast feeds using
> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
> multicast feeds to get dropped.  We have a multicast feed on a rock
> solid network, and we were very surprised to see dropped packets.  The
> cause was a different process/program being run by a different user
> joining a bunch of mulitcast feeds.
> 
> I can recreate this with a fairly simple testcase (attached below.)
> The problem doesn't happen with unicast UDP data, and it doesn't
> happen with loopback, so you need at least two systems to run this
> (and what subscriber to netdev doesn't have at least two systems.)  To
> recreate, run "receiver" on one system, "sender", on another, and then
> "joiner" on the receiving system.  You should see a message pop out
> saying that packets have been dropped.  I've recreated this on a few
> different kernel versions (the latest being 2.6.28) and a few
> different sets off hardware.  I HAVEN"T recreated it if the system
> doing the IP_ADD_MEMBERSHIP specifies a specific interface rather than
> INADDR_ANY.  I'm not sure if that is core to the issue or not.  You
> may also need to bump the value in
> /proc/sys/net/ipv4/igmp_max_memberships (though that hasn't seemed
> necessary for me.)
> 
> I poked around in igmp.c, but its mojo exceeds my threshold.  If
> anyone has any ideas or questions I'd be happy to hear them.
> 

I could not reproduce the problem on my machines (bnx2 adapter), even if changing
NUMSOCK from 55 to 200 in joiner.c

Is your network a 100Mb one or Gigabit ?
Try to slow down your joiner ?
(Could be a flood of IGMP messages your router/switch cannot cope with)

Please describe your "rock solid" network setup (kind of network adapters you have, kind of router...)

Each time an address is added, NIC driver have to reprogram mcfilter of
the device. Maybe some NIC can drop some packets at this moment...

If using tcpdump to force promiscuous mode on the device also triggers packet losses ?

(see also ifconfig ethX promisc|allmulti)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-15  2:37 ` Eric Dumazet
@ 2009-03-16  2:04   ` Dave Boutcher
  2009-03-16 19:01     ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Boutcher @ 2009-03-16  2:04 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

On Sat, Mar 14, 2009 at 9:37 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
> Dave Boutcher a écrit :
>> I'm running into an interesting problem with joining multiple
>> multicast feeds.  If you join multiple multicast feeds using
>> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
>> multicast feeds to get dropped.  We have a multicast feed on a rock
>> solid network, and we were very surprised to see dropped packets.  The
>> cause was a different process/program being run by a different user
>> joining a bunch of mulitcast feeds.
>
> I could not reproduce the problem on my machines (bnx2 adapter), even if changing
> NUMSOCK from 55 to 200 in joiner.c

Thanks for trying Eric.  Based on your email I did some more testing
and thus far I've
only recreated this on x86_64 arches, not on i386.  Which arch did you
try it on?

> Is your network a 100Mb one or Gigabit ?
> Try to slow down your joiner ?
> (Could be a flood of IGMP messages your router/switch cannot cope with)
>
> Please describe your "rock solid" network setup (kind of network adapters you have, kind of router...)

The problem originally manifest itself at work on a 24-core Dell
server with 6 NICs.   The network
is gigabit with a Cisco 4900 switch.  I recreated it in my basement on
my little white-box
system and a cheap netgear switch.  The NIC at work is Intel e1000e
driver, the one
at home is also e1000.

> If using tcpdump to force promiscuous mode on the device also triggers packet losses ?
>
> (see also ifconfig ethX promisc|allmulti)

I haven't had a chance to play with promiscuous yet...

-- 
Dave B

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-16  2:04   ` Dave Boutcher
@ 2009-03-16 19:01     ` Eric Dumazet
  2009-03-17  7:08       ` Eric Dumazet
  2009-03-18  3:50       ` Dave Boutcher
  0 siblings, 2 replies; 12+ messages in thread
From: Eric Dumazet @ 2009-03-16 19:01 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: netdev

Dave Boutcher a écrit :
> On Sat, Mar 14, 2009 at 9:37 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
>> Dave Boutcher a écrit :
>>> I'm running into an interesting problem with joining multiple
>>> multicast feeds.  If you join multiple multicast feeds using
>>> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
>>> multicast feeds to get dropped.  We have a multicast feed on a rock
>>> solid network, and we were very surprised to see dropped packets.  The
>>> cause was a different process/program being run by a different user
>>> joining a bunch of mulitcast feeds.
>> I could not reproduce the problem on my machines (bnx2 adapter), even if changing
>> NUMSOCK from 55 to 200 in joiner.c
> 
> Thanks for trying Eric.  Based on your email I did some more testing
> and thus far I've
> only recreated this on x86_64 arches, not on i386.  Which arch did you
> try it on?

I tried both, 32 and 64 bit kernels. No problems so far.

Could you post a linux kernel .config of a non 'working' machine, and dmesg output ?

> 
>> Is your network a 100Mb one or Gigabit ?
>> Try to slow down your joiner ?
>> (Could be a flood of IGMP messages your router/switch cannot cope with)
>>
>> Please describe your "rock solid" network setup (kind of network adapters you have, kind of router...)
> 
> The problem originally manifest itself at work on a 24-core Dell
> server with 6 NICs.   The network
> is gigabit with a Cisco 4900 switch.  I recreated it in my basement on
> my little white-box
> system and a cheap netgear switch.  The NIC at work is Intel e1000e
> driver, the one
> at home is also e1000.
> 
>> If using tcpdump to force promiscuous mode on the device also triggers packet losses ?
>>
>> (see also ifconfig ethX promisc|allmulti)
> 
> I haven't had a chance to play with promiscuous yet...
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-16 19:01     ` Eric Dumazet
@ 2009-03-17  7:08       ` Eric Dumazet
  2009-03-18  3:50       ` Dave Boutcher
  1 sibling, 0 replies; 12+ messages in thread
From: Eric Dumazet @ 2009-03-17  7:08 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: netdev

Eric Dumazet a écrit :
> Dave Boutcher a écrit :
>> On Sat, Mar 14, 2009 at 9:37 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
>>> Dave Boutcher a écrit :
>>>> I'm running into an interesting problem with joining multiple
>>>> multicast feeds.  If you join multiple multicast feeds using
>>>> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
>>>> multicast feeds to get dropped.  We have a multicast feed on a rock
>>>> solid network, and we were very surprised to see dropped packets.  The
>>>> cause was a different process/program being run by a different user
>>>> joining a bunch of mulitcast feeds.
>>> I could not reproduce the problem on my machines (bnx2 adapter), even if changing
>>> NUMSOCK from 55 to 200 in joiner.c
>> Thanks for trying Eric.  Based on your email I did some more testing
>> and thus far I've
>> only recreated this on x86_64 arches, not on i386.  Which arch did you
>> try it on?
> 
> I tried both, 32 and 64 bit kernels. No problems so far.
> 
> Could you post a linux kernel .config of a non 'working' machine, and dmesg output ?
> 

Also, is using a third machine to start your joiner program is able to trigger
packet losses too ?



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-16 19:01     ` Eric Dumazet
  2009-03-17  7:08       ` Eric Dumazet
@ 2009-03-18  3:50       ` Dave Boutcher
  2009-03-18  7:38         ` Eric Dumazet
  2009-03-18 17:24         ` Brandeburg, Jesse
  1 sibling, 2 replies; 12+ messages in thread
From: Dave Boutcher @ 2009-03-18  3:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

On Mon, Mar 16, 2009 at 2:01 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
> Dave Boutcher a écrit :
>> On Sat, Mar 14, 2009 at 9:37 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
>>> Dave Boutcher a écrit :
>>>> I'm running into an interesting problem with joining multiple
>>>> multicast feeds.  If you join multiple multicast feeds using
>>>> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
>>>> multicast feeds to get dropped.  We have a multicast feed on a rock
>>>> solid network, and we were very surprised to see dropped packets.  The
>>>> cause was a different process/program being run by a different user
>>>> joining a bunch of mulitcast feeds.
>>> I could not reproduce the problem on my machines (bnx2 adapter), even if changing
>>> NUMSOCK from 55 to 200 in joiner.c
>>
>> Thanks for trying Eric.  Based on your email I did some more testing
>> and thus far I've
>> only recreated this on x86_64 arches, not on i386.  Which arch did you
>> try it on?
>
> I tried both, 32 and 64 bit kernels. No problems so far.
>
> Could you post a linux kernel .config of a non 'working' machine, and dmesg output ?

Eric, based on your inability to recreate this, I tried on some other
hardware I had lying around that has an AMD chipset built-in NIC.
I could not recreate the problem on that hardware.  I'm starting to
think this is an e1000 problem.  In both the e1000 and e1000e
drivers they do the following logic:

      /* clear the old settings from the multicast hash table */

       for (i = 0; i < mta_reg_count; i++) {
               E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
               E1000_WRITE_FLUSH();
       }

       /* load any remaining addresses into the hash table */

       for (; mc_ptr; mc_ptr = mc_ptr->next) {
               hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
               e1000_mta_set(hw, hash_value);
       }

There's clearly a window where the NIC doesn't have the multicast
addresses loaded.  This may just be broken-as-designed.  If anyone
else happens to have some e1000 hardware and wants to see if you
can recreate this, I'd be curious.

Some other notes just FYI...

- RcvbufErrors in /proc/net/snmp doesn't get incremented when this happens
- there are no messages in dmesg
- frames get dropped when the program calls exit() and all the sockets
get closed
  (and multicast joins dropped) as well as when the ADD_MEMBERSHIPs happen
- The problem happens even when adding a sleep(1) in between each of the
  ADD_MEMBERSHIP calls.

-- 
Dave B

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-18  3:50       ` Dave Boutcher
@ 2009-03-18  7:38         ` Eric Dumazet
  2009-03-18 17:24         ` Brandeburg, Jesse
  1 sibling, 0 replies; 12+ messages in thread
From: Eric Dumazet @ 2009-03-18  7:38 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: netdev, Brandeburg, Jesse, jeffrey.t.kirsher, david.graham

Dave Boutcher a écrit :
> On Mon, Mar 16, 2009 at 2:01 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
>> Dave Boutcher a écrit :
>>> On Sat, Mar 14, 2009 at 9:37 PM, Eric Dumazet <dada1@cosmosbay.com> wrote:
>>>> Dave Boutcher a écrit :
>>>>> I'm running into an interesting problem with joining multiple
>>>>> multicast feeds.  If you join multiple multicast feeds using
>>>>> setsockopt(...,IP_ADD_MEMBERSHIP...) it causes packets on UNRELATED
>>>>> multicast feeds to get dropped.  We have a multicast feed on a rock
>>>>> solid network, and we were very surprised to see dropped packets.  The
>>>>> cause was a different process/program being run by a different user
>>>>> joining a bunch of mulitcast feeds.
>>>> I could not reproduce the problem on my machines (bnx2 adapter), even if changing
>>>> NUMSOCK from 55 to 200 in joiner.c
>>> Thanks for trying Eric.  Based on your email I did some more testing
>>> and thus far I've
>>> only recreated this on x86_64 arches, not on i386.  Which arch did you
>>> try it on?
>> I tried both, 32 and 64 bit kernels. No problems so far.
>>
>> Could you post a linux kernel .config of a non 'working' machine, and dmesg output ?
> 
> Eric, based on your inability to recreate this, I tried on some other
> hardware I had lying around that has an AMD chipset built-in NIC.
> I could not recreate the problem on that hardware.  I'm starting to
> think this is an e1000 problem.  In both the e1000 and e1000e
> drivers they do the following logic:
> 
>       /* clear the old settings from the multicast hash table */
> 
>        for (i = 0; i < mta_reg_count; i++) {
>                E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
>                E1000_WRITE_FLUSH();
>        }
> 
>        /* load any remaining addresses into the hash table */
> 
>        for (; mc_ptr; mc_ptr = mc_ptr->next) {
>                hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
>                e1000_mta_set(hw, hash_value);
>        }
> 
> There's clearly a window where the NIC doesn't have the multicast
> addresses loaded.  This may just be broken-as-designed.  If anyone
> else happens to have some e1000 hardware and wants to see if you
> can recreate this, I'd be curious.
> 

Ouch, you are probably right, this code needs a change.

tg3 for example has a loop bulding hash values in a local array,
then a write of this array on NIC.

                for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
                     i++, mclist = mclist->next) {

                        crc = calc_crc (mclist->dmi_addr, ETH_ALEN);
                        bit = ~crc & 0x7f;
                        regidx = (bit & 0x60) >> 5;
                        bit &= 0x1f;
                        mc_filter[regidx] |= (1 << bit);
                }

                tw32(MAC_HASH_REG_0, mc_filter[0]);
                tw32(MAC_HASH_REG_1, mc_filter[1]);
                tw32(MAC_HASH_REG_2, mc_filter[2]);
                tw32(MAC_HASH_REG_3, mc_filter[3]);
        }

Other example , on bnx2, same logic :

               memset(mc_filter, 0, 4 * NUM_MC_HASH_REGISTERS);

                for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
                     i++, mclist = mclist->next) {

                        crc = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
                        bit = crc & 0xff;
                        regidx = (bit & 0xe0) >> 5;
                        bit &= 0x1f;
                        mc_filter[regidx] |= (1 << bit);
                }

                for (i = 0; i < NUM_MC_HASH_REGISTERS; i++) {
                        REG_WR(bp, BNX2_EMAC_MULTICAST_HASH0 + (i * 4),
                               mc_filter[i]);
                }



> Some other notes just FYI...
> 
> - RcvbufErrors in /proc/net/snmp doesn't get incremented when this happens
> - there are no messages in dmesg
> - frames get dropped when the program calls exit() and all the sockets
> get closed
>   (and multicast joins dropped) as well as when the ADD_MEMBERSHIPs happen
> - The problem happens even when adding a sleep(1) in between each of the
>   ADD_MEMBERSHIP calls.
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-18  3:50       ` Dave Boutcher
  2009-03-18  7:38         ` Eric Dumazet
@ 2009-03-18 17:24         ` Brandeburg, Jesse
  2009-03-19  1:48           ` Dave Boutcher
                             ` (2 more replies)
  1 sibling, 3 replies; 12+ messages in thread
From: Brandeburg, Jesse @ 2009-03-18 17:24 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: Eric Dumazet, netdev, jesse.brandeburg, e1000-devel

On Tue, 17 Mar 2009, Dave Boutcher wrote:
> Eric, based on your inability to recreate this, I tried on some other
> hardware I had lying around that has an AMD chipset built-in NIC.
> I could not recreate the problem on that hardware.  I'm starting to
> think this is an e1000 problem.  In both the e1000 and e1000e
> drivers they do the following logic:
> 
>       /* clear the old settings from the multicast hash table */
> 
>        for (i = 0; i < mta_reg_count; i++) {
>                E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
>                E1000_WRITE_FLUSH();
>        }
> 
>        /* load any remaining addresses into the hash table */
> 
>        for (; mc_ptr; mc_ptr = mc_ptr->next) {
>                hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
>                e1000_mta_set(hw, hash_value);
>        }
> 
> There's clearly a window where the NIC doesn't have the multicast
> addresses loaded.  This may just be broken-as-designed.  If anyone
> else happens to have some e1000 hardware and wants to see if you
> can recreate this, I'd be curious.
> 
> Some other notes just FYI...
> 
> - RcvbufErrors in /proc/net/snmp doesn't get incremented when this happens
> - there are no messages in dmesg
> - frames get dropped when the program calls exit() and all the sockets
> get closed
>   (and multicast joins dropped) as well as when the ADD_MEMBERSHIPs happen
> - The problem happens even when adding a sleep(1) in between each of the
>   ADD_MEMBERSHIP calls.

Interesting, this code has been there for eons (and probably this 
behavior) but that doesn't mean its not a problem.

We are in the process of figuring out if there are any hardware corner 
cases to changing this code (particularly in e1000)

Initial thoughts are:
1) kcalloc an array that we then populate with the hash functions, and 
   then program every location only once (never flush)
2) only program a single hash value each time a multicast is added (bad 
   because we can't tell the difference in the list since the last time 
   the OS gave us the list)

It really seems like this should be fixable, and I agree that the driver 
behavior is far from optimal, however well entrenched.

Jesse

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-18 17:24         ` Brandeburg, Jesse
@ 2009-03-19  1:48           ` Dave Boutcher
  2009-03-19  1:51           ` Dave Boutcher
  2009-03-19  5:46           ` David Miller
  2 siblings, 0 replies; 12+ messages in thread
From: Dave Boutcher @ 2009-03-19  1:48 UTC (permalink / raw)
  To: Brandeburg, Jesse; +Cc: e1000-devel, netdev, Eric Dumazet


[-- Attachment #1.1: Type: text/plain, Size: 2804 bytes --]

On Wed, Mar 18, 2009 at 12:24 PM, Brandeburg, Jesse <
jesse.brandeburg@intel.com> wrote:
> On Tue, 17 Mar 2009, Dave Boutcher wrote:
>> Eric, based on your inability to recreate this, I tried on some other
>> hardware I had lying around that has an AMD chipset built-in NIC.
>> I could not recreate the problem on that hardware.  I'm starting to
>> think this is an e1000 problem.  In both the e1000 and e1000e
>> drivers they do the following logic:
>>
>>       /* clear the old settings from the multicast hash table */
>>
>>        for (i = 0; i < mta_reg_count; i++) {
>>                E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
>>                E1000_WRITE_FLUSH();
>>        }
>>
>>        /* load any remaining addresses into the hash table */
>>
>>        for (; mc_ptr; mc_ptr = mc_ptr->next) {
>>                hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
>>                e1000_mta_set(hw, hash_value);
>>        }
>>
>> There's clearly a window where the NIC doesn't have the multicast
>> addresses loaded.  This may just be broken-as-designed.  If anyone
>> else happens to have some e1000 hardware and wants to see if you
>> can recreate this, I'd be curious.
>>
>> Some other notes just FYI...
>>
>> - RcvbufErrors in /proc/net/snmp doesn't get incremented when this
happens
>> - there are no messages in dmesg
>> - frames get dropped when the program calls exit() and all the sockets
>> get closed
>>   (and multicast joins dropped) as well as when the ADD_MEMBERSHIPs
happen
>> - The problem happens even when adding a sleep(1) in between each of the
>>   ADD_MEMBERSHIP calls.
>
> Interesting, this code has been there for eons (and probably this
> behavior) but that doesn't mean its not a problem.
>
> We are in the process of figuring out if there are any hardware corner
> cases to changing this code (particularly in e1000)
>
> Initial thoughts are:
> 1) kcalloc an array that we then populate with the hash functions, and
>   then program every location only once (never flush)
> 2) only program a single hash value each time a multicast is added (bad
>   because we can't tell the difference in the list since the last time
>   the OS gave us the list)
Hi Jesse, thanks for the response...

If you go back in this thread I had a dead easy unprivileged user-land
testcase
that causes frame loss.  We ran into this in a production environment (and I
kind
of glossed over how long it took to figure out why the hell we were dropping
frames...you can only increase rmem_max so many times ;-)  OTOH not that
many
people use multicast, and even fewer notice a few dropped frames, so the
priority is probably lowish.

On the other other hand, I'm working in the financial trading space these
days,
where Linux is pretty much king....and they're all about multicast.

-- 
Dave B

[-- Attachment #2: Type: text/plain, Size: 433 bytes --]

------------------------------------------------------------------------------
Apps built with the Adobe(R) Flex(R) framework and Flex Builder(TM) are
powering Web 2.0 with engaging, cross-platform capabilities. Quickly and
easily build your RIAs with Flex Builder, the Eclipse(TM)based development
software that enables intelligent coding and step-through debugging.
Download the free 60 day trial. http://p.sf.net/sfu/www-adobe-com

[-- Attachment #3: Type: text/plain, Size: 164 bytes --]

_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-18 17:24         ` Brandeburg, Jesse
  2009-03-19  1:48           ` Dave Boutcher
@ 2009-03-19  1:51           ` Dave Boutcher
  2009-03-20 20:36             ` Brandeburg, Jesse
  2009-03-19  5:46           ` David Miller
  2 siblings, 1 reply; 12+ messages in thread
From: Dave Boutcher @ 2009-03-19  1:51 UTC (permalink / raw)
  To: Brandeburg, Jesse; +Cc: e1000-devel, netdev, Eric Dumazet

On Wed, Mar 18, 2009 at 12:24 PM, Brandeburg, Jesse
<jesse.brandeburg@intel.com> wrote:
>
> On Tue, 17 Mar 2009, Dave Boutcher wrote:
> > Eric, based on your inability to recreate this, I tried on some other
> > hardware I had lying around that has an AMD chipset built-in NIC.
> > I could not recreate the problem on that hardware.  I'm starting to
> > think this is an e1000 problem.  In both the e1000 and e1000e
> > drivers they do the following logic:
> >
> >       /* clear the old settings from the multicast hash table */
> >
> >        for (i = 0; i < mta_reg_count; i++) {
> >                E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
> >                E1000_WRITE_FLUSH();
> >        }
> >
> >        /* load any remaining addresses into the hash table */
> >
> >        for (; mc_ptr; mc_ptr = mc_ptr->next) {
> >                hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
> >                e1000_mta_set(hw, hash_value);
> >        }
> >
> > There's clearly a window where the NIC doesn't have the multicast
> > addresses loaded.  This may just be broken-as-designed.  If anyone
> > else happens to have some e1000 hardware and wants to see if you
> > can recreate this, I'd be curious.
> >
> > Some other notes just FYI...
> >
> > - RcvbufErrors in /proc/net/snmp doesn't get incremented when this happens
> > - there are no messages in dmesg
> > - frames get dropped when the program calls exit() and all the sockets
> > get closed
> >   (and multicast joins dropped) as well as when the ADD_MEMBERSHIPs happen
> > - The problem happens even when adding a sleep(1) in between each of the
> >   ADD_MEMBERSHIP calls.
>
> Interesting, this code has been there for eons (and probably this
> behavior) but that doesn't mean its not a problem.

Hi Jesse, thanks for the response...

If you go back in this thread I had a dead easy unprivileged user-land testcase
that causes frame loss.  We ran into this in a production environment
(and I kind
of glossed over how long it took to figure out why the hell we were dropping
frames...you can only increase rmem_max so many times ;-)  OTOH not that many
people use multicast, and even fewer notice a few dropped frames, so the
priority is probably lowish.

On the other other hand, I'm working in the financial trading space these days,
where Linux is pretty much king....and they're all about multicast.

--
Dave B

------------------------------------------------------------------------------
Apps built with the Adobe(R) Flex(R) framework and Flex Builder(TM) are
powering Web 2.0 with engaging, cross-platform capabilities. Quickly and
easily build your RIAs with Flex Builder, the Eclipse(TM)based development
software that enables intelligent coding and step-through debugging.
Download the free 60 day trial. http://p.sf.net/sfu/www-adobe-com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-18 17:24         ` Brandeburg, Jesse
  2009-03-19  1:48           ` Dave Boutcher
  2009-03-19  1:51           ` Dave Boutcher
@ 2009-03-19  5:46           ` David Miller
  2 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2009-03-19  5:46 UTC (permalink / raw)
  To: jesse.brandeburg; +Cc: daveboutcher, dada1, netdev, e1000-devel

From: "Brandeburg, Jesse" <jesse.brandeburg@intel.com>
Date: Wed, 18 Mar 2009 10:24:18 -0700 (Pacific Daylight Time)

> Interesting, this code has been there for eons (and probably this 
> behavior) but that doesn't mean its not a problem.
> 
> We are in the process of figuring out if there are any hardware corner 
> cases to changing this code (particularly in e1000)
> 
> Initial thoughts are:
> 1) kcalloc an array that we then populate with the hash functions, and 
>    then program every location only once (never flush)
> 2) only program a single hash value each time a multicast is added (bad 
>    because we can't tell the difference in the list since the last time 
>    the OS gave us the list)
> 
> It really seems like this should be fixable, and I agree that the driver 
> behavior is far from optimal, however well entrenched.

Just do what tg3 does to fix this now, get fancy and "beautiful"
later.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: IGMP Join dropping multicast packets
  2009-03-19  1:51           ` Dave Boutcher
@ 2009-03-20 20:36             ` Brandeburg, Jesse
  0 siblings, 0 replies; 12+ messages in thread
From: Brandeburg, Jesse @ 2009-03-20 20:36 UTC (permalink / raw)
  To: Dave Boutcher; +Cc: e1000-devel, netdev, Eric Dumazet

On Wed, 18 Mar 2009, Dave Boutcher wrote:
> If you go back in this thread I had a dead easy unprivileged user-land testcase
> that causes frame loss.  We ran into this in a production environment
> (and I kind
> of glossed over how long it took to figure out why the hell we were dropping
> frames...you can only increase rmem_max so many times ;-)  OTOH not that many
> people use multicast, and even fewer notice a few dropped frames, so the
> priority is probably lowish.
> 
> On the other other hand, I'm working in the financial trading space these days,
> where Linux is pretty much king....and they're all about multicast.

here is a patch proposal [RFC] only, I've just briefly tested it for e1000 
parts.  If you want to give it a spin I would appreciate feedback.

[RFC] e1000: fix loss of multicast packets

From: Jesse Brandeburg <jesse.brandeburg@intel.com>

e1000 (and e1000e, igb, ixgbe, ixgb) all do a series of operations each time a
multicast address is added.  The flow goes something like

1) stack adds one multicast address
2) stack passes whole current list of unicast and multicast addresses to
driver
3) driver clears entire list in hardware
4) driver programs each multicast address using iomem in a loop

This was causing multicast packets to be lost during the reprogramming
process.

reference with test program:
http://kerneltrap.org/mailarchive/linux-netdev/2009/3/14/5160514/thread

Thanks to Dave Boutcher for his report and test program.

This driver fix prepares an array all at once in memory and programs it in 
one shot to the hardware, not requiring an "erase" cycle.  It would still 
be possible for packets to be dropped while the receiver is off during 
reprogramming.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
CC: Dave Boutcher <daveboutcher@gmail.com>
---

 drivers/net/e1000/e1000_main.c |   40 +++++++++++++++++++++++++++++++---------
 1 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 26474c9..65697ab 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2328,6 +2328,12 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	int mta_reg_count = (hw->mac_type == e1000_ich8lan) ?
 				E1000_NUM_MTA_REGISTERS_ICH8LAN :
 				E1000_NUM_MTA_REGISTERS;
+	u32 *mcarray = kzalloc(512, GFP_ATOMIC);
+
+	if (!mcarray) {
+		DPRINTK(PROBE, ERR, "memory allocation failed\n");
+		return;
+	}
 
 	if (hw->mac_type == e1000_ich8lan)
 		rar_entries = E1000_RAR_ENTRIES_ICH8LAN;
@@ -2394,22 +2400,38 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	}
 	WARN_ON(uc_ptr != NULL);
 
-	/* clear the old settings from the multicast hash table */
-
-	for (i = 0; i < mta_reg_count; i++) {
-		E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
-		E1000_WRITE_FLUSH();
-	}
-
 	/* load any remaining addresses into the hash table */
 
 	for (; mc_ptr; mc_ptr = mc_ptr->next) {
+		u32 hash_reg, hash_bit, mta;
 		hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
-		e1000_mta_set(hw, hash_value);
+		hash_reg = (hash_value >> 5) & 0x7F;
+		hash_bit = hash_value & 0x1F;
+		mta = (1 << hash_bit);
+		mcarray[hash_reg] |= mta;
 	}
 
+	/* write the hash table completely, write from bottom to avoid
+	 * stupid write combining chipsets, and flushing each write */
+	for (i = mta_reg_count - 1; i >= 0 ; i--) {
+		/* If we are on an 82544 and we are trying to write an odd
+		 * offset in the MTA, save off the previous entry before
+		 * writing and restore the old value after writing.
+		 */
+		if ((hw->mac_type == e1000_82544) && ((i & 1) == 1)) {
+			u32 temp = E1000_READ_REG_ARRAY(hw, MTA, (i - 1));
+			E1000_WRITE_REG_ARRAY(hw, MTA, i, mcarray[i]);
+			E1000_WRITE_REG_ARRAY(hw, MTA, (i - 1), temp);
+		} else {
+			E1000_WRITE_REG_ARRAY(hw, MTA, i, mcarray[i]);
+		}
+	}
+	E1000_WRITE_FLUSH();
+
 	if (hw->mac_type == e1000_82542_rev2_0)
 		e1000_leave_82542_rst(adapter);
+	
+	kfree(mcarray);
 }
 
 /* Need to wait a few seconds after link up to get diagnostic information from
 

------------------------------------------------------------------------------
Apps built with the Adobe(R) Flex(R) framework and Flex Builder(TM) are
powering Web 2.0 with engaging, cross-platform capabilities. Quickly and
easily build your RIAs with Flex Builder, the Eclipse(TM)based development
software that enables intelligent coding and step-through debugging.
Download the free 60 day trial. http://p.sf.net/sfu/www-adobe-com

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2009-03-20 20:36 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-14 20:16 IGMP Join dropping multicast packets Dave Boutcher
2009-03-15  2:37 ` Eric Dumazet
2009-03-16  2:04   ` Dave Boutcher
2009-03-16 19:01     ` Eric Dumazet
2009-03-17  7:08       ` Eric Dumazet
2009-03-18  3:50       ` Dave Boutcher
2009-03-18  7:38         ` Eric Dumazet
2009-03-18 17:24         ` Brandeburg, Jesse
2009-03-19  1:48           ` Dave Boutcher
2009-03-19  1:51           ` Dave Boutcher
2009-03-20 20:36             ` Brandeburg, Jesse
2009-03-19  5:46           ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.