All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mihail Dakov <mihail.dakov@ng4t.com>
To: netdev@vger.kernel.org
Subject: AF_PACKET: tx_ring mirrored in rx_ring?
Date: Mon, 21 Jul 2014 15:38:49 +0200	[thread overview]
Message-ID: <53CD17E9.6000102@ng4t.com> (raw)
In-Reply-To: <53CD1326.5090006@ng4t.com>

[-- Attachment #1: Type: text/plain, Size: 1662 bytes --]


Hello guys,

I am having a trouble using the RX/TX ring buffer for AF_PACKET sockets. 
I create two sockets (one for rx, one for tx). I bind those sockets to 
the same interface. According the docs you can create a socket per 
direction or single socket for both directions (allocating double the 
memory needed for a ring buffer, and then mapping first rx and then tx 
buffer). In this case I opted for creating two sockets, one per 
direction. The problem is that when I use the tx_ring to send over the 
pf_socket I see those message "mirrored" in the rx_ring buffer which is 
not an expected behavior for my application. In other to reproduce the 
issue I simplified my application into a smaller one. Then I send a 
manually created ping message with adjusted mac and ip address so that a 
remote machine in my local network answers it. I successfully see the 
ping request double (once in the tx_ring and once in the rx_ring). Which 
I think is not expected behavior. This application was tested on kernel 
3.14.12-1 and was compiled with gcc (Debian 4.8.3-5) and on kernel 
3.2.0-52-lowlatency with compiler gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3.


So some questions have arised:
  1. Is this normal behavior? If it is, why? I mean, if I use a socket 
per direction I expect to see only packets for that direction on the 
correspondent socket, right?
  2. Could you provide some more insights about why this "problem" is 
happening? Am I doing it wrong? Did I get it wrong (the whole ring 
buffer in af_packets)? Am I using wrong settings?

I have attached the simple program which should reproduce the issue.


--
Mihail Dakov
mihail.dakov@ng4t.com


[-- Attachment #2: pftest.cpp --]
[-- Type: text/x-c++src, Size: 9585 bytes --]

#include <cstdio>
#include <cstdint>
#include <cstring>
#include <cstdlib>

#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/poll.h>

#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <net/if.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/sockios.h>
#include <errno.h>
#include <signal.h>

#define BLOCK_SZ (4096 << 8)
#define FRAME_SZ 2048

#define IP_HLEN 20

struct ring3_t
{
	uint8_t *rx_buf;
	uint32_t brx;//current block idx
	struct tpacket_req3 req;
	ring3_t()
	{
		rx_buf = NULL;
		brx = 0;
	}
};

struct ring_t
{
	uint8_t *rx_buf;
	uint8_t *tx_buf;
	uint32_t ftx;//current frame idx for tx
	uint32_t frx;//current frame idx for rx
	struct tpacket_req req;
	ring_t()
	{
		rx_buf = tx_buf = NULL;
		ftx = frx = 0;
	}
};

static int rx_kernel_ready(struct tpacket_hdr_v1 *hdr)
{
	return (hdr->block_status & TP_STATUS_USER);
}

static void rx_user_ready(struct tpacket_hdr_v1 *hdr)
{
	hdr->block_status = TP_STATUS_KERNEL;
}

static int tx_kernel_ready(struct tpacket2_hdr *hdr)
{
	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
}

static void tx_user_ready(struct tpacket2_hdr *hdr)
{
	hdr->tp_status = TP_STATUS_SEND_REQUEST;
}
void filltxring(int sock, uint32_t *frame, ring_t *ring, uint8_t *data, uint32_t len);

uint32_t seq = 0, frametx = 0, flushneed = 0;
int sockrx,socktx, rbuf = 16777216, sbuf = 16777216;
ring_t txring;

void signal_handler(int signum)
{
	switch(signum)
	{
		case SIGHUP:
		{
			uint8_t data[128];

			uint8_t const ping[] = {
				0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xBB,0xBB,0xBB,0xBB,0xBB,0xBB,0x08,0x00,0x45,0x00,
				0x00,0x54,0xb3,0x31,0x40,0x00,0x40,0x01,0x9f,0x18,0xCC,0xCC,0xCC,0xCC,0xDD,0xDD,
				0xDD,0xDD,0x08,0x00,0x71,0xae,0x02,0x35,0x00,0x01,0xed,0xda,0xcc,0x53,0x00,0x00,
				0x00,0x00,0x00,0x1a,0x0b,0x00,0x00,0x00,0x00,0x00,0x10,0x11,0x12,0x13,0x14,0x15,
				0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,
				0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,
				0x36,0x37
			};
			std::memmove(data,ping,98);
			filltxring(socktx, &frametx, &txring, data, 98);
			break;
		}
		default:
			break;
	}
}

void flushtx(int sock)
{
	if (flushneed)
	{
		if (sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0) < 0)
			fprintf(stderr, "flushtx: sendto() error %s\n",strerror(errno));
		flushneed = 0;
	}
}

void filltxring(int sock, uint32_t *frame, ring_t *ring, uint8_t *data, uint32_t len)
{
	struct tpacket2_hdr *hdr = NULL;
	uint8_t *buf = NULL,
			*base = (uint8_t*)(ring->tx_buf+(*frame)*FRAME_SZ);
	hdr = (struct tpacket2_hdr *)base;
	if (tx_kernel_ready(hdr))
	{
		buf = base+(TPACKET2_HDRLEN-sizeof(struct sockaddr_ll));
		std::memmove(buf,data,len);
		struct ethhdr *ethh = (struct ethhdr*)buf;
		uint8_t *smac = (uint8_t*)ethh->h_source;
		uint8_t *dmac = (uint8_t*)ethh->h_dest;
		struct iphdr *iph = (struct iphdr*)&buf[ETH_HLEN];
		fprintf(stderr,"ftx:%d,len:%d################"
			"smac=%02x:%02x:%02x:%02x:%02x:%02x,"
			"dmac=%02x:%02x:%02x:%02x:%02x:%02x,"
			"sa:%08x,da:%08x\n",
			*frame,len,
			smac[0],smac[1],smac[2],smac[3],smac[4],smac[5],
			dmac[0],dmac[1],dmac[2],dmac[3],dmac[4],dmac[5],
			iph->saddr,iph->daddr);
		hdr->tp_len = len;
		hdr->tp_snaplen = len;
		tx_user_ready(hdr);
		flushneed = 1;
		//next frame
		*frame = ((*frame) + 1) % ring->req.tp_frame_nr;
	}
}

void walkrxring(int sock, int *block, ring3_t *ring)
{
	while (1)
	{
		struct tpacket_block_desc *bd = NULL;
		struct tpacket3_hdr *hdr = NULL;
		uint8_t *data = NULL;

		bd = (struct tpacket_block_desc*)(ring->rx_buf+ (*block)*BLOCK_SZ);
		if (rx_kernel_ready(&bd->hdr.bh1))
		{
			hdr = (struct tpacket3_hdr*)((uint8_t*)bd+bd->hdr.bh1.offset_to_first_pkt);
			for (uint32_t p=0;p<bd->hdr.bh1.num_pkts;p++)
			{
				data = (uint8_t*)hdr+hdr->tp_mac;
				if (hdr->tp_snaplen < FRAME_SZ)//only packet < 
				{
					struct ethhdr *ethh = (struct ethhdr*)data;
					uint8_t *smac = (uint8_t*)ethh->h_source;
					uint8_t *dmac = (uint8_t*)ethh->h_dest;
					struct iphdr *iph = (struct iphdr*)&data[ETH_HLEN];
					struct udphdr *udph = (struct udphdr*)&data[ETH_HLEN+IP_HLEN];
					fprintf(stderr,"p:%d,len:%d,nump:%d,blk:%d###"
						"smac=%02x:%02x:%02x:%02x:%02x:%02x,"
						"dmac=%02x:%02x:%02x:%02x:%02x:%02x,"
						"sa:%08x,da:%08x,sp:%u,dp:%u\n",
						p,hdr->tp_snaplen,bd->hdr.bh1.num_pkts,*block,
						smac[0],smac[1],smac[2],smac[3],smac[4],smac[5],
						dmac[0],dmac[1],dmac[2],dmac[3],dmac[4],dmac[5],
						iph->saddr,iph->daddr,
						ntohs(udph->source),ntohs(udph->dest));
				}
				hdr = (struct tpacket3_hdr*)((uint8_t*)hdr+hdr->tp_next_offset);
			}
			rx_user_ready(&bd->hdr.bh1);
			//next block
			*block = ((*block) + 1) % ring->req.tp_block_nr;
		} else {
			return;//
		}
	}
}

int pfsocket(int protocol,
			int version,
			bool trans,
			struct ifreq *req,
			struct sockaddr_ll *addr,
			char *devname,
			int rsize,
			int ssize)
{
	int sock, discardoff = 1;
	if (trans)
		sock = socket(AF_PACKET, SOCK_RAW, 0);//Only TX
	else
		sock = socket(AF_PACKET, SOCK_RAW, htons(protocol));
	if (sock < 0)
		return -1;
	std::strncpy(req->ifr_ifrn.ifrn_name, devname, IFNAMSIZ);
	if (ioctl(sock, SIOGIFINDEX, req) < 0)
		return -2;
	addr->sll_family = AF_PACKET;
	addr->sll_ifindex = req->ifr_ifru.ifru_ivalue;
	if (trans)
		addr->sll_protocol = 0;//tx only
	else
		addr->sll_protocol = htons(protocol);
	addr->sll_pkttype 	= 0;
	addr->sll_halen		= 0;
	addr->sll_hatype	= 0;
	if (ioctl(sock, SIOCGIFHWADDR, req) < 0)
		return -3;
	if (setsockopt(sock,SOL_SOCKET, SO_RCVBUFFORCE,&rsize,sizeof(rsize)) < 0)
		return -4;
	if (setsockopt(sock,SOL_SOCKET, SO_SNDBUFFORCE,&ssize,sizeof(ssize)) < 0)
		return -5;
	if (setsockopt(sock, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)) < 0)
		return -6;
	if (setsockopt(sock, SOL_PACKET, PACKET_LOSS, &discardoff, sizeof(discardoff)) < 0)
		return -7;
	return sock;
}

void *slayout(void *ring, bool v3, size_t mmsize)
{
	if (v3)
	{
		struct ring3_t *r = (struct ring3_t*)ring;
		std::memset(&r->req,0,sizeof(r->req));
		r->req.tp_block_nr = mmsize/BLOCK_SZ;
		r->req.tp_block_size = BLOCK_SZ;
		r->req.tp_frame_size = FRAME_SZ;
		r->req.tp_frame_nr = (BLOCK_SZ/FRAME_SZ)*r->req.tp_block_nr;
		r->req.tp_retire_blk_tov = 1;//1ms scanning interval
		// r->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
		r->req.tp_feature_req_word = 0;
		ring = (void*)r;
	} else {
		struct ring_t *r = (struct ring_t *)ring;
		std::memset(&r->req,0,sizeof(r->req));
		r->req.tp_block_nr = mmsize/BLOCK_SZ;
		r->req.tp_block_size = BLOCK_SZ;
		r->req.tp_frame_size = FRAME_SZ;
		r->req.tp_frame_nr = (BLOCK_SZ/FRAME_SZ)*r->req.tp_block_nr;
		ring = (void*)r;
	}
	return ring;
}

void *setuprxring(int sock, struct ring3_t *ring, size_t mmsize)
{
	if (slayout((void*)ring,true,mmsize) == NULL)
		return NULL;
	if (setsockopt(sock, SOL_PACKET,PACKET_RX_RING,(void*)&ring->req,sizeof(ring->req)) < 0)
		return NULL;

	ring->rx_buf = (uint8_t*)mmap(NULL,mmsize,PROT_READ|PROT_WRITE,
								MAP_SHARED|MAP_LOCKED,sock,0);
	if (ring->rx_buf == MAP_FAILED)
		return NULL;
	return (void*)ring;
}

void *setuptxring(int sock, struct ring_t *ring, size_t mmsize)
{
	if (slayout((void*)ring,false,mmsize)==NULL)
		return NULL;
	if (setsockopt(sock, SOL_PACKET, PACKET_TX_RING,(void*)&ring->req,sizeof(ring->req)) < 0)
		return NULL;
	ring->tx_buf = (uint8_t*)mmap(NULL,mmsize,PROT_READ|PROT_WRITE,
								MAP_SHARED|MAP_LOCKED,
								sock,
								0);
	if (ring->tx_buf == MAP_FAILED)
		return NULL;
	return (void*)ring;
}

int main(int argc, char **argv)
{

	if (argc != 2)
	{
		fprintf(stderr, "Usage: %s <dev_name>\n", argv[0]);
		exit(EXIT_SUCCESS);
	}

	struct sockaddr_ll ifa;
	struct ifreq ifr;
	char *device = new char[IFNAMSIZ];
	ring3_t rxring;

	std::memset(&ifa,0,sizeof(ifa));
	std::memset(&ifr,0,sizeof(ifr));
	std::memset(&txring,0,sizeof(txring));
	std::memset(&rxring,0,sizeof(rxring));
	std::memset(device,0,IFNAMSIZ);

	std::strcpy(device, argv[1]);

	sockrx = pfsocket(ETH_P_ALL,TPACKET_V3,false,&ifr,&ifa,device,rbuf,sbuf);

	if (sockrx < 0)
		return sockrx;
	
	fprintf(stderr, "Socket rx(%d) created\n",sockrx);
	
	if (setuprxring(sockrx,&rxring,rbuf) == NULL)
		return -8;

	fprintf(stderr, "Ring rx setup done.\n");
	
	if (bind(sockrx,(struct sockaddr*)&ifa,sizeof(ifa)) < 0)
		return -9;
	
	fprintf(stderr, "Socket rx(%d) bound to %s\n", sockrx, device);
	
	socktx = pfsocket(ETH_P_ALL,TPACKET_V2,true,&ifr,&ifa,device,rbuf,sbuf);

	if (socktx < 0)
		return socktx;
	
	fprintf(stderr, "Socket tx(%d) created\n", socktx);

	if (setuptxring(socktx,&txring,sbuf) == NULL)
		return -10;

	fprintf(stderr, "Ring tx setup done.\n");

	if (bind(socktx,(struct sockaddr*)&ifa,sizeof(ifa)) < 0)
		return -11;

	fprintf(stderr, "Socket tx(%d) bound to %s\n", socktx, device);
	
	uint32_t nfds = 1;
	int ret = 0, block = 0;
	struct pollfd fds[nfds];

	fds[0].fd = sockrx;
	fds[0].events = POLLIN|POLLRDNORM|POLLERR;
	fds[0].revents = 0;

	sigset_t newmask, zeromask;
	struct timespec tv;

	std::memset(&tv,0,sizeof(tv));
	sigemptyset(&zeromask);
	sigemptyset(&newmask);
	sigaddset(&newmask,SIGINT);

	signal(SIGHUP, signal_handler);

	while (1)
	{
		tv.tv_nsec = 1000000;//1ms
		ret = ppoll(fds,nfds,&tv,&zeromask);

		if (ret < 0 && errno == EINTR)
			continue;
		if (ret < 0)
		{
			fprintf(stderr, "ppoll() error:%s\n", strerror(errno));
			exit(EXIT_FAILURE);
		}

		//read rxring every 1ms
		walkrxring(sockrx,&block,&rxring);
		//try to flush every 1ms
		flushtx(socktx);
	}
	return 0;
}



  reply	other threads:[~2014-07-21 13:39 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-21 13:18 AF_PACKET: tx_ring mirrored in rx_ring? Mihail Dakov
2014-07-21 13:38 ` Mihail Dakov [this message]
2014-07-21 13:51 ` Daniel Borkmann
2014-07-21 14:40   ` Mihail Dakov
2014-07-21 14:44     ` Fwd: " Mihail Dakov
2014-07-21 15:13     ` Daniel Borkmann
2014-07-21 18:32       ` mihail.dakov
2014-07-21 22:35         ` Willem de Bruijn
2014-07-21 22:36           ` Willem de Bruijn
2014-07-22 13:39           ` Mihail Dakov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53CD17E9.6000102@ng4t.com \
    --to=mihail.dakov@ng4t.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.