All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] mlx5 high latency observed on send operations
@ 2017-08-21  7:47 Sagi Grimberg
  2017-08-21  7:47 ` [PATCH 1/2] net/mlx5: replace memory barrier type Sagi Grimberg
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Sagi Grimberg @ 2017-08-21  7:47 UTC (permalink / raw)
  To: dev; +Cc: Nelio Laranjeiro, Adrien Mazarguil

When measuring latency when running a latency critical workload
on mlx5 pmd drivers we noticed high latency can occur due to
delayed doorbell record update flush.

This can be reproduced using the simple program [1]
against testpmd macswap fwd mode. This utility sends
a raw ethernet frame to the dpdk port and measures the
time between send and the received mirrored frame.

This patchset guarantees immediate doorbell updates
visibility by making the doorbell a non-cacheble memory.
In addition, we relax the memory barrier for dma-able
memory.

Without this fix the tsc delta was 3550760-5993019 cycles
(which translates to 2-6 ms on 1.7 GHz processor).

With the fix applied the tsc delta reduced to 17740-29663
(wich translates to 9-17 us).

Shahaf Shuler (2):
  net/mlx5: replace memory barrier type
  net/mlx5: don't map doorbell register to write combining

 drivers/net/mlx5/mlx5.c      | 2 ++
 drivers/net/mlx5/mlx5_rxtx.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

[1]:
/*
 * compiling: gcc test.c -o test
 * run using: ./test <local_iface> <dest_mac> 
 */
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <netinet/ether.h>

#define BUF_SIZ		1024

static inline uint64_t rte_rdtsc(void)
{
	union {
		uint64_t tsc_64;
		struct {
			uint32_t lo_32;
			uint32_t hi_32;
		};
	} tsc;

	asm volatile("rdtsc" :
		     "=a" (tsc.lo_32),
		     "=d" (tsc.hi_32));
	return tsc.tsc_64;
}

int main(int argc, char *argv[])
{
	int sockfd;
	struct ifreq if_idx;
	struct ifreq if_mac;
	int tx_len = 0;
	char sendbuf[BUF_SIZ];
	struct ether_header *eh = (struct ether_header *) sendbuf;
	struct sockaddr_ll socket_address;
	char ifname[IFNAMSIZ];
	int values[6];
	struct ether_header expected;
	uint64_t payload = 0xB16B00B5;
	uint8_t buffer[1024];
	int result;
	uint64_t before_rcv;
	uint64_t after_rcv;
	uint64_t delta;
	int numbytes;

	if (argc != 3) {
		fprintf(stderr, "device name and dest mac\n");
		return -1;
	}

	strcpy(ifname, argv[1]);
	result = sscanf(argv[2], "%x:%x:%x:%x:%x:%x",
			&values[0], &values[1], &values[2], &values[3], &values[4], &values[5]);
	if (result != 6) {
		fprintf(stderr, "invalid mac\n");
		return -1;
	}

	/* Open RAW socket to send on */
	if ((sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) == -1) {
	    perror("socket");
	}

	/* Get the index of the interface to send on */
	memset(&if_idx, 0, sizeof(struct ifreq));
	strncpy(if_idx.ifr_name, ifname, IFNAMSIZ-1);
	if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0)
	    perror("SIOCGIFINDEX");
	/* Get the MAC address of the interface to send on */
	memset(&if_mac, 0, sizeof(struct ifreq));
	strncpy(if_mac.ifr_name, ifname, IFNAMSIZ-1);
	if (ioctl(sockfd, SIOCGIFHWADDR, &if_mac) < 0)
	    perror("SIOCGIFHWADDR");

	/* Construct the Ethernet header */
	memset(sendbuf, 0, BUF_SIZ);
	/* Ethernet header */
	eh->ether_shost[0] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[0];
	eh->ether_shost[1] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[1];
	eh->ether_shost[2] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[2];
	eh->ether_shost[3] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[3];
	eh->ether_shost[4] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[4];
	eh->ether_shost[5] = ((uint8_t *)&if_mac.ifr_hwaddr.sa_data)[5];
	eh->ether_dhost[0] = values[0];
	eh->ether_dhost[1] = values[1];
	eh->ether_dhost[2] = values[2];
	eh->ether_dhost[3] = values[3];
	eh->ether_dhost[4] = values[4];
	eh->ether_dhost[5] = values[5];
	/* Ethertype field */
	eh->ether_type = htons(ETH_P_IP);
	tx_len += sizeof(struct ether_header);

	memcpy(&sendbuf[tx_len], &payload, sizeof(payload));
	tx_len += sizeof(payload);

	/* Index of the network device */
	socket_address.sll_ifindex = if_idx.ifr_ifindex;
	/* Address length*/
	socket_address.sll_halen = ETH_ALEN;
	/* Destination MAC */
	socket_address.sll_addr[0] = values[0];
	socket_address.sll_addr[1] = values[1];
	socket_address.sll_addr[2] = values[2];
	socket_address.sll_addr[3] = values[3];
	socket_address.sll_addr[4] = values[4];
	socket_address.sll_addr[5] = values[5];

	memcpy(&expected.ether_dhost, &eh->ether_shost, ETH_ALEN);
	memcpy(&expected.ether_shost, &eh->ether_dhost, ETH_ALEN);
	expected.ether_type = eh->ether_type;


	/* Send packet */
	if (sendto(sockfd, sendbuf, tx_len, 0, (struct sockaddr*)&socket_address, sizeof(struct sockaddr_ll)) < 0) {
	    printf("Send failed\n");
	    return -2;
	}

	before_rcv = rte_rdtsc();
	while (1) {
		numbytes = recvfrom(sockfd, buffer, BUF_SIZ, 0, NULL, NULL);
		if (numbytes <= 0)
			continue;
		after_rcv = rte_rdtsc();

		if (memcmp(&expected, buffer, sizeof(expected)) != 0)
			continue;

		if (memcmp(&payload, &buffer[sizeof(expected)], sizeof(payload)) == 0) {
			break;
		}

	}

	delta =  after_rcv - before_rcv;
	printf("RTT is %lu tsc \n", delta);
	return 0;
}
-- 
2.7.4

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-08-29 16:53 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-21  7:47 [PATCH 0/2] mlx5 high latency observed on send operations Sagi Grimberg
2017-08-21  7:47 ` [PATCH 1/2] net/mlx5: replace memory barrier type Sagi Grimberg
2017-08-23 11:39   ` Nélio Laranjeiro
2017-08-23 13:11     ` Bruce Richardson
2017-08-24  6:56       ` Shahaf Shuler
2017-08-24  9:27         ` Bruce Richardson
2017-08-21  7:47 ` [PATCH 2/2] net/mlx5: don't map doorbell register to write combining Sagi Grimberg
2017-08-23 11:03   ` Ferruh Yigit
2017-08-23 11:58     ` Shahaf Shuler
2017-08-23 12:06     ` Nélio Laranjeiro
2017-08-27  6:47 ` [PATCH v2 0/2] mlx5 high latency observed on send operations Shahaf Shuler
2017-08-27  6:47   ` [PATCH v2 1/2] net/mlx5: replace memory barrier type Shahaf Shuler
2017-08-27  6:47   ` [PATCH v2 2/2] net/mlx5: don't map doorbell register to write combining Shahaf Shuler
2017-08-29 16:53   ` [PATCH v2 0/2] mlx5 high latency observed on send operations Ferruh Yigit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.