All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
To: mst@redhat.com, jasowang@redhat.com, davem@davemloft.net,
	kuba@kernel.org, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com
Cc: Anant Thazhemadam <anant.thazhemadam@gmail.com>,
	linux-kernel-mentees@lists.linuxfoundation.org,
	virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	bpf@vger.kernel.org
Subject: [Linux-kernel-mentees][PATCH 1/2] net: reorder members of virtnet_info for optimization
Date: Wed, 30 Sep 2020 10:47:21 +0530	[thread overview]
Message-ID: <20200930051722.389587-2-anant.thazhemadam@gmail.com> (raw)
In-Reply-To: <20200930051722.389587-1-anant.thazhemadam@gmail.com>

Analysis of the structure virtnet_info using pahole gives the
following stats.
	/* size: 256, cachelines: 4, members: 25 */
	/* sum members: 245, holes: 3, sum holes: 11 */
	/* paddings: 1, sum paddings: 4 */

Reordering the order in which the members of virtnet_info are declared
helps in packing byte holes in the middle of virtnet_info, reduce the
size required by the structure by 8 bytes, and also allows members to be
stored without overstepping the boundaries of a cacheline (for a
cacheline of size 64bytes) unnecessarily.

Analysis using pahole post-reordering of members gives the following
stats.
	/* size: 248, cachelines: 4, members: 25 */
        /* padding: 3 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 56 bytes */

Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
---
The complete analysis done by pahole can be found below.
Before the change:
		struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		u8                         hdr_len;              /*    54     1 */

		/* XXX 1 byte hole, try to pack */

		struct delayed_work refill;                      /*    56    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
		struct work_struct config_work;                  /*   144    32 */
		bool                       affinity_hint_set;    /*   176     1 */

		/* XXX 7 bytes hole, try to pack */

		struct hlist_node  node;                         /*   184    16 */
		/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
		struct hlist_node  node_dead;                    /*   200    16 */
		struct control_buf *       ctrl;                 /*   216     8 */
		u8                         duplex;               /*   224     1 */

		/* XXX 3 bytes hole, try to pack */

		u32                        speed;                /*   228     4 */
		long unsigned int          guest_offloads;       /*   232     8 */
		long unsigned int          guest_offloads_capable; /*   240     8 */
		struct failover *          failover;             /*   248     8 */

		/* size: 256, cachelines: 4, members: 25 */
		/* sum members: 245, holes: 3, sum holes: 11 */
		/* paddings: 1, sum paddings: 4 */
	};

After the Change:
	struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		bool                       affinity_hint_set;    /*    54     1 */
		u8                         hdr_len;              /*    55     1 */
		struct control_buf *       ctrl;                 /*    56     8 */
		/* --- cacheline 1 boundary (64 bytes) --- */
		struct work_struct config_work;                  /*    64    32 */
		struct hlist_node  node;                         /*    96    16 */
		struct hlist_node  node_dead;                    /*   112    16 */
		/* --- cacheline 2 boundary (128 bytes) --- */
		long unsigned int          guest_offloads;       /*   128     8 */
		long unsigned int          guest_offloads_capable; /*   136     8 */
		struct failover *          failover;             /*   144     8 */
		struct delayed_work refill;                      /*   152    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */
		u32                        speed;                /*   240     4 */
		u8                         duplex;               /*   244     1 */

		/* size: 248, cachelines: 4, members: 25 */
		/* padding: 3 */
		/* paddings: 1, sum paddings: 4 */
		/* last cacheline: 56 bytes */
	};

It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated
as well. Also, more members of virtnet_info are accomodated within one cacheline 
(without unnecessarily crossing over the cacheline boundary).


 drivers/net/virtio_net.c | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..32747f1980ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -137,29 +137,29 @@ struct receive_queue {
 
 	struct napi_struct napi;
 
+	/* Name of this receive queue: input.$index */
+	char name[40];
+
 	struct bpf_prog __rcu *xdp_prog;
 
 	struct virtnet_rq_stats stats;
 
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+	/* Page frag for packet buffer allocation. */
+	struct page_frag alloc_frag;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
 	/* Average packet length for mergeable receive buffers. */
 	struct ewma_pkt_len mrg_avg_pkt_len;
 
-	/* Page frag for packet buffer allocation. */
-	struct page_frag alloc_frag;
-
-	/* RX: fragments + linear part + virtio header */
-	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+	struct xdp_rxq_info xdp_rxq;
 
 	/* Min single buffer size for mergeable buffers case. */
 	unsigned int min_buf_len;
-
-	/* Name of this receive queue: input.$index */
-	char name[40];
-
-	struct xdp_rxq_info xdp_rxq;
 };
 
 /* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info {
 	/* Host can handle any s/g split between our header and packet data */
 	bool any_header_sg;
 
+	/* Does the affinity hint is set for virtqueues? */
+	bool affinity_hint_set;
+
 	/* Packet virtio header size */
 	u8 hdr_len;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
+	struct control_buf *ctrl;
 
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
-	/* Does the affinity hint is set for virtqueues? */
-	bool affinity_hint_set;
-
 	/* CPU hotplug instances for online & dead */
 	struct hlist_node node;
 	struct hlist_node node_dead;
 
-	struct control_buf *ctrl;
-
-	/* Ethtool settings */
-	u8 duplex;
-	u32 speed;
-
 	unsigned long guest_offloads;
 	unsigned long guest_offloads_capable;
 
 	/* failover when STANDBY feature enabled */
 	struct failover *failover;
+
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Ethtool settings */
+	u32 speed;
+	u8 duplex;
 };
 
 struct padded_vnet_hdr {
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
To: mst@redhat.com, jasowang@redhat.com, davem@davemloft.net,
	kuba@kernel.org, ast@kernel.org, daniel@iogearbox.net,
	hawk@kernel.org, john.fastabend@gmail.com
Cc: Anant Thazhemadam <anant.thazhemadam@gmail.com>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, bpf@vger.kernel.org,
	linux-kernel-mentees@lists.linuxfoundation.org
Subject: [Linux-kernel-mentees] [PATCH 1/2] net: reorder members of virtnet_info for optimization
Date: Wed, 30 Sep 2020 10:47:21 +0530	[thread overview]
Message-ID: <20200930051722.389587-2-anant.thazhemadam@gmail.com> (raw)
In-Reply-To: <20200930051722.389587-1-anant.thazhemadam@gmail.com>

Analysis of the structure virtnet_info using pahole gives the
following stats.
	/* size: 256, cachelines: 4, members: 25 */
	/* sum members: 245, holes: 3, sum holes: 11 */
	/* paddings: 1, sum paddings: 4 */

Reordering the order in which the members of virtnet_info are declared
helps in packing byte holes in the middle of virtnet_info, reduce the
size required by the structure by 8 bytes, and also allows members to be
stored without overstepping the boundaries of a cacheline (for a
cacheline of size 64bytes) unnecessarily.

Analysis using pahole post-reordering of members gives the following
stats.
	/* size: 248, cachelines: 4, members: 25 */
        /* padding: 3 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 56 bytes */

Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
---
The complete analysis done by pahole can be found below.
Before the change:
		struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		u8                         hdr_len;              /*    54     1 */

		/* XXX 1 byte hole, try to pack */

		struct delayed_work refill;                      /*    56    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
		struct work_struct config_work;                  /*   144    32 */
		bool                       affinity_hint_set;    /*   176     1 */

		/* XXX 7 bytes hole, try to pack */

		struct hlist_node  node;                         /*   184    16 */
		/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
		struct hlist_node  node_dead;                    /*   200    16 */
		struct control_buf *       ctrl;                 /*   216     8 */
		u8                         duplex;               /*   224     1 */

		/* XXX 3 bytes hole, try to pack */

		u32                        speed;                /*   228     4 */
		long unsigned int          guest_offloads;       /*   232     8 */
		long unsigned int          guest_offloads_capable; /*   240     8 */
		struct failover *          failover;             /*   248     8 */

		/* size: 256, cachelines: 4, members: 25 */
		/* sum members: 245, holes: 3, sum holes: 11 */
		/* paddings: 1, sum paddings: 4 */
	};

After the Change:
	struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		bool                       affinity_hint_set;    /*    54     1 */
		u8                         hdr_len;              /*    55     1 */
		struct control_buf *       ctrl;                 /*    56     8 */
		/* --- cacheline 1 boundary (64 bytes) --- */
		struct work_struct config_work;                  /*    64    32 */
		struct hlist_node  node;                         /*    96    16 */
		struct hlist_node  node_dead;                    /*   112    16 */
		/* --- cacheline 2 boundary (128 bytes) --- */
		long unsigned int          guest_offloads;       /*   128     8 */
		long unsigned int          guest_offloads_capable; /*   136     8 */
		struct failover *          failover;             /*   144     8 */
		struct delayed_work refill;                      /*   152    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */
		u32                        speed;                /*   240     4 */
		u8                         duplex;               /*   244     1 */

		/* size: 248, cachelines: 4, members: 25 */
		/* padding: 3 */
		/* paddings: 1, sum paddings: 4 */
		/* last cacheline: 56 bytes */
	};

It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated
as well. Also, more members of virtnet_info are accomodated within one cacheline 
(without unnecessarily crossing over the cacheline boundary).


 drivers/net/virtio_net.c | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..32747f1980ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -137,29 +137,29 @@ struct receive_queue {
 
 	struct napi_struct napi;
 
+	/* Name of this receive queue: input.$index */
+	char name[40];
+
 	struct bpf_prog __rcu *xdp_prog;
 
 	struct virtnet_rq_stats stats;
 
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+	/* Page frag for packet buffer allocation. */
+	struct page_frag alloc_frag;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
 	/* Average packet length for mergeable receive buffers. */
 	struct ewma_pkt_len mrg_avg_pkt_len;
 
-	/* Page frag for packet buffer allocation. */
-	struct page_frag alloc_frag;
-
-	/* RX: fragments + linear part + virtio header */
-	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+	struct xdp_rxq_info xdp_rxq;
 
 	/* Min single buffer size for mergeable buffers case. */
 	unsigned int min_buf_len;
-
-	/* Name of this receive queue: input.$index */
-	char name[40];
-
-	struct xdp_rxq_info xdp_rxq;
 };
 
 /* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info {
 	/* Host can handle any s/g split between our header and packet data */
 	bool any_header_sg;
 
+	/* Does the affinity hint is set for virtqueues? */
+	bool affinity_hint_set;
+
 	/* Packet virtio header size */
 	u8 hdr_len;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
+	struct control_buf *ctrl;
 
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
-	/* Does the affinity hint is set for virtqueues? */
-	bool affinity_hint_set;
-
 	/* CPU hotplug instances for online & dead */
 	struct hlist_node node;
 	struct hlist_node node_dead;
 
-	struct control_buf *ctrl;
-
-	/* Ethtool settings */
-	u8 duplex;
-	u32 speed;
-
 	unsigned long guest_offloads;
 	unsigned long guest_offloads_capable;
 
 	/* failover when STANDBY feature enabled */
 	struct failover *failover;
+
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Ethtool settings */
+	u32 speed;
+	u8 duplex;
 };
 
 struct padded_vnet_hdr {
-- 
2.25.1

_______________________________________________
Linux-kernel-mentees mailing list
Linux-kernel-mentees@lists.linuxfoundation.org
https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

  reply	other threads:[~2020-09-30  5:17 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-30  5:17 [Linux-kernel-mentees][PATCH 0/2] reorder members of structures in virtio_net for optimization Anant Thazhemadam
2020-09-30  5:17 ` [Linux-kernel-mentees] [PATCH " Anant Thazhemadam
2020-09-30  5:17 ` Anant Thazhemadam [this message]
2020-09-30  5:17   ` [Linux-kernel-mentees] [PATCH 1/2] net: reorder members of virtnet_info " Anant Thazhemadam
2020-09-30  5:17 ` [Linux-kernel-mentees][PATCH 2/2] net: reorder members of receive_queue in virtio_net " Anant Thazhemadam
2020-09-30  5:17   ` [Linux-kernel-mentees] [PATCH " Anant Thazhemadam
2020-10-03  2:06 ` [Linux-kernel-mentees][PATCH 0/2] reorder members of structures " David Miller
2020-10-03  2:06   ` David Miller
2020-10-03  2:06   ` [Linux-kernel-mentees] [PATCH " David Miller
2020-10-03 18:43   ` [Linux-kernel-mentees][PATCH " Michael S. Tsirkin
2020-10-03 18:43     ` Michael S. Tsirkin
2020-10-03 18:43     ` [Linux-kernel-mentees] [PATCH " Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200930051722.389587-2-anant.thazhemadam@gmail.com \
    --to=anant.thazhemadam@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=hawk@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel-mentees@lists.linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.