[PATCH] block: use the __packed attribute only on architectures where it is efficient

* [PATCH] block: use the __packed attribute only on architectures where it is efficient
@ 2024-02-06 11:14 Mikulas Patocka
  2024-02-06 14:18 ` Ming Lei
  0 siblings, 1 reply; 4+ messages in thread
From: Mikulas Patocka @ 2024-02-06 11:14 UTC (permalink / raw)
  To: Jens Axboe, Ming Lei, Christoph Hellwig
  Cc: Sagi Grimberg, Hannes Reinecke, Mark Wunderlich,
	James E.J. Bottomley, Helge Deller, John David Anglin,
	linux-block, linux-parisc

The __packed macro (expanding to __attribute__((__packed__))) specifies
that the structure has an alignment of 1. Therefore, it may be arbitrarily
misaligned. On architectures that don't have hardware support for
unaligned accesses, gcc generates very inefficient code that accesses the
structure fields byte-by-byte and assembles the result using shifts and
ors.

For example, on PA-RISC, this function is compiled to 23 instructions with
the __packed attribute and only 2 instructions without the __packed
attribute.

This commit changes the definition of 'struct bvec_iter', so that it only
uses the __packed attribute on architectures where unaligned access is
efficient.

sector_t get_sector(struct bvec_iter *iter)
{
        return iter->bi_sector;
}

with __packed:

0000000000000000 <get_sector>:
   0:   0f 40 10 1f     ldb 0(r26),r31
   4:   0f 42 10 1c     ldb 1(r26),ret0
   8:   f3 ff 0b 18     depd,z,* r31,7,8,r31
   c:   f3 9c 0a 10     depd,z,* ret0,15,16,ret0
  10:   0b fc 02 5c     or ret0,r31,ret0
  14:   0f 44 10 15     ldb 2(r26),r21
  18:   0f 46 10 14     ldb 3(r26),r20
  1c:   f2 b5 09 08     depd,z,* r21,23,24,r21
  20:   0f 48 10 13     ldb 4(r26),r19
  24:   0b 95 02 55     or r21,ret0,r21
  28:   0f 4a 10 1f     ldb 5(r26),r31
  2c:   f2 94 08 00     depd,z,* r20,31,32,r20
  30:   0f 4c 10 1c     ldb 6(r26),ret0
  34:   0f 4e 10 16     ldb 7(r26),r22
  38:   0a b4 02 54     or r20,r21,r20
  3c:   f2 73 13 18     depd,z,* r19,39,40,r19
  40:   f3 ff 12 10     depd,z,* r31,47,48,r31
  44:   0a 93 02 53     or r19,r20,r19
  48:   f3 9c 11 08     depd,z,* ret0,55,56,ret0
  4c:   0a 7f 02 5f     or r31,r19,r31
  50:   0b fc 02 5c     or ret0,r31,ret0
  54:   e8 40 d0 00     bve (rp)
  58:   0b 96 02 5c     or r22,ret0,ret0
  5c:   00 00 00 00     break 0,0

without __packed:

0000000000000000 <get_sector>:
   0:   e8 40 d0 00     bve (rp)
   4:   0f 40 10 dc     ldd 0(r26),ret0

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 19416123ab3e ("block: define 'struct bvec_iter' as packed")
Cc: stable@vger.kernel.org	# v5.16+

---
 include/linux/bvec.h |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/bvec.h
===================================================================

--- linux-2.6.orig/include/linux/bvec.h	2023-09-05 14:46:02.000000000 +0200
+++ linux-2.6/include/linux/bvec.h	2024-02-06 11:49:56.000000000 +0100
@@ -83,7 +83,11 @@ struct bvec_iter {
 
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
-} __packed;
+}
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+__packed
+#endif
+;
 
 struct bvec_iter_all {
 	struct bio_vec	bv;


^ permalink raw reply	[flat|nested] 4+ messages in thread