From: Greg KH <gregkh@linuxfoundation.org>
To: Julia Lawall <julia.lawall@inria.fr>
Cc: Alex Elder <elder@ieee.org>,
Menna Mahmoud <eng.mennamahmoud.mm@gmail.com>,
outreachy@lists.linux.dev, johan@kernel.org, elder@kernel.org,
linux-kernel@vger.kernel.org, linux-staging@lists.linux.dev
Subject: Re: [PATCH v2] staging: greybus: use inline function for macros
Date: Sat, 25 Mar 2023 09:49:49 +0100 [thread overview]
Message-ID: <ZB61rbl68LfGCosV@kroah.com> (raw)
In-Reply-To: <alpine.DEB.2.22.394.2303230807130.2866@hadrien>
On Thu, Mar 23, 2023 at 10:52:35AM +0100, Julia Lawall wrote:
>
>
> On Thu, 23 Mar 2023, Greg KH wrote:
>
> > On Wed, Mar 22, 2023 at 11:00:41AM +0100, Julia Lawall wrote:
> > > Greg raised the question of whether the inline function is really as
> > > efficient as a macro.
> > >
> > > I tried the following definitions:
> > >
> > > #define to_gbphy_dev(d) container_of(d, struct gbphy_device, dev)
> > >
> > > static inline struct gbphy_device *extra_to_gbphy_dev(const struct device *_dev)
> > > {
> > > return container_of(_dev, struct gbphy_device, dev);
> > > }
> > >
> > > And the following uses:
> > >
> > > ssize_t macro_protocol_id_show(struct device *dev,
> > > struct device_attribute *attr, char *buf)
> > > {
> > > struct gbphy_device *gbphy_dev = to_gbphy_dev(dev);
> > >
> > > return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id);
> > > }
> > > ssize_t extra_protocol_id_show(struct device *dev,
> > > struct device_attribute *attr, char *buf)
> > > {
> > > struct gbphy_device *gbphy_dev = extra_to_gbphy_dev(dev);
> > >
> > > return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf);
> > > }
> > >
> > > They are a little bit different to avoid too much compiler optimization.
> > >
> > > After doing make drivers/staging/greybus/gbphy.s, I get similar looking
> > > code in both cases:
> > >
> > > Macro version:
> > >
> > > .type macro_protocol_id_show, @function
> > > macro_protocol_id_show:
> > > endbr64
> > > 1: call __fentry__
> > > .section __mcount_loc, "a",@progbits
> > > .quad 1b
> > > .previous
> > > pushq %rbp #
> > > movq %rdx, %rbp # tmp96, buf
> > > pushq %rbx #
> > > # drivers/staging/greybus/gbphy.c:40: {
> > > movq %rdi, %rbx # tmp95, dev
> > > # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id);
> > > call __sanitizer_cov_trace_pc #
> > > # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id);
> > > movq -32(%rbx), %rax # MEM[(struct gbphy_device *)dev_7(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_7(D) + -40B].cport_desc
> > > # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id);
> > > movzbl 0(%rbp), %edx # *buf_9(D), *buf_9(D)
> > > movq %rbp, %rdi # buf,
> > > movq $.LC18, %rsi #,
> > > movzbl 3(%rax), %ecx # _1->protocol_id, _1->protocol_id
> > > call sprintf #
> > > # drivers/staging/greybus/gbphy.c:44: }
> > > movl $13, %eax #,
> > > popq %rbx #
> > > popq %rbp #
> > > jmp __x86_return_thunk
> > > .size macro_protocol_id_show, .-macro_protocol_id_show
> > >
> > > Function version:
> > >
> > > .type extra_protocol_id_show, @function
> > > extra_protocol_id_show:
> > > endbr64
> > > 1: call __fentry__
> > > .section __mcount_loc, "a",@progbits
> > > .quad 1b
> > > .previous
> > > pushq %rbp #
> > > movq %rdx, %rbp # tmp96, buf
> > > pushq %rbx #
> > > # drivers/staging/greybus/gbphy.c:47: {
> > > movq %rdi, %rbx # tmp95, dev
> > > # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf);
> > > call __sanitizer_cov_trace_pc #
> > > # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf);
> > > movq -32(%rbx), %rax # MEM[(struct gbphy_device *)dev_8(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_8(D) + -40B].cport_desc
> > > # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf);
> > > movzbl 0(%rbp), %ecx # *buf_9(D), *buf_9(D)
> > > movq %rbp, %rdi # buf,
> > > movq $.LC19, %rsi #,
> > > movzbl 3(%rax), %edx # _3->protocol_id, _3->protocol_id
> > > call sprintf #
> > > # drivers/staging/greybus/gbphy.c:51: }
> > > movl $13, %eax #,
> > > popq %rbx #
> > > popq %rbp #
> > > jmp __x86_return_thunk
> > > .size extra_protocol_id_show, .-extra_protocol_id_show
> > >
> > > Both seem to access the memory directly. Maybe the example is too simple,
> > > and the compiler is more likely to optimize aggressively?
> >
> > Nice, that shows that it is the same both ways as the compiler version
> > you are using is smart enough
> >
> > Which compiler and version is this? Does it work the same for all of
> > the supported versions we have to support (i.e. really old gcc?)
>
> gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
>
> I got a similar result for gcc-5:
>
> macro_protocol_id_show:
> 1: call __fentry__
> .section __mcount_loc, "a",@progbits
> .quad 1b
> .previous
> movq %rdx, %rax # buf, buf
> movq -32(%rdi), %rdx # MEM[(struct gbphy_device *)dev_1(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_1(D) + -40B].cport_desc
> movq $.LC19, %rsi #,
> movq %rax, %rdi # buf,
> movzbl 3(%rdx), %ecx # _3->protocol_id, D.44996
> movzbl (%rax), %edx # *buf_6(D), D.44996
> call sprintf #
> cltq
> jmp __x86_return_thunk
> .size macro_protocol_id_show, .-macro_protocol_id_show
> .section .text.unlikely
> .LCOLDE20:
> .text
> .LHOTE20:
> .section .rodata.str1.1
> .LC21:
> .string "extra 0x%02x %c\n"
> .section .text.unlikely
> .LCOLDB22:
> .text
> .LHOTB22:
> .p2align 6,,63
> .globl extra_protocol_id_show
> .type extra_protocol_id_show, @function
> extra_protocol_id_show:
> 1: call __fentry__
> .section __mcount_loc, "a",@progbits
> .quad 1b
> .previous
> movq %rdx, %rax # buf, buf
> movzbl (%rdx), %ecx # *buf_3(D), D.45003
> movq -32(%rdi), %rdx # MEM[(struct gbphy_device *)dev_2(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_2(D) + -40B].cport_desc
> movq $.LC21, %rsi #,
> movq %rax, %rdi # buf,
> movzbl 3(%rdx), %edx # _6->protocol_id, D.45003
> call sprintf #
> cltq
> jmp __x86_return_thunk
> .size extra_protocol_id_show, .-extra_protocol_id_show
> .section .text.unlikely
Ah nice! Thanks for checking.
Ok, so it's not a performance issue at all either way, compilers are
smarter than we think :)
> > For the most part, sysfs files are not on any sort of "fast path" so a
> > function call is fine, but as I mentioned before, sometimes we are
> > forced to move calls to container_of() to container_of_const() and that
> > can not be an inline function, but must remain a macro :(
>
> It seems that this is because there is not a unique return type, but not a
> performance issue?
Yes, container_of_const() will return a different type (const or not)
depending on what is passed into it. This allows the "const-ness" of a
pointer to be kept across the call, while as container_of() will always
cast away the const which can be dangerous if you don't know what you
are doing.
thanks,
greg k-h
next prev parent reply other threads:[~2023-03-25 8:49 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-21 18:34 [PATCH v2] staging: greybus: use inline function for macros Menna Mahmoud
2023-03-21 18:50 ` Alex Elder
2023-03-21 20:43 ` Julia Lawall
2023-03-21 21:09 ` Alex Elder
2023-03-21 21:29 ` Julia Lawall
2023-03-21 22:42 ` Alex Elder
2023-03-22 10:00 ` Julia Lawall
2023-03-22 12:39 ` Alex Elder
2023-03-23 4:58 ` Greg KH
2023-03-23 5:05 ` Deepak R Varma
2023-03-23 5:22 ` Greg KH
2023-03-23 19:46 ` Deepak R Varma
2023-03-23 9:52 ` Julia Lawall
2023-03-25 8:49 ` Greg KH [this message]
2023-03-25 9:28 ` Julia Lawall
2023-03-22 9:13 ` Greg KH
-- strict thread matches above, loose matches on Subject: below --
2023-03-19 20:49 Menna Mahmoud
2023-03-19 20:56 ` Julia Lawall
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZB61rbl68LfGCosV@kroah.com \
--to=gregkh@linuxfoundation.org \
--cc=elder@ieee.org \
--cc=elder@kernel.org \
--cc=eng.mennamahmoud.mm@gmail.com \
--cc=johan@kernel.org \
--cc=julia.lawall@inria.fr \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-staging@lists.linux.dev \
--cc=outreachy@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).