From: David Miller <davem@davemloft.net>
To: rdreier@cisco.com
Cc: swise@opengridcomputing.com, randy.dunlap@oracle.com,
linux-next@vger.kernel.org, linux-kernel@vger.kernel.org,
general@lists.openfabrics.org
Subject: Re: [ofa-general] [PATCH 2.6.30] RDMA/cxgb3: Remove modulo math.
Date: Tue, 10 Feb 2009 17:23:47 -0800 (PST) [thread overview]
Message-ID: <20090210.172347.189515015.davem@davemloft.net> (raw)
In-Reply-To: <adaeiy5ahza.fsf@cisco.com>
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 10 Feb 2009 17:18:49 -0800
> > > Is this required? Strength reduction optimization should do this
> > > automatically (and the code has been there for quite a while, so
> > > obviously it isn't causing problems)
>
> > GCC won't optimize that modulus the way you expect, try for yourself
> > and look at the assembler if you don't believe me. :-)
>
> Are you thinking of the case when there are signed integers involved and
> so "% modulus" might produce a different result than "& (modulus - 1)"
> (because the compiler can't know that things are never negative)?
> Because in this case the compiler seems to do what I thought it would;
> the relevant part of the i386 assembly for
>
> wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
> (1UL << (12 + page_size[i])));
>
> is
>
> movl %eax, 28(%edi,%ebx) # <variable>.length,
> <variable>.len
> movzbl 28(%esp,%esi), %ecx # page_size, tmp89
> movl $1, %eax #, tmp92
> addl $12, %ecx #, tmp90
> sall %cl, %eax # tmp90, tmp92
> movl (%esp), %ecx # wr,
> decl %eax # tmp93
> movl 12(%ecx), %edx # <variable>.sg_list, <variable>.sg_list
> andl (%edx,%ebx), %eax # <variable>.addr, tmp93
>
> ie the compiler computes the modulus, then does decl to compute
> modulus-1 and then &s with it.
>
> Or am I misunderstanding your point?
Must be compiler and platform specific because with gcc-4.1.3 on
sparc with -O2, for the test program:
unsigned long page_size[4];
int main(int argc)
{
unsigned long long x = argc;
return x % (1UL << (12 + page_size[argc]));
}
I get a call to __umoddi3:
main:
save %sp, -112, %sp
sethi %hi(page_size), %g1
sll %i0, 2, %g3
or %g1, %lo(page_size), %g1
mov 1, %o2
ld [%g1+%g3], %g2
add %g2, 12, %g2
sll %o2, %g2, %o2
mov %i0, %o1
mov %o2, %o3
sra %i0, 31, %o0
call __umoddi3, 0
mov 0, %o2
jmp %i7+8
restore %g0, %o1, %o0
I get the same with gcc-4.3.0 and -O2 on 32-bit x86:
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
movl $1, %eax
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $20, %esp
movl (%ecx), %edx
movl page_size(,%edx,4), %ecx
movl $0, 12(%esp)
movl %edx, (%esp)
addl $12, %ecx
sall %cl, %eax
movl %eax, 8(%esp)
movl %edx, %eax
sarl $31, %eax
movl %eax, 4(%esp)
call __umoddi3
addl $20, %esp
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
WARNING: multiple messages have this Message-ID (diff)
From: David Miller <davem@davemloft.net>
To: rdreier@cisco.com
Cc: randy.dunlap@oracle.com, linux-next@vger.kernel.org,
general@lists.openfabrics.org, linux-kernel@vger.kernel.org
Subject: Re: [ofa-general] [PATCH 2.6.30] RDMA/cxgb3: Remove modulo math.
Date: Tue, 10 Feb 2009 17:23:47 -0800 (PST) [thread overview]
Message-ID: <20090210.172347.189515015.davem@davemloft.net> (raw)
In-Reply-To: <adaeiy5ahza.fsf@cisco.com>
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 10 Feb 2009 17:18:49 -0800
> > > Is this required? Strength reduction optimization should do this
> > > automatically (and the code has been there for quite a while, so
> > > obviously it isn't causing problems)
>
> > GCC won't optimize that modulus the way you expect, try for yourself
> > and look at the assembler if you don't believe me. :-)
>
> Are you thinking of the case when there are signed integers involved and
> so "% modulus" might produce a different result than "& (modulus - 1)"
> (because the compiler can't know that things are never negative)?
> Because in this case the compiler seems to do what I thought it would;
> the relevant part of the i386 assembly for
>
> wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
> (1UL << (12 + page_size[i])));
>
> is
>
> movl %eax, 28(%edi,%ebx) # <variable>.length,
> <variable>.len
> movzbl 28(%esp,%esi), %ecx # page_size, tmp89
> movl $1, %eax #, tmp92
> addl $12, %ecx #, tmp90
> sall %cl, %eax # tmp90, tmp92
> movl (%esp), %ecx # wr,
> decl %eax # tmp93
> movl 12(%ecx), %edx # <variable>.sg_list, <variable>.sg_list
> andl (%edx,%ebx), %eax # <variable>.addr, tmp93
>
> ie the compiler computes the modulus, then does decl to compute
> modulus-1 and then &s with it.
>
> Or am I misunderstanding your point?
Must be compiler and platform specific because with gcc-4.1.3 on
sparc with -O2, for the test program:
unsigned long page_size[4];
int main(int argc)
{
unsigned long long x = argc;
return x % (1UL << (12 + page_size[argc]));
}
I get a call to __umoddi3:
main:
save %sp, -112, %sp
sethi %hi(page_size), %g1
sll %i0, 2, %g3
or %g1, %lo(page_size), %g1
mov 1, %o2
ld [%g1+%g3], %g2
add %g2, 12, %g2
sll %o2, %g2, %o2
mov %i0, %o1
mov %o2, %o3
sra %i0, 31, %o0
call __umoddi3, 0
mov 0, %o2
jmp %i7+8
restore %g0, %o1, %o0
I get the same with gcc-4.3.0 and -O2 on 32-bit x86:
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
movl $1, %eax
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $20, %esp
movl (%ecx), %edx
movl page_size(,%edx,4), %ecx
movl $0, 12(%esp)
movl %edx, (%esp)
addl $12, %ecx
sall %cl, %eax
movl %eax, 8(%esp)
movl %edx, %eax
sarl $31, %eax
movl %eax, 4(%esp)
call __umoddi3
addl $20, %esp
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
next prev parent reply other threads:[~2009-02-11 1:24 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-10 18:44 [PATCH 2.6.30] RDMA/cxgb3: Remove modulo math Steve Wise
2009-02-10 18:44 ` [ofa-general] " Steve Wise
2009-02-10 19:04 ` Randy Dunlap
2009-02-10 19:04 ` [ofa-general] " Randy Dunlap
2009-02-10 19:10 ` Steve Wise
2009-02-10 19:10 ` [ofa-general] " Steve Wise
2009-02-10 19:12 ` Randy Dunlap
2009-02-11 0:38 ` [ofa-general] " Roland Dreier
2009-02-11 1:03 ` Steve Wise
2009-02-11 1:07 ` David Miller
2009-02-11 1:18 ` Roland Dreier
2009-02-11 1:18 ` Roland Dreier
2009-02-11 1:23 ` David Miller [this message]
2009-02-11 1:23 ` David Miller
2009-02-11 7:20 ` Roland Dreier
2009-02-11 7:20 ` Roland Dreier
2009-02-11 8:00 ` David Miller
2009-02-11 1:03 ` Steve Wise
2009-02-11 15:44 ` Steve Wise
2009-02-11 18:12 ` Roland Dreier
2009-02-11 18:32 ` Steve Wise
2009-02-11 18:36 ` Roland Dreier
2009-02-11 18:44 ` Steve Wise
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090210.172347.189515015.davem@davemloft.net \
--to=davem@davemloft.net \
--cc=general@lists.openfabrics.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=randy.dunlap@oracle.com \
--cc=rdreier@cisco.com \
--cc=swise@opengridcomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.