* [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 @ 2017-03-17 4:42 Subash Abhinov Kasiviswanathan 2017-03-17 12:00 ` Robin Murphy 0 siblings, 1 reply; 4+ messages in thread From: Subash Abhinov Kasiviswanathan @ 2017-03-17 4:42 UTC (permalink / raw) To: netdev, davem, luke.starrett, robin.murphy, catalin.marinas Cc: Subash Abhinov Kasiviswanathan Android devices use multiple ip[6]tables for statistics, UID matching and other functionality. Perf output indicated that ip6_do_table was taking a considerable amount of CPU and more that ip_do_table for an equivalent rate. ipv6_masked_addr_cmp was chosen for optimization as there are more instructions required than the equivalent operation in ip_packet_match. Using 128 bit operations helps to reduce the number of instructions for the match on an ARM64 system. This helps to improve UDPv6 DL performance by 40Mbps (860Mbps -> 900Mbps) on a CPU limited system. Tested on x86_64 UML to check if generic version is used and ARM64 to verify that ARM64 version is used. Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/ipv6.h | 29 +++++++++++++++++++++++++++++ arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/nios2/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/ipv6.h | 32 ++++++++++++++++++++++++++++++++ include/net/ipv6.h | 20 +------------------- 33 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 arch/arm64/include/asm/ipv6.h create mode 100644 include/asm-generic/ipv6.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index d103db5..5b7e92b 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += export.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 63a0401..99f1456 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kmap_types.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index b14e8c7..a0ba9ac 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += errno.h generic-y += exec.h generic-y += ioctl.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h diff --git a/arch/arm64/include/asm/ipv6.h b/arch/arm64/include/asm/ipv6.h new file mode 100644 index 0000000..d49dec6 --- /dev/null +++ b/arch/arm64/include/asm/ipv6.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ASM_IPV6_H +#define __ASM_IPV6_H + +#include <linux/types.h> + +static inline bool +ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, + const struct in6_addr *a2) +{ + const __uint128_t *ul1 = (const __uint128_t *)a1; + const __uint128_t *ulm = (const __uint128_t *)m; + const __uint128_t *ul2 = (const __uint128_t *)a1; + + return !!((*ul1 ^ *ul2) & *ulm); +} +#define ipv6_masked_addr_cmp ipv6_masked_addr_cmp +#endif /* __ASM_IPV6_H */ diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 3d7ef2c..fd6a964 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h generic-y += futex.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += local.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 625db8a..f713d85 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += futex.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 82619c3..ff8033f 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += io.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 0f5132b..c5b5fa0 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += futex.h generic-y += hardirq.h generic-y += ioctl.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index c33b467..0717ffb 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 341740c..0058275 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 797b64a..4985925 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += iomap.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 502a91d..cacba4c 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index deb2987..8ed14f5 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += current.h generic-y += exec.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index d4f9ccb..1ddfeac 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += futex.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index f9b9df5..2382a6e 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 1732ec1..66b7d8a 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += device.h generic-y += exec.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 2535c7b..4453f33 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += current.h generic-y += dma-contiguous.h generic-y += emergency-restart.h generic-y += export.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 97f64c7..df55c2b 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index aaa3c21..d6b13ee 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index fb01873..0d9ad5a 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index a9909c2..b0e156e 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h generic-y += hw_irq.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 5c4fbc8..f675f7c 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += div64.h generic-y += export.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += local64.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 8aea32f..53a2335 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += asm-offsets.h generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += export.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 926943a..9405456 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,6 +4,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h generic-y += current.h +generic-y += ipv6.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index cf2a750..3984834 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += exec.h generic-y += fcntl.h generic-y += ioctl.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kvm_para.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e9e837b..d2acdaf 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h generic-y += export.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += linkage.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index aa48b6e..9487778 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += fcntl.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h +generic-y += ipv6.h generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += local.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index e9d42aa..b226353 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += futex.h generic-y += hardirq.h generic-y += hw_irq.h generic-y += io.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 84205fe..d01d723 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 5d6a53f..8a68e12 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += ipv6.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index f41408c..dc02075 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += fcntl.h generic-y += hardirq.h generic-y += ioctl.h +generic-y += ipv6.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/include/asm-generic/ipv6.h b/include/asm-generic/ipv6.h new file mode 100644 index 0000000..754adac --- /dev/null +++ b/include/asm-generic/ipv6.h @@ -0,0 +1,32 @@ +/* Linux INET6 implementation + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef ipv6_masked_addr_cmp +#define ipv6_masked_addr_cmp ipv6_masked_addr_cmp +static inline bool +ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, + const struct in6_addr *a2) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ulm = (const unsigned long *)m; + const unsigned long *ul2 = (const unsigned long *)a2; + + return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | + ((ul1[1] ^ ul2[1]) & ulm[1])); +#else + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); +#endif +} +#endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index dbf0abb..08ad1a98 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -21,6 +21,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <asm/ipv6.h> #define SIN6_LEN_RFC2133 24 @@ -385,25 +386,6 @@ static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr return memcmp(a1, a2, sizeof(struct in6_addr)); } -static inline bool -ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, - const struct in6_addr *a2) -{ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - const unsigned long *ul1 = (const unsigned long *)a1; - const unsigned long *ulm = (const unsigned long *)m; - const unsigned long *ul2 = (const unsigned long *)a2; - - return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | - ((ul1[1] ^ ul2[1]) & ulm[1])); -#else - return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); -#endif -} - static inline void ipv6_addr_prefix(struct in6_addr *pfx, const struct in6_addr *addr, int plen) -- 1.9.1 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 2017-03-17 4:42 [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 Subash Abhinov Kasiviswanathan @ 2017-03-17 12:00 ` Robin Murphy 2017-03-17 12:22 ` James Greenhalgh 0 siblings, 1 reply; 4+ messages in thread From: Robin Murphy @ 2017-03-17 12:00 UTC (permalink / raw) To: Subash Abhinov Kasiviswanathan, netdev, davem, luke.starrett, catalin.marinas Cc: James Greenhalgh On 17/03/17 04:42, Subash Abhinov Kasiviswanathan wrote: > Android devices use multiple ip[6]tables for statistics, UID matching > and other functionality. Perf output indicated that ip6_do_table > was taking a considerable amount of CPU and more that ip_do_table > for an equivalent rate. ipv6_masked_addr_cmp was chosen for > optimization as there are more instructions required than the > equivalent operation in ip_packet_match. > > Using 128 bit operations helps to reduce the number of instructions > for the match on an ARM64 system. This helps to improve UDPv6 DL > performance by 40Mbps (860Mbps -> 900Mbps) on a CPU limited system. After trying to have a look at the codegen difference it makes, I think I may have found why it's faster ;) ---------- [root@space-channel-5 ~]# cat > ip.c #include <stdbool.h> #include <netinet/in.h> bool ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ulm = (const unsigned long *)m; const unsigned long *ul2 = (const unsigned long *)a2; return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | ((ul1[1] ^ ul2[1]) & ulm[1])); } bool ipv6_masked_addr_cmp_new(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { const __uint128_t *ul1 = (const __uint128_t *)a1; const __uint128_t *ulm = (const __uint128_t *)m; const __uint128_t *ul2 = (const __uint128_t *)a1; return !!((*ul1 ^ *ul2) & *ulm); } [root@space-channel-5 ~]# gcc -c -O2 ip.c [root@space-channel-5 ~]# objdump -d ip.o ip.o: file format elf64-littleaarch64 Disassembly of section .text: 0000000000000000 <ipv6_masked_addr_cmp>: 0: a9401847 ldp x7, x6, [x2] 4: a9401003 ldp x3, x4, [x0] 8: f9400025 ldr x5, [x1] c: f9400422 ldr x2, [x1, #8] 10: ca070060 eor x0, x3, x7 14: ca060081 eor x1, x4, x6 18: 8a050000 and x0, x0, x5 1c: 8a020021 and x1, x1, x2 20: aa010000 orr x0, x0, x1 24: f100001f cmp x0, #0x0 28: 1a9f07e0 cset w0, ne // ne = any 2c: d65f03c0 ret 0000000000000030 <ipv6_masked_addr_cmp_new>: 30: 52800000 mov w0, #0x0 // #0 34: d65f03c0 ret [root@space-channel-5 ~]# gcc --version gcc (GCC) 6.3.1 20170306 Copyright (C) 2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ---------- That's clearly not right - I'm not sure quite what undefined behaviour assumption convinces GCC to optimise the whole thing away, but I do note that the generic 64-bit version really isn't far off optimal already. Even if it happens to work out in practice due to inlining behaviour, I don't think that's something we'd want to rely on. Robin. > Tested on x86_64 UML to check if generic version is used and ARM64 > to verify that ARM64 version is used. > > Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> > --- > arch/alpha/include/asm/Kbuild | 1 + > arch/arc/include/asm/Kbuild | 1 + > arch/arm/include/asm/Kbuild | 1 + > arch/arm64/include/asm/ipv6.h | 29 +++++++++++++++++++++++++++++ > arch/avr32/include/asm/Kbuild | 1 + > arch/blackfin/include/asm/Kbuild | 1 + > arch/c6x/include/asm/Kbuild | 1 + > arch/cris/include/asm/Kbuild | 1 + > arch/frv/include/asm/Kbuild | 1 + > arch/h8300/include/asm/Kbuild | 1 + > arch/hexagon/include/asm/Kbuild | 1 + > arch/ia64/include/asm/Kbuild | 1 + > arch/m32r/include/asm/Kbuild | 1 + > arch/m68k/include/asm/Kbuild | 1 + > arch/metag/include/asm/Kbuild | 1 + > arch/microblaze/include/asm/Kbuild | 1 + > arch/mips/include/asm/Kbuild | 1 + > arch/mn10300/include/asm/Kbuild | 1 + > arch/nios2/include/asm/Kbuild | 1 + > arch/openrisc/include/asm/Kbuild | 1 + > arch/parisc/include/asm/Kbuild | 1 + > arch/powerpc/include/asm/Kbuild | 1 + > arch/s390/include/asm/Kbuild | 1 + > arch/score/include/asm/Kbuild | 1 + > arch/sh/include/asm/Kbuild | 1 + > arch/sparc/include/asm/Kbuild | 1 + > arch/tile/include/asm/Kbuild | 1 + > arch/um/include/asm/Kbuild | 1 + > arch/unicore32/include/asm/Kbuild | 1 + > arch/x86/include/asm/Kbuild | 1 + > arch/xtensa/include/asm/Kbuild | 1 + > include/asm-generic/ipv6.h | 32 ++++++++++++++++++++++++++++++++ > include/net/ipv6.h | 20 +------------------- > 33 files changed, 92 insertions(+), 19 deletions(-) > create mode 100644 arch/arm64/include/asm/ipv6.h > create mode 100644 include/asm-generic/ipv6.h > > diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild > index d103db5..5b7e92b 100644 > --- a/arch/alpha/include/asm/Kbuild > +++ b/arch/alpha/include/asm/Kbuild > @@ -3,6 +3,7 @@ > generic-y += clkdev.h > generic-y += exec.h > generic-y += export.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild > index 63a0401..99f1456 100644 > --- a/arch/arc/include/asm/Kbuild > +++ b/arch/arc/include/asm/Kbuild > @@ -14,6 +14,7 @@ generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kmap_types.h > diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild > index b14e8c7..a0ba9ac 100644 > --- a/arch/arm/include/asm/Kbuild > +++ b/arch/arm/include/asm/Kbuild > @@ -9,6 +9,7 @@ generic-y += errno.h > generic-y += exec.h > generic-y += ioctl.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += kdebug.h > generic-y += local.h > diff --git a/arch/arm64/include/asm/ipv6.h b/arch/arm64/include/asm/ipv6.h > new file mode 100644 > index 0000000..d49dec6 > --- /dev/null > +++ b/arch/arm64/include/asm/ipv6.h > @@ -0,0 +1,29 @@ > +/* Copyright (c) 2017, The Linux Foundation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 and > + * only version 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#ifndef __ASM_IPV6_H > +#define __ASM_IPV6_H > + > +#include <linux/types.h> > + > +static inline bool > +ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, > + const struct in6_addr *a2) > +{ > + const __uint128_t *ul1 = (const __uint128_t *)a1; > + const __uint128_t *ulm = (const __uint128_t *)m; > + const __uint128_t *ul2 = (const __uint128_t *)a1; > + > + return !!((*ul1 ^ *ul2) & *ulm); > +} > +#define ipv6_masked_addr_cmp ipv6_masked_addr_cmp > +#endif /* __ASM_IPV6_H */ > diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild > index 3d7ef2c..fd6a964 100644 > --- a/arch/avr32/include/asm/Kbuild > +++ b/arch/avr32/include/asm/Kbuild > @@ -6,6 +6,7 @@ generic-y += div64.h > generic-y += emergency-restart.h > generic-y += exec.h > generic-y += futex.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += local.h > diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild > index 625db8a..f713d85 100644 > --- a/arch/blackfin/include/asm/Kbuild > +++ b/arch/blackfin/include/asm/Kbuild > @@ -12,6 +12,7 @@ generic-y += futex.h > generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild > index 82619c3..ff8033f 100644 > --- a/arch/c6x/include/asm/Kbuild > +++ b/arch/c6x/include/asm/Kbuild > @@ -20,6 +20,7 @@ generic-y += io.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild > index 0f5132b..c5b5fa0 100644 > --- a/arch/cris/include/asm/Kbuild > +++ b/arch/cris/include/asm/Kbuild > @@ -15,6 +15,7 @@ generic-y += futex.h > generic-y += hardirq.h > generic-y += ioctl.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild > index c33b467..0717ffb 100644 > --- a/arch/frv/include/asm/Kbuild > +++ b/arch/frv/include/asm/Kbuild > @@ -1,6 +1,7 @@ > > generic-y += clkdev.h > generic-y += exec.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild > index 341740c..0058275 100644 > --- a/arch/h8300/include/asm/Kbuild > +++ b/arch/h8300/include/asm/Kbuild > @@ -23,6 +23,7 @@ generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild > index 797b64a..4985925 100644 > --- a/arch/hexagon/include/asm/Kbuild > +++ b/arch/hexagon/include/asm/Kbuild > @@ -20,6 +20,7 @@ generic-y += ioctl.h > generic-y += ioctls.h > generic-y += iomap.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild > index 502a91d..cacba4c 100644 > --- a/arch/ia64/include/asm/Kbuild > +++ b/arch/ia64/include/asm/Kbuild > @@ -1,6 +1,7 @@ > > generic-y += clkdev.h > generic-y += exec.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += kvm_para.h > generic-y += mcs_spinlock.h > diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild > index deb2987..8ed14f5 100644 > --- a/arch/m32r/include/asm/Kbuild > +++ b/arch/m32r/include/asm/Kbuild > @@ -2,6 +2,7 @@ > generic-y += clkdev.h > generic-y += current.h > generic-y += exec.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += kvm_para.h > generic-y += mcs_spinlock.h > diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild > index d4f9ccb..1ddfeac 100644 > --- a/arch/m68k/include/asm/Kbuild > +++ b/arch/m68k/include/asm/Kbuild > @@ -9,6 +9,7 @@ generic-y += futex.h > generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild > index f9b9df5..2382a6e 100644 > --- a/arch/metag/include/asm/Kbuild > +++ b/arch/metag/include/asm/Kbuild > @@ -16,6 +16,7 @@ generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild > index 1732ec1..66b7d8a 100644 > --- a/arch/microblaze/include/asm/Kbuild > +++ b/arch/microblaze/include/asm/Kbuild > @@ -3,6 +3,7 @@ generic-y += barrier.h > generic-y += clkdev.h > generic-y += device.h > generic-y += exec.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild > index 2535c7b..4453f33 100644 > --- a/arch/mips/include/asm/Kbuild > +++ b/arch/mips/include/asm/Kbuild > @@ -5,6 +5,7 @@ generic-y += current.h > generic-y += dma-contiguous.h > generic-y += emergency-restart.h > generic-y += export.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += local64.h > generic-y += mcs_spinlock.h > diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild > index 97f64c7..df55c2b 100644 > --- a/arch/mn10300/include/asm/Kbuild > +++ b/arch/mn10300/include/asm/Kbuild > @@ -2,6 +2,7 @@ > generic-y += barrier.h > generic-y += clkdev.h > generic-y += exec.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild > index aaa3c21..d6b13ee 100644 > --- a/arch/nios2/include/asm/Kbuild > +++ b/arch/nios2/include/asm/Kbuild > @@ -22,6 +22,7 @@ generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild > index fb01873..0d9ad5a 100644 > --- a/arch/openrisc/include/asm/Kbuild > +++ b/arch/openrisc/include/asm/Kbuild > @@ -25,6 +25,7 @@ generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > generic-y += irq.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild > index a9909c2..b0e156e 100644 > --- a/arch/parisc/include/asm/Kbuild > +++ b/arch/parisc/include/asm/Kbuild > @@ -8,6 +8,7 @@ generic-y += div64.h > generic-y += emergency-restart.h > generic-y += exec.h > generic-y += hw_irq.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild > index 5c4fbc8..f675f7c 100644 > --- a/arch/powerpc/include/asm/Kbuild > +++ b/arch/powerpc/include/asm/Kbuild > @@ -1,6 +1,7 @@ > generic-y += clkdev.h > generic-y += div64.h > generic-y += export.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += local64.h > diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild > index 8aea32f..53a2335 100644 > --- a/arch/s390/include/asm/Kbuild > +++ b/arch/s390/include/asm/Kbuild > @@ -2,6 +2,7 @@ generic-y += asm-offsets.h > generic-y += clkdev.h > generic-y += dma-contiguous.h > generic-y += export.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild > index 926943a..9405456 100644 > --- a/arch/score/include/asm/Kbuild > +++ b/arch/score/include/asm/Kbuild > @@ -4,6 +4,7 @@ header-y += > generic-y += barrier.h > generic-y += clkdev.h > generic-y += current.h > +generic-y += ipv6.h > generic-y += irq_work.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild > index cf2a750..3984834 100644 > --- a/arch/sh/include/asm/Kbuild > +++ b/arch/sh/include/asm/Kbuild > @@ -10,6 +10,7 @@ generic-y += exec.h > generic-y += fcntl.h > generic-y += ioctl.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kvm_para.h > diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild > index e9e837b..d2acdaf 100644 > --- a/arch/sparc/include/asm/Kbuild > +++ b/arch/sparc/include/asm/Kbuild > @@ -6,6 +6,7 @@ generic-y += div64.h > generic-y += emergency-restart.h > generic-y += exec.h > generic-y += export.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += linkage.h > diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild > index aa48b6e..9487778 100644 > --- a/arch/tile/include/asm/Kbuild > +++ b/arch/tile/include/asm/Kbuild > @@ -12,6 +12,7 @@ generic-y += fcntl.h > generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > +generic-y += ipv6.h > generic-y += ipcbuf.h > generic-y += irq_regs.h > generic-y += local.h > diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild > index e9d42aa..b226353 100644 > --- a/arch/um/include/asm/Kbuild > +++ b/arch/um/include/asm/Kbuild > @@ -11,6 +11,7 @@ generic-y += futex.h > generic-y += hardirq.h > generic-y += hw_irq.h > generic-y += io.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild > index 84205fe..d01d723 100644 > --- a/arch/unicore32/include/asm/Kbuild > +++ b/arch/unicore32/include/asm/Kbuild > @@ -19,6 +19,7 @@ generic-y += hw_irq.h > generic-y += ioctl.h > generic-y += ioctls.h > generic-y += ipcbuf.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild > index 5d6a53f..8a68e12 100644 > --- a/arch/x86/include/asm/Kbuild > +++ b/arch/x86/include/asm/Kbuild > @@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h > generic-y += clkdev.h > generic-y += dma-contiguous.h > generic-y += early_ioremap.h > +generic-y += ipv6.h > generic-y += mcs_spinlock.h > generic-y += mm-arch-hooks.h > diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild > index f41408c..dc02075 100644 > --- a/arch/xtensa/include/asm/Kbuild > +++ b/arch/xtensa/include/asm/Kbuild > @@ -9,6 +9,7 @@ generic-y += exec.h > generic-y += fcntl.h > generic-y += hardirq.h > generic-y += ioctl.h > +generic-y += ipv6.h > generic-y += irq_regs.h > generic-y += irq_work.h > generic-y += kdebug.h > diff --git a/include/asm-generic/ipv6.h b/include/asm-generic/ipv6.h > new file mode 100644 > index 0000000..754adac > --- /dev/null > +++ b/include/asm-generic/ipv6.h > @@ -0,0 +1,32 @@ > +/* Linux INET6 implementation > + * > + * Authors: > + * Pedro Roque <roque@di.fc.ul.pt> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#ifndef ipv6_masked_addr_cmp > +#define ipv6_masked_addr_cmp ipv6_masked_addr_cmp > +static inline bool > +ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, > + const struct in6_addr *a2) > +{ > +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 > + const unsigned long *ul1 = (const unsigned long *)a1; > + const unsigned long *ulm = (const unsigned long *)m; > + const unsigned long *ul2 = (const unsigned long *)a2; > + > + return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | > + ((ul1[1] ^ ul2[1]) & ulm[1])); > +#else > + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | > + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | > + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | > + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); > +#endif > +} > +#endif > diff --git a/include/net/ipv6.h b/include/net/ipv6.h > index dbf0abb..08ad1a98 100644 > --- a/include/net/ipv6.h > +++ b/include/net/ipv6.h > @@ -21,6 +21,7 @@ > #include <net/flow.h> > #include <net/flow_dissector.h> > #include <net/snmp.h> > +#include <asm/ipv6.h> > > #define SIN6_LEN_RFC2133 24 > > @@ -385,25 +386,6 @@ static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr > return memcmp(a1, a2, sizeof(struct in6_addr)); > } > > -static inline bool > -ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, > - const struct in6_addr *a2) > -{ > -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 > - const unsigned long *ul1 = (const unsigned long *)a1; > - const unsigned long *ulm = (const unsigned long *)m; > - const unsigned long *ul2 = (const unsigned long *)a2; > - > - return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | > - ((ul1[1] ^ ul2[1]) & ulm[1])); > -#else > - return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | > - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | > - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | > - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); > -#endif > -} > - > static inline void ipv6_addr_prefix(struct in6_addr *pfx, > const struct in6_addr *addr, > int plen) > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 2017-03-17 12:00 ` Robin Murphy @ 2017-03-17 12:22 ` James Greenhalgh 2017-03-17 21:20 ` Subash Abhinov Kasiviswanathan 0 siblings, 1 reply; 4+ messages in thread From: James Greenhalgh @ 2017-03-17 12:22 UTC (permalink / raw) To: Robin Murphy Cc: Subash Abhinov Kasiviswanathan, netdev, davem, luke.starrett, catalin.marinas, nd On Fri, Mar 17, 2017 at 12:00:42PM +0000, Robin Murphy wrote: > On 17/03/17 04:42, Subash Abhinov Kasiviswanathan wrote: > > Android devices use multiple ip[6]tables for statistics, UID matching > > and other functionality. Perf output indicated that ip6_do_table > > was taking a considerable amount of CPU and more that ip_do_table > > for an equivalent rate. ipv6_masked_addr_cmp was chosen for > > optimization as there are more instructions required than the > > equivalent operation in ip_packet_match. > > > > Using 128 bit operations helps to reduce the number of instructions > > for the match on an ARM64 system. This helps to improve UDPv6 DL > > performance by 40Mbps (860Mbps -> 900Mbps) on a CPU limited system. > > After trying to have a look at the codegen difference it makes, I think > I may have found why it's faster ;) > > ---------- > [root@space-channel-5 ~]# cat > ip.c > #include <stdbool.h> > #include <netinet/in.h> > > bool > ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, > const struct in6_addr *a2) > { > const unsigned long *ul1 = (const unsigned long *)a1; > const unsigned long *ulm = (const unsigned long *)m; > const unsigned long *ul2 = (const unsigned long *)a2; > > return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | > ((ul1[1] ^ ul2[1]) & ulm[1])); > } > > bool > ipv6_masked_addr_cmp_new(const struct in6_addr *a1, const struct > in6_addr *m, > const struct in6_addr *a2) > { > const __uint128_t *ul1 = (const __uint128_t *)a1; > const __uint128_t *ulm = (const __uint128_t *)m; > const __uint128_t *ul2 = (const __uint128_t *)a1; > > return !!((*ul1 ^ *ul2) & *ulm); > } <snip> > That's clearly not right - I'm not sure quite what undefined behaviour > assumption convinces GCC to optimise the whole thing away> While the pointer casting is a bit ghastly, I don't actually think that GCC is taking advantage of undefined behaviour here, rather it looks like you have a simple typo on line 3: > const __uint128_t *ul1 = (const __uint128_t *)a1; > const __uint128_t *ulm = (const __uint128_t *)m; > const __uint128_t *ul2 = (const __uint128_t *)a1; ul2 = a2, surely? As it is (stripping casts) you have a1 ^ a1, which will get you to 0 pretty quickly. Fixing that up for you; bool ipv6_masked_addr_cmp_new(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { const __uint128_t *ul1 = (const __uint128_t *)a1; const __uint128_t *ulm = (const __uint128_t *)m; const __uint128_t *ul2 = (const __uint128_t *)a2; return !!((*ul1 ^ *ul2) & *ulm); } $ gcc -O2 ipv6_masked_addr_cmp_new: ldp x4, x3, [x0] ldp x5, x2, [x2] ldp x0, x1, [x1] eor x4, x4, x5 eor x2, x3, x2 and x0, x0, x4 and x1, x1, x2 orr x0, x0, x1 cmp x0, 0 cset w0, ne ret Which at least looks like it might calculate something useful :-) Cheers, James ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 2017-03-17 12:22 ` James Greenhalgh @ 2017-03-17 21:20 ` Subash Abhinov Kasiviswanathan 0 siblings, 0 replies; 4+ messages in thread From: Subash Abhinov Kasiviswanathan @ 2017-03-17 21:20 UTC (permalink / raw) To: James Greenhalgh, Robin Murphy Cc: netdev, davem, luke.starrett, catalin.marinas, nd, netdev-owner > >> That's clearly not right - I'm not sure quite what undefined behaviour >> assumption convinces GCC to optimise the whole thing away> > > While the pointer casting is a bit ghastly, I don't actually think that > GCC is taking advantage of undefined behaviour here, rather it looks > like > you have a simple typo on line 3: > >> const __uint128_t *ul1 = (const __uint128_t *)a1; >> const __uint128_t *ulm = (const __uint128_t *)m; >> const __uint128_t *ul2 = (const __uint128_t *)a1; > > ul2 = a2, surely? > > As it is (stripping casts) you have a1 ^ a1, which will get you to 0 > pretty quickly. Fixing that up for you; > > bool > ipv6_masked_addr_cmp_new(const struct in6_addr *a1, const struct > in6_addr *m, > const struct in6_addr *a2) > { > const __uint128_t *ul1 = (const __uint128_t *)a1; > const __uint128_t *ulm = (const __uint128_t *)m; > const __uint128_t *ul2 = (const __uint128_t *)a2; > > return !!((*ul1 ^ *ul2) & *ulm); > } > > $ gcc -O2 > > ipv6_masked_addr_cmp_new: > ldp x4, x3, [x0] > ldp x5, x2, [x2] > ldp x0, x1, [x1] > eor x4, x4, x5 > eor x2, x3, x2 > and x0, x0, x4 > and x1, x1, x2 > orr x0, x0, x1 > cmp x0, 0 > cset w0, ne > ret > > Which at least looks like it might calculate something useful :-) > Hi Robin / James Thanks for checking and sorry for the confusion. I'll retest this. ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-03-17 21:20 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-03-17 4:42 [PATCH net-next] ipv6: Implement optimized IPv6 masked address comparison for ARM64 Subash Abhinov Kasiviswanathan 2017-03-17 12:00 ` Robin Murphy 2017-03-17 12:22 ` James Greenhalgh 2017-03-17 21:20 ` Subash Abhinov Kasiviswanathan
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.