All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>,
	Jean-Philippe Brucker <jean-philippe.brucker@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Alexei Starovoitov <ast@kernel.org>
Subject: [PATCH 5.1 51/55] bpf, arm64: use more scalable stadd over ldxr / stxr loop in xadd
Date: Tue,  2 Jul 2019 10:01:59 +0200	[thread overview]
Message-ID: <20190702080126.728030225@linuxfoundation.org> (raw)
In-Reply-To: <20190702080124.103022729@linuxfoundation.org>

From: Daniel Borkmann <daniel@iogearbox.net>

commit 34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9 upstream.

Since ARMv8.1 supplement introduced LSE atomic instructions back in 2016,
lets add support for STADD and use that in favor of LDXR / STXR loop for
the XADD mapping if available. STADD is encoded as an alias for LDADD with
XZR as the destination register, therefore add LDADD to the instruction
encoder along with STADD as special case and use it in the JIT for CPUs
that advertise LSE atomics in CPUID register. If immediate offset in the
BPF XADD insn is 0, then use dst register directly instead of temporary
one.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/arm64/include/asm/insn.h |    8 ++++++++
 arch/arm64/kernel/insn.c      |   40 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/net/bpf_jit.h      |    4 ++++
 arch/arm64/net/bpf_jit_comp.c |   28 +++++++++++++++++++---------
 4 files changed, 71 insertions(+), 9 deletions(-)

--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp,	0x9F000000, 0
 __AARCH64_INSN_FUNCS(prfm,	0x3FC00000, 0x39800000)
 __AARCH64_INSN_FUNCS(prfm_lit,	0xFF000000, 0xD8000000)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldadd,	0x3F20FC00, 0xB8200000)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
 __AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
 __AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
@@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum
 				   enum aarch64_insn_register state,
 				   enum aarch64_insn_size_type size,
 				   enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+			   enum aarch64_insn_register address,
+			   enum aarch64_insn_register value,
+			   enum aarch64_insn_size_type size);
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+			   enum aarch64_insn_register value,
+			   enum aarch64_insn_size_type size);
 u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 				 enum aarch64_insn_register src,
 				 int imm, enum aarch64_insn_variant variant,
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum
 					    state);
 }
 
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+			   enum aarch64_insn_register address,
+			   enum aarch64_insn_register value,
+			   enum aarch64_insn_size_type size)
+{
+	u32 insn = aarch64_insn_get_ldadd_value();
+
+	switch (size) {
+	case AARCH64_INSN_SIZE_32:
+	case AARCH64_INSN_SIZE_64:
+		break;
+	default:
+		pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_ldst_size(size, insn);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+					    result);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    address);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+					    value);
+}
+
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+			   enum aarch64_insn_register value,
+			   enum aarch64_insn_size_type size)
+{
+	/*
+	 * STADD is simply encoded as an alias for LDADD with XZR as
+	 * the destination register.
+	 */
+	return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
+				      value, size);
+}
+
 static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
 					enum aarch64_insn_prfm_target target,
 					enum aarch64_insn_prfm_policy policy,
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -100,6 +100,10 @@
 #define A64_STXR(sf, Rt, Rn, Rs) \
 	A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
 
+/* LSE atomics */
+#define A64_STADD(sf, Rn, Rs) \
+	aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+
 /* Add/subtract (immediate) */
 #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
 	aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -365,7 +365,7 @@ static int build_insn(const struct bpf_i
 	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
 			  BPF_CLASS(code) == BPF_JMP;
 	const bool isdw = BPF_SIZE(code) == BPF_DW;
-	u8 jmp_cond;
+	u8 jmp_cond, reg;
 	s32 jmp_offset;
 
 #define check_imm(bits, imm) do {				\
@@ -756,18 +756,28 @@ emit_cond_jmp:
 			break;
 		}
 		break;
+
 	/* STX XADD: lock *(u32 *)(dst + off) += src */
 	case BPF_STX | BPF_XADD | BPF_W:
 	/* STX XADD: lock *(u64 *)(dst + off) += src */
 	case BPF_STX | BPF_XADD | BPF_DW:
-		emit_a64_mov_i(1, tmp, off, ctx);
-		emit(A64_ADD(1, tmp, tmp, dst), ctx);
-		emit(A64_LDXR(isdw, tmp2, tmp), ctx);
-		emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
-		emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
-		jmp_offset = -3;
-		check_imm19(jmp_offset);
-		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+		if (!off) {
+			reg = dst;
+		} else {
+			emit_a64_mov_i(1, tmp, off, ctx);
+			emit(A64_ADD(1, tmp, tmp, dst), ctx);
+			reg = tmp;
+		}
+		if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
+			emit(A64_STADD(isdw, reg, src), ctx);
+		} else {
+			emit(A64_LDXR(isdw, tmp2, reg), ctx);
+			emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
+			emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
+			jmp_offset = -3;
+			check_imm19(jmp_offset);
+			emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+		}
 		break;
 
 	default:



  parent reply	other threads:[~2019-07-02  8:04 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-02  8:01 [PATCH 5.1 00/55] 5.1.16-stable review Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 01/55] arm64: Dont unconditionally add -Wno-psabi to KBUILD_CFLAGS Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 02/55] Revert "x86/uaccess, ftrace: Fix ftrace_likely_update() vs. SMAP" Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 03/55] qmi_wwan: Fix out-of-bounds read Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 04/55] fs/proc/array.c: allow reporting eip/esp for all coredumping threads Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 05/55] mm/mempolicy.c: fix an incorrect rebind node in mpol_rebind_nodemask Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 06/55] fs/binfmt_flat.c: make load_flat_shared_library() work Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 07/55] clk: tegra210: Fix default rates for HDA clocks Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 08/55] clk: socfpga: stratix10: fix divider entry for the emac clocks Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 09/55] drm/i915: Force 2*96 MHz cdclk on glk/cnl when audio power is enabled Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 10/55] drm/i915: Save the old CDCLK atomic state Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 11/55] drm/i915: Remove redundant store of logical CDCLK state Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 12/55] drm/i915: Skip modeset for cdclk changes if possible Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 13/55] mm: soft-offline: return -EBUSY if set_hwpoison_free_buddy_page() fails Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 14/55] mm: hugetlb: soft-offline: dissolve_free_huge_page() return zero on !PageHuge Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 15/55] mm/page_idle.c: fix oops because end_pfn is larger than max_pfn Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 16/55] mm, swap: fix THP swap out Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 17/55] dm init: fix incorrect uses of kstrndup() Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 18/55] dm log writes: make sure super sector log updates are written in order Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 19/55] io_uring: ensure req->file is cleared on allocation Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 20/55] scsi: vmw_pscsi: Fix use-after-free in pvscsi_queue_lck() Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 21/55] x86/speculation: Allow guests to use SSBD even if host does not Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 22/55] x86/microcode: Fix the microcode load on CPU hotplug for real Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 23/55] x86/resctrl: Prevent possible overrun during bitmap operations Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 24/55] mm: fix page cache convergence regression Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 25/55] efi/memreserve: deal with memreserve entries in unmapped memory Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 26/55] NFS/flexfiles: Use the correct TCP timeout for flexfiles I/O Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 27/55] cpu/speculation: Warn on unsupported mitigations= parameter Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 28/55] SUNRPC: Fix up calculation of client message length Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 29/55] irqchip/mips-gic: Use the correct local interrupt map registers Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 30/55] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 31/55] bonding: Always enable vlan tx offload Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 32/55] ipv4: Use return value of inet_iif() for __raw_v4_lookup in the while loop Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 33/55] net/packet: fix memory leak in packet_set_ring() Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 34/55] net: remove duplicate fetch in sock_getsockopt Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 35/55] net: stmmac: fixed new system time seconds value calculation Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 36/55] net: stmmac: set IC bit when transmitting frames with HW timestamp Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 37/55] net/tls: fix page double free on TX cleanup Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 38/55] sctp: change to hold sk after auth shkey is created successfully Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 39/55] team: Always enable vlan tx offload Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 40/55] tipc: change to use register_pernet_device Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 41/55] tipc: check msg->req data len in tipc_nl_compat_bearer_disable Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 42/55] tun: wake up waitqueues after IFF_UP is set Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 43/55] net: aquantia: fix vlans not working over bridged network Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 44/55] bpf: simplify definition of BPF_FIB_LOOKUP related flags Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 45/55] bpf: lpm_trie: check left child of last leftmost node for NULL Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 46/55] bpf: fix nested bpf tracepoints with per-cpu data Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 47/55] bpf: fix unconnected udp hooks Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 48/55] bpf: udp: Avoid calling reuseports bpf_prog from udp_gro Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 49/55] bpf: udp: ipv6: Avoid running reuseports bpf_prog from __udp6_lib_err Greg Kroah-Hartman
2019-07-02  8:01 ` [PATCH 5.1 50/55] arm64: futex: Avoid copying out uninitialised stack in failed cmpxchg() Greg Kroah-Hartman
2019-07-02  8:01 ` Greg Kroah-Hartman [this message]
2019-07-03  2:02   ` [PATCH 5.1 51/55] bpf, arm64: use more scalable stadd over ldxr / stxr loop in xadd Sasha Levin
2019-07-03  7:24     ` Greg Kroah-Hartman
2019-07-02  8:02 ` [PATCH 5.1 52/55] futex: Update comments and docs about return values of arch futex code Greg Kroah-Hartman
2019-07-02  8:02 ` [PATCH 5.1 53/55] RDMA: Directly cast the sockaddr union to sockaddr Greg Kroah-Hartman
2019-07-02  8:02 ` [PATCH 5.1 54/55] fanotify: update connector fsid cache on add mark Greg Kroah-Hartman
2019-07-02  8:02 ` [PATCH 5.1 55/55] tipc: pass tunnel dev as NULL to udp_tunnel(6)_xmit_skb Greg Kroah-Hartman
2019-07-02 14:32 ` [PATCH 5.1 00/55] 5.1.16-stable review kernelci.org bot
2019-07-02 17:39 ` Naresh Kamboju
2019-07-03  9:11   ` Greg Kroah-Hartman
2019-07-02 18:06 ` Jiunn Chang
2019-07-02 21:09 ` Kelsey Skunberg
2019-07-02 22:56 ` shuah
2019-07-03  9:12   ` Greg Kroah-Hartman
2019-07-03  6:26 ` Shreeya Patel
2019-07-03 10:21 ` Jon Hunter
2019-07-03 10:49   ` Greg Kroah-Hartman
2019-07-04  5:27 ` Bharath Vedartham

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190702080126.728030225@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jean-philippe.brucker@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.