linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Puranjay Mohan <puranjay@kernel.org>,
	Alexei Starovoitov <ast@kernel.org>,
	Sasha Levin <sashal@kernel.org>,
	daniel@iogearbox.net, andrii@kernel.org, davem@davemloft.net,
	dsahern@kernel.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
	bpf@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 6.8 23/52] bpf, x86: Fix PROBE_MEM runtime load check
Date: Tue,  7 May 2024 19:06:49 -0400	[thread overview]
Message-ID: <20240507230800.392128-23-sashal@kernel.org> (raw)
In-Reply-To: <20240507230800.392128-1-sashal@kernel.org>

From: Puranjay Mohan <puranjay@kernel.org>

[ Upstream commit b599d7d26d6ad1fc9975218574bc2ca6d0293cfd ]

When a load is marked PROBE_MEM - e.g. due to PTR_UNTRUSTED access - the
address being loaded from is not necessarily valid. The BPF jit sets up
exception handlers for each such load which catch page faults and 0 out
the destination register.

If the address for the load is outside kernel address space, the load
will escape the exception handling and crash the kernel. To prevent this
from happening, the emits some instruction to verify that addr is > end
of userspace addresses.

x86 has a legacy vsyscall ABI where a page at address 0xffffffffff600000
is mapped with user accessible permissions. The addresses in this page
are considered userspace addresses by the fault handler. Therefore, a
BPF program accessing this page will crash the kernel.

This patch fixes the runtime checks to also check that the PROBE_MEM
address is below VSYSCALL_ADDR.

Example BPF program:

 SEC("fentry/tcp_v4_connect")
 int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk)
 {
	*(volatile unsigned long *)&sk->sk_tsq_flags;
	return 0;
 }

BPF Assembly:

 0: (79) r1 = *(u64 *)(r1 +0)
 1: (79) r1 = *(u64 *)(r1 +344)
 2: (b7) r0 = 0
 3: (95) exit

			       x86-64 JIT
			       ==========

            BEFORE                                    AFTER
	    ------                                    -----

 0:   nopl   0x0(%rax,%rax,1)             0:   nopl   0x0(%rax,%rax,1)
 5:   xchg   %ax,%ax                      5:   xchg   %ax,%ax
 7:   push   %rbp                         7:   push   %rbp
 8:   mov    %rsp,%rbp                    8:   mov    %rsp,%rbp
 b:   mov    0x0(%rdi),%rdi               b:   mov    0x0(%rdi),%rdi
-------------------------------------------------------------------------------
 f:   movabs $0x100000000000000,%r11      f:   movabs $0xffffffffff600000,%r10
19:   add    $0x2a0,%rdi                 19:   mov    %rdi,%r11
20:   cmp    %r11,%rdi                   1c:   add    $0x2a0,%r11
23:   jae    0x0000000000000029          23:   sub    %r10,%r11
25:   xor    %edi,%edi                   26:   movabs $0x100000000a00000,%r10
27:   jmp    0x000000000000002d          30:   cmp    %r10,%r11
29:   mov    0x0(%rdi),%rdi              33:   ja     0x0000000000000039
--------------------------------\        35:   xor    %edi,%edi
2d:   xor    %eax,%eax           \       37:   jmp    0x0000000000000040
2f:   leave                       \      39:   mov    0x2a0(%rdi),%rdi
30:   ret                          \--------------------------------------------
                                         40:   xor    %eax,%eax
                                         42:   leave
                                         43:   ret

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Link: https://lore.kernel.org/r/20240424100210.11982-3-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 57 ++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index df484885ccd4a..f415c2cf53582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1585,36 +1585,41 @@ st:			if (is_imm8(insn->off))
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
 			    BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
 				/* Conservatively check that src_reg + insn->off is a kernel address:
-				 *   src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
-				 * src_reg is used as scratch for src_reg += insn->off and restored
-				 * after emit_ldx if necessary
+				 *   src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
+				 *   and
+				 *   src_reg + insn->off < VSYSCALL_ADDR
 				 */
 
-				u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
+				u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
 				u8 *end_of_jmp;
 
-				/* At end of these emitted checks, insn->off will have been added
-				 * to src_reg, so no need to do relative load with insn->off offset
-				 */
-				insn_off = 0;
+				/* movabsq r10, VSYSCALL_ADDR */
+				emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
+					       (u32)(long)VSYSCALL_ADDR);
 
-				/* movabsq r11, limit */
-				EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
-				EMIT((u32)limit, 4);
-				EMIT(limit >> 32, 4);
+				/* mov src_reg, r11 */
+				EMIT_mov(AUX_REG, src_reg);
 
 				if (insn->off) {
-					/* add src_reg, insn->off */
-					maybe_emit_1mod(&prog, src_reg, true);
-					EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
+					/* add r11, insn->off */
+					maybe_emit_1mod(&prog, AUX_REG, true);
+					EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
 				}
 
-				/* cmp src_reg, r11 */
-				maybe_emit_mod(&prog, src_reg, AUX_REG, true);
-				EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
+				/* sub r11, r10 */
+				maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+				EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
+
+				/* movabsq r10, limit */
+				emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
+					       (u32)(long)limit);
+
+				/* cmp r10, r11 */
+				maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+				EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
 
-				/* if unsigned '>=', goto load */
-				EMIT2(X86_JAE, 0);
+				/* if unsigned '>', goto load */
+				EMIT2(X86_JA, 0);
 				end_of_jmp = prog;
 
 				/* xor dst_reg, dst_reg */
@@ -1640,18 +1645,6 @@ st:			if (is_imm8(insn->off))
 				/* populate jmp_offset for JMP above */
 				start_of_ldx[-1] = prog - start_of_ldx;
 
-				if (insn->off && src_reg != dst_reg) {
-					/* sub src_reg, insn->off
-					 * Restore src_reg after "add src_reg, insn->off" in prev
-					 * if statement. But if src_reg == dst_reg, emit_ldx
-					 * above already clobbered src_reg, so no need to restore.
-					 * If add src_reg, insn->off was unnecessary, no need to
-					 * restore either.
-					 */
-					maybe_emit_1mod(&prog, src_reg, true);
-					EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
-				}
-
 				if (!bpf_prog->aux->extable)
 					break;
 
-- 
2.43.0


  parent reply	other threads:[~2024-05-07 23:08 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-07 23:06 [PATCH AUTOSEL 6.8 01/52] ASoC: Intel: bytcr_rt5640: Apply Asus T100TA quirk to Asus T100TAM too Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 02/52] regulator: irq_helpers: duplicate IRQ name Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 03/52] ALSA: hda: cs35l56: Exit cache-only after cs35l56_wait_for_firmware_boot() Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 04/52] ASoC: SOF: ipc4-pcm: Use consistent name for snd_sof_pcm_stream pointer Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 05/52] ASoC: SOF: ipc4-pcm: Use consistent name for sof_ipc4_timestamp_info pointer Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 06/52] ASoC: SOF: ipc4-pcm: Introduce generic sof_ipc4_pcm_stream_priv Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 07/52] ASoC: SOF: pcm: Restrict DSP D0i3 during S0ix to IPC3 Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 08/52] ASoC: acp: Support microphone from device Acer 315-24p Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 09/52] ASoC: rt5645: Fix the electric noise due to the CBJ contacts floating Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 10/52] ASoC: dt-bindings: rt5645: add cbj sleeve gpio property Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 11/52] ASoC: rt722-sdca: modify channel number to support 4 channels Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 12/52] ASoC: rt722-sdca: add headset microphone vrefo setting Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 13/52] regulator: qcom-refgen: fix module autoloading Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 14/52] regulator: vqmmc-ipq4019: " Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 15/52] ASoC: cs35l41: Update DSP1RX5/6 Sources for DSP config Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 16/52] ASoC: rt715: add vendor clear control register Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 17/52] ASoC: rt715-sdca: volume step modification Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 18/52] KVM: selftests: Add test for uaccesses to non-existent vgic-v2 CPUIF Sasha Levin
2024-05-08  6:25   ` Oliver Upton
2024-05-08 17:55     ` Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 19/52] Input: xpad - add support for ASUS ROG RAIKIRI Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 20/52] btrfs: take the cleaner_mutex earlier in qgroup disable Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 21/52] EDAC/versal: Do not register for NOC errors Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 22/52] fpga: dfl-pci: add PCI subdevice ID for Intel D5005 card Sasha Levin
2024-05-07 23:06 ` Sasha Levin [this message]
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 24/52] ALSA: emu10k1: factor out snd_emu1010_load_dock_firmware() Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 25/52] ALSA: emu10k1: make E-MU FPGA writes potentially more reliable Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 26/52] erofs: reliably distinguish block based and fscache mode Sasha Levin
2024-05-07 23:19   ` Gao Xiang
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 27/52] softirq: Fix suspicious RCU usage in __do_softirq() Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 28/52] net: qede: sanitize 'rc' in qede_add_tc_flower_fltr() Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 29/52] firewire: nosy: ensure user_length is taken into account when fetching packet contents Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 30/52] platform/x86: ISST: Add Grand Ridge to HPM CPU list Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 31/52] ASoC: da7219-aad: fix usage of device_get_named_child_node() Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 32/52] ASoC: cs35l56: fix usages " Sasha Levin
2024-05-07 23:06 ` [PATCH AUTOSEL 6.8 33/52] ALSA: hda: intel-dsp-config: harden I2C/I2S codec detection Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 34/52] Input: amimouse - mark driver struct with __refdata to prevent section mismatch Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 35/52] drm/amdgpu: Fix VRAM memory accounting Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 36/52] drm/amd/display: Ensure that dmcub support flag is set for DCN20 Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 37/52] drm/amd/display: Add dtbclk access to dcn315 Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 38/52] drm/amd/display: Atom Integrated System Info v2_2 for DCN35 Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 39/52] drm/amd/display: Allocate zero bw after bw alloc enable Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 40/52] drm/amd/display: Add VCO speed parameter for DCN31 FPU Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 41/52] drm/amd/display: Fix DC mode screen flickering on DCN321 Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 42/52] drm/amd/display: Disable seamless boot on 128b/132b encoding Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 43/52] drm/amdkfd: Flush the process wq before creating a kfd_process Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 44/52] x86/mm: Remove broken vsyscall emulation code from the page fault code Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 45/52] nvme: find numa distance only if controller has valid numa id Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 46/52] nvmet-auth: return the error code to the nvmet_auth_host_hash() callers Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 47/52] nvmet-auth: replace pr_debug() with pr_err() to report an error Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 48/52] nvme: cancel pending I/O if nvme controller is in terminal state Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 49/52] nvmet-tcp: fix possible memory leak when tearing down a controller Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 50/52] nvmet: fix nvme status code when namespace is disabled Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 51/52] nvme-tcp: strict pdu pacing to avoid send stalls on TLS Sasha Levin
2024-05-07 23:07 ` [PATCH AUTOSEL 6.8 52/52] epoll: be better about file lifetimes Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240507230800.392128-23-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bp@alien8.de \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=puranjay@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).