Re: [PATCH RFC v3 10/36] kmsan: add KMSAN runtime

From: Andrey Konovalov <andreyknvl@google.com>
To: Alexander Potapenko <glider@google.com>
Cc: Wolfram Sang <wsa@the-dreams.de>,
	Vegard Nossum <vegard.nossum@oracle.com>,
	 Dmitry Vyukov <dvyukov@google.com>,
	Linux Memory Management List <linux-mm@kvack.org>,
	 Alexander Viro <viro@zeniv.linux.org.uk>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	 Andrew Morton <akpm@linux-foundation.org>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	 Andy Lutomirski <luto@kernel.org>,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	 Arnd Bergmann <arnd@arndb.de>,
	Christoph Hellwig <hch@infradead.org>,
	Christoph Hellwig <hch@lst.de>,
	 darrick.wong@oracle.com, "David S. Miller" <davem@davemloft.net>,
	 Dmitry Torokhov <dmitry.torokhov@gmail.com>,
	Eric Biggers <ebiggers@google.com>,
	 Eric Dumazet <edumazet@google.com>,
	ericvh@gmail.com,
	 Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	harry.wentland@amd.com,  Herbert Xu <herbert@gondor.apana.org.au>,
	iii@linux.ibm.com, mingo@elte.hu,
	 Jason Wang <jasowang@redhat.com>, Jens Axboe <axboe@kernel.dk>,
	 Marek Szyprowski <m.szyprowski@samsung.com>,
	Marco Elver <elver@google.com>,
	 Mark Rutland <mark.rutland@arm.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>,
	 Martin Schwidefsky <schwidefsky@de.ibm.com>,
	Matthew Wilcox <willy@infradead.org>,
	 "Michael S . Tsirkin" <mst@redhat.com>,
	Michal Simek <monstr@monstr.eu>, Petr Mladek <pmladek@suse.com>,
	 Qian Cai <cai@lca.pw>, Randy Dunlap <rdunlap@infradead.org>,
	Robin Murphy <robin.murphy@arm.com>,
	 sergey.senozhatsky@gmail.com,
	Steven Rostedt <rostedt@goodmis.org>,
	 Takashi Iwai <tiwai@suse.com>, "Theodore Ts'o" <tytso@mit.edu>,
	Thomas Gleixner <tglx@linutronix.de>,
	gor@linux.ibm.com
Subject: Re: [PATCH RFC v3 10/36] kmsan: add KMSAN runtime
Date: Mon, 2 Dec 2019 16:39:57 +0100	[thread overview]
Message-ID: <CAAeHK+xoh5gjrsai5fe6_meWrskhXbiR4ubT5hy2yZFFuMr5aw@mail.gmail.com> (raw)
In-Reply-To: <20191122112621.204798-11-glider@google.com>

On Fri, Nov 22, 2019 at 12:26 PM <glider@google.com> wrote:

A few generic comments:

1. There's a lot of TODOs in the code. They either need to be resolved
or removed.

2. This patch is huge, would it be possible to split it? One way to do
this is to have two parts, one that adds the headers and empty hooks,
and the other one that adds hooks implementations. Or something like
that, if that's feasible at all.

[...]

> +/*
> + * Assembly bits to safely invoke KMSAN hooks from .S files.
> + *
> + * Adopted from KTSAN assembly hooks implementation by Dmitry Vyukov:
> + * https://github.com/google/ktsan/blob/ktsan/arch/x86/include/asm/ktsan.h

This link can get stale. Maybe just link the repo?

[...]

> +/*
> + * KMSAN checks.

This comment just repeats the file name. Maybe worth mentioning what
exactly we are checking here, and how this header is different from
kmsan.h. Perhaps some of the functions declared here should be moved
there as well.

> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@google.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */

[...]

> +#ifndef LINUX_KMSAN_H
> +#define LINUX_KMSAN_H
> +
> +#include <linux/gfp.h>
> +#include <linux/stackdepot.h>
> +#include <linux/types.h>
> +#include <linux/vmalloc.h>
> +
> +struct page;
> +struct kmem_cache;
> +struct task_struct;
> +struct vm_struct;
> +
> +

Remove unneeded whitespace.

[...]

> +void kmsan_internal_memset_shadow(void *addr, int b, size_t size,
> +   bool checked)
> +{
> + void *shadow_start;
> + u64 page_offset, address = (u64)addr;
> + size_t to_fill;
> +
> + BUG_ON(!metadata_is_contiguous(addr, size, META_SHADOW));
> + while (size) {
> + page_offset = address % PAGE_SIZE;
> + to_fill = min(PAGE_SIZE - page_offset, (u64)size);
> + shadow_start = kmsan_get_metadata((void *)address, to_fill,
> +   META_SHADOW);
> + if (!shadow_start) {
> + if (checked) {
> + kmsan_pr_locked("WARNING: not memsetting %d bytes starting at %px, because the shadow is NULL\n", to_fill, address);

Why not use WARN()?

[...]

> +depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
> +{
> + depot_stack_handle_t handle;
> + unsigned long entries[3];
> + u64 magic = KMSAN_CHAIN_MAGIC_ORIGIN_FULL;
> + int depth = 0;
> + static int skipped;
> + u32 extra_bits;
> +
> + if (!kmsan_ready)
> + return 0;

Do we need this check here?

> +
> + if (!id)
> + return id;

And this one?

> + /*
> + * Make sure we have enough spare bits in |id| to hold the UAF bit and
> + * the chain depth.
> + */
> + BUILD_BUG_ON((1 << STACK_DEPOT_EXTRA_BITS) <= (MAX_CHAIN_DEPTH << 1));
> +
> + extra_bits = get_dsh_extra_bits(id);
> +
> + depth = extra_bits >> 1;
> + if (depth >= MAX_CHAIN_DEPTH) {
> + skipped++;
> + if (skipped % 10000 == 0) {
> + kmsan_pr_locked("not chained %d origins\n", skipped);
> + dump_stack();
> + kmsan_print_origin(id);
> + }
> + return id;
> + }
> + depth++;
> + /* Lowest bit is the UAF flag, higher bits hold the depth. */
> + extra_bits = (depth << 1) | (extra_bits & 1);

Please add some helper functions/macros to work with extra_bits.

[...]

> +void kmsan_set_origin_checked(void *addr, int size, u32 origin, bool checked)
> +{
> + if (checked && !metadata_is_contiguous(addr, size, META_ORIGIN)) {
> + kmsan_pr_locked("WARNING: not setting origin for %d bytes starting at %px, because the metadata is incontiguous\n", size, addr);

Why not use WARN()?

[...]

> +void kmsan_internal_task_create(struct task_struct *task);
> +void kmsan_internal_set_origin(void *addr, int size, u32 origin);
> +void kmsan_set_origin_checked(void *addr, int size, u32 origin, bool checked);
> +
> +struct kmsan_context_state *task_kmsan_context_state(void);

s/task_kmsan_context_state/kmsan_task_context_state/ or something like that.

[...]

> +void kmsan_interrupt_enter(void)
> +{
> + int in_interrupt = this_cpu_read(kmsan_in_interrupt);
> +
> + /* Turns out it's possible for in_interrupt to be >0 here. */

Why/how? Expand the comment.

[...]

> +void kmsan_nmi_enter(void)
> +{
> + bool in_nmi = this_cpu_read(kmsan_in_nmi);
> +
> + BUG_ON(in_nmi);
> + BUG_ON(preempt_count() & NMI_MASK);

BUG_ON(in_nmi())?

> +/*
> + * The functions may call back to instrumented code, which, in turn, may call
> + * these hooks again. To avoid re-entrancy, we use __GFP_NO_KMSAN_SHADOW.
> + * Instrumented functions shouldn't be called under
> + * ENTER_RUNTIME()/LEAVE_RUNTIME(), because this will lead to skipping
> + * effects of functions like memset() inside instrumented code.
> + */

Add empty line.

> +/* Called from kernel/kthread.c, kernel/fork.c */
> +void kmsan_task_create(struct task_struct *task)
> +{
> + unsigned long irq_flags;
> +
> + if (!task)
> + return;
> + ENTER_RUNTIME(irq_flags);
> + kmsan_internal_task_create(task);
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_task_create);
> +
> +

Remove empty line.

> +/* Called from kernel/exit.c */
> +void kmsan_task_exit(struct task_struct *task)
> +{
> + unsigned long irq_flags;
> + struct kmsan_task_state *state = &task->kmsan;
> +
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> +
> + ENTER_RUNTIME(irq_flags);
> + state->allow_reporting = false;
> +
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_task_exit);
> +
> +/* Called from mm/slub.c */
> +void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags)
> +{
> + unsigned long irq_flags;
> +
> + if (unlikely(object == NULL))
> + return;
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> + /*
> + * There's a ctor or this is an RCU cache - do nothing. The memory
> + * status hasn't changed since last use.
> + */
> + if (s->ctor || (s->flags & SLAB_TYPESAFE_BY_RCU))
> + return;
> +
> + ENTER_RUNTIME(irq_flags);
> + if (flags & __GFP_ZERO) {

No {} needed here.

> + kmsan_internal_unpoison_shadow(object, s->object_size,
> +        KMSAN_POISON_CHECK);
> + } else {
> + kmsan_internal_poison_shadow(object, s->object_size, flags,
> +      KMSAN_POISON_CHECK);
> + }
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_slab_alloc);
> +
> +/* Called from mm/slub.c */
> +void kmsan_slab_free(struct kmem_cache *s, void *object)
> +{
> + unsigned long irq_flags;
> +
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> + ENTER_RUNTIME(irq_flags);
> +
> + /* RCU slabs could be legally used after free within the RCU period */
> + if (unlikely(s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)))
> + goto leave;
> + if (s->ctor)

Why don't we poison if there's a ctor? Some comment is needed.

> + goto leave;
> + kmsan_internal_poison_shadow(object, s->object_size,
> +      GFP_KERNEL,
> +      KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
> +leave:
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_slab_free);
> +
> +/* Called from mm/slub.c */
> +void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
> +{
> + unsigned long irq_flags;
> +
> + if (unlikely(ptr == NULL))
> + return;
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> + ENTER_RUNTIME(irq_flags);
> + if (flags & __GFP_ZERO) {

No {} needed here.

[...]

> +void kmsan_check_skb(const struct sk_buff *skb)
> +{
> + int start = skb_headlen(skb);
> + struct sk_buff *frag_iter;
> + int i, copy = 0;
> + skb_frag_t *f;
> + u32 p_off, p_len, copied;
> + struct page *p;
> + u8 *vaddr;
> +
> + if (!skb || !skb->len)

We either need to check !skb before skb_headlen() or drop the check.

> + return;
> +
> + kmsan_internal_check_memory(skb->data, skb_headlen(skb), 0, REASON_ANY);

Use start instead of calling skb_headlen(skb) again. Or just remove
start and always call skb_headlen(skb).

> + if (skb_is_nonlinear(skb)) {
> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> + f = &skb_shinfo(skb)->frags[i];
> +
> + skb_frag_foreach_page(f,
> +       skb_frag_off(f)  - start,
> +       copy, p, p_off, p_len, copied) {
> +
> + vaddr = kmap_atomic(p);
> + kmsan_internal_check_memory(vaddr + p_off,
> + p_len, /*user_addr*/ 0,
> + REASON_ANY);
> + kunmap_atomic(vaddr);
> + }
> + }
> + }
> + skb_walk_frags(skb, frag_iter)
> + kmsan_check_skb(frag_iter);

Hm, won't this recursively walk the same list multiple times?

[...]

> +static void __init kmsan_record_future_shadow_range(void *start, void *end)
> +{
> + BUG_ON(future_index == NUM_FUTURE_RANGES);
> + BUG_ON((start >= end) || !start || !end);
> + start_end_pairs[future_index].start = start;
> + start_end_pairs[future_index].end = end;
> + future_index++;
> +}
> +
> +extern char _sdata[], _edata[];

#include <asm/sections.h>?

> +
> +
> +

Remove excessive whitespace.

> +/*
> + * Initialize the shadow for existing mappings during kernel initialization.
> + * These include kernel text/data sections, NODE_DATA and future ranges
> + * registered while creating other data (e.g. percpu).
> + *
> + * Allocations via memblock can be only done before slab is initialized.
> + */
> +void __init kmsan_initialize_shadow(void)
> +{
> + int nid;
> + u64 i;
> + const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
> + phys_addr_t p_start, p_end;
> +
> + for_each_reserved_mem_region(i, &p_start, &p_end) {

No need for {} here.

> + kmsan_record_future_shadow_range(phys_to_virt(p_start),
> + phys_to_virt(p_end+1));
> + }
> + /* Allocate shadow for .data */
> + kmsan_record_future_shadow_range(_sdata, _edata);
> +
> + /*
> + * TODO(glider): alloc_node_data() in arch/x86/mm/numa.c uses
> + * sizeof(pg_data_t).
> + */
> + for_each_online_node(nid)
> + kmsan_record_future_shadow_range(
> + NODE_DATA(nid), (char *)NODE_DATA(nid) + nd_size);

Remove tab before (char *)NODE_DATA(nid).

[...]

> +void __msan_instrument_asm_store(void *addr, u64 size)
> +{
> + unsigned long irq_flags;
> +
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> + /*
> + * Most of the accesses are below 32 bytes. The two exceptions so far
> + * are clwb() (64 bytes) and FPU state (512 bytes).
> + * It's unlikely that the assembly will touch more than 512 bytes.
> + */
> + if (size > 512)

Maybe do (WARN_ON(size > 512)) if this is something that we would want
to detect?

> + size = 8;
> + if (is_bad_asm_addr(addr, size, /*is_store*/true))
> + return;
> + ENTER_RUNTIME(irq_flags);
> + /* Unpoisoning the memory on best effort. */
> + kmsan_internal_unpoison_shadow(addr, size, /*checked*/false);
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(__msan_instrument_asm_store);
> +
> +void *__msan_memmove(void *dst, void *src, u64 n)
> +{
> + void *result;
> + void *shadow_dst;
> +
> + result = __memmove(dst, src, n);
> + if (!n)
> + /* Some people call memmove() with zero length. */
> + return result;
> + if (!kmsan_ready || IN_RUNTIME())
> + return result;
> +
> + /* Ok to skip address check here, we'll do it later. */
> + shadow_dst = kmsan_get_metadata(dst, n, META_SHADOW);

kmsan_memmove_metadata() performs this check, do we need it here? Same
goes for other callers of kmsan_memmove/memcpy_metadata().

> +
> + if (!shadow_dst)
> + /* Can happen e.g. if the memory is untracked. */
> + return result;
> +
> + kmsan_memmove_metadata(dst, src, n);
> +
> + return result;
> +}
> +EXPORT_SYMBOL(__msan_memmove);

[...]

> +void *__msan_memset(void *dst, int c, size_t n)
> +{
> + void *result;
> + unsigned long irq_flags;
> + depot_stack_handle_t new_origin;
> + unsigned int shadow;
> +
> + result = __memset(dst, c, n);
> + if (!kmsan_ready || IN_RUNTIME())
> + return result;
> +
> + ENTER_RUNTIME(irq_flags);
> + shadow = 0;
> + kmsan_internal_memset_shadow(dst, shadow, n, /*checked*/false);
> + new_origin = 0;
> + kmsan_internal_set_origin(dst, n, new_origin);

Do we need variables for shadow and new_origin here?

> + LEAVE_RUNTIME(irq_flags);
> +
> + return result;
> +}
> +EXPORT_SYMBOL(__msan_memset);

[...]

> +void __msan_poison_alloca(void *address, u64 size, char *descr)
> +{
> + depot_stack_handle_t handle;
> + unsigned long entries[4];
> + unsigned long irq_flags;
> + u64 size_copy = size, to_fill;
> + u64 addr_copy = (u64)address;
> + u64 page_offset;
> + void *shadow_start;
> +
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> +
> + while (size_copy) {

Why not call kmsan_internal_poison_shadow()/kmsan_internal_memset_shadow()
here instead of doing this manually?

> + page_offset = addr_copy % PAGE_SIZE;
> + to_fill = min(PAGE_SIZE - page_offset, size_copy);
> + shadow_start = kmsan_get_metadata((void *)addr_copy, to_fill,
> +   META_SHADOW);
> + addr_copy += to_fill;
> + size_copy -= to_fill;
> + if (!shadow_start)
> + /* Can happen e.g. if the memory is untracked. */
> + continue;
> + __memset(shadow_start, -1, to_fill);
> + }
> +
> + entries[0] = KMSAN_ALLOCA_MAGIC_ORIGIN;
> + entries[1] = (u64)descr;
> + entries[2] = (u64)__builtin_return_address(0);
> + entries[3] = (u64)kmsan_internal_return_address(1);
> +
> + /* stack_depot_save() may allocate memory. */
> + ENTER_RUNTIME(irq_flags);
> + handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC);
> + LEAVE_RUNTIME(irq_flags);
> + kmsan_internal_set_origin(address, size, handle);
> +}
> +EXPORT_SYMBOL(__msan_poison_alloca);
> +
> +void __msan_unpoison_alloca(void *address, u64 size)
> +{
> + unsigned long irq_flags;
> +
> + if (!kmsan_ready || IN_RUNTIME())
> + return;
> +
> + ENTER_RUNTIME(irq_flags);
> + /* Assuming the shadow exists. */

Why do we assume that shadow exists here, but not in
__msan_poison_alloca()? Please expand the comment.

> + kmsan_internal_unpoison_shadow(address, size, /*checked*/true);
> + LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(__msan_unpoison_alloca);

[...]

> +void kmsan_print_origin(depot_stack_handle_t origin)
> +{
> + unsigned long *entries = NULL, *chained_entries = NULL;
> + unsigned long nr_entries, chained_nr_entries, magic;
> + char *descr = NULL;
> + void *pc1 = NULL, *pc2 = NULL;
> + depot_stack_handle_t head;
> +
> + if (!origin) {

In some cases the caller of kmsan_print_origin() performs this check
and prints a differently formatted message (metadata_is_contiguous())
or no message at all (kmsan_report()). Some unification would be food.

> + kmsan_pr_err("Origin not found, presumably a false report.\n");
> + return;
> + }
> +
> + while (true) {
> + nr_entries = stack_depot_fetch(origin, &entries);
> + magic = nr_entries ? (entries[0] & KMSAN_MAGIC_MASK) : 0;
> + if ((nr_entries == 4) && (magic == KMSAN_ALLOCA_MAGIC_ORIGIN)) {
> + descr = (char *)entries[1];
> + pc1 = (void *)entries[2];
> + pc2 = (void *)entries[3];
> + kmsan_pr_err("Local variable description: %s\n", descr);
> + kmsan_pr_err("Variable was created at:\n");

A shorter way: "Local variable %s created at: ...".

> + kmsan_pr_err(" %pS\n", pc1);
> + kmsan_pr_err(" %pS\n", pc2);
> + break;
> + }
> + if ((nr_entries == 3) &&
> +     (magic == KMSAN_CHAIN_MAGIC_ORIGIN_FULL)) {
> + head = entries[1];
> + origin = entries[2];
> + kmsan_pr_err("Uninit was stored to memory at:\n");
> + chained_nr_entries =
> + stack_depot_fetch(head, &chained_entries);
> + stack_trace_print(chained_entries, chained_nr_entries,
> +   0);
> + kmsan_pr_err("\n");
> + continue;
> + }
> + kmsan_pr_err("Uninit was created at:\n");
> + if (entries)

Should this rather check nr_entries?

> + stack_trace_print(entries, nr_entries, 0);
> + else
> + kmsan_pr_err("No stack\n");

KASAN says "(stack is not available)" here. Makes sense to unify with this.

> + break;
> + }
> +}
> +
> +void kmsan_report(depot_stack_handle_t origin,
> +   void *address, int size, int off_first, int off_last,
> +   const void *user_addr, int reason)

It's not really clear what off_first and off_last arguments are, and
how the range that they describe is different from [address, address +
size). Some comment would be good.

> +{
> + unsigned long flags;
> + unsigned long *entries;
> + unsigned int nr_entries;
> + bool is_uaf = false;
> + char *bug_type = NULL;
> +
> + if (!kmsan_ready)
> + return;
> + if (!current->kmsan.allow_reporting)
> + return;
> + if (!origin)
> + return;
> +
> + nr_entries = stack_depot_fetch(origin, &entries);

Do we need this here?

[...]

> +#define shadow_page_for(page) \
> + ((page)->shadow)
> +
> +#define origin_page_for(page) \
> + ((page)->origin)
> +
> +#define shadow_ptr_for(page) \
> + (page_address((page)->shadow))
> +
> +#define origin_ptr_for(page) \
> + (page_address((page)->origin))
> +
> +#define has_shadow_page(page) \
> + (!!((page)->shadow))
> +
> +#define has_origin_page(page) \
> + (!!((page)->origin))

Something like this would take less space:

#define shadow_page_for(page) ((page)->shadow)
#define origin_page_for(page) ((page)->origin)
...

> +
> +#define set_no_shadow_origin_page(page) \
> + do { \
> + (page)->shadow = NULL; \
> + (page)->origin = NULL; \
> + } while (0) /**/
> +
> +#define is_ignored_page(page) \
> + (!!(((u64)((page)->shadow)) % 2))
> +
> +#define ignore_page(pg) \
> + ((pg)->shadow = (struct page *)((u64)((pg)->shadow) | 1)) \
> +
> +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
> +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
> +
> +/*
> + * Dummy load and store pages to be used when the real metadata is unavailable.
> + * There are separate pages for loads and stores, so that every load returns a
> + * zero, and every store doesn't affect other stores.

every store doesn't affect other _reads_?

[...]

> +static unsigned long vmalloc_meta(void *addr, bool is_origin)
> +{
> + unsigned long addr64 = (unsigned long)addr, off;
> +
> + BUG_ON(is_origin && !IS_ALIGNED(addr64, ORIGIN_SIZE));
> + if (kmsan_internal_is_vmalloc_addr(addr)) {

No need for {} here.

> + return addr64 + (is_origin ? VMALLOC_ORIGIN_OFFSET
> +    : VMALLOC_SHADOW_OFFSET);
> + }
> + if (kmsan_internal_is_module_addr(addr)) {
> + off = addr64 - MODULES_VADDR;
> + return off + (is_origin ? MODULES_ORIGIN_START
> + : MODULES_SHADOW_START);
> + }
> + return 0;
> +}

[...]

> +/*
> + * Obtain the shadow or origin pointer for the given address, or NULL if there's
> + * none. The caller must check the return value for being non-NULL if needed.
> + * The return value of this function should not depend on whether we're in the
> + * runtime or not.
> + */
> +void *kmsan_get_metadata(void *address, size_t size, bool is_origin)

This looks very similar to kmsan_get_shadow_origin_ptr(), would it be
possible to unify them somehow or to split out common parts into a
helper function?

> +{
> + struct page *page;
> + void *ret;
> + u64 addr = (u64)address, pad, off;
> +
> + if (is_origin && !IS_ALIGNED(addr, ORIGIN_SIZE)) {
> + pad = addr % ORIGIN_SIZE;
> + addr -= pad;
> + size += pad;
> + }
> + address = (void *)addr;
> + if (kmsan_internal_is_vmalloc_addr(address) ||
> +     kmsan_internal_is_module_addr(address)) {

No need for {} here.

> + return (void *)vmalloc_meta(address, is_origin);
> + }
> +
> + if (!kmsan_virt_addr_valid(address)) {
> + page = vmalloc_to_page_or_null(address);
> + if (page)
> + goto next;
> + ret = get_cea_meta_or_null(address, is_origin);
> + if (ret)
> + return ret;
> + }
> + page = virt_to_page_or_null(address);
> + if (!page)
> + return NULL;
> +next:
> + if (is_ignored_page(page))
> + return NULL;
> + if (!has_shadow_page(page) || !has_origin_page(page))
> + return NULL;
> + off = addr % PAGE_SIZE;
> +
> + ret = (is_origin ? origin_ptr_for(page) : shadow_ptr_for(page)) + off;
> + return ret;
> +}

[...]

> +void kmsan_ignore_page(struct page *page, int order)
> +{
> + int pages = 1 << order;
> + int i;
> + struct page *cp;
> +
> + for (i = 0; i < pages; i++) {
> + cp = &page[i];
> + ignore_page(cp);

ignore_page(&page[i])

> + }
> +}