All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Mike Krinkin <krinkin.m.u@gmail.com>,
	kvm@vger.kernel.org
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Sasha Levin <sasha.levin@oracle.com>
Subject: Re: index-out-of-range ubsan warnings
Date: Wed, 24 Feb 2016 14:23:19 +0800	[thread overview]
Message-ID: <56CD4C57.5030402@linux.intel.com> (raw)
In-Reply-To: <56CC5CC3.5060208@redhat.com>



On 02/23/2016 09:21 PM, Paolo Bonzini wrote:
>>
>> +#define INVALID_INDEX        (-1)
>> +
>>    static int mmu_unsync_walk(struct kvm_mmu_page *sp,
>>                               struct kvm_mmu_pages *pvec)
>>    {
>>            if (!sp->unsync_children)
>>                    return 0;
>>
>> -        mmu_pages_add(pvec, sp, 0);
>> +        /*
>> +         * do not count the index in the parent of the sp we're
>> +         * walking start from.
>> +         */
>> +        mmu_pages_add(pvec, sp, INVALID_INDEX);
>>            return __mmu_unsync_walk(sp, pvec);
>>    }
>>
>> @@ -1980,8 +1986,11 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
>>                            return n;
>>                    }
>>
>> -                parents->parent[sp->role.level-2] = sp;
>> -                parents->idx[sp->role.level-1] = pvec->page[n].idx;
>> +                parents->parent[sp->role.level - 2] = sp;
>> +
>> +                /* skip setting idex of the sp we start from. */
>> +                if (pvec->page[n].idx != INVALID_INDEX)
>> +                        parents->idx[sp->role.level - 1] = pvec->page[n].idx;
>
> So far so good.
>
>>            }
>>
>>            return n;
>> @@ -1999,6 +2008,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
>>                    if (!sp)
>>                            return;
>>
>> +                WARN_ON(idx != INVALID_INDEX);
>
> Shouldn't this warn if idx is *equal* to INVALID_INDEX?

Yes, indeed. It seems i am spending so much time on QEMU and mixed it with
assert() :(

>
>>                    clear_unsync_child_bit(sp, idx);
>>                    level++;
>>            } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
>> @@ -2008,7 +2018,7 @@ static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
>>                                   struct mmu_page_path *parents,
>>                                   struct kvm_mmu_pages *pvec)
>>    {
>> -        parents->parent[parent->role.level-1] = NULL;
>> +        parents->parent[parent->role.level - 2] = NULL;
>
> This is meant to stop mmu_pages_clear_parents _after_ it has
> processed sp, so the "-1" is correct.  The right fix would be:
>
>          if (parent->role.level < PT64_ROOT_LEVEL-1)
>                  parents->parent[parent->role.level - 1] = NULL;
>

it is okay as mmu_pages_next() will refill the highest level.

> and no one has noticed it so far because if the bug hits you just write
> over the beginning of ->idx (which so far is uninitialized) and nothing
> bad happens.
>
> I'm thinking of the following (untested) patch instead:
>
> -------- 8< --------------
> From: Paolo Bonzini <pbonzini@redhat.com>
> Subject: [PATCH] KVM: MMU: Fix ubsan warnings
>
> kvm_mmu_pages_init is doing some really yucky stuff.  It is setting
> up a sentinel for mmu_page_clear_parents; however, because of a) the
> way levels are numbered starting from 1 and b) the way mmu_page_path
> sizes its arrays with PT64_ROOT_LEVEL-1 elements, the access can be
> out of bounds.  This is harmless because the code overwrites up to the
> first two elements of parents->idx and these are initialized, and
> because the sentinel is not needed in this case---mmu_page_clear_parents
> exits anyway when it gets to the end of the array.  However ubsan
> complains, and everyone else should too.
>
> This fix does three things.  First it makes the mmu_page_path arrays
> PT64_ROOT_LEVEL elements in size, so that we can write to them without
> checking the level in advance.  Second it disintegrates kvm_mmu_pages_init
> between mmu_unsync_walk (which resets struct kvm_mmu_pages) and
> mmu_pages_next (which unconditionally places a NULL sentinel at the
> end of the current path).  This is okay because the mmu_page_path is
> only used in mmu_pages_clear_parents; mmu_pages_clear_parents itself
> is called within a for_each_sp iterator, and hence always after a
> call to mmu_pages_next.  Third it changes mmu_pages_clear_parents to
> just use the sentinel to stop iteration, without checking the bounds
> on level.
>
> Reported-by: Sasha Levin <sasha.levin@oracle.com>
> Reported-by: Mike Krinkin <krinkin.m.u@gmail.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 33cc9f3f5b02..f4f5e7ca14dd 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1843,6 +1843,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
>   static int mmu_unsync_walk(struct kvm_mmu_page *sp,
>   			   struct kvm_mmu_pages *pvec)
>   {
> +	pvec->nr = 0;
>   	if (!sp->unsync_children)
>   		return 0;
>
> @@ -1956,8 +1957,8 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
>   }
>
>   struct mmu_page_path {
> -	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
> -	unsigned int idx[PT64_ROOT_LEVEL-1];
> +	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
> +	unsigned int idx[PT64_ROOT_LEVEL];

So that it wastes 1 entry as we only use it to save parent sp.

>   };
>
>   #define for_each_sp(pvec, sp, parents, i)			\
> @@ -1971,19 +1972,21 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
>   			  int i)
>   {
>   	int n;
> +	int level = PT_PAGE_TABLE_LEVEL;
>
>   	for (n = i+1; n < pvec->nr; n++) {
>   		struct kvm_mmu_page *sp = pvec->page[n].sp;
>
> -		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
> -			parents->idx[0] = pvec->page[n].idx;
> -			return n;
> -		}
> +		level = sp->role.level;
> +		parents->idx[level-1] = pvec->page[n].idx;
>
> -		parents->parent[sp->role.level-2] = sp;
> -		parents->idx[sp->role.level-1] = pvec->page[n].idx;
> +		if (level == PT_PAGE_TABLE_LEVEL)
> +			break;
> +
> +		parents->parent[level-2] = sp;
>   	}
>
> +	parents->parent[level-1] = NULL;

Why?

Considering we have already got a unsync page, so break the loop
and reach here. It results parents->parent[ 0 ] = NULL and see
below......

>   	return n;
>   }
>
> @@ -1994,22 +1996,13 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
>
>   	do {
>   		unsigned int idx = parents->idx[level];
> -
>   		sp = parents->parent[level];
>   		if (!sp)
>   			return;

So the first level is NULL, it stops clearing unsync_children.

>
>   		clear_unsync_child_bit(sp, idx);
>   		level++;
> -	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
> -}
> -
> -static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
> -			       struct mmu_page_path *parents,
> -			       struct kvm_mmu_pages *pvec)
> -{
> -	parents->parent[parent->role.level-1] = NULL;
> -	pvec->nr = 0;
> +	} while (!sp->unsync_children);
>   }
>
>   static void mmu_sync_children(struct kvm_vcpu *vcpu,
> @@ -2021,7 +2014,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
>   	struct kvm_mmu_pages pages;
>   	LIST_HEAD(invalid_list);
>
> -	kvm_mmu_pages_init(parent, &parents, &pages);
>   	while (mmu_unsync_walk(parent, &pages)) {
>   		bool protected = false;
>
> @@ -2037,7 +2029,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
>   		}
>   		kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
>   		cond_resched_lock(&vcpu->kvm->mmu_lock);
> -		kvm_mmu_pages_init(parent, &parents, &pages);
>   	}
>   }
>
> @@ -2269,7 +2260,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>   	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
>   		return 0;
>
> -	kvm_mmu_pages_init(parent, &parents, &pages);
>   	while (mmu_unsync_walk(parent, &pages)) {
>   		struct kvm_mmu_page *sp;
>
> @@ -2278,7 +2268,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>   			mmu_pages_clear_parents(&parents);
>   			zapped++;
>   		}
> -		kvm_mmu_pages_init(parent, &parents, &pages);
>   	}
>
>   	return zapped;
>

Each time i looked into these functions, it costs my several hours. The trick is
that the item, sp and idx, saved to kvm_mmu_pages is the sp and the index in the
_parent_ level.

How about clarify/simplify it as this patch:

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 57cf30b..14b2800 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1848,7 +1848,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
  		child = page_header(ent & PT64_BASE_ADDR_MASK);

  		if (child->unsync_children) {
-			if (mmu_pages_add(pvec, child, i))
+			if (mmu_pages_add(pvec, sp, i))
  				return -ENOSPC;

  			ret = __mmu_unsync_walk(child, pvec);
@@ -1861,7 +1861,14 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
  				return ret;
  		} else if (child->unsync) {
  			nr_unsync_leaf++;
-			if (mmu_pages_add(pvec, child, i))
+			if (mmu_pages_add(pvec, sp, i))
+				return -ENOSPC;
+
+			/*
+			 * the unsync is on the last level so its 'idx' is
+			 * useless, we set it to 0 to catch potential bugs.
+			 */
+			if (mmu_pages_add(pvec, child, 0))
  				return -ENOSPC;
  		} else
  			clear_unsync_child_bit(sp, i);
@@ -1876,7 +1883,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
  	if (!sp->unsync_children)
  		return 0;

-	mmu_pages_add(pvec, sp, 0);
  	return __mmu_unsync_walk(sp, pvec);
  }

@@ -2002,16 +2008,18 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
  {
  	int n;

-	for (n = i+1; n < pvec->nr; n++) {
+	for (n = i + 1; n < pvec->nr; n++) {
  		struct kvm_mmu_page *sp = pvec->page[n].sp;
+		unsigned int idx = pvec->page[n].idx;

  		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
-			parents->idx[0] = pvec->page[n].idx;
+			/* we always set the idex as 0 for the unsync page. */
+			WARN_ON(idx != 0);
  			return n;
  		}

-		parents->parent[sp->role.level-2] = sp;
-		parents->idx[sp->role.level-1] = pvec->page[n].idx;
+		parents->parent[sp->role.level - 2] = sp;
+		parents->idx[sp->role.level - 2] = idx;
  	}

  	return n;
@@ -2031,14 +2039,18 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)

  		clear_unsync_child_bit(sp, idx);
  		level++;
-	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+	} while (level < PT64_ROOT_LEVEL - 1 && !sp->unsync_children);
  }

  static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
  			       struct mmu_page_path *parents,
  			       struct kvm_mmu_pages *pvec)
  {
-	parents->parent[parent->role.level-1] = NULL;
+	/*
+	 * set the highest level to NULL to stop mmu_pages_clear_parents()
+	 * handing it on 32 bit guest.
+	 */
+	parents->parent[parent->role.level - 2] = NULL;
  	pvec->nr = 0;
  }




  parent reply	other threads:[~2016-02-24  6:31 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-23  9:26 index-out-of-range ubsan warnings Mike Krinkin
2016-02-23 10:07 ` Jan Kiszka
2016-02-23 10:44   ` Xiao Guangrong
2016-02-23 11:13     ` Mike Krinkin
2016-02-23 13:21     ` Paolo Bonzini
2016-02-23 13:56       ` Mike Krinkin
2016-02-24  6:23       ` Xiao Guangrong [this message]
2016-02-24  7:59         ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56CD4C57.5030402@linux.intel.com \
    --to=guangrong.xiao@linux.intel.com \
    --cc=jan.kiszka@siemens.com \
    --cc=krinkin.m.u@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=sasha.levin@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.