[RFC] Have insn decoder functions return success/failure
diff mbox series

Message ID 20201020120232.GD11583@zn.tnic
State New, archived
Headers show
Series
  • [RFC] Have insn decoder functions return success/failure
Related show

Commit Message

Borislav Petkov Oct. 20, 2020, 12:02 p.m. UTC
Hi guys,

this has come up a couple of times in the past, how about if those insn
decoder functions returned an error value so that callers can use that
instead of looking at different insn...got fields whether a certain
subset of the insn got decoded properly or not?

Here's a dirty diff to show what I mean. Thoughts?

Thx.

---

Comments

Masami Hiramatsu Oct. 20, 2020, 2:27 p.m. UTC | #1
Hi Borislav,

On Tue, 20 Oct 2020 14:02:32 +0200
Borislav Petkov <bp@alien8.de> wrote:

> Hi guys,
> 
> this has come up a couple of times in the past, how about if those insn
> decoder functions returned an error value so that callers can use that
> instead of looking at different insn...got fields whether a certain
> subset of the insn got decoded properly or not?

The problem is that x86 instruction is too complex to find what is wrong
instruction by current decoder. As you know x86 insn is based on the
opcode map which is too simple to express the wrong encoding rules for
each instruction in x86 ISA. Thus, when I made it, I decided to trust
gcc and just focused on decoding the instruction. Of course, in some corner
cases like immediate decoding, we can find the wrong encoding, but it is
hard to make it cover all the wrong instruction.

So, if this return value is optional, it is OK to me. But the success
return value does NOT mean it a correctly encoded instruction.

Thank you,

> 
> Here's a dirty diff to show what I mean. Thoughts?
> 
> Thx.
> 
> ---
> diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
> index 954cb2702e23..53e8ced662ff 100644
> --- a/arch/x86/boot/compressed/sev-es.c
> +++ b/arch/x86/boot/compressed/sev-es.c
> @@ -79,16 +79,14 @@ static inline void sev_es_wr_ghcb_msr(u64 val)
>  static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
>  {
>  	char buffer[MAX_INSN_SIZE];
> -	enum es_result ret;
>  
>  	memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
>  
>  	insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
> -	insn_get_length(&ctxt->insn);
> +	if (insn_get_length(&ctxt->insn))
> +		return ES_DECODE_FAILED;
>  
> -	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
> -
> -	return ret;
> +	return ES_OK;
>  }
>  
>  static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 404315df1e16..aa72e8d305dd 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1262,14 +1262,14 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
>  		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
>  #endif
>  		insn_init(&insn, kaddr, size, is_64bit);
> -		insn_get_length(&insn);
> +
>  		/*
>  		 * Make sure there was not a problem decoding the
>  		 * instruction and getting the length.  This is
>  		 * doubly important because we have an infinite
>  		 * loop if insn.length=0.
>  		 */
> -		if (!insn.length)
> +		if (insn_get_length(&insn) || !insn.length)
>  			break;
>  
>  		to += insn.length;
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index 8961653c5dd2..4ef88831f9b7 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -1262,8 +1262,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
>  		ret = X86_BR_INT;
>  		break;
>  	case 0xe8: /* call near rel */
> -		insn_get_immediate(&insn);
> -		if (insn.immediate1.value == 0) {
> +		if (insn_get_immediate(&insn) || !insn.immediate1.value) {
>  			/* zero length call */
>  			ret = X86_BR_ZERO_CALL;
>  			break;
> diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
> index 5c1ae3eff9d4..0c79dad82b89 100644
> --- a/arch/x86/include/asm/insn.h
> +++ b/arch/x86/include/asm/insn.h
> @@ -92,8 +92,8 @@ extern void insn_get_opcode(struct insn *insn);
>  extern void insn_get_modrm(struct insn *insn);
>  extern void insn_get_sib(struct insn *insn);
>  extern void insn_get_displacement(struct insn *insn);
> -extern void insn_get_immediate(struct insn *insn);
> -extern void insn_get_length(struct insn *insn);
> +extern int insn_get_immediate(struct insn *insn);
> +extern int insn_get_length(struct insn *insn);
>  
>  /* Attribute will be determined after getting ModRM (for opcode groups) */
>  static inline void insn_get_attribute(struct insn *insn)
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 547c7abb39f5..9f1010b30cd0 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -296,8 +296,10 @@ static int can_probe(unsigned long paddr)
>  		__addr = recover_probed_instruction(buf, addr);
>  		if (!__addr)
>  			return 0;
> +
>  		kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
> -		insn_get_length(&insn);
> +		if (insn_get_length(&insn))
> +			return 0;
>  
>  		/*
>  		 * Another debugging subsystem might insert this breakpoint.
> @@ -352,7 +354,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
>  		return 0;
>  
>  	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
> -	insn_get_length(insn);
> +	if (insn_get_length(insn))
> +		return 0;
>  
>  	/* We can not probe force emulate prefixed instruction */
>  	if (insn_has_emulate_prefix(insn))
> diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
> index 041f0b50bc27..afe2148c964c 100644
> --- a/arch/x86/kernel/kprobes/opt.c
> +++ b/arch/x86/kernel/kprobes/opt.c
> @@ -309,7 +309,9 @@ static int can_optimize(unsigned long paddr)
>  		if (!recovered_insn)
>  			return 0;
>  		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
> -		insn_get_length(&insn);
> +		if (insn_get_length(&insn))
> +			return 0;
> +
>  		/* Another subsystem puts a breakpoint */
>  		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
>  			return 0;
> diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
> index 4a96726fbaf8..73c3a4998bc2 100644
> --- a/arch/x86/kernel/sev-es.c
> +++ b/arch/x86/kernel/sev-es.c
> @@ -268,12 +268,13 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
>  		}
>  
>  		insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
> -		insn_get_length(&ctxt->insn);
> +		res = insn_get_length(&ctxt->insn);
>  	}
>  
> -	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
> +	if (res)
> +		return ES_DECODE_FAILED;
>  
> -	return ret;
> +	return ES_OK;
>  }
>  
>  static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
> diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
> index 58f7fb95c7f4..f754470c884d 100644
> --- a/arch/x86/lib/insn-eval.c
> +++ b/arch/x86/lib/insn-eval.c
> @@ -1491,7 +1491,9 @@ bool insn_decode(struct insn *insn, struct pt_regs *regs,
>  	insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
>  	insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
>  
> -	insn_get_length(insn);
> +	if (insn_get_length(insn))
> +		return false;
> +
>  	if (buf_size < insn->length)
>  		return false;
>  
> diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
> index 404279563891..875cd5296c50 100644
> --- a/arch/x86/lib/insn.c
> +++ b/arch/x86/lib/insn.c
> @@ -538,18 +538,23 @@ static int __get_immptr(struct insn *insn)
>  }
>  
>  /**
> - * insn_get_immediate() - Get the immediates of instruction
> + * insn_get_immediate() - Get the immediate in an instruction
>   * @insn:	&struct insn containing instruction
>   *
>   * If necessary, first collects the instruction up to and including the
>   * displacement bytes.
>   * Basically, most of immediates are sign-expanded. Unsigned-value can be
> - * get by bit masking with ((1 << (nbytes * 8)) - 1)
> + * computed by bit masking with ((1 << (nbytes * 8)) - 1)
> + *
> + * Returns:
> + *  - 0 on success
> + *  - !0 on error
>   */
> -void insn_get_immediate(struct insn *insn)
> +int insn_get_immediate(struct insn *insn)
>  {
>  	if (insn->immediate.got)
> -		return;
> +		return 0;
> +
>  	if (!insn->displacement.got)
>  		insn_get_displacement(insn);
>  
> @@ -604,9 +609,10 @@ void insn_get_immediate(struct insn *insn)
>  	}
>  done:
>  	insn->immediate.got = 1;
> +	return 0;
>  
>  err_out:
> -	return;
> +	return 1;
>  }
>  
>  /**
> @@ -615,13 +621,22 @@ void insn_get_immediate(struct insn *insn)
>   *
>   * If necessary, first collects the instruction up to and including the
>   * immediates bytes.
> - */
> -void insn_get_length(struct insn *insn)
> + *
> + * Returns:
> + *  - 0 on success
> + *  - !0 on error
> +*/
> +int insn_get_length(struct insn *insn)
>  {
>  	if (insn->length)
> -		return;
> +		return 0;
> +
>  	if (!insn->immediate.got)
> -		insn_get_immediate(insn);
> +		if (insn_get_immediate(insn))
> +			return 1;
> +
>  	insn->length = (unsigned char)((unsigned long)insn->next_byte
>  				     - (unsigned long)insn->kaddr);
> +
> +	return 0;
>  }
> diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
> index 34eda63c124b..5cb0d7b3ebf4 100644
> --- a/arch/x86/tools/insn_decoder_test.c
> +++ b/arch/x86/tools/insn_decoder_test.c
> @@ -150,8 +150,7 @@ int main(int argc, char **argv)
>  		}
>  		/* Decode an instruction */
>  		insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
> -		insn_get_length(&insn);
> -		if (insn.length != nb) {
> +		if (insn_get_length(&insn) || insn.length != nb) {
>  			warnings++;
>  			pr_warn("Found an x86 instruction decoder bug, "
>  				"please report this.\n", sym);
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov Oct. 20, 2020, 2:37 p.m. UTC | #2
On Tue, Oct 20, 2020 at 11:27:00PM +0900, Masami Hiramatsu wrote:
> So, if this return value is optional, it is OK to me. But the success
> return value does NOT mean it a correctly encoded instruction.

Ok, so what is the correct way to find out whether the decoding was
successful?

Because as it is now, it is confusing:

- Which ->got field do you check?

- Do you check all got fields like insn_complete() does?

And the return value can be made non-optional to denote that the
*function* that was called, was successful or not. The thing is, one
needs to designate one function to call and say, if this function
returns successfully, then the decode was ok.

If we want to look at only some aspects of some insn bytes, we can
definitely make the functions which do, like insn_get_length(),
insn_get_immediate() and all those return a value to denote that *they*
were successful or not.

All I'm trying to say is, *how* this insn decoder *should* be used,
is not really entirely clear, at least to me it isn't, and we need to
define that so that callers know what to expect.

Does that make more sense?

Thx.
Masami Hiramatsu Oct. 21, 2020, 12:50 a.m. UTC | #3
On Tue, 20 Oct 2020 16:37:46 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Tue, Oct 20, 2020 at 11:27:00PM +0900, Masami Hiramatsu wrote:
> > So, if this return value is optional, it is OK to me. But the success
> > return value does NOT mean it a correctly encoded instruction.
> 
> Ok, so what is the correct way to find out whether the decoding was
> successful?
> 
> Because as it is now, it is confusing:
> 
> - Which ->got field do you check?
> 
> - Do you check all got fields like insn_complete() does?
> 
> And the return value can be made non-optional to denote that the
> *function* that was called, was successful or not. The thing is, one
> needs to designate one function to call and say, if this function
> returns successfully, then the decode was ok.

Agreed. So I'm OK for returning the result of "decoding".
But we also need to note that the returning success doesn't
mean the instruction is valid. That needs another validator.

> If we want to look at only some aspects of some insn bytes, we can
> definitely make the functions which do, like insn_get_length(),
> insn_get_immediate() and all those return a value to denote that *they*
> were successful or not.

OK.

> 
> All I'm trying to say is, *how* this insn decoder *should* be used,
> is not really entirely clear, at least to me it isn't, and we need to
> define that so that callers know what to expect.
> 
> Does that make more sense?

Yes, so let's add the return value (with a note, so that someone
does not try to use it for validation).

Thank you,

> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov Oct. 21, 2020, 9:27 a.m. UTC | #4
On Wed, Oct 21, 2020 at 09:50:13AM +0900, Masami Hiramatsu wrote:
> Agreed. So I'm OK for returning the result of "decoding".
> But we also need to note that the returning success doesn't
> mean the instruction is valid. That needs another validator.
>
...

>
> Yes, so let's add the return value (with a note, so that someone
> does not try to use it for validation).

Ok, I'm unclear on that "validation" you talk about. What exactly do
you mean? Can you give an example of how one would determine whether an
instruction is valid? And valid how?

Thx.
Masami Hiramatsu Oct. 21, 2020, 2:26 p.m. UTC | #5
On Wed, 21 Oct 2020 11:27:50 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Wed, Oct 21, 2020 at 09:50:13AM +0900, Masami Hiramatsu wrote:
> > Agreed. So I'm OK for returning the result of "decoding".
> > But we also need to note that the returning success doesn't
> > mean the instruction is valid. That needs another validator.
> >
> ...
> 
> >
> > Yes, so let's add the return value (with a note, so that someone
> > does not try to use it for validation).
> 
> Ok, I'm unclear on that "validation" you talk about. What exactly do
> you mean? Can you give an example of how one would determine whether an
> instruction is valid? And valid how?

Hmm, I meant someone might think it can be used for filtering the
instruction something like,

insn_init(insn, buf, buflen, 1);
ret = insn_get_length(insn);
if (!ret) {
	/* OK, this is safe */
	patch_text(buf, trampoline);
}

No, we need another validator for such usage.

Thank you,
 
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov Oct. 21, 2020, 4:45 p.m. UTC | #6
On Wed, Oct 21, 2020 at 11:26:13PM +0900, Masami Hiramatsu wrote:
> Hmm, I meant someone might think it can be used for filtering the
> instruction something like,
> 
> insn_init(insn, buf, buflen, 1);
> ret = insn_get_length(insn);
> if (!ret) {
> 	/* OK, this is safe */
> 	patch_text(buf, trampoline);
> }
> 
> No, we need another validator for such usage.

Well, I think calling insn_get_length() should give you only the
*length* of the insn and nothing else - I mean that is what the function
is called. And I believe current use is wrong.

Examples:

arch/x86/kernel/kprobes/core.c:
                insn_get_length(&insn);

                /*
                 * Another debugging subsystem might insert this breakpoint.
                 * In that case, we can't recover it.
                 */
                if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)

So this has called get_length but it is far from clear that after that
call, the opcode bytes in insn.opcode.bytes are there.

What that should do instead IMO is this:

	insn_get_opcode(&insn);

and *then* the return value can tell you whether the opcode bytes were
parsed properly or not. See what I mean?

That's even documented that way:

/**
 * insn_get_opcode - collect opcode(s)
 * @insn:       &struct insn containing instruction
 *
 * Populates @insn->opcode, updates @insn->next_byte to point past the
 * opcode byte(s), and set @insn->attr (except for groups).


Similarly here:

static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)

	...

        insn_get_length(&ctxt->insn);

        ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;

that thing wants to decode the insn but it is looking whether it parsed
an *immediate*?!

I'm not saying this is necessarily wrong - just the naming nomenclature
and the API should be properly defined when you call a function of the
insn decoder, what you are guaranteed to get and what a caller can
assume after that. And then the proper functions be called.

In the kprobes/core.c example above, it does a little further:

	ddr += insn.length;	

which, IMO, it should be either preceeded by a call to insn_get_length()
- yes, this time we want the insn length or, the code should call a
decoding function which gives you *both* length* and opcode bytes.
insn_decode_insn() or whatever. And *that* should be documented in that
function's kernel-doc section. And so on...

Does that make more sense?

Thx.
Masami Hiramatsu Oct. 22, 2020, 7:31 a.m. UTC | #7
On Wed, 21 Oct 2020 18:45:58 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Wed, Oct 21, 2020 at 11:26:13PM +0900, Masami Hiramatsu wrote:
> > Hmm, I meant someone might think it can be used for filtering the
> > instruction something like,
> > 
> > insn_init(insn, buf, buflen, 1);
> > ret = insn_get_length(insn);
> > if (!ret) {
> > 	/* OK, this is safe */
> > 	patch_text(buf, trampoline);
> > }
> > 
> > No, we need another validator for such usage.
> 
> Well, I think calling insn_get_length() should give you only the
> *length* of the insn and nothing else - I mean that is what the function
> is called. And I believe current use is wrong.
> 
> Examples:
> 
> arch/x86/kernel/kprobes/core.c:
>                 insn_get_length(&insn);
> 
>                 /*
>                  * Another debugging subsystem might insert this breakpoint.
>                  * In that case, we can't recover it.
>                  */
>                 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
> 
> So this has called get_length but it is far from clear that after that
> call, the opcode bytes in insn.opcode.bytes are there.

No, insn_get_length() implies it decodes whole of the instruction.
(yeah, we need an alias of that, something like insn_get_complete())

> 
> What that should do instead IMO is this:
> 
> 	insn_get_opcode(&insn);
> 

No, you've cut the last lines of that loop.

                /*
                 * Another debugging subsystem might insert this breakpoint.
                 * In that case, we can't recover it.
                 */
                if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
                        return 0;
                addr += insn.length;
        }

I need insn.length too. Of course we can split it into 2 calls. But
as I said, since the insn_get_length() implies it decodes all other
parts, I just called it once.

> and *then* the return value can tell you whether the opcode bytes were
> parsed properly or not. See what I mean?

I agreed to check the return value of insn_get_length() at that point
only for checking whether the instruction parsing was failed or not.

> 
> That's even documented that way:
> 
> /**
>  * insn_get_opcode - collect opcode(s)
>  * @insn:       &struct insn containing instruction
>  *
>  * Populates @insn->opcode, updates @insn->next_byte to point past the
>  * opcode byte(s), and set @insn->attr (except for groups).
> 
> 
> Similarly here:
> 
> static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
> 
> 	...
> 
>         insn_get_length(&ctxt->insn);
> 
>         ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
> 
> that thing wants to decode the insn but it is looking whether it parsed
> an *immediate*?!

Hm, it is better to call insn_get_immediate() if it doesn't use length later.

> 
> I'm not saying this is necessarily wrong - just the naming nomenclature
> and the API should be properly defined when you call a function of the
> insn decoder, what you are guaranteed to get and what a caller can
> assume after that. And then the proper functions be called.

Would you mean we'd better have something like insn_get_until_immediate() ? 

Since the x86 instruction is CISC, we can not decode intermediate
parts. The APIs follows that. If you are confused, I'm sorry about that.

> 
> In the kprobes/core.c example above, it does a little further:
> 
> 	ddr += insn.length;	
> 
> which, IMO, it should be either preceeded by a call to insn_get_length()
> - yes, this time we want the insn length or, the code should call a
> decoding function which gives you *both* length* and opcode bytes.
> insn_decode_insn() or whatever. And *that* should be documented in that
> function's kernel-doc section. And so on...

Actually, there is a historical reason too. INT3 check was added afterwards.
At first, I just calculated the instruction length in the loop...

Thank you,

> 
> Does that make more sense?
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Peter Zijlstra Oct. 22, 2020, 8:04 a.m. UTC | #8
On Wed, Oct 21, 2020 at 06:45:58PM +0200, Borislav Petkov wrote:
> On Wed, Oct 21, 2020 at 11:26:13PM +0900, Masami Hiramatsu wrote:
> > Hmm, I meant someone might think it can be used for filtering the
> > instruction something like,
> > 
> > insn_init(insn, buf, buflen, 1);
> > ret = insn_get_length(insn);
> > if (!ret) {
> > 	/* OK, this is safe */
> > 	patch_text(buf, trampoline);
> > }
> > 
> > No, we need another validator for such usage.
> 
> Well, I think calling insn_get_length() should give you only the
> *length* of the insn and nothing else - I mean that is what the function
> is called. And I believe current use is wrong.
> 
> Examples:
> 
> arch/x86/kernel/kprobes/core.c:
>                 insn_get_length(&insn);
> 
>                 /*
>                  * Another debugging subsystem might insert this breakpoint.
>                  * In that case, we can't recover it.
>                  */
>                 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
> 
> So this has called get_length but it is far from clear that after that
> call, the opcode bytes in insn.opcode.bytes are there.

Given the trainwreck called x86-instruction-encoding, it's impossible to
not have decoded the opcode and still know the length. Therefore, if you
know the length, you also have the opcode. Hm?
Borislav Petkov Oct. 22, 2020, 9:30 a.m. UTC | #9
On Thu, Oct 22, 2020 at 04:31:00PM +0900, Masami Hiramatsu wrote:
> No, insn_get_length() implies it decodes whole of the instruction.
> (yeah, we need an alias of that, something like insn_get_complete())

That's exactly what I'm trying to point out: the whole API is not
entirely wrong - it just needs a better naming and documentation. Now,
the implication that getting the length of the insn will give you a full
decode is a totally internal detail which users don't need and have to
know.

> I need insn.length too. Of course we can split it into 2 calls. But
> as I said, since the insn_get_length() implies it decodes all other
> parts, I just called it once.

Yes, I have noticed that and wrote about it further on. The intent was
to show that the API needs work.

> Hm, it is better to call insn_get_immediate() if it doesn't use length later.

Ok, so you see the problem. This thing wants to decode the whole insn -
that's what the function is called. But it reads like it does something
else.

> Would you mean we'd better have something like insn_get_until_immediate() ? 
> 
> Since the x86 instruction is CISC, we can not decode intermediate
> parts. The APIs follows that. If you are confused, I'm sorry about that.

No, I'm not confused - again, I'd like for the API to be properly
defined and callers should not have to care which parts of the insn they
need to decode in order to get something else they actually need.

So the main API should be: insn_decode_insn() or so and it should give
you everything you need.

If this succeeds, you can go poke at insn.<field> and you know you have
valid data there.

If there are specialized uses, you can call some of the insn_get_*
helpers if you're not interested in decoding the full insn.

But if simply calling insn_decode_insn() would give you everything and
that is not that expensive, we can do that - API simplicity.

What I don't want to have is calling insn_get_length() or so and then
inspecting the opcode bytes because that's totally non-transparent.

Thx.
Masami Hiramatsu Oct. 22, 2020, 1:21 p.m. UTC | #10
On Thu, 22 Oct 2020 11:30:44 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Thu, Oct 22, 2020 at 04:31:00PM +0900, Masami Hiramatsu wrote:
> > No, insn_get_length() implies it decodes whole of the instruction.
> > (yeah, we need an alias of that, something like insn_get_complete())
> 
> That's exactly what I'm trying to point out: the whole API is not
> entirely wrong - it just needs a better naming and documentation. Now,
> the implication that getting the length of the insn will give you a full
> decode is a totally internal detail which users don't need and have to
> know.

Ok, what names would you like to suggest? insn_get_complete()?

> > I need insn.length too. Of course we can split it into 2 calls. But
> > as I said, since the insn_get_length() implies it decodes all other
> > parts, I just called it once.
> 
> Yes, I have noticed that and wrote about it further on. The intent was
> to show that the API needs work.
> 
> > Hm, it is better to call insn_get_immediate() if it doesn't use length later.
> 
> Ok, so you see the problem. This thing wants to decode the whole insn -
> that's what the function is called. But it reads like it does something
> else.
> 
> > Would you mean we'd better have something like insn_get_until_immediate() ? 
> > 
> > Since the x86 instruction is CISC, we can not decode intermediate
> > parts. The APIs follows that. If you are confused, I'm sorry about that.
> 
> No, I'm not confused - again, I'd like for the API to be properly
> defined and callers should not have to care which parts of the insn they
> need to decode in order to get something else they actually need.

Sorry, I can not get what you point. We already have those APIs, 

extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
extern void insn_get_modrm(struct insn *insn);
extern void insn_get_sib(struct insn *insn);
extern void insn_get_displacement(struct insn *insn);
extern void insn_get_immediate(struct insn *insn);
extern void insn_get_length(struct insn *insn);

As I agreed, that we may need an alias of insn_get_length(). But it seems
clear to me, if you need insn.immediate, you must call insn_get_immediate().

> So the main API should be: insn_decode_insn() or so and it should give
> you everything you need.
> 
> If this succeeds, you can go poke at insn.<field> and you know you have
> valid data there.

Ah, so you meant that we don't need such a different insn_get_* APIs,
but a single insn_decode() API, which will decode all fields.
(IOW, alias of insn_init() and insn_get_length(), right?)

> If there are specialized uses, you can call some of the insn_get_*
> helpers if you're not interested in decoding the full insn.

OK, agreed.

> 
> But if simply calling insn_decode_insn() would give you everything and
> that is not that expensive, we can do that - API simplicity.

I rather like simple "insn_decode()" function, no need to repeat
insn again.

int insn_decode(struct insn *insn, const void *kaddr, int buf_len, bool x86_64);

> 
> What I don't want to have is calling insn_get_length() or so and then
> inspecting the opcode bytes because that's totally non-transparent.

OK, I agreed.

Thank you,

> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Andy Lutomirski Oct. 22, 2020, 5:58 p.m. UTC | #11
On Thu, Oct 22, 2020 at 6:21 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Thu, 22 Oct 2020 11:30:44 +0200
> Borislav Petkov <bp@alien8.de> wrote:
>
> > On Thu, Oct 22, 2020 at 04:31:00PM +0900, Masami Hiramatsu wrote:
> > > No, insn_get_length() implies it decodes whole of the instruction.
> > > (yeah, we need an alias of that, something like insn_get_complete())
> >
> > That's exactly what I'm trying to point out: the whole API is not
> > entirely wrong - it just needs a better naming and documentation. Now,
> > the implication that getting the length of the insn will give you a full
> > decode is a totally internal detail which users don't need and have to
> > know.
>
> Ok, what names would you like to suggest? insn_get_complete()?
>
> > > I need insn.length too. Of course we can split it into 2 calls. But
> > > as I said, since the insn_get_length() implies it decodes all other
> > > parts, I just called it once.
> >
> > Yes, I have noticed that and wrote about it further on. The intent was
> > to show that the API needs work.
> >
> > > Hm, it is better to call insn_get_immediate() if it doesn't use length later.
> >
> > Ok, so you see the problem. This thing wants to decode the whole insn -
> > that's what the function is called. But it reads like it does something
> > else.
> >
> > > Would you mean we'd better have something like insn_get_until_immediate() ?
> > >
> > > Since the x86 instruction is CISC, we can not decode intermediate
> > > parts. The APIs follows that. If you are confused, I'm sorry about that.
> >
> > No, I'm not confused - again, I'd like for the API to be properly
> > defined and callers should not have to care which parts of the insn they
> > need to decode in order to get something else they actually need.
>
> Sorry, I can not get what you point. We already have those APIs,
>
> extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
> extern void insn_get_prefixes(struct insn *insn);
> extern void insn_get_opcode(struct insn *insn);
> extern void insn_get_modrm(struct insn *insn);
> extern void insn_get_sib(struct insn *insn);
> extern void insn_get_displacement(struct insn *insn);
> extern void insn_get_immediate(struct insn *insn);
> extern void insn_get_length(struct insn *insn);
>
> As I agreed, that we may need an alias of insn_get_length(). But it seems
> clear to me, if you need insn.immediate, you must call insn_get_immediate().

I'm guessing that the confusion here is that the kernel instruction
decoder was originally designed to be used to decode kernel
instructions, which are generally trusted to be valid, but that it's
starting to be used to decode user code and such as well.

Masami, could we perhaps have an extra API like:

extern int insn_decode_fully(struct insn *insn);

that decodes the *entire* instruction, returns success if the decoder
thinks the instruction is valid, and returns an error if the decoder
thinks it's invalid?  We would use this when decoding arbitrary bytes
when we're not really sure that there's a valid instruction there.
For user code emulation, we don't really care much about performance
-- the overhead of getting #GP in the first place is much, much higher
than the overhead of decoding more of the instruction than needed.

Ideally we would solve another little problem at the same time.  Right
now, we are quite sloppy about how we fetch the instruction bytes, and
it might be nice to fix this.  It would be nice if we could have a
special error code saying "more bytes are needed".  So
insn_decode_fully() would return 0 (or the length) on a successful
decode, -EINVAL if the bytes are not a valid instruction, and -EAGAIN
(or something more appropriate) if the bytes are a valid *prefix* of
an instruction but more bytes are needed.  Then the caller would do:

len = min(15, remaining bytes in page);
fetch len bytes;
insn_init();
ret = insn_decode_fully();
if (ret == -EAGAIN) {
  fetch remaining 15 - len bytes;
  insn_init();
  ret = insn_decode_fully();
}

It's a bit impolite to potentially cause page faults on the page after
a short instruction, but it's also not so good to fail to decode a
long instruction that happens to cross a page boundary.

--Andy
Borislav Petkov Oct. 23, 2020, 9:17 a.m. UTC | #12
On Thu, Oct 22, 2020 at 10:21:40PM +0900, Masami Hiramatsu wrote:
> extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
> extern void insn_get_prefixes(struct insn *insn);
> extern void insn_get_opcode(struct insn *insn);
> extern void insn_get_modrm(struct insn *insn);
> extern void insn_get_sib(struct insn *insn);
> extern void insn_get_displacement(struct insn *insn);
> extern void insn_get_immediate(struct insn *insn);
> extern void insn_get_length(struct insn *insn);

...

> Ah, so you meant that we don't need such a different insn_get_* APIs,
> but a single insn_decode() API, which will decode all fields.
> (IOW, alias of insn_init() and insn_get_length(), right?)

Yes, so there should be a balance between what one wants to decode:

length, opcodes, etc

vs

when one needs only a certain *single* aspect: sib, length,
displacement, etc.

So if you need a couple of things, you can simply call the insn_decode()
function - I'm reading forward and I like your naming :) - and when
that returns success, you can be sure that struct insn contains all the
fields needed.

Otherwise...

> > If there are specialized uses, you can call some of the insn_get_*
> > helpers if you're not interested in decoding the full insn.
> 
> OK, agreed.

... yes, exactly!

> > But if simply calling insn_decode_insn() would give you everything and
> > that is not that expensive, we can do that - API simplicity.
> 
> I rather like simple "insn_decode()" function, no need to repeat
> insn again.
> 
> int insn_decode(struct insn *insn, const void *kaddr, int buf_len, bool x86_64);

Yap, good.

Ok, seems we agree, lemme poke at this one more time, convert some users
and we can see how it looks like and talk then.

Thx.
Borislav Petkov Oct. 23, 2020, 9:20 a.m. UTC | #13
On Thu, Oct 22, 2020 at 10:58:32AM -0700, Andy Lutomirski wrote:
> I'm guessing that the confusion here is that the kernel instruction
> decoder was originally designed to be used to decode kernel
> instructions, which are generally trusted to be valid, but that it's
> starting to be used to decode user code and such as well.
>
> Masami, could we perhaps have an extra API like:
>
> extern int insn_decode_fully(struct insn *insn);

insn_decode(). Simple. :)

> that decodes the *entire* instruction, returns success if the decoder
> thinks the instruction is valid, and returns an error if the decoder
> thinks it's invalid?

Yap, exactly.

> We would use this when decoding arbitrary bytes
> when we're not really sure that there's a valid instruction there.
> For user code emulation, we don't really care much about performance
> -- the overhead of getting #GP in the first place is much, much higher
> than the overhead of decoding more of the instruction than needed.

Ack.

> Ideally we would solve another little problem at the same time.  Right
> now, we are quite sloppy about how we fetch the instruction bytes, and
> it might be nice to fix this.  It would be nice if we could have a
> special error code saying "more bytes are needed".  So
> insn_decode_fully() would return 0 (or the length) on a successful
> decode, -EINVAL if the bytes are not a valid instruction, and -EAGAIN
> (or something more appropriate) if the bytes are a valid *prefix* of
> an instruction but more bytes are needed.  Then the caller would do:

Yah, lemme see if that can be done easily.

Thx.
Masami Hiramatsu Oct. 23, 2020, 9:28 a.m. UTC | #14
On Thu, 22 Oct 2020 10:58:32 -0700
Andy Lutomirski <luto@kernel.org> wrote:

> On Thu, Oct 22, 2020 at 6:21 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Thu, 22 Oct 2020 11:30:44 +0200
> > Borislav Petkov <bp@alien8.de> wrote:
> >
> > > On Thu, Oct 22, 2020 at 04:31:00PM +0900, Masami Hiramatsu wrote:
> > > > No, insn_get_length() implies it decodes whole of the instruction.
> > > > (yeah, we need an alias of that, something like insn_get_complete())
> > >
> > > That's exactly what I'm trying to point out: the whole API is not
> > > entirely wrong - it just needs a better naming and documentation. Now,
> > > the implication that getting the length of the insn will give you a full
> > > decode is a totally internal detail which users don't need and have to
> > > know.
> >
> > Ok, what names would you like to suggest? insn_get_complete()?
> >
> > > > I need insn.length too. Of course we can split it into 2 calls. But
> > > > as I said, since the insn_get_length() implies it decodes all other
> > > > parts, I just called it once.
> > >
> > > Yes, I have noticed that and wrote about it further on. The intent was
> > > to show that the API needs work.
> > >
> > > > Hm, it is better to call insn_get_immediate() if it doesn't use length later.
> > >
> > > Ok, so you see the problem. This thing wants to decode the whole insn -
> > > that's what the function is called. But it reads like it does something
> > > else.
> > >
> > > > Would you mean we'd better have something like insn_get_until_immediate() ?
> > > >
> > > > Since the x86 instruction is CISC, we can not decode intermediate
> > > > parts. The APIs follows that. If you are confused, I'm sorry about that.
> > >
> > > No, I'm not confused - again, I'd like for the API to be properly
> > > defined and callers should not have to care which parts of the insn they
> > > need to decode in order to get something else they actually need.
> >
> > Sorry, I can not get what you point. We already have those APIs,
> >
> > extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
> > extern void insn_get_prefixes(struct insn *insn);
> > extern void insn_get_opcode(struct insn *insn);
> > extern void insn_get_modrm(struct insn *insn);
> > extern void insn_get_sib(struct insn *insn);
> > extern void insn_get_displacement(struct insn *insn);
> > extern void insn_get_immediate(struct insn *insn);
> > extern void insn_get_length(struct insn *insn);
> >
> > As I agreed, that we may need an alias of insn_get_length(). But it seems
> > clear to me, if you need insn.immediate, you must call insn_get_immediate().
> 
> I'm guessing that the confusion here is that the kernel instruction
> decoder was originally designed to be used to decode kernel
> instructions, which are generally trusted to be valid, but that it's
> starting to be used to decode user code and such as well.

Hmm, right...

> 
> Masami, could we perhaps have an extra API like:
> 
> extern int insn_decode_fully(struct insn *insn);
> 
> that decodes the *entire* instruction, returns success if the decoder
> thinks the instruction is valid, and returns an error if the decoder
> thinks it's invalid?  We would use this when decoding arbitrary bytes
> when we're not really sure that there's a valid instruction there.
> For user code emulation, we don't really care much about performance
> -- the overhead of getting #GP in the first place is much, much higher
> than the overhead of decoding more of the instruction than needed.

OK, would you think we also better to integrate it with insn_init()?

> Ideally we would solve another little problem at the same time.  Right
> now, we are quite sloppy about how we fetch the instruction bytes, and
> it might be nice to fix this.  It would be nice if we could have a
> special error code saying "more bytes are needed".  So
> insn_decode_fully() would return 0 (or the length) on a successful
> decode, -EINVAL if the bytes are not a valid instruction, and -EAGAIN
> (or something more appropriate)

Maybe -ERANGE?

> if the bytes are a valid *prefix* of
> an instruction but more bytes are needed.  Then the caller would do:
> 
> len = min(15, remaining bytes in page);
> fetch len bytes;
> insn_init();
> ret = insn_decode_fully();
> if (ret == -EAGAIN) {
>   fetch remaining 15 - len bytes;
>   insn_init();
>   ret = insn_decode_fully();
> }
> 
> It's a bit impolite to potentially cause page faults on the page after
> a short instruction, but it's also not so good to fail to decode a
> long instruction that happens to cross a page boundary.

OK.

Borislav, would you handle it? I think you already started.

Thank you,
Borislav Petkov Oct. 23, 2020, 9:32 a.m. UTC | #15
On Fri, Oct 23, 2020 at 06:28:50PM +0900, Masami Hiramatsu wrote:
> OK, would you think we also better to integrate it with insn_init()?

That kinda makes sense because it would obviate the need to call it
as user of the interface but simply do insn_decode() which will do
everything for you. Lemme see how it works out in practice...

> Borislav, would you handle it? I think you already started.

Sure, I'll have a poke at it.
Masami Hiramatsu Oct. 23, 2020, 10:47 a.m. UTC | #16
On Fri, 23 Oct 2020 11:32:54 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Fri, Oct 23, 2020 at 06:28:50PM +0900, Masami Hiramatsu wrote:
> > OK, would you think we also better to integrate it with insn_init()?
> 
> That kinda makes sense because it would obviate the need to call it
> as user of the interface but simply do insn_decode() which will do
> everything for you. Lemme see how it works out in practice...
> 
> > Borislav, would you handle it? I think you already started.
> 
> Sure, I'll have a poke at it.

Thanks! I look forward to it.
Borislav Petkov Oct. 23, 2020, 11:27 p.m. UTC | #17
On Fri, Oct 23, 2020 at 07:47:04PM +0900, Masami Hiramatsu wrote:
> Thanks! I look forward to it.

Ok, here's a first stab, it is a single big diff and totally untested
but it should show what I mean. I've made some notes while converting,
as I went along.

Have a look at insn_decode() and its call sites: they are almost trivial
now because caller needs simply to do:

	if (insn_decode(insn, buffer, ...))

and not care about any helper functions.

For some of the call sites it still makes sense to do a piecemeal insn
decoding and I've left them this way but they can be converted too, if
one wants.

In any case, just have a look please and lemme know if that looks OKish.
I'll do the actual splitting and testing afterwards.

And what Andy wants can't be done with the decoder because it already
gets a fixed size buffer and length - it doesn't do the fetching. The
caller does.

What you wanna do:

> len = min(15, remaining bytes in page);
> fetch len bytes;
> insn_init();
> ret = insn_decode_fully();

<--- you can't always know here whether the insn is valid if you don't
have all the bytes. But you can always fetch *all* bytes and then give
it to the decoder for checking.

Also, this doesn't make any sense: try insn decode on a subset of bytes
and then if it fails, try it on the whole set of bytes. Why even try the
subset - it will almost always fail.

> if (ret == -EAGAIN) {
>   fetch remaining 15 - len bytes;
>   insn_init();
>   ret = insn_decode_fully();

Ok, good night and good luck. :-)

Author: Borislav Petkov <bp@suse.de>
Date:   Fri Oct 23 12:15:59 2020 +0200

    insn: Reorg
    
    - Rename insn_decode() to insn_decode_regs() to denote that it receives
      regs as param and free the name for the generic version.
    
    - intel/ds.c needs only the insn length so it uses the appropriate
      helper instead of a full decode.
    
    - make insn_get_opcode() return an errval, make it more stricter as
      to whether what has seen so far is a valid insn and if not, signal an
      error.
    
    - lbr.c doesn't need to call the full insn_decode() because it doesn't
      need it in all cases thus the separate calls.
    
    - Add enum insn_mode to insn_decode() to call kernel_insn_init() too.
    

diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
index 954cb2702e23..8d95f859e439 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -79,16 +79,13 @@ static inline void sev_es_wr_ghcb_msr(u64 val)
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 {
 	char buffer[MAX_INSN_SIZE];
-	enum es_result ret;
 
 	memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
 
-	insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
-	insn_get_length(&ctxt->insn);
+	if (insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
+		return ES_DECODE_FAILED;
 
-	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
-
-	return ret;
+	return ES_OK;
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 404315df1e16..aa72e8d305dd 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1262,14 +1262,14 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
 #endif
 		insn_init(&insn, kaddr, size, is_64bit);
-		insn_get_length(&insn);
+
 		/*
 		 * Make sure there was not a problem decoding the
 		 * instruction and getting the length.  This is
 		 * doubly important because we have an infinite
 		 * loop if insn.length=0.
 		 */
-		if (!insn.length)
+		if (insn_get_length(&insn) || !insn.length)
 			break;
 
 		to += insn.length;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8961653c5dd2..d23f7d3108a8 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1224,8 +1224,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 	is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
 #endif
 	insn_init(&insn, addr, bytes_read, is64);
-	insn_get_opcode(&insn);
-	if (!insn.opcode.got)
+	if (insn_get_opcode(&insn))
 		return X86_BR_ABORT;
 
 	switch (insn.opcode.bytes[0]) {
@@ -1262,8 +1261,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 		ret = X86_BR_INT;
 		break;
 	case 0xe8: /* call near rel */
-		insn_get_immediate(&insn);
-		if (insn.immediate1.value == 0) {
+		if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
 			/* zero length call */
 			ret = X86_BR_ZERO_CALL;
 			break;
@@ -1279,7 +1277,9 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 		ret = X86_BR_JMP;
 		break;
 	case 0xff: /* call near absolute, call far absolute ind */
-		insn_get_modrm(&insn);
+		if (insn_get_modrm(&insn))
+			return X86_BR_ABORT;
+
 		ext = (insn.modrm.bytes[0] >> 3) & 0x7;
 		switch (ext) {
 		case 2: /* near ind call */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index a0f839aa144d..3797497a9270 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -23,7 +23,7 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
 int insn_fetch_from_user(struct pt_regs *regs,
 			 unsigned char buf[MAX_INSN_SIZE]);
-bool insn_decode(struct insn *insn, struct pt_regs *regs,
-		 unsigned char buf[MAX_INSN_SIZE], int buf_size);
+bool insn_decode_regs(struct insn *insn, struct pt_regs *regs,
+		      unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
 #endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..94c1ef548883 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -88,12 +88,21 @@ struct insn {
 
 extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
 extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+	INSN_MODE_32,
+	INSN_MODE_64,
+	INSN_MODE_KERN,	/* determined by the current kernel build */
+	INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
@@ -108,11 +117,7 @@ extern int insn_rip_relative(struct insn *insn);
 static inline void kernel_insn_init(struct insn *insn,
 				    const void *kaddr, int buf_len)
 {
-#ifdef CONFIG_X86_64
-	insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
-	insn_init(insn, kaddr, buf_len, 0);
-#endif
+	insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
 }
 
 static inline int insn_is_avx(struct insn *insn)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 83df991314c5..30ba7c9ae27c 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -224,9 +224,7 @@ static bool is_copy_from_user(struct pt_regs *regs)
 	if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
 		return false;
 
-	kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
-	insn_get_opcode(&insn);
-	if (!insn.opcode.got)
+	if (insn_decode(&insn, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN))
 		return false;
 
 	switch (insn.opcode.value) {
@@ -234,10 +232,6 @@ static bool is_copy_from_user(struct pt_regs *regs)
 	case 0x8A: case 0x8B:
 	/* MOVZ mem,reg */
 	case 0xB60F: case 0xB70F:
-		insn_get_modrm(&insn);
-		insn_get_sib(&insn);
-		if (!insn.modrm.got || !insn.sib.got)
-			return false;
 		addr = (unsigned long)insn_get_addr_ref(&insn, regs);
 		break;
 	/* REP MOVS */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 547c7abb39f5..9f1010b30cd0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -296,8 +296,10 @@ static int can_probe(unsigned long paddr)
 		__addr = recover_probed_instruction(buf, addr);
 		if (!__addr)
 			return 0;
+
 		kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
-		insn_get_length(&insn);
+		if (insn_get_length(&insn))
+			return 0;
 
 		/*
 		 * Another debugging subsystem might insert this breakpoint.
@@ -352,7 +354,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
 		return 0;
 
 	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
-	insn_get_length(insn);
+	if (insn_get_length(insn))
+		return 0;
 
 	/* We can not probe force emulate prefixed instruction */
 	if (insn_has_emulate_prefix(insn))
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 041f0b50bc27..afe2148c964c 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -309,7 +309,9 @@ static int can_optimize(unsigned long paddr)
 		if (!recovered_insn)
 			return 0;
 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
-		insn_get_length(&insn);
+		if (insn_get_length(&insn))
+			return 0;
+
 		/* Another subsystem puts a breakpoint */
 		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
 			return 0;
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 4a96726fbaf8..2f87cd52f12c 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -244,8 +244,7 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 {
 	char buffer[MAX_INSN_SIZE];
-	enum es_result ret;
-	int res;
+	int res, ret;
 
 	if (user_mode(ctxt->regs)) {
 		res = insn_fetch_from_user(ctxt->regs, buffer);
@@ -256,7 +255,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 			return ES_EXCEPTION;
 		}
 
-		if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
+		if (!insn_decode_regs(&ctxt->insn, ctxt->regs, buffer, res))
 			return ES_DECODE_FAILED;
 	} else {
 		res = vc_fetch_insn_kernel(ctxt, buffer);
@@ -267,13 +266,13 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 			return ES_EXCEPTION;
 		}
 
-		insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
-		insn_get_length(&ctxt->insn);
+		ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE - res, INSN_MODE_64);
 	}
 
-	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
-
-	return ret;
+	if (ret)
+		return ES_DECODE_FAILED;
+	else
+		return ES_OK;
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3c70fb34028b..40211eb584a7 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -498,9 +498,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 			MAX_INSN_SIZE))
 		return GP_NO_HINT;
 
-	kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
-	insn_get_modrm(&insn);
-	insn_get_sib(&insn);
+	if (insn_decode(&insn, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN))
+		return GP_NO_HINT;
 
 	*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
 	if (*addr == -1UL)
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f6225bf22c02..e3584894b074 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (!nr_copied)
 		return false;
 
-	if (!insn_decode(&insn, regs, buf, nr_copied))
+	if (!insn_decode_regs(&insn, regs, buf, nr_copied))
 		return false;
 
 	umip_inst = identify_insn(&insn);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3fdaa042823d..b1451242c097 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -275,12 +275,10 @@ static bool is_prefix_bad(struct insn *insn)
 
 static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
 {
+	enum insn_mode m = x86_64 ? INSN_MODE_64 : INSN_MODE_32;
 	u32 volatile *good_insns;
 
-	insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
-	/* has the side-effect of processing the entire instruction */
-	insn_get_length(insn);
-	if (!insn_complete(insn))
+	if (insn_decode(insn, auprobe->insn, sizeof(auprobe->insn), m))
 		return -ENOEXEC;
 
 	if (is_prefix_bad(insn))
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 58f7fb95c7f4..5825b4cf4386 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1117,7 +1117,8 @@ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
 	if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
 		return -EINVAL;
 
-	insn_get_modrm(insn);
+	if (insn_get_modrm(insn))
+		return -EINVAL;
 
 	if (!insn->modrm.nbytes)
 		return -EINVAL;
@@ -1125,7 +1126,8 @@ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
 	if (X86_MODRM_MOD(insn->modrm.value) > 2)
 		return -EINVAL;
 
-	insn_get_sib(insn);
+	if (insn_get_sib(insn))
+		return -EINVAL;
 
 	if (!insn->sib.nbytes)
 		return -EINVAL;
@@ -1194,8 +1196,8 @@ static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs)
 	short eff_addr;
 	long tmp;
 
-	insn_get_modrm(insn);
-	insn_get_displacement(insn);
+	if (insn_get_modrm(insn) || insn_get_displacement(insn))
+		goto out;
 
 	if (insn->addr_bytes != 2)
 		goto out;
@@ -1454,7 +1456,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 }
 
 /**
- * insn_decode() - Decode an instruction
+ * insn_decode_regs() - Decode an instruction
  * @insn:	Structure to store decoded instruction
  * @regs:	Structure with register values as seen when entering kernel mode
  * @buf:	Buffer containing the instruction bytes
@@ -1467,8 +1469,8 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
  *
  * True if instruction was decoded, False otherwise.
  */
-bool insn_decode(struct insn *insn, struct pt_regs *regs,
-		 unsigned char buf[MAX_INSN_SIZE], int buf_size)
+bool insn_decode_regs(struct insn *insn, struct pt_regs *regs,
+		      unsigned char buf[MAX_INSN_SIZE], int buf_size)
 {
 	int seg_defs;
 
@@ -1491,7 +1493,9 @@ bool insn_decode(struct insn *insn, struct pt_regs *regs,
 	insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
 	insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
 
-	insn_get_length(insn);
+	if (insn_get_length(insn))
+		return false;
+
 	if (buf_size < insn->length)
 		return false;
 
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 404279563891..6031b774a9cb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -37,6 +37,7 @@
  * insn_init() - initialize struct insn
  * @insn:	&struct insn to be initialized
  * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:	length of the insn buffer at @kaddr
  * @x86_64:	!0 for 64-bit kernel or 64-bit app
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
@@ -230,14 +231,20 @@ void insn_get_prefixes(struct insn *insn)
  * If necessary, first collects any preceding (prefix) bytes.
  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
  * is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * !0: on error
  */
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
 {
 	struct insn_field *opcode = &insn->opcode;
 	insn_byte_t op;
 	int pfx_id;
+
 	if (opcode->got)
-		return;
+		return 0;
+
 	if (!insn->prefixes.got)
 		insn_get_prefixes(insn);
 
@@ -254,9 +261,13 @@ void insn_get_opcode(struct insn *insn)
 		insn->attr = inat_get_avx_attribute(op, m, p);
 		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
 		    (!inat_accept_vex(insn->attr) &&
-		     !inat_is_group(insn->attr)))
-			insn->attr = 0;	/* This instruction is bad */
-		goto end;	/* VEX has only 1 byte for opcode */
+		     !inat_is_group(insn->attr))) {
+			/* This instruction is bad */
+			insn->attr = 0;
+			return 1;
+		}
+		/* VEX has only 1 byte for opcode */
+		goto end;
 	}
 
 	insn->attr = inat_get_opcode_attribute(op);
@@ -267,13 +278,18 @@ void insn_get_opcode(struct insn *insn)
 		pfx_id = insn_last_prefix_id(insn);
 		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
 	}
-	if (inat_must_vex(insn->attr))
-		insn->attr = 0;	/* This instruction is bad */
+
+	if (inat_must_vex(insn->attr)) {
+		/* This instruction is bad */
+		insn->attr = 0;
+		return 1;
+	}
 end:
 	opcode->got = 1;
+	return 0;
 
 err_out:
-	return;
+	return 1;
 }
 
 /**
@@ -283,15 +299,22 @@ void insn_get_opcode(struct insn *insn)
  * Populates @insn->modrm and updates @insn->next_byte to point past the
  * ModRM byte, if any.  If necessary, first collects the preceding bytes
  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * !0: on error
  */
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
 {
 	struct insn_field *modrm = &insn->modrm;
 	insn_byte_t pfx_id, mod;
+
 	if (modrm->got)
-		return;
+		return 0;
+
 	if (!insn->opcode.got)
-		insn_get_opcode(insn);
+		if (insn_get_opcode(insn))
+			return 1;
 
 	if (inat_has_modrm(insn->attr)) {
 		mod = get_next(insn_byte_t, insn);
@@ -301,17 +324,22 @@ void insn_get_modrm(struct insn *insn)
 			pfx_id = insn_last_prefix_id(insn);
 			insn->attr = inat_get_group_attribute(mod, pfx_id,
 							      insn->attr);
-			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
-				insn->attr = 0;	/* This is bad */
+			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+				/* Bad insn */
+				insn->attr = 0;
+				return 1;
+			}
 		}
 	}
 
 	if (insn->x86_64 && inat_is_force64(insn->attr))
 		insn->opnd_bytes = 8;
+
 	modrm->got = 1;
+	return 0;
 
 err_out:
-	return;
+	return 1;
 }
 
 
@@ -343,15 +371,23 @@ int insn_rip_relative(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * !0: otherwise.
  */
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
 {
 	insn_byte_t modrm;
 
 	if (insn->sib.got)
-		return;
-	if (!insn->modrm.got)
-		insn_get_modrm(insn);
+		return 0;
+
+	if (!insn->modrm.got) {
+		if (insn_get_modrm(insn))
+			return 1;
+	}
+
 	if (insn->modrm.nbytes) {
 		modrm = (insn_byte_t)insn->modrm.value;
 		if (insn->addr_bytes != 2 &&
@@ -362,8 +398,10 @@ void insn_get_sib(struct insn *insn)
 	}
 	insn->sib.got = 1;
 
+	return 0;
+
 err_out:
-	return;
+	return 1;
 }
 
 
@@ -374,15 +412,23 @@ void insn_get_sib(struct insn *insn)
  * If necessary, first collects the instruction up to and including the
  * SIB byte.
  * Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * !0: otherwise.
  */
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
 {
 	insn_byte_t mod, rm, base;
 
 	if (insn->displacement.got)
-		return;
-	if (!insn->sib.got)
-		insn_get_sib(insn);
+		return 0;
+
+	if (!insn->sib.got) {
+		if (insn_get_sib(insn))
+			return 1;
+	}
+
 	if (insn->modrm.nbytes) {
 		/*
 		 * Interpreting the modrm byte:
@@ -425,9 +471,10 @@ void insn_get_displacement(struct insn *insn)
 	}
 out:
 	insn->displacement.got = 1;
+	return 0;
 
 err_out:
-	return;
+	return 1;
 }
 
 /* Decode moffset16/32/64. Return 0 if failed */
@@ -538,20 +585,27 @@ static int __get_immptr(struct insn *insn)
 }
 
 /**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
  * @insn:	&struct insn containing instruction
  *
  * If necessary, first collects the instruction up to and including the
  * displacement bytes.
  * Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0:  on success
+ * !0: on error
  */
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
 {
 	if (insn->immediate.got)
-		return;
-	if (!insn->displacement.got)
-		insn_get_displacement(insn);
+		return 0;
+
+	if (!insn->displacement.got) {
+		if (insn_get_displacement(insn))
+			return 1;
+	}
 
 	if (inat_has_moffset(insn->attr)) {
 		if (!__get_moffset(insn))
@@ -604,9 +658,10 @@ void insn_get_immediate(struct insn *insn)
 	}
 done:
 	insn->immediate.got = 1;
+	return 0;
 
 err_out:
-	return;
+	return 1;
 }
 
 /**
@@ -615,13 +670,58 @@ void insn_get_immediate(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - !0 on error
+*/
+int insn_get_length(struct insn *insn)
 {
 	if (insn->length)
-		return;
+		return 0;
+
 	if (!insn->immediate.got)
-		insn_get_immediate(insn);
+		if (insn_get_immediate(insn))
+			return 1;
+
 	insn->length = (unsigned char)((unsigned long)insn->next_byte
 				     - (unsigned long)insn->kaddr);
+
+	return 0;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn:	&struct insn to be initialized
+ * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:	length of the insn buffer at @kaddr
+ * @m:		insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * !0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+	if (m == INSN_MODE_KERN)
+		kernel_insn_init(insn, kaddr, buf_len);
+	else
+		insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+	if (insn_get_opcode(insn))
+		return 1;
+
+	if (insn_get_modrm(insn))
+		return 1;
+
+	if (insn_get_sib(insn))
+		return 1;
+
+	if (insn_get_displacement(insn))
+		return 1;
+
+	if (insn_get_immediate(insn))
+		return 1;
+
+	return !insn_complete(insn);
 }
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index 34eda63c124b..5cb0d7b3ebf4 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -150,8 +150,7 @@ int main(int argc, char **argv)
 		}
 		/* Decode an instruction */
 		insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
-		insn_get_length(&insn);
-		if (insn.length != nb) {
+		if (insn_get_length(&insn) || insn.length != nb) {
 			warnings++;
 			pr_warn("Found an x86 instruction decoder bug, "
 				"please report this.\n", sym);
Andy Lutomirski Oct. 24, 2020, 12:12 a.m. UTC | #18
On Fri, Oct 23, 2020 at 4:27 PM Borislav Petkov <bp@alien8.de> wrote:
>
> On Fri, Oct 23, 2020 at 07:47:04PM +0900, Masami Hiramatsu wrote:
> > Thanks! I look forward to it.
>
> Ok, here's a first stab, it is a single big diff and totally untested
> but it should show what I mean. I've made some notes while converting,
> as I went along.
>
> Have a look at insn_decode() and its call sites: they are almost trivial
> now because caller needs simply to do:
>
>         if (insn_decode(insn, buffer, ...))
>
> and not care about any helper functions.
>
> For some of the call sites it still makes sense to do a piecemeal insn
> decoding and I've left them this way but they can be converted too, if
> one wants.
>
> In any case, just have a look please and lemme know if that looks OKish.
> I'll do the actual splitting and testing afterwards.
>
> And what Andy wants can't be done with the decoder because it already
> gets a fixed size buffer and length - it doesn't do the fetching. The
> caller does.
>
> What you wanna do:
>
> > len = min(15, remaining bytes in page);
> > fetch len bytes;
> > insn_init();
> > ret = insn_decode_fully();
>
> <--- you can't always know here whether the insn is valid if you don't
> have all the bytes. But you can always fetch *all* bytes and then give
> it to the decoder for checking.
>
> Also, this doesn't make any sense: try insn decode on a subset of bytes
> and then if it fails, try it on the whole set of bytes. Why even try the
> subset - it will almost always fail.

I disagree.  A real CPU does exactly what I'm describing.  If I stick
0xcc at the end of a page and a make the next page not-present, I get
#BP, not #PF.  But if I stick 0x0F at the end of a page and mark the
next page not-present, I get #PF.  If we're trying to decode an
instruction in user memory, we can kludge it by trying to fetch 15
bytes and handling -EFAULT by fetching fewer bytes, but that's gross
and doesn't really have the right semantics.  What we actually want is
to fetch up to the page boundary and try to decode it.  If it's a
valid instruction or if it's definitely invalid, we're done.
Otherwise we fetch across the page boundary.

Eventually we should wrap this whole mess up in an insn_decode_user()
helper that does the right thing.  And we can then make that helper
extra fancy by getting PKRU and EPT-hacker-execute-only right, whereas
we currently get these cases wrong.

Does this make sense?
Masami Hiramatsu Oct. 24, 2020, 7:13 a.m. UTC | #19
On Sat, 24 Oct 2020 01:27:41 +0200
Borislav Petkov <bp@alien8.de> wrote:

> On Fri, Oct 23, 2020 at 07:47:04PM +0900, Masami Hiramatsu wrote:
> > Thanks! I look forward to it.
> 
> Ok, here's a first stab, it is a single big diff and totally untested
> but it should show what I mean. I've made some notes while converting,
> as I went along.

Thanks, so will you split this into several patches, since I saw some
cleanups in this patch?

> 
> Have a look at insn_decode() and its call sites: they are almost trivial
> now because caller needs simply to do:
> 
> 	if (insn_decode(insn, buffer, ...))
> 
> and not care about any helper functions.

Yeah, that's good to me because in the most cases, user needs prefix,
length or total decoded info.

BTW, it seems you returns 1 for errors, I rather like -EINVAL or -EILSEQ
for errors so that user can also write

 if (insn_decode() < 0)
   ...

I think "positive" and "zero" pair can easily mislead user to "true" and
"false" trap.


> For some of the call sites it still makes sense to do a piecemeal insn
> decoding and I've left them this way but they can be converted too, if
> one wants.

Yeah, for the kprobes, if you see the insn_init() and insn_get_length()
those can be replaced with one insn_decode(). 

> In any case, just have a look please and lemme know if that looks OKish.
> I'll do the actual splitting and testing afterwards.

Except for the return value, it looks good to me.

Thank you,
Masami Hiramatsu Oct. 24, 2020, 7:21 a.m. UTC | #20
On Fri, 23 Oct 2020 17:12:49 -0700
Andy Lutomirski <luto@kernel.org> wrote:

> On Fri, Oct 23, 2020 at 4:27 PM Borislav Petkov <bp@alien8.de> wrote:
> >
> > On Fri, Oct 23, 2020 at 07:47:04PM +0900, Masami Hiramatsu wrote:
> > > Thanks! I look forward to it.
> >
> > Ok, here's a first stab, it is a single big diff and totally untested
> > but it should show what I mean. I've made some notes while converting,
> > as I went along.
> >
> > Have a look at insn_decode() and its call sites: they are almost trivial
> > now because caller needs simply to do:
> >
> >         if (insn_decode(insn, buffer, ...))
> >
> > and not care about any helper functions.
> >
> > For some of the call sites it still makes sense to do a piecemeal insn
> > decoding and I've left them this way but they can be converted too, if
> > one wants.
> >
> > In any case, just have a look please and lemme know if that looks OKish.
> > I'll do the actual splitting and testing afterwards.
> >
> > And what Andy wants can't be done with the decoder because it already
> > gets a fixed size buffer and length - it doesn't do the fetching. The
> > caller does.
> >
> > What you wanna do:
> >
> > > len = min(15, remaining bytes in page);
> > > fetch len bytes;
> > > insn_init();
> > > ret = insn_decode_fully();
> >
> > <--- you can't always know here whether the insn is valid if you don't
> > have all the bytes. But you can always fetch *all* bytes and then give
> > it to the decoder for checking.
> >
> > Also, this doesn't make any sense: try insn decode on a subset of bytes
> > and then if it fails, try it on the whole set of bytes. Why even try the
> > subset - it will almost always fail.
> 
> I disagree.  A real CPU does exactly what I'm describing.  If I stick
> 0xcc at the end of a page and a make the next page not-present, I get
> #BP, not #PF.  But if I stick 0x0F at the end of a page and mark the
> next page not-present, I get #PF.  If we're trying to decode an
> instruction in user memory, we can kludge it by trying to fetch 15
> bytes and handling -EFAULT by fetching fewer bytes, but that's gross
> and doesn't really have the right semantics.  What we actually want is
> to fetch up to the page boundary and try to decode it.  If it's a
> valid instruction or if it's definitely invalid, we're done.
> Otherwise we fetch across the page boundary.
> 
> Eventually we should wrap this whole mess up in an insn_decode_user()
> helper that does the right thing.  And we can then make that helper
> extra fancy by getting PKRU and EPT-hacker-execute-only right, whereas
> we currently get these cases wrong.

+1. To handle the user-space (untrusted) instruction, we need to
take more care about page boundary and presense. Also less side-effect
is perferrable.

Thank you,
Borislav Petkov Oct. 24, 2020, 8:23 a.m. UTC | #21
On Fri, Oct 23, 2020 at 05:12:49PM -0700, Andy Lutomirski wrote:
> I disagree.  A real CPU does exactly what I'm describing.  If I stick

A real modern CPU fetches up to 32 bytes insn window which it tries
to decode etc. I don't know, though, what it does when that fetch
encounters a fault - I will have to ask people. I'm not sure it would
even try to feed a shorter stream of bytes to the decoder but lemme
ask...

> 0xcc at the end of a page and a make the next page not-present, I get
> #BP, not #PF.  But if I stick 0x0F at the end of a page and mark the
> next page not-present, I get #PF.  If we're trying to decode an
> instruction in user memory, we can kludge it by trying to fetch 15
> bytes and handling -EFAULT by fetching fewer bytes, but that's gross
> and doesn't really have the right semantics.  What we actually want is
> to fetch up to the page boundary and try to decode it.  If it's a
> valid instruction or if it's definitely invalid, we're done.
> Otherwise we fetch across the page boundary.

We can do that but why would you put all that logic in the insn decoder?
Is that use case sooo important?

I mean, it would work that way anyway *even* *now* - the insn decoder
will tell you that the insn it decoded wasn't valid and you, as a
caller, know that you didn't fetch the whole 15 bytes so that means
that you still need to fetch some more. You've got all the relevant
information.

> Eventually we should wrap this whole mess up in an insn_decode_user()
> helper that does the right thing.

Oh sure, you can do that easily. Just put the logic which determines
that it copied a shorter buffer and that it attempts to decode the
shorter buffer first in it. Yah, that can work.

> And we can then make that helper
> extra fancy by getting PKRU and EPT-hacker-execute-only right, whereas
> we currently get these cases wrong.
> 
> Does this make sense?

Sure, but you could point me to those cases so that I can get a better
idea what they do exactly.

Thx.
Borislav Petkov Oct. 24, 2020, 8:24 a.m. UTC | #22
On Sat, Oct 24, 2020 at 04:13:15PM +0900, Masami Hiramatsu wrote:
> Thanks, so will you split this into several patches, since I saw some
> cleanups in this patch?

Oh, most definitely. This was just a preview of where this is going...

> Yeah, that's good to me because in the most cases, user needs prefix,
> length or total decoded info.
> 
> BTW, it seems you returns 1 for errors, I rather like -EINVAL or -EILSEQ
> for errors so that user can also write
> 
>  if (insn_decode() < 0)
>    ...
> 
> I think "positive" and "zero" pair can easily mislead user to "true" and
> "false" trap.

Ok, sure, makes sense.

> Yeah, for the kprobes, if you see the insn_init() and insn_get_length()
> those can be replaced with one insn_decode().

Ok.

> Except for the return value, it looks good to me.

Thanks!
Andy Lutomirski Oct. 24, 2020, 4:10 p.m. UTC | #23
On Sat, Oct 24, 2020 at 1:23 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Fri, Oct 23, 2020 at 05:12:49PM -0700, Andy Lutomirski wrote:
> > I disagree.  A real CPU does exactly what I'm describing.  If I stick
>
> A real modern CPU fetches up to 32 bytes insn window which it tries
> to decode etc. I don't know, though, what it does when that fetch
> encounters a fault - I will have to ask people. I'm not sure it would
> even try to feed a shorter stream of bytes to the decoder but lemme
> ask...
>

I can pretty much guarantee that a real modern CPU is able to decode a
<15 byte instruction that is followed by unmapped or non-executable
pages.  I don't know specifically how the CPU implements it, but it
works.

> > 0xcc at the end of a page and a make the next page not-present, I get
> > #BP, not #PF.  But if I stick 0x0F at the end of a page and mark the
> > next page not-present, I get #PF.  If we're trying to decode an
> > instruction in user memory, we can kludge it by trying to fetch 15
> > bytes and handling -EFAULT by fetching fewer bytes, but that's gross
> > and doesn't really have the right semantics.  What we actually want is
> > to fetch up to the page boundary and try to decode it.  If it's a
> > valid instruction or if it's definitely invalid, we're done.
> > Otherwise we fetch across the page boundary.
>
> We can do that but why would you put all that logic in the insn decoder?
> Is that use case sooo important?

It's not sooo important, but I think it would be nice to at least try
to be fully correct.

>
> I mean, it would work that way anyway *even* *now* - the insn decoder
> will tell you that the insn it decoded wasn't valid and you, as a
> caller, know that you didn't fetch the whole 15 bytes so that means
> that you still need to fetch some more. You've got all the relevant
> information.

How so?

If I have a page that ends in 0x0F followed by an unmapped page, then
the correct response to an attempt to decode is SIGSEGV or -EFAULT.
If there's a page there that contains garbage, then the correct
response is SIGILL or -EINVAL or similar.  These are different
scenarios, and I don't think the current decoder API can be used to
distinguish them.

>
> > Eventually we should wrap this whole mess up in an insn_decode_user()
> > helper that does the right thing.
>
> Oh sure, you can do that easily. Just put the logic which determines
> that it copied a shorter buffer and that it attempts to decode the
> shorter buffer first in it. Yah, that can work.
>
> > And we can then make that helper
> > extra fancy by getting PKRU and EPT-hacker-execute-only right, whereas
> > we currently get these cases wrong.
> >
> > Does this make sense?
>
> Sure, but you could point me to those cases so that I can get a better
> idea what they do exactly.

Take a look at fixup_umip_exception().  It currently has two bugs:

1. If it tries to decode a short instruction followed by something
like a userfaultfd page, it will incorrectly trigger the userfaultfd.
This is because it tries to fetch MAX_INSN_SIZE even if the
instruction is shorter than that.

2. It will fail on execute-only memory, and it will succeed on NX
memory.  copy_from_user() is the wrong API to use here.  We don't have
the right API, and we should add it.  (Hi Dave - what's the best way
to do this?  New get_user_pages() mode?  Try to fault it in, hold an
appropriate lock, walk the page tables to check permissions, and then
access the user address directly?)

I don't know how much anyone really cares about this for UMIP, but
with SEV-ES and such, I can see this becoming more important.
Borislav Petkov Oct. 27, 2020, 1:42 p.m. UTC | #24
On Sat, Oct 24, 2020 at 09:10:25AM -0700, Andy Lutomirski wrote:
> I can pretty much guarantee that a real modern CPU is able to decode a
> <15 byte instruction that is followed by unmapped or non-executable
> pages.  I don't know specifically how the CPU implements it, but it
> works.

Yes, so reportedly and architecturally, a CPU tries to execute every
last byte it has fetched. If it fails decoding an instruction because it
is incomplete, then it raises a #PF. So you're correct.

> If I have a page that ends in 0x0F followed by an unmapped page, then
> the correct response to an attempt to decode is SIGSEGV or -EFAULT.
> If there's a page there that contains garbage, then the correct
> response is SIGILL or -EINVAL or similar.  These are different
> scenarios, and I don't think the current decoder API can be used to
> distinguish them.

See above - the insn decoder should be taught to look only at the bytes
it is *allowed* to look, i.e., the bytes which have been fetched and not
peek forward. And I believe it does that to some extent but I need to
look closer.

And it should detect the cases where the insn bytes come short. But that
needs also looking but first things first.

Bottomline: it should do exactly what a CPU does, IMO.

Again, find me on IRC to hash out details but I believe we're in an
agreement here.

> Take a look at fixup_umip_exception().  It currently has two bugs:
> 
> 1. If it tries to decode a short instruction followed by something
> like a userfaultfd page, it will incorrectly trigger the userfaultfd.
> This is because it tries to fetch MAX_INSN_SIZE even if the
> instruction is shorter than that.
> 
> 2. It will fail on execute-only memory, and it will succeed on NX
> memory.  copy_from_user() is the wrong API to use here.  We don't have
> the right API, and we should add it.  (Hi Dave - what's the best way
> to do this?  New get_user_pages() mode?  Try to fault it in, hold an
> appropriate lock, walk the page tables to check permissions, and then
> access the user address directly?)
> 
> I don't know how much anyone really cares about this for UMIP, but
> with SEV-ES and such, I can see this becoming more important.

I'll have a look at those when I do the patchset.

Thx.
Masami Hiramatsu Oct. 28, 2020, 11:36 a.m. UTC | #25
On Tue, 27 Oct 2020 14:42:51 +0100
Borislav Petkov <bp@alien8.de> wrote:

> On Sat, Oct 24, 2020 at 09:10:25AM -0700, Andy Lutomirski wrote:
> > I can pretty much guarantee that a real modern CPU is able to decode a
> > <15 byte instruction that is followed by unmapped or non-executable
> > pages.  I don't know specifically how the CPU implements it, but it
> > works.
> 
> Yes, so reportedly and architecturally, a CPU tries to execute every
> last byte it has fetched. If it fails decoding an instruction because it
> is incomplete, then it raises a #PF. So you're correct.
> 
> > If I have a page that ends in 0x0F followed by an unmapped page, then
> > the correct response to an attempt to decode is SIGSEGV or -EFAULT.
> > If there's a page there that contains garbage, then the correct
> > response is SIGILL or -EINVAL or similar.  These are different
> > scenarios, and I don't think the current decoder API can be used to
> > distinguish them.
> 
> See above - the insn decoder should be taught to look only at the bytes
> it is *allowed* to look, i.e., the bytes which have been fetched and not
> peek forward. And I believe it does that to some extent but I need to
> look closer.

Yeah, it always does except for the prefix decoding. Anyway, it always
check the boundary (end address) when peek the byte.

> And it should detect the cases where the insn bytes come short. But that
> needs also looking but first things first.
> 
> Bottomline: it should do exactly what a CPU does, IMO.
> 
> Again, find me on IRC to hash out details but I believe we're in an
> agreement here.
> 
> > Take a look at fixup_umip_exception().  It currently has two bugs:
> > 
> > 1. If it tries to decode a short instruction followed by something
> > like a userfaultfd page, it will incorrectly trigger the userfaultfd.
> > This is because it tries to fetch MAX_INSN_SIZE even if the
> > instruction is shorter than that.

Hmm, did it pass the correct buf_size to insn_init()?
...
        nr_copied = insn_fetch_from_user(regs, buf);
...
Ah, I got it. It copies not until the page boundary but +MAX_INSN_SIZE...

> > 
> > 2. It will fail on execute-only memory, and it will succeed on NX
> > memory.  copy_from_user() is the wrong API to use here.  We don't have
> > the right API, and we should add it.  (Hi Dave - what's the best way
> > to do this?  New get_user_pages() mode?  Try to fault it in, hold an
> > appropriate lock, walk the page tables to check permissions, and then
> > access the user address directly?)

Good point! If we can not read the page we can not decode it by software.

Thank you,

> > 
> > I don't know how much anyone really cares about this for UMIP, but
> > with SEV-ES and such, I can see this becoming more important.
> 
> I'll have a look at those when I do the patchset.
> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov Oct. 29, 2020, 12:42 p.m. UTC | #26
Hi Masami,

On Sat, Oct 24, 2020 at 01:27:41AM +0200, Borislav Petkov wrote:
> @@ -230,14 +231,20 @@ void insn_get_prefixes(struct insn *insn)
>   * If necessary, first collects any preceding (prefix) bytes.
>   * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
>   * is already 1.
> + *
> + * Returns:
> + * 0:  on success
> + * !0: on error
>   */
> -void insn_get_opcode(struct insn *insn)
> +int insn_get_opcode(struct insn *insn)
>  {
>  	struct insn_field *opcode = &insn->opcode;
>  	insn_byte_t op;
>  	int pfx_id;
> +
>  	if (opcode->got)
> -		return;
> +		return 0;
> +
>  	if (!insn->prefixes.got)
>  		insn_get_prefixes(insn);
>  
> @@ -254,9 +261,13 @@ void insn_get_opcode(struct insn *insn)
>  		insn->attr = inat_get_avx_attribute(op, m, p);
>  		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
>  		    (!inat_accept_vex(insn->attr) &&
> -		     !inat_is_group(insn->attr)))
> -			insn->attr = 0;	/* This instruction is bad */
> -		goto end;	/* VEX has only 1 byte for opcode */
> +		     !inat_is_group(insn->attr))) {
> +			/* This instruction is bad */
> +			insn->attr = 0;
> +			return 1;
> +		}
> +		/* VEX has only 1 byte for opcode */
> +		goto end;

so I'm playing more with this and am hitting the following after I made
this change to insn_get_opcode() to actually return an error because,
well, it is an error when the opcode bytes are pointing to an invalid
insn.

However, the current situation is that even though the comment says that
the instruction is bad:

                if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
                    (!inat_accept_vex(insn->attr) &&
                     !inat_is_group(insn->attr)))
                        insn->attr = 0; /* This instruction is bad */
                goto end;       /* VEX has only 1 byte for opcode */

it would goto to end and set opcode->got = 1, i.e., denote success.

Do you have a particular reason for why it does that?

Because, for example, when it encounters an invalid VEX insn which is
bad, running insn_sanity says this:

Error: Found an access violation:
Instruction = {
        .prefixes = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 1, .nbytes = 0},
        .rex_prefix = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 1, .nbytes = 0},
        .vex_prefix = {
                .value = 7138501, bytes[] = {c5, ec, 6c, 0},
                .got = 1, .nbytes = 2},
        .opcode = {
                .value = 149, bytes[] = {95, 0, 0, 0},
                .got = 0, .nbytes = 1},
        .modrm = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 0, .nbytes = 0},
        .sib = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 0, .nbytes = 0},
        .displacement = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 0, .nbytes = 0},
        .immediate1 = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 0, .nbytes = 0},
        .immediate2 = {
                .value = 0, bytes[] = {0, 0, 0, 0},
                .got = 0, .nbytes = 0},
        .attr = 0, .opnd_bytes = 4, .addr_bytes = 8,
        .length = 0, .x86_64 = 1, .kaddr = 0x7ffe7cc46460}
You can reproduce this with below command(s);
 $ echo  c5 ec 95 b2 02 bd 4b c8 a8 36 b2 c5 c0 df 13 | arch/x86/tools/insn_sanity -i -
Or 
 $ arch/x86/tools/insn_sanity -s 0x87ac2160,109

I do

arch/x86/tools/insn_sanity -s 0x87ac2160 -v -y

After having added debug output, it says:

inat_get_avx_attribute: vex_m: 0x1, vex_p: 0x0
inat_get_avx_attribute: looking up opcode 0x95
insn_get_opcode: insn is bad, must_evex: 0, !accept_vex: 1, !is_group: 1
get_opcode
get_modrm
get_sib
get_displacement
get_immediate failed
insn_decode: here
main: ret: -22
Error: Found an access violation:

so long story short, 0xc5 0xec 0x95 is an invalid VEX insn because
there's no VEX insn with opcode 0x95.

So it really is a bad insn.

So after my changes, insn_decode() becomes stricter but that would need
adjusting the sanity checker. And before I do that, let me run it by you
in case I'm missing some other aspect...

Thx.
Masami Hiramatsu Oct. 30, 2020, 1:24 a.m. UTC | #27
Hi,

On Thu, 29 Oct 2020 13:42:31 +0100
Borislav Petkov <bp@alien8.de> wrote:

> Hi Masami,
> 
> On Sat, Oct 24, 2020 at 01:27:41AM +0200, Borislav Petkov wrote:
> > @@ -230,14 +231,20 @@ void insn_get_prefixes(struct insn *insn)
> >   * If necessary, first collects any preceding (prefix) bytes.
> >   * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
> >   * is already 1.
> > + *
> > + * Returns:
> > + * 0:  on success
> > + * !0: on error
> >   */
> > -void insn_get_opcode(struct insn *insn)
> > +int insn_get_opcode(struct insn *insn)
> >  {
> >  	struct insn_field *opcode = &insn->opcode;
> >  	insn_byte_t op;
> >  	int pfx_id;
> > +
> >  	if (opcode->got)
> > -		return;
> > +		return 0;
> > +
> >  	if (!insn->prefixes.got)
> >  		insn_get_prefixes(insn);
> >  
> > @@ -254,9 +261,13 @@ void insn_get_opcode(struct insn *insn)
> >  		insn->attr = inat_get_avx_attribute(op, m, p);
> >  		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
> >  		    (!inat_accept_vex(insn->attr) &&
> > -		     !inat_is_group(insn->attr)))
> > -			insn->attr = 0;	/* This instruction is bad */
> > -		goto end;	/* VEX has only 1 byte for opcode */
> > +		     !inat_is_group(insn->attr))) {
> > +			/* This instruction is bad */
> > +			insn->attr = 0;
> > +			return 1;
> > +		}
> > +		/* VEX has only 1 byte for opcode */
> > +		goto end;
> 
> so I'm playing more with this and am hitting the following after I made
> this change to insn_get_opcode() to actually return an error because,
> well, it is an error when the opcode bytes are pointing to an invalid
> insn.

OK, let me see.

> 
> However, the current situation is that even though the comment says that
> the instruction is bad:
> 
>                 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
>                     (!inat_accept_vex(insn->attr) &&
>                      !inat_is_group(insn->attr)))
>                         insn->attr = 0; /* This instruction is bad */
>                 goto end;       /* VEX has only 1 byte for opcode */
> 
> it would goto to end and set opcode->got = 1, i.e., denote success.

Ah, it should be a bug.

> 
> Do you have a particular reason for why it does that?

No, I think I have made a bug..

> 
> Because, for example, when it encounters an invalid VEX insn which is
> bad, running insn_sanity says this:
> 
> Error: Found an access violation:
> Instruction = {
>         .prefixes = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 1, .nbytes = 0},
>         .rex_prefix = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 1, .nbytes = 0},
>         .vex_prefix = {
>                 .value = 7138501, bytes[] = {c5, ec, 6c, 0},
>                 .got = 1, .nbytes = 2},
>         .opcode = {
>                 .value = 149, bytes[] = {95, 0, 0, 0},
>                 .got = 0, .nbytes = 1},
>         .modrm = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 0, .nbytes = 0},
>         .sib = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 0, .nbytes = 0},
>         .displacement = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 0, .nbytes = 0},
>         .immediate1 = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 0, .nbytes = 0},
>         .immediate2 = {
>                 .value = 0, bytes[] = {0, 0, 0, 0},
>                 .got = 0, .nbytes = 0},
>         .attr = 0, .opnd_bytes = 4, .addr_bytes = 8,
>         .length = 0, .x86_64 = 1, .kaddr = 0x7ffe7cc46460}
> You can reproduce this with below command(s);
>  $ echo  c5 ec 95 b2 02 bd 4b c8 a8 36 b2 c5 c0 df 13 | arch/x86/tools/insn_sanity -i -
> Or 
>  $ arch/x86/tools/insn_sanity -s 0x87ac2160,109

What's the objdump say here?

> 
> I do
> 
> arch/x86/tools/insn_sanity -s 0x87ac2160 -v -y
> 
> After having added debug output, it says:
> 
> inat_get_avx_attribute: vex_m: 0x1, vex_p: 0x0
> inat_get_avx_attribute: looking up opcode 0x95
> insn_get_opcode: insn is bad, must_evex: 0, !accept_vex: 1, !is_group: 1
> get_opcode
> get_modrm
> get_sib
> get_displacement
> get_immediate failed
> insn_decode: here
> main: ret: -22
> Error: Found an access violation:
> 
> so long story short, 0xc5 0xec 0x95 is an invalid VEX insn because
> there's no VEX insn with opcode 0x95.

Yes.

> 
> So it really is a bad insn.
> 
> So after my changes, insn_decode() becomes stricter but that would need
> adjusting the sanity checker. And before I do that, let me run it by you
> in case I'm missing some other aspect...

Yes, in this case, we would better to handle it as an undecodable input
instead of access violation in insn_sanity.

Thank you,

> 
> Thx.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov Oct. 30, 2020, 1:07 p.m. UTC | #28
On Fri, Oct 30, 2020 at 10:24:53AM +0900, Masami Hiramatsu wrote:
> What's the objdump say here?

The expected "bad":

   0:   c5 ec 95                (bad)
   3:   b2 02                   mov    $0x2,%dl
   5:   bd 4b c8 a8 36          mov    $0x36a8c84b,%ebp
   a:   b2 c5                   mov    $0xc5,%dl
   c:   c0 df 13                rcr    $0x13,%bh

> Yes, in this case, we would better to handle it as an undecodable input
> instead of access violation in insn_sanity.

Ok, good. I've got the hunk below now and it does the right thing. The
whole patch has become huuge now, lemme split it finally. :)

Thx.

---
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 185ceba9d289..f20765beec9c 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -222,8 +224,8 @@ static void parse_args(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+       int insns = 0, ret;
        struct insn insn;
-       int insns = 0;
        int errors = 0;
        unsigned long i;
        unsigned char insn_buff[MAX_INSN_SIZE * 2];
@@ -241,15 +243,15 @@ int main(int argc, char **argv)
                        continue;
 
                /* Decode an instruction */
-               insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
-               insn_get_length(&insn);
+               ret = insn_decode(&insn, insn_buff, sizeof(insn_buff),
+                                 x86_64 ? INSN_MODE_64 : INSN_MODE_32);
 
                if (insn.next_byte <= insn.kaddr ||
                    insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
                        /* Access out-of-range memory */
                        dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
                        errors++;
-               } else if (verbose && !insn_complete(&insn))
+               } else if (verbose && ret < 0)
                        dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
                else if (verbose >= 2)
                        dump_insn(stdout, &insn);

Patch
diff mbox series

diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
index 954cb2702e23..53e8ced662ff 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -79,16 +79,14 @@  static inline void sev_es_wr_ghcb_msr(u64 val)
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 {
 	char buffer[MAX_INSN_SIZE];
-	enum es_result ret;
 
 	memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
 
 	insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
-	insn_get_length(&ctxt->insn);
+	if (insn_get_length(&ctxt->insn))
+		return ES_DECODE_FAILED;
 
-	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
-
-	return ret;
+	return ES_OK;
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 404315df1e16..aa72e8d305dd 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1262,14 +1262,14 @@  static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
 #endif
 		insn_init(&insn, kaddr, size, is_64bit);
-		insn_get_length(&insn);
+
 		/*
 		 * Make sure there was not a problem decoding the
 		 * instruction and getting the length.  This is
 		 * doubly important because we have an infinite
 		 * loop if insn.length=0.
 		 */
-		if (!insn.length)
+		if (insn_get_length(&insn) || !insn.length)
 			break;
 
 		to += insn.length;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8961653c5dd2..4ef88831f9b7 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1262,8 +1262,7 @@  static int branch_type(unsigned long from, unsigned long to, int abort)
 		ret = X86_BR_INT;
 		break;
 	case 0xe8: /* call near rel */
-		insn_get_immediate(&insn);
-		if (insn.immediate1.value == 0) {
+		if (insn_get_immediate(&insn) || !insn.immediate1.value) {
 			/* zero length call */
 			ret = X86_BR_ZERO_CALL;
 			break;
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..0c79dad82b89 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -92,8 +92,8 @@  extern void insn_get_opcode(struct insn *insn);
 extern void insn_get_modrm(struct insn *insn);
 extern void insn_get_sib(struct insn *insn);
 extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 547c7abb39f5..9f1010b30cd0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -296,8 +296,10 @@  static int can_probe(unsigned long paddr)
 		__addr = recover_probed_instruction(buf, addr);
 		if (!__addr)
 			return 0;
+
 		kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
-		insn_get_length(&insn);
+		if (insn_get_length(&insn))
+			return 0;
 
 		/*
 		 * Another debugging subsystem might insert this breakpoint.
@@ -352,7 +354,8 @@  int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
 		return 0;
 
 	kernel_insn_init(insn, dest, MAX_INSN_SIZE);
-	insn_get_length(insn);
+	if (insn_get_length(insn))
+		return 0;
 
 	/* We can not probe force emulate prefixed instruction */
 	if (insn_has_emulate_prefix(insn))
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 041f0b50bc27..afe2148c964c 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -309,7 +309,9 @@  static int can_optimize(unsigned long paddr)
 		if (!recovered_insn)
 			return 0;
 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
-		insn_get_length(&insn);
+		if (insn_get_length(&insn))
+			return 0;
+
 		/* Another subsystem puts a breakpoint */
 		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
 			return 0;
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 4a96726fbaf8..73c3a4998bc2 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -268,12 +268,13 @@  static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
 		}
 
 		insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
-		insn_get_length(&ctxt->insn);
+		res = insn_get_length(&ctxt->insn);
 	}
 
-	ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+	if (res)
+		return ES_DECODE_FAILED;
 
-	return ret;
+	return ES_OK;
 }
 
 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 58f7fb95c7f4..f754470c884d 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1491,7 +1491,9 @@  bool insn_decode(struct insn *insn, struct pt_regs *regs,
 	insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
 	insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
 
-	insn_get_length(insn);
+	if (insn_get_length(insn))
+		return false;
+
 	if (buf_size < insn->length)
 		return false;
 
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 404279563891..875cd5296c50 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -538,18 +538,23 @@  static int __get_immptr(struct insn *insn)
 }
 
 /**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
  * @insn:	&struct insn containing instruction
  *
  * If necessary, first collects the instruction up to and including the
  * displacement bytes.
  * Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - !0 on error
  */
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
 {
 	if (insn->immediate.got)
-		return;
+		return 0;
+
 	if (!insn->displacement.got)
 		insn_get_displacement(insn);
 
@@ -604,9 +609,10 @@  void insn_get_immediate(struct insn *insn)
 	}
 done:
 	insn->immediate.got = 1;
+	return 0;
 
 err_out:
-	return;
+	return 1;
 }
 
 /**
@@ -615,13 +621,22 @@  void insn_get_immediate(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - !0 on error
+*/
+int insn_get_length(struct insn *insn)
 {
 	if (insn->length)
-		return;
+		return 0;
+
 	if (!insn->immediate.got)
-		insn_get_immediate(insn);
+		if (insn_get_immediate(insn))
+			return 1;
+
 	insn->length = (unsigned char)((unsigned long)insn->next_byte
 				     - (unsigned long)insn->kaddr);
+
+	return 0;
 }
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index 34eda63c124b..5cb0d7b3ebf4 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -150,8 +150,7 @@  int main(int argc, char **argv)
 		}
 		/* Decode an instruction */
 		insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
-		insn_get_length(&insn);
-		if (insn.length != nb) {
+		if (insn_get_length(&insn) || insn.length != nb) {
 			warnings++;
 			pr_warn("Found an x86 instruction decoder bug, "
 				"please report this.\n", sym);