instruction-analysis API(s)

Wed Mar 4 01:15:13 UTC 2009

On Fri, 2009-02-27 at 16:20 -0500, Masami Hiramatsu wrote:
...
> 
> Here are a patch against your code and an example code for
> instruction length decoder.
> Curiously, KVM's instruction decoder does not completely
> cover all instructions(especially, Jcc/test...).
> I had to refer Intel manuals.
> 
> Moreover, even with this patch, the decoder is incomplete.
> - this doesn't cover 3bytes opcode yet.
> - this doesn't decode sib, displacement and immediate.
> - might have some bugs :-(
> 
> 
> Thank you,

Thanks for your work on this.  Comments below.

Jim

> 
> plain text document attachment (insn_x86.patch)
> Index: insn_x86.h
> ===================================================================
> --- insn_x86.h	(revision 1510)
> +++ insn_x86.h	(working copy)
> @@ -66,6 +66,10 @@
>  	struct insn_field displacement;
>  	struct insn_field immediate;
> 
> +	u8 op_bytes;

I'd probably use opnd_bytes and addr_bytes here, for clarity.  (When I
first saw "op", I thought "opcode".)  Also, we should clarify that these
are the EFFECTIVE lengths, not the lengths of the immediate and
displacement fields in the instruction.

> +	u8 ad_bytes;
> +	u8 length;
> +
>  	const u8 *kaddr;	/* kernel address of insn (copy) to analyze */
>  	const u8 *next_byte;
>  	bool x86_64;
> @@ -75,6 +79,7 @@
>  extern void insn_get_prefixes(struct insn *insn);
>  extern void insn_get_opcode(struct insn *insn);
>  extern void insn_get_modrm(struct insn *insn);
> +extern void insn_get_length(struct insn *insn);
> 
>  #ifdef CONFIG_X86_64
>  extern bool insn_rip_relative(struct insn *insn);
> Index: insn_x86.c
> ===================================================================
> --- insn_x86.c	(revision 1510)
> +++ insn_x86.c	(working copy)
> @@ -17,7 +17,7 @@
>   *
>   * Copyright (C) IBM Corporation, 2002, 2004, 2009
>   */
> -
> +#include <linux/module.h>
>  #include <linux/string.h>
>  // #include <asm/insn.h>
>  #include "insn_x86.h"
> @@ -34,6 +34,11 @@
>  	insn->kaddr = kaddr;
>  	insn->next_byte = kaddr;
>  	insn->x86_64 = x86_64;
> +	insn->op_bytes = 4;
> +	if (x86_64)
> +		insn->ad_bytes = 8;
> +	else
> +		insn->ad_bytes = 4;
>  }
>  EXPORT_SYMBOL_GPL(insn_init);
> 
> @@ -79,10 +84,51 @@
>  			break;
>  		prefixes->value |= pfx;
>  	}
> +	if (prefixes->value & X86_PFX_OPNDSZ) {
> +		/* oprand size switches 2/4 */
> +		insn->op_bytes ^= 6;
> +	}
> +	if (prefixes->value & X86_PFX_ADDRSZ) {
> +		/* address size switches 2/4 or 4/8 */
> +#ifdef CONFIG_X86_64
> +		if (insn->x86_64)
> +			insn->op_bytes ^= 12;
> +		else
> +#endif
> +			insn->op_bytes ^= 6;

This seems wrong.  You're checking the address-size prefix, but
adjusting the operand size.

> +	}
> +#ifdef CONFIG_X86_64
> +	if (prefixes->value & X86_PFX_REXW)
> +		insn->op_bytes = 8;
> +#endif
>  	prefixes->got = true;
>  }
>  EXPORT_SYMBOL_GPL(insn_get_prefixes);
> 
> +static bool __insn_is_stack(struct insn *insn)

It's not entirely clear to me what this function checks.  (A more
precise name might help.)  You have pushes, pops, and calls here, but
you also have some instructions that don't appear to affect the stack at
all.  And other push and pop instructions are missing.

> +{
> +	u8 reg;
> +	if (insn->opcode.nbytes == 2)
> +		return 0;

The following are 2-byte pushes or pops: 0f-a0, 0f-a1, 0f-a8, and 0f-a9.

Also, since the return value is bool, I'd prefer to see true/false
rather than 1/0.

> +
> +	switch(insn->opcode1) {
> +	case 0x68:
> +	case 0x6a:
> +	case 0x9c:
> +	case 0x9d:
> +	case 0xc5:

0xc5 = lds.  Why lds?

In general, it'd be nice to add a comment showing the mnemonic next to
each hex value -- e.g.,
	case 0x68: /* push */

> +	case 0xe8:
> +		return 1;
> +	}

Other related instructions: 9a, 1f, 07, 17, 8f.

> +	reg = ((*insn->next_byte) >> 3) & 7;
> +	if ((insn->opcode1 & 0xf0) == 0x50 ||
> +	    (insn->opcode1 == 0x1a && reg == 0) ||

The above line doesn't seem right.  It catches things like
sbb (%rax),%al .

> +	    (insn->opcode1 == 0xff && (reg & 1) == 0 && reg != 0)) {

Looks like the interesting reg values are 2 (call), 3 (call), and 6
(push).

> +		return 1;
> +	}
> +	return 0;
> +}
> +
>  /**
>   * insn_get_opcode - collect opcode(s)
>   * @insn:	&struct insn containing instruction
> @@ -108,6 +154,8 @@
>  		opcode->nbytes = 1;
>  	opcode->value = insn->opcode1;
>  	opcode->got = true;
> +	if (insn->x86_64 && __insn_is_stack(insn))
> +		insn->op_bytes = 8;
>  }
>  EXPORT_SYMBOL_GPL(insn_get_opcode);
> 
> @@ -208,3 +256,115 @@
>  }
>  EXPORT_SYMBOL_GPL(insn_rip_relative);
>  #endif
> +
> +/**
> + *
> + * insn_get_length() - Get the length of instruction
> + * @insn:	&struct insn containing instruction
> + *
> + * If necessary, first collects the instruction up to and including the
> + * ModRM byte.
> + */

As I mentioned in private email, you or I should probably refactor this
into:
- insn_get_sib()
- insn_get_displacement()
- insn_get_immediate()
- insn_get_length()

BTW, I'm going to have to change my definition of insn_field to
accommodate the 8-byte fields that can be found in instructions like
a0-a3 (8-byte displacement) and b8-bf (8-byte immediate).

> +void insn_get_length(struct insn *insn)
> +{
> +	u8 modrm;
> +	u8 mod = 0, reg = 0, rm = 0, sib;
> +	const u8 *next_byte;
> +	if (insn->length)
> +		return;
> +	if (!insn->modrm.got)
> +		insn_get_modrm(insn);
> +	next_byte = insn->next_byte;

This of course assumes that no fields have been fetched beyond the modrm
field.

> +
> +	if (insn->modrm.nbytes) {
> +		modrm = insn->modrm.value;
> +		mod = (modrm & 0xc0) >> 6;
> +		reg = (modrm & 0x38) >> 3;
> +		rm = (modrm & 0x07);

Some comments here would really help -- e.g...
/*
Interpreting the modrm byte:
mod = 00 - no displacement fields (exceptions below)
mod = 01 - 1-byte displacement field
mod = 10 - displacement field is 4 bytes, or 2 bytes if
	address size = 2 (0x67 prefix in 32-bit mode)
mod = 11 - no memory operand

If address size = 2...
mod = 00, r/m = 110 - displacement field is 2 bytes

If address size != 2...
mod != 11, r/m = 100 - SIB byte exists
mod = 00, SIB base field = 101 - displacement field is 4 bytes
mod = 00, r/m = 101 - rip-relative addressing, displacement
	field is 4 bytes
*/

> +		if (mod == 3)
> +			goto decode_src;
> +		if (insn->ad_bytes == 2) {
> +			if (mod == 1)
> +				next_byte++;
> +			else if (mod == 2)
> +				next_byte += 2;
> +			else if (rm == 6)
> +				next_byte += 2;
> +		} else {
> +			if (rm == 4) {
> +				sib = *(next_byte++);
> +				insn->sib.value = sib;
> +				insn->sib.nbytes = 1;
> +				insn->sib.got = 1;
> +				if ((sib & 7) == 5 && mod == 0)
> +					next_byte += 4;
> +			}
> +			if (mod == 1)
> +				next_byte++;
> +			else if (mod == 2)
> +				next_byte += 4;
> +			else if (rm == 5)
> +				next_byte += 4;
> +		}
> +	} else if (insn->opcode.nbytes == 1)
> +		if (0xa0 <= insn->opcode1 && insn->opcode1 < 0xa4)

Add comment:
			/* Displacement = entire address - up to 8 bytes */

> +			next_byte += insn->ad_bytes;
> +decode_src:

decode_src is a misnomer.  Here we're decoding the immediate operand
(which is always a source operand, but not the only kind).

> +	if (insn->opcode.nbytes == 1) {
> +		switch (insn->opcode1) {
> +		case 0x05:
> +		case 0x25:

What about (hex) 15, 35, 01, 0d, 2d?

> +		case 0x3d:
> +		case 0x68: // pushl
> +		case 0x69: // imul
> +		case 0x9a: /* long call */

0x9a (lcall) seems to have 6 bytes following the opcode, disassembled as
2 immediate operands.

> +		case 0xa9: // test
> +		case 0xc7:
> +		case 0xe8:
> +		case 0xe9:
> +		case 0xea: /* long jump */

Similarly, 0xea (ljmp) seems to have 6 bytes following the opcode,
disassembled as 2 immediate operands.

> +		case 0x82: /* Group */

s/82/81/ here.

> +			goto imm_common;
> +		case 0x04:
> +		case 0x24:

What about (hex) 14, 34, 0c, 1c, 2c?

> +		case 0x3c:
> +		case 0x6a: //pushb
> +		case 0x6b: //imul
> +		case 0xa8: //testb
> +		case 0xeb:
> +		case 0xc0:
> +		case 0xc1:
> +		case 0xc6:
> +		case 0x80: /* Group */
> +		case 0x81: /* Group */

s/81/82/ here.

> +		case 0x83: /* Group */
> +			goto immbyte_common;
> +		}
> +		if ((insn->opcode1 & 0xf8) == 0xb8 ||

I don't think this is right.  b8-bf can have 8-byte immediate fields
(with 0x48 prefix).

> +		    (insn->opcode1 == 0xf7 && reg == 0

or reg == 1

> ) ) {
> +imm_common:

Jumping into the middle of an if block is ugly, and not necessary here.

> +			next_byte += (insn->op_bytes == 8) ? 4 : insn->op_bytes;
> +		} else if ((insn->opcode1 & 0xf8) == 0xb0 || //
> +			   (insn->opcode1 & 0xf0) == 0x70 || // Jcc
> +			   (insn->opcode1 & 0xf8) == 0xe0 || // loop/in/out
> +			    (insn->opcode1 == 0xf6 && reg == 0)) {
> +immbyte_common:

Jumping into the middle of an if block is ugly, and not necessary here.

> +			next_byte++;
> +		}

0xc8 and 0xcd are weird cases that we should handle .

> +	} else {
> +		switch (insn->opcode2) {

Add 0x70.

> +		case 0xa4:
> +		case 0xac:
> +		case 0xba:
> +		case 0x0f: // 3dnow
> +		case 0x3a: // ssse3
> +			next_byte++;
> +			break;
> +		default:
> +			if ((insn->opcode2 & 0xf0) == 0x80)
> +				next_byte += (insn->op_bytes == 8) ? 4 : insn->op_bytes;
> +		}
> +	}
> +	insn->length = (u8)(next_byte - insn->kaddr);
> +}
> +EXPORT_SYMBOL_GPL(insn_get_length);
>