linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Joe Perches <joe@perches.com>
To: Borislav Petkov <bp@alien8.de>
Cc: X86 ML <x86@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andy Whitcroft <apw@canonical.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [PATCH] checkpatch: Check for .byte-spelled insn opcodes documentation on x86
Date: Sat, 10 Oct 2020 08:27:20 -0700	[thread overview]
Message-ID: <4147e49c0b1251343181b5580d946c2273247927.camel@perches.com> (raw)
In-Reply-To: <20201010105421.GA24674@zn.tnic>

On Sat, 2020-10-10 at 12:54 +0200, Borislav Petkov wrote:
> On Fri, Oct 09, 2020 at 11:01:18AM -0700, Joe Perches wrote:
> > Given the location, this only works on .c and .h files.
> > It does not work on .S files.  Should it?
> 
> Probably not because there will be too many false positives. .byte is
> used not only to spell instruction opcodes in .S files. And the main
> case we're addressing here is using those .byte spelled opcodes in asm
> volatile constructs so...

[]

> > So it looks like the regex would be more complete as:
> > 
> > 	if ($realfile =~ m@^arch/x86/@ &&
> > 	    $rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
> 
> This is much less readable than what I have now (yes, realfile test
> should come first):

I care less about readability than completeness, correctness,
and minimal false positives.

> 	if ($realfile =~ m@^arch/x86/@ && $rawline =~ /\.byte[\s0-9a-fx,]+/) {
> 
> Also, this
> 
> 	$rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
> 
> matches
> 
> 	".byte 0x66"

And? So does the regex you propose.

> This
> 
> 	$rawline =~ /\.byte[\s0-9a-fx,]+)) {
> 
> matches
> 
> 	".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> 
> which is what it needs to match.

Is that the only case you are trying to match?
two or more "0x<hex>," ?

Then this could use:

/"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}/

$ git grep -P '"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}' arch/x86
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/segment.h:                 ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC  ".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC  ".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h:           ".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");

vs:

$ git grep -P '\.byte[\s0-9a-fx,]+' -- 'arch/x86/*.[ch]' | cat
arch/x86/include/asm/alternative-asm.h:	.byte \orig_len
arch/x86/include/asm/alternative-asm.h:	.byte \alt_len
arch/x86/include/asm/alternative-asm.h:	.byte \pad_len
arch/x86/include/asm/alternative.h:	" .byte " alt_total_slen "\n"			/* source len      */ \
arch/x86/include/asm/alternative.h:	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
arch/x86/include/asm/alternative.h:	" .byte " alt_pad_len "\n"			/* pad len */
arch/x86/include/asm/bug.h:#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */
arch/x86/include/asm/bug.h:#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
arch/x86/include/asm/bug.h:#define ASM_UD2		".byte 0x0f, 0x0b"
arch/x86/include/asm/cpufeature.h:		 " .byte 3b - 1b\n"		/* src len */
arch/x86/include/asm/cpufeature.h:		 " .byte 5f - 4f\n"		/* repl len */
arch/x86/include/asm/cpufeature.h:		 " .byte 3b - 2b\n"		/* pad len */
arch/x86/include/asm/cpufeature.h:		 " .byte 3b - 1b\n"		/* src len */
arch/x86/include/asm/cpufeature.h:		 " .byte 0\n"			/* repl len */
arch/x86/include/asm/cpufeature.h:		 " .byte 0\n"			/* pad len */
arch/x86/include/asm/fpu/internal.h:#define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
arch/x86/include/asm/fpu/internal.h:#define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
arch/x86/include/asm/fpu/internal.h:#define XSAVES		".byte " REX_PREFIX "0x0f,0xc7,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTOR		".byte " REX_PREFIX "0x0f,0xae,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTORS		".byte " REX_PREFIX "0x0f,0xc7,0x1f"
arch/x86/include/asm/idtentry.h: * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
arch/x86/include/asm/idtentry.h: * .byte achieves the same thing and the only fixup needed in the C entry
arch/x86/include/asm/idtentry.h:	.byte	0x6a, vector
arch/x86/include/asm/idtentry.h:	.byte	0x6a, vector
arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not supported by old
arch/x86/include/asm/inst.h:	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
arch/x86/include/asm/inst.h:	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
arch/x86/include/asm/inst.h:	.byte 0xf3
arch/x86/include/asm/inst.h:	.byte 0x0f, 0xc7
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/jump_label.h:		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
arch/x86/include/asm/jump_label.h:		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
arch/x86/include/asm/jump_label.h:	.byte		0xe9
arch/x86/include/asm/jump_label.h:	.byte		STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h:	.byte		STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h:	.byte		0xe9
arch/x86/include/asm/mwait.h:	asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h:	asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h:	asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:	asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h:	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:	asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) .byte x
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
arch/x86/include/asm/paravirt.h:	 .byte ptype;				\
arch/x86/include/asm/paravirt.h:	 .byte 772b-771b;			\
arch/x86/include/asm/paravirt_types.h:	"  .byte " type "\n"				\
arch/x86/include/asm/paravirt_types.h:	"  .byte 772b-771b\n"				\
arch/x86/include/asm/segment.h:			".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC	".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC	".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h:	asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h:	asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h:	alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0",
arch/x86/include/asm/special_insns.h:		       ".byte 0x66; clflush %P0",
arch/x86/include/asm/special_insns.h:		".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
arch/x86/include/asm/special_insns.h:		".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
arch/x86/include/asm/special_insns.h:		".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h:	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
arch/x86/include/asm/special_insns.h:	asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h:	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
arch/x86/include/asm/static_call.h:	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
arch/x86/include/asm/xen/interface.h:#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
arch/x86/kvm/vmx/ops.h:		     ".byte 0x3e\n\t" /* branch taken hint */
arch/x86/kvm/vmx/ops.h:			  ".byte 0x2e\n\t" /* branch not taken hint */	\
arch/x86/kvm/vmx/ops.h:			  ".byte 0x2e\n\t" /* branch not taken hint */	\
arch/x86/realmode/rm/realmode.h:#define LJMPW_RM(to)	.byte 0xea ; .word (to), real_mode_seg

and if lines like:

arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not supported by old
arch/x86/include/asm/paravirt_types.h:	"  .byte " type "\n"				\

are false positives then perhaps change the regex
to add required spaces after the \.byte before the
match.




  parent reply	other threads:[~2020-10-10 22:51 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-09 16:14 [PATCH] checkpatch: Check for .byte-spelled insn opcodes documentation on x86 Borislav Petkov
2020-10-09 18:01 ` Joe Perches
2020-10-10 10:54   ` Borislav Petkov
2020-10-10 10:55     ` [PATCH -v2] " Borislav Petkov
2020-10-10 15:27     ` Joe Perches [this message]
2020-10-10 16:11       ` [PATCH] " Borislav Petkov
2020-10-10 16:47         ` Joe Perches
2020-10-12 14:21           ` Borislav Petkov
2020-10-12 14:23             ` [PATCH -v3] " Borislav Petkov
2020-10-12 15:04               ` Joe Perches
2020-10-12 15:02             ` [PATCH] " Joe Perches
2020-10-12 17:09             ` [PATCH -v4] " Joe Perches
2020-10-12 17:15               ` Borislav Petkov
2020-10-12 17:17                 ` Joe Perches
2020-10-12 17:31                   ` Borislav Petkov
2020-10-12 17:40                     ` Joe Perches
2020-10-12 17:55                       ` Borislav Petkov
2020-10-12 18:03                         ` Joe Perches
2020-10-10 15:38     ` [PATCH] " Joe Perches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4147e49c0b1251343181b5580d946c2273247927.camel@perches.com \
    --to=joe@perches.com \
    --cc=akpm@linux-foundation.org \
    --cc=apw@canonical.com \
    --cc=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).