From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752984AbbC1Il0 (ORCPT <rfc822;w@1wt.eu>);
	Sat, 28 Mar 2015 04:41:26 -0400
Received: from mail.skyhub.de ([78.46.96.112]:56471 "EHLO mail.skyhub.de"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1752249AbbC1IlV (ORCPT <rfc822;linux-kernel@vger.kernel.org>);
	Sat, 28 Mar 2015 04:41:21 -0400
Date: Sat, 28 Mar 2015 09:39:28 +0100
From: Borislav Petkov <bp@alien8.de>
To: Dave Hansen <dave@sr71.net>
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, tglx@linutronix.de,
        dave.hansen@linux.intel.com
Subject: Re: [PATCH 15/17] x86, mpx: do 32-bit-only cmpxchg for 32-bit apps
Message-ID: <20150328083928.GA17284@pd.tnic>
References: <20150326183327.64807530@viggo.jf.intel.com>
 <20150326183353.A2A5B371@viggo.jf.intel.com>
 <20150327172914.GE5517@pd.tnic>
 <55159E89.5090007@sr71.net>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline
In-Reply-To: <55159E89.5090007@sr71.net>
User-Agent: Mutt/1.5.23 (2014-03-12)
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org

On Fri, Mar 27, 2015 at 11:16:41AM -0700, Dave Hansen wrote:
> That would have saved creating 'u32 __user *bd_entry_32' so that we
> could implicitly do sizeof(*bd_entry_32).  But, what else does it buy us?

Well, you could misappropriate futex_atomic_cmpxchg_inatomic() which
takes u32s already - you probably might want to rename it to something
more generic first, though.

Diff ontop:

---
Index: b/arch/x86/mm/mpx.c
===================================================================
--- a/arch/x86/mm/mpx.c	2015-03-28 09:21:40.199966745 +0100
+++ b/arch/x86/mm/mpx.c	2015-03-28 09:19:40.491968402 +0100
@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <asm/trace/mpx.h>
 #include <asm/fpu-internal.h>
+#include <asm/futex.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/mpx.h>
@@ -425,7 +426,6 @@ static int mpx_cmpxchg_bd_entry(struct m
 		unsigned long *actual_old_val_ptr, long __user *bd_entry_addr,
 		unsigned long expected_old_val, unsigned long new_bd_entry)
 {
-	int ret;
 	/*
 	 * user_atomic_cmpxchg_inatomic() actually uses sizeof()
 	 * the pointer thatt we pass to it to figure out how much
@@ -433,21 +433,16 @@ static int mpx_cmpxchg_bd_entry(struct m
 	 * pass a pointer to a 64-bit data type when we only want
 	 * a 32-bit copy.
 	 */
-	if (is_64bit_mm(mm)) {
-		ret = user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
-				bd_entry_addr, expected_old_val, new_bd_entry);
-	} else {
-		u32 uninitialized_var(actual_old_val_32);
-		u32 expected_old_val_32 = expected_old_val;
-		u32 new_bd_entry_32 = new_bd_entry;
-		u32 __user *bd_entry_32 = (u32 __user *)bd_entry_addr;
-		ret = user_atomic_cmpxchg_inatomic(&actual_old_val_32,
-				bd_entry_32, expected_old_val_32,
-				new_bd_entry_32);
-		if (!ret)
-			*actual_old_val_ptr = actual_old_val_32;
-	}
-	return ret;
+	if (is_64bit_mm(mm))
+		return user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
+						   bd_entry_addr,
+						   expected_old_val,
+						   new_bd_entry);
+	else
+		return futex_atomic_cmpxchg_inatomic((u32 *)actual_old_val_ptr,
+						    (u32 __user *)bd_entry_addr,
+						    expected_old_val,
+						    new_bd_entry);
 }
 
 /*
---

The asm looks the same except the retval. Yours does

	mov %rax, (%rsi)

for actual_old_val_ptr which, AFAICT, is not needed in the 32-bit
case because there we're returning a 32-bit value anyway:

	*actual_old_val_ptr = actual_old_val_32;

but gcc writes out the whole 64-bit register %rax to the pointer in %rsi
because it is an unsigned long it gets passed in.

Not that it matters, it is being sign-extended before that with

	movl	%eax, %eax	# actual_old_val_32, tmp137


yours:
------
	.loc 1 445 0
	cmpq	%rax, %rdx	# D.38827, bd_entry_addr
	ja	.L151	#,
.LBB993:
	.loc 1 445 0 is_stmt 0 discriminator 1
	movl	%ecx, %eax	# expected_old_val, actual_old_val_32
.LVL179:
	xorl	%edi, %edi	# ret
.LVL180:
#APP
# 445 "arch/x86/mm/mpx.c" 1

1:	.pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
	lock; cmpxchgl %r8d, (%rdx)	# new_bd_entry, MEM[(u32 *)bd_entry_addr_12(D)]
2:
	.section .fixup, "ax"
3:	mov     $-14, %edi	#, ret
	jmp     2b
	.previous
 .pushsection "__ex_table","a"
 .balign 8
 .long (1b) - .
 .long (3b) - .
 .popsection

# 0 "" 2
#NO_APP
.LBE993:
	.loc 1 448 0 is_stmt 1 discriminator 1
	testl	%edi, %edi	# ret
	jne	.L151	#,
	.loc 1 449 0
	movl	%eax, %eax	# actual_old_val_32, tmp137
.LVL181:
	movq	%rax, (%rsi)	# tmp137, *actual_old_val_ptr_17(D)
---


futex_atomic_cmpxchg_inatomic:
------------------------------
	.file 9 "./arch/x86/include/asm/futex.h"
	.loc 9 113 0
	cmpq	%rax, %rdx	# D.38827, bd_entry_addr
	ja	.L153	#,
.LBB1003:
	movl	%ecx, %eax	# expected_old_val, __old
.LVL185:
	xorl	%edi, %edi	# ret
.LVL186:
#APP
# 113 "./arch/x86/include/asm/futex.h" 1

1:	.pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
	lock; cmpxchgl %r8d, (%rdx)	# new_bd_entry, MEM[(u32 *)bd_entry_addr_12(D)]
2:
	.section .fixup, "ax"
3:	mov     $-14, %edi	#, ret
	jmp     2b
	.previous
 .pushsection "__ex_table","a"
 .balign 8
 .long (1b) - .
 .long (3b) - .
 .popsection

# 0 "" 2
#NO_APP
	movl	%eax, (%rsi)	# __old, MEM[(u32 *)actual_old_val_ptr_17(D)]
.LBE1003:
.LBE995:
.LBE994:
.LBE989:
	.loc 1 458 0
	movl	%edi, %eax	# ret,
---

Here the objdump output which shows the difference better:

yours:
------
     b02:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
     b08:       48 83 e8 04             sub    $0x4,%rax
     b0c:       bf f2 ff ff ff          mov    $0xfffffff2,%edi
     b11:       48 39 c2                cmp    %rax,%rdx
     b14:       77 e8                   ja     afe <mpx_cmpxchg_bd_entry+0x3e>
     b16:       89 c8                   mov    %ecx,%eax
     b18:       31 ff                   xor    %edi,%edi
     b1a:       f0 44 0f b1 02          lock cmpxchg %r8d,(%rdx)
     b1f:       85 ff                   test   %edi,%edi
     b21:       75 db                   jne    afe <mpx_cmpxchg_bd_entry+0x3e>
     b23:       89 c0                   mov    %eax,%eax
     b25:       48 89 06                mov    %rax,(%rsi)
     b28:       89 f8                   mov    %edi,%eax
     b2a:       5d                      pop    %rbp
     b2b:       c3                      retq
     b2c:       0f 1f 40 00             nopl   0x0(%rax)


futex_atomic_cmpxchg_inatomic:
------------------------------
     b72:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
     b78:       48 83 ef 04             sub    $0x4,%rdi
     b7c:       b8 f2 ff ff ff          mov    $0xfffffff2,%eax
     b81:       48 39 fa                cmp    %rdi,%rdx
     b84:       77 ea                   ja     b70 <mpx_cmpxchg_bd_entry+0x40>
     b86:       89 c8                   mov    %ecx,%eax
     b88:       31 ff                   xor    %edi,%edi
     b8a:       f0 44 0f b1 02          lock cmpxchg %r8d,(%rdx)
     b8f:       89 06                   mov    %eax,(%rsi)
     b91:       89 f8                   mov    %edi,%eax
     b93:       5d                      pop    %rbp
     b94:       c3                      retq
     b95:       66 66 2e 0f 1f 84 00    data16 nopw %cs:0x0(%rax,%rax,1)
     b9c:       00 00 00 00

AFAICT, in this case, we return only a 32-bit value and don't touch
the upper 32 bits of actual_old_val which might be a problem if the
assumptions of the callers is that the whole unsigned long is being
changed.

If that's not the case, then you get much nicer code :-)

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--