linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] tls-2.5.30-A1
@ 2002-08-07 18:10 Ingo Molnar
  2002-08-07 18:33 ` Linus Torvalds
                   ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-07 18:10 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard

[-- Attachment #1: Type: TEXT/PLAIN, Size: 5643 bytes --]


the attached patch (against BK-curr + Luca Barbieri's two TLS patches)  
does two things:

 - it implements a second TLS entry for Wine's purposes.

Alexandre suggested that Wine would need two TLS entries, one for glibc
(in %gs), and one for the Win32 API (in %fs). The constant selector is
also a speedup for switches to/from 16-bit mode.

i left the possibility open to add even more TLS entries, but i find it
very unlikely to happen. So the code does not iterate over an array of TLS
descriptors, for performance reasons. This can be changed anytime without
affecting the userspace interface.

 - the patch adds the get_thread_area() system-call.

the get_thread_area() call is needed by debuggers, to be able to read the
TLS settings of a threaded application, without having to assume anything
about what was loaded. The get_thread_area() call does not expose any
segmentation details - it returns the TLS info in the same format as
passed to the set_thread_area() call.

i've also attached tls.c which shows off both extensions. These extensions
are source and binary-compatible with any potential TLS code.

	Ingo

--- linux/arch/i386/kernel/process.c.orig	Wed Aug  7 19:16:45 2002
+++ linux/arch/i386/kernel/process.c	Wed Aug  7 19:40:27 2002
@@ -839,6 +839,7 @@
 asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
 {
 	struct thread_struct *t = &current->thread;
+	struct desc_struct *desc;
 	int writable = 0;
 	int cpu;
 
@@ -848,21 +849,62 @@
 
 	if (flags & TLS_FLAG_WRITABLE)
 		writable = 1;
+	desc = &t->tls_desc1;
+	if (flags & TLS_FLAG_ENTRY2)
+		desc = &t->tls_desc2;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
+        desc->a = ((base & 0x0000ffff) << 16) | 0xffff;
 
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
+        desc->b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
 				0xf0000 | (writable << 9) | (1 << 15) |
 					(1 << 22) | (1 << 23) | 0x7000;
 
 	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	if (flags & TLS_FLAG_ENTRY2)
+		return TLS_ENTRY2*8 + 3;
+	else
+		return TLS_ENTRY1*8 + 3;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) \
+(	(((desc).a >> 16) & 0x0000ffff) | \
+	(((desc).b << 16) & 0x00ff0000) | \
+	( (desc).b        & 0xff000000)	)
+
+#define GET_WRITABLE(desc) \
+	(((desc).b >> 9)  & 0x00000001)
+
+asmlinkage int sys_get_thread_area(unsigned long *ubase, unsigned long *uflags,
+					unsigned long flags)
+{
+	struct thread_struct *thread = &current->thread;
+	unsigned long base, flg;
+
+	if (flags & ~TLS_FLAGS_MASK)
+		return -EINVAL;
+
+	if (flags & TLS_FLAG_ENTRY2) {
+		base = GET_BASE(thread->tls_desc2);
+		flg = GET_WRITABLE(thread->tls_desc2) | TLS_FLAG_ENTRY2;
+	} else {
+		base = GET_BASE(thread->tls_desc1);
+		flg = GET_WRITABLE(thread->tls_desc1) | TLS_FLAG_ENTRY1;
+	}
+	if (copy_to_user(ubase, &base, sizeof(base)))
+		return -EFAULT;
+	if (copy_to_user(uflags, &flg, sizeof(flg)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/entry.S.orig	Wed Aug  7 19:18:33 2002
+++ linux/arch/i386/kernel/entry.S	Wed Aug  7 19:18:21 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/include/asm-i386/processor.h.orig	Wed Aug  7 19:22:57 2002
+++ linux/include/asm-i386/processor.h	Wed Aug  7 19:27:01 2002
@@ -376,8 +376,8 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
+/* TLS cached descriptors */
+	struct desc_struct tls_desc1, tls_desc2;
 };
 
 #define INIT_THREAD  {						\
--- linux/include/asm-i386/unistd.h.orig	Wed Aug  7 19:18:45 2002
+++ linux/include/asm-i386/unistd.h	Wed Aug  7 19:18:58 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
--- linux/include/asm-i386/desc.h.orig	Wed Aug  7 19:20:57 2002
+++ linux/include/asm-i386/desc.h	Wed Aug  7 19:51:13 2002
@@ -12,7 +12,7 @@
  *   3 - kernel data segment
  *   4 - user code segment		<==== new cacheline
  *   5 - user data segment
- *   6 - Thread-Local Storage (TLS) segment
+ *   6 - Thread-Local Storage (TLS) segment #1
  *   7 - LDT
  *   8 - APM BIOS support		<==== new cacheline
  *   9 - APM BIOS support
@@ -23,12 +23,13 @@
  *  14 - PNPBIOS support
  *  15 - PNPBIOS support
  *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
+ *  17 - TLS segment #2
  *  18 - not used
  *  19 - not used
  */
 #define TSS_ENTRY 1
-#define TLS_ENTRY 6
+#define TLS_ENTRY1 6
+#define TLS_ENTRY2 17
 #define LDT_ENTRY 7
 /*
  * The interrupt descriptor table has room for 256 idt's,
@@ -86,13 +87,16 @@
 	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define TLS_FLAGS_MASK			0x00000003
 
 #define TLS_FLAG_WRITABLE		0x00000001
+#define TLS_FLAG_ENTRY1			0x00000000
+#define TLS_FLAG_ENTRY2			0x00000002
 
 static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+	cpu_gdt_table[cpu][TLS_ENTRY1] = t->tls_desc1;
+	cpu_gdt_table[cpu][TLS_ENTRY2] = t->tls_desc2;
 }
 
 static inline void clear_LDT(void)

[-- Attachment #2: Type: TEXT/PLAIN, Size: 3698 bytes --]

#include <asm/ldt.h>
#include <stdio.h>
#include <linux/unistd.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <asm/sigcontext.h>

/*
 * TLS functionality testing utility.
 */
#define TLS_FLAGS_MASK                  0x00000003

#define TLS_FLAG_WRITABLE               0x00000001
#define TLS_FLAG_ENTRY2                 0x00000002

#define __NR_set_thread_area 243
_syscall2(int, set_thread_area, unsigned int, base, unsigned int, flags)

#define __NR_get_thread_area 244
_syscall3(int, get_thread_area, unsigned int *, ubase, unsigned int *, uflags, unsigned int, flags)

static inline void initseg (int seg)
{
	asm ("mov %w0,%%fs" : : "r" (seg));
}

static inline unsigned char __readseg (unsigned offset)
{
	unsigned char res;

	asm ("fs; movb (%1),%%al" : "=a" (res) : "r" (offset));

	return res;
}

static inline void __writeseg (unsigned offset, unsigned char b)
{
	asm ("fs; movb %b1,(%0)" : : "r" (offset), "r" (b));
}

static void readseg (void *dst, const void *src)
{
	*(char *)dst = __readseg((unsigned int)src);
}

static void writeseg (void *dst, unsigned char value)
{
	__writeseg((unsigned int)dst, value);
}

unsigned char pre_data		[4096] = { [ 0 ... 4095 ] = 33 };
unsigned char data		[4096] = { [ 0 ... 4095 ] = 44 };
unsigned char post_data		[4096] = { [ 0 ... 4095 ] = 55 };

int main (void)
{
	unsigned int base, flags;
	int seg, ret;
	unsigned char result;

	data[0] = 123;
	data[4096] = 210;

	base = 0;

	printf("\ndoing set_thread_area(0x%08x, writable):\n", base);
	seg = set_thread_area(base, TLS_FLAG_WRITABLE);
	printf("====> got GDT selector: 0x%x", seg);
	if (seg != 51) {
		printf(" ERROR: incorrect selector!\n");
		exit(-1);
	} else
		printf(" --- TEST PASSED.\n");

	initseg(seg);
	printf("\nreading first byte of [0x%08x] TLS:\n", base);

	readseg (&result, &data);
	if (result == 123)
		printf("====> %d --- TEST PASSED.\n\n", result);
	else
		printf("====> %d --- TEST FAILURE!\n\n", result);

	base = (unsigned int)&data;

	printf("doing set_thread_area(0x%08x, writable, entry2):\n", base);
	seg = set_thread_area(base, TLS_FLAG_WRITABLE | TLS_FLAG_ENTRY2);
	initseg(seg);
	printf("====> got GDT selector: 0x%x", seg);
	if (seg != 0x8b) {
		printf(" ERROR: incorrect selector!\n");
		exit(-1);
	} else
		printf(" --- TEST PASSED.\n");

	printf("context-switching once ...\n");
	sleep(1);
	printf("\nreading first byte of 4K [0x%08x] TLS:\n", base);

	readseg (&result, 0);
	if (result == 123)
		printf("====> %d --- TEST PASSED.\n\n", result);
	else
		printf("====> %d --- TEST FAILURE!\n\n", result);

	printf("reading last byte of 4097 byte [0x%08x] TLS:\n", base);

	readseg (&result, (void *)4096);
	if (result == 210)
		printf("====> %d --- TEST PASSED.\n\n", result);
	else
		printf("====> %d --- TEST FAILURE!\n\n", result);

	printf("writing last byte of 4097 byte [0x%08x] TLS:\n", base);
	writeseg ((void *)4096, 234);
	readseg (&result, (void *)4096);
	if (result == 234)
		printf("====> %d --- TEST PASSED.\n", result);
	else
		printf("====> %d --- TEST FAILURE!.\n", result);

	printf("\nreading byte outside of the TLS (should not coredump)...\n\n");
	readseg (&result, (void *)4097);
	printf("result: %d.\n", result);

	printf("doing get_thread_area(0x%08x, writable, entry2):\n", base);
	base = flags = 1234;
	ret = get_thread_area(&base, &flags, TLS_FLAG_WRITABLE | TLS_FLAG_ENTRY2);
	if (!ret)
		printf("====> [%08x, %d] %d --- TEST PASSED.\n", base, flags, ret);
	else
		printf("====> [%08x, %d] %d --- TEST FAILURE!.\n", base, flags, ret);

	return 0;
}

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar
@ 2002-08-07 18:33 ` Linus Torvalds
  2002-08-07 18:43   ` Stephen Rothwell
                     ` (4 more replies)
  2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig
  2002-08-08 12:25 ` Jamie Lokier
  2 siblings, 5 replies; 47+ messages in thread
From: Linus Torvalds @ 2002-08-07 18:33 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


On Wed, 7 Aug 2002, Ingo Molnar wrote:
> 
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)  
> does two things:
> 
>  - it implements a second TLS entry for Wine's purposes.

Guys, I really don't like how the segment map ends up getting uglier and
uglier.

I would suggest:
 - move all kernel-related (and thus non-visible to user space) segments 
   up, and make the cacheline optimizations _there_. 
 - keep the TLS entries contiguous, and make sure that segment 0040 (ie
   GDT entry #8) is available to a TLS entry, since if I remember
   correctly, that one is also magical for old Windows binaries for all
   the wrong reasons (ie it was some system data area in DOS and in 
   Windows 3.1)
 - and for cleanliness bonus points: make the regular user data segments 
   just another TLS segment that just happens to have default values. If 
   the user wants to screw with its own segments, let it.

Then, for double extra bonus points somebody should look into whether
those damn PnP BIOS segments could be simply made to be TLS segments
during module init. I don't know if that PnP stuff is required later or
not.

		Linus


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:33 ` Linus Torvalds
@ 2002-08-07 18:43   ` Stephen Rothwell
  2002-08-07 18:57     ` Linus Torvalds
  2002-08-07 19:31   ` Ingo Molnar
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 47+ messages in thread
From: Stephen Rothwell @ 2002-08-07 18:43 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: mingo, linux-kernel, julliard, ldb

On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <torvalds@transmeta.com> wrote:
>
>  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
>    GDT entry #8) is available to a TLS entry, since if I remember
>    correctly, that one is also magical for old Windows binaries for all
>    the wrong reasons (ie it was some system data area in DOS and in 
>    Windows 3.1)

segment 0040 is used by the APM driver to work around bugs in some BIOS
implementations where some (brain-dead) BIOS writer has assume that the
BIOS data area is still available in protected mode ...
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:43   ` Stephen Rothwell
@ 2002-08-07 18:57     ` Linus Torvalds
  2002-08-07 19:40       ` Alexandre Julliard
  0 siblings, 1 reply; 47+ messages in thread
From: Linus Torvalds @ 2002-08-07 18:57 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: mingo, linux-kernel, julliard, ldb


On Thu, 8 Aug 2002, Stephen Rothwell wrote:

> On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <torvalds@transmeta.com> wrote:
> >
> >  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> >    GDT entry #8) is available to a TLS entry, since if I remember
> >    correctly, that one is also magical for old Windows binaries for all
> >    the wrong reasons (ie it was some system data area in DOS and in 
> >    Windows 3.1)
> 
> segment 0040 is used by the APM driver to work around bugs in some BIOS
> implementations where some (brain-dead) BIOS writer has assume that the
> BIOS data area is still available in protected mode ...

Ok, sounds like that one ends up having to be a fixed segment (I wonder if
Wine can take advantage of it? looks like it is hardcoded to base 0x400,
which is probably fine for Wine anyway - just map something at the right
address - but it looks CPL0 only? Might be ok to just make it available to
user space).

		Linus


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar
  2002-08-07 18:33 ` Linus Torvalds
@ 2002-08-07 19:02 ` Christoph Hellwig
  2002-08-08 12:25 ` Jamie Lokier
  2 siblings, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2002-08-07 19:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Alexandre Julliard

On Wed, Aug 07, 2002 at 08:10:40PM +0200, Ingo Molnar wrote:
> 
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)  
> does two things:
> 
>  - it implements a second TLS entry for Wine's purposes.

The sys_set_thread_area interface gets worse with every patch you post..

Why do you really need a magic multiplexer syscall (you could have just
used prctl if you don't need a sane interface..)?

What about a proper interface like:

asmlinkage int
sys_set_thread_area(int entry, unsigned long base, int writeable)

instead?


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:33 ` Linus Torvalds
  2002-08-07 18:43   ` Stephen Rothwell
@ 2002-08-07 19:31   ` Ingo Molnar
  2002-08-07 19:49     ` Alexandre Julliard
  2002-08-07 22:01   ` Alan Cox
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-07 19:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


On Wed, 7 Aug 2002, Linus Torvalds wrote:

> I would suggest:
>  - move all kernel-related (and thus non-visible to user space) segments 
>    up, and make the cacheline optimizations _there_. 
>  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
>    GDT entry #8) is available to a TLS entry, since if I remember
>    correctly, that one is also magical for old Windows binaries for all
>    the wrong reasons (ie it was some system data area in DOS and in 
>    Windows 3.1)
>  - and for cleanliness bonus points: make the regular user data segments 
>    just another TLS segment that just happens to have default values. If 
>    the user wants to screw with its own segments, let it.

i'll do this. Julliard, any additional suggestions perhaps - is GDT entry
8 the best %fs choice for Wine?

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:57     ` Linus Torvalds
@ 2002-08-07 19:40       ` Alexandre Julliard
  0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Julliard @ 2002-08-07 19:40 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Stephen Rothwell, mingo, linux-kernel, ldb

Linus Torvalds <torvalds@transmeta.com> writes:

> Ok, sounds like that one ends up having to be a fixed segment (I wonder if
> Wine can take advantage of it? looks like it is hardcoded to base 0x400,
> which is probably fine for Wine anyway - just map something at the right
> address - but it looks CPL0 only? Might be ok to just make it available to
> user space).

Base 0x400 should work just fine for Wine, we already need to have the
BIOS data mapped there anyway, so simply making the selector available
to user space would work completely transparently for us. We are
currently trapping and emulating accesses to that selector so it
doesn't matter much whether it is protected or not, except for a small
performance gain. What would break Wine is if that selector was made
accessible to user space with a different base address, so this should
be avoided.

-- 
Alexandre Julliard
julliard@winehq.com

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 19:31   ` Ingo Molnar
@ 2002-08-07 19:49     ` Alexandre Julliard
  0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Julliard @ 2002-08-07 19:49 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Luca Barbieri

Ingo Molnar <mingo@elte.hu> writes:

> i'll do this. Julliard, any additional suggestions perhaps - is GDT entry
> 8 the best %fs choice for Wine?

No, this one is special and has to point to 0x400, so it's actually
the only one that wouldn't work to use as %fs in Wine.

-- 
Alexandre Julliard
julliard@winehq.com

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:33 ` Linus Torvalds
  2002-08-07 18:43   ` Stephen Rothwell
  2002-08-07 19:31   ` Ingo Molnar
@ 2002-08-07 22:01   ` Alan Cox
  2002-08-07 22:36   ` Luca Barbieri
  2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
  4 siblings, 0 replies; 47+ messages in thread
From: Alan Cox @ 2002-08-07 22:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, linux-kernel, Alexandre Julliard, Luca Barbieri

On Wed, 2002-08-07 at 19:33, Linus Torvalds wrote:
>  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
>    GDT entry #8) is available to a TLS entry, since if I remember
>    correctly, that one is also magical for old Windows binaries for all
>    the wrong reasons (ie it was some system data area in DOS and in 
>    Windows 3.1)

Lots of BIOSes (a million monkeys bashing on typewriters will write
something that passes some BIOS vendor QA in about 2 seconds) illegally
assume that 0040: points at the BIOS data segment 0040 when making APM32
calls. Sufficient that Windows makea it so and its never going to get
corrected.

> Then, for double extra bonus points somebody should look into whether
> those damn PnP BIOS segments could be simply made to be TLS segments
> during module init. I don't know if that PnP stuff is required later or
> not.

PnPBIOS has to rewrite segments as it goes for data passing. It doesnt
really matter where you stuff them though.



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:33 ` Linus Torvalds
                     ` (2 preceding siblings ...)
  2002-08-07 22:01   ` Alan Cox
@ 2002-08-07 22:36   ` Luca Barbieri
  2002-08-07 22:54     ` Ingo Molnar
  2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
  4 siblings, 1 reply; 47+ messages in thread
From: Luca Barbieri @ 2002-08-07 22:36 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Linux-Kernel ML

[-- Attachment #1: Type: text/plain, Size: 12054 bytes --]

On Wed, 2002-08-07 at 20:33, Linus Torvalds wrote:
> 
> On Wed, 7 Aug 2002, Ingo Molnar wrote:
> > 
> > the attached patch (against BK-curr + Luca Barbieri's two TLS patches)  
> > does two things:
> > 
> >  - it implements a second TLS entry for Wine's purposes.
> 
> Guys, I really don't like how the segment map ends up getting uglier and
> uglier.
> 
> I would suggest:
>  - move all kernel-related (and thus non-visible to user space) segments 
>    up, and make the cacheline optimizations _there_. 
Done.
>  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
>    GDT entry #8) is available to a TLS entry, since if I remember
>    correctly, that one is also magical for old Windows binaries for all
>    the wrong reasons (ie it was some system data area in DOS and in 
>    Windows 3.1)
Done. Segment 0x40 set to CPL 3.
>  - and for cleanliness bonus points: make the regular user data segments 
>    just another TLS segment that just happens to have default values. If 
>    the user wants to screw with its own segments, let it.
Bad idea: makes task switch slower without any practical advantage.
The user may load a TLS or LDT selector in %ds to get the same effect.
> Then, for double extra bonus points somebody should look into whether
> those damn PnP BIOS segments could be simply made to be TLS segments
> during module init. I don't know if that PnP stuff is required later or
> not.
Not sure what you mean. The current definition of TLS segments is "a
minimal number of GDT entries that are modified on task switch and that
can be set on a per-task basis so that the selectors can be loaded %fs
and %gs". How can kernel PNPBIOS segments fit in this definition?


The patch changes the descriptior layout so that LDT is in the kernel
segment cacheline, the 16-bit APM segments are together and user
segments are together. It also sets segment 0x40 CPL to 3.
__BOOT_CS and __BOOT_DS are introduced as the value of segment selectors
during boot (so that we don't have to enlarge the gdt in setup.s).

New layout:
 *   0 - null
 *   1 - PNPBIOS support (16->32 gate)
 *   2 - boot code segment
 *   3 - boot data segment
 *   4 - PNPBIOS support		<==== new cacheline
 *   5 - PNPBIOS support
 *   6 - PNPBIOS support
 *   7 - PNPBIOS support
 *   8 - APM BIOS support (0x400-0x1000)<==== new cacheline
 *   9 - APM BIOS support
 *  10 - APM BIOS support
 *  11 - APM BIOS support 
 *  12 - kernel code segment		<==== new cacheline
 *  13 - kernel data segment
 *  14 - TSS
 *  15 - LDT
 *  ------- start of user segments 
 *  16 - user code segment		<==== new cacheline
 *  17 - user data segment
 *  18 - Thread-Local Storage (TLS) segment #1
 *  19 - Thread-Local Storage (TLS) segment #2


diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
--- a/arch/i386/boot/compressed/head.S	2002-07-20 21:12:21.000000000 +0200
+++ b/arch/i386/boot/compressed/head.S	2002-08-08 00:14:45.000000000 +0200
@@ -31,7 +31,7 @@
 startup_32:
 	cld
 	cli
-	movl $(__KERNEL_DS),%eax
+	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%fs
@@ -74,7 +74,7 @@
 	popl %esi	# discard address
 	popl %esi	# real mode pointer
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $0x100000
+	ljmp $(__BOOT_CS), $0x100000
 
 /*
  * We come here, if we were loaded high.
@@ -101,7 +101,7 @@
 	popl %eax	# hcount
 	movl $0x100000,%edi
 	cli		# make sure we don't get interrupted
-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
 
 /*
  * Routine (template) for moving the decompressed kernel in place,
@@ -124,5 +124,5 @@
 	movsl
 	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__KERNEL_CS), $0x100000
+	ljmp $(__BOOT_CS), $0x100000
 move_routine_end:
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
--- a/arch/i386/boot/compressed/misc.c	2002-07-20 21:11:24.000000000 +0200
+++ b/arch/i386/boot/compressed/misc.c	2002-08-07 23:48:58.000000000 +0200
@@ -299,7 +299,7 @@
 struct {
 	long * a;
 	short b;
-	} stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS };
+	} stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
 
 static void setup_normal_output_buffer(void)
 {
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
--- a/arch/i386/boot/setup.S	2002-07-20 21:11:05.000000000 +0200
+++ b/arch/i386/boot/setup.S	2002-08-08 00:14:30.000000000 +0200
@@ -801,7 +801,7 @@
 	subw	$DELTA_INITSEG, %si
 	shll	$4, %esi			# Convert to 32-bit pointer
 # NOTE: For high loaded big kernels we need a
-#	jmpi    0x100000,__KERNEL_CS
+#	jmpi    0x100000,__BOOT_CS
 #
 #	but we yet haven't reloaded the CS register, so the default size 
 #	of the target offset still is 16 bit.
@@ -812,7 +812,7 @@
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
 code32:	.long	0x1000				# will be set to 0x100000
 						# for big kernels
-	.word	__KERNEL_CS
+	.word	__BOOT_CS
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S	2002-08-07 19:03:24.000000000 +0200
+++ b/arch/i386/kernel/head.S	2002-08-08 00:08:48.000000000 +0200
@@ -46,7 +46,7 @@
  * Set segments to known values
  */
 	cld
-	movl $(__KERNEL_DS),%eax
+	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%fs
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -311,7 +306,7 @@
 
 ENTRY(stack_start)
 	.long init_thread_union+8192
-	.long __KERNEL_DS
+	.long __BOOT_DS
 
 /* This is the default interrupt "handler" :-) */
 int_msg:
@@ -415,31 +410,30 @@
  * The Global Descriptor Table contains 20 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
-	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x0000000000000000	/* 0x00 NULL descriptor */
+	.quad 0x00c09a0000000000	/* 0x08 PNPBIOS 32-bit code */	
+	.quad 0x00cf9a000000ffff	/* 0x10 boot 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x18 boot 4GB data at 0x00000000 */
+	.quad 0x00809a0000000000	/* 0x20 PNPBIOS 16-bit code */
+	.quad 0x0080920000000000	/* 0x28 PNPBIOS 16-bit data */
+	.quad 0x0080920000000000	/* 0x30 PNPBIOS 16-bit data */
+	.quad 0x0080920000000000	/* 0x38 PNPBIOS 16-bit data */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
+	.quad 0x0040f20000000000	/* 0x40 APM set up for bad BIOS's */
 	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
 	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0x58 APM DS    data */
-	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */	
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+	.quad 0x00cffa000000ffff	/* 0x80 user   4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x88 user   4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x90 TLS1 descriptor */
+	.quad 0x0000000000000000	/* 0x98 TLS2 descriptor */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/drivers/pnp/pnpbios_core.c b/drivers/pnp/pnpbios_core.c
--- a/drivers/pnp/pnpbios_core.c	2002-08-02 01:19:05.000000000 +0200
+++ b/drivers/pnp/pnpbios_core.c	2002-08-08 00:03:13.000000000 +0200
@@ -90,12 +90,13 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
-#define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
-#define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
-#define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
-#define PNP_TS1    (PNP_GDT+0x18)	/* transfer data segment */
-#define PNP_TS2    (PNP_GDT+0x20)	/* another data segment */
+#define PNP_CS32   (0x08)	/* segment for calling fn */
+
+#define PNP_GDT    (0x20)
+#define PNP_CS16   (PNP_GDT+0x00)	/* code segment for BIOS */
+#define PNP_DS     (PNP_GDT+0x08)	/* data segment for BIOS */
+#define PNP_TS1    (PNP_GDT+0x10)	/* transfer data segment */
+#define PNP_TS2    (PNP_GDT+0x18)	/* another data segment */
 
 /* 
  * These are some opcodes for a "static asmlinkage"
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/desc.h b/include/asm-i386/desc.h
--- a/include/asm-i386/desc.h	2002-08-07 21:27:54.000000000 +0200
+++ b/include/asm-i386/desc.h	2002-08-08 00:12:01.000000000 +0200
@@ -7,30 +7,31 @@
  * The layout of the per-CPU GDT under Linux:
  *
  *   0 - null
- *   1 - TSS
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - Thread-Local Storage (TLS) segment #1
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
+ *   1 - PNPBIOS support (16->32 gate)
+ *   2 - boot code segment
+ *   3 - boot data segment
+ *   4 - PNPBIOS support		<==== new cacheline
+ *   5 - PNPBIOS support
+ *   6 - PNPBIOS support
+ *   7 - PNPBIOS support
+ *   8 - APM BIOS support (0x400-0x1000)<==== new cacheline
  *   9 - APM BIOS support
  *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - TLS segment #2
- *  18 - not used
- *  19 - not used
+ *  11 - APM BIOS support 
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  ------- start of user segments 
+ *  16 - user code segment		<==== new cacheline
+ *  17 - user data segment
+ *  18 - Thread-Local Storage (TLS) segment #1
+ *  19 - Thread-Local Storage (TLS) segment #2
  */
-#define TSS_ENTRY 1
-#define TLS_ENTRY1 6
-#define TLS_ENTRY2 17
-#define LDT_ENTRY 7
+#define TSS_ENTRY 14
+#define LDT_ENTRY 15
+#define TLS_ENTRY1 18
+#define TLS_ENTRY2 19
 /*
  * The interrupt descriptor table has room for 256 idt's,
  * the global descriptor table is dependent on the number
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/segment.h b/include/asm-i386/segment.h
--- a/include/asm-i386/segment.h	2002-07-20 21:11:11.000000000 +0200
+++ b/include/asm-i386/segment.h	2002-08-07 23:50:08.000000000 +0200
@@ -1,10 +1,13 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+#define __BOOT_CS	0x10
+#define __BOOT_DS	0x18
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define __KERNEL_CS	0x60
+#define __KERNEL_DS	0x68
+
+#define __USER_CS	0x83
+#define __USER_DS	0x8B
 
 #endif

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 22:36   ` Luca Barbieri
@ 2002-08-07 22:54     ` Ingo Molnar
  2002-08-07 23:21       ` Luca Barbieri
  0 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-07 22:54 UTC (permalink / raw)
  To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML


On 8 Aug 2002, Luca Barbieri wrote:

> > I would suggest:
> >  - move all kernel-related (and thus non-visible to user space) segments 
> >    up, and make the cacheline optimizations _there_. 
> Done.
> >  - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> >    GDT entry #8) is available to a TLS entry, since if I remember
> >    correctly, that one is also magical for old Windows binaries for all
> >    the wrong reasons (ie it was some system data area in DOS and in 
> >    Windows 3.1)
> Done. Segment 0x40 set to CPL 3.
> >  - and for cleanliness bonus points: make the regular user data segments 
> >    just another TLS segment that just happens to have default values. If 
> >    the user wants to screw with its own segments, let it.
> Bad idea: makes task switch slower without any practical advantage.
> The user may load a TLS or LDT selector in %ds to get the same effect.

your patch looks good to me - as long as we want to keep those 2 TLS
entries and nothing more. (which i believe we want.) If even more TLS
entries are to be made possible then a cleaner TLS enumeration interface
has to be used like Christoph mentioned - although i dont think we really
want that, 3 or more entries would be a stretch i think.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 22:54     ` Ingo Molnar
@ 2002-08-07 23:21       ` Luca Barbieri
  2002-08-07 23:35         ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss
  0 siblings, 1 reply; 47+ messages in thread
From: Luca Barbieri @ 2002-08-07 23:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Christoph Hellwig

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

> your patch looks good to me - as long as we want to keep those 2 TLS
> entries and nothing more. (which i believe we want.) If even more TLS
> entries are to be made possible then a cleaner TLS enumeration interface
> has to be used like Christoph mentioned - although i dont think we really
> want that, 3 or more entries would be a stretch i think.
I think that 2 are enough.
Flat 32-bit programs set ds=es=ss=__USER_DS and cs=__USER_CS so they
only have fs and gs left.
16-bit programs and other odd ones can use the LDT support.

As for the interface I would suggest replacing the current one with a
single interface for LDT and GDT modifications that would provide the
following parameters:

unsigned table
- LDT
- GDTAVAIL: GDT starting from first TLS
- GDTABS: GDT starting from 0
- AUTO: starts with the 2 TLS entries and proceeds with LDT

unsigned operation
- set: copy to kernel space (enlarge table if necessary). If root, don't
check validity for speed, otherwise check to ensure the user is not e.g.
putting call gates to CPL 0 code.
- set1: like set, but passes a single entry directly in the num and ptr
parameters
- get: copy from kernel space
- free: free memory and lower limits. If entry = 0 and num = ~0,
completely frees table.
- map: only for LDT and for root, allows to directly point to a user
memory range 
- movekern: when support for per-task GDT is implemented, this would
allow to change the entries used for kernel entries. This would be
implemented with per-CPU IDTs and maybe dynamically generated code.
Useful for virtualization programs.

unsigned entry
- first entry affected. ~0 for first unused entry.

unsigned num
- number of entries affected

void* ptr
- pointer to read/write entries from

(table and operations may be merged)

Return value: first entry changed

e.g. libpthread would use table = AUTO, operation = set1, entry = ~0.

For the LDT things would be implemented as usual. For the GDT the
initial implementation would just modify TLS entries.
In future, support for dynamically allocated per-task GDTs could be
added.

I would implement this by adding ops to sys_modify_ldt.

BTW, tls_desc1/tls_desc2 would IMHO be better as gdt_desc[2].

I don't plan to implement this myself.


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* DMA Problems with Intel 845 Chipset and Northwood CPU
  2002-08-07 23:21       ` Luca Barbieri
@ 2002-08-07 23:35         ` Mark Cuss
  2002-08-08  0:58           ` John L. Korpi
  0 siblings, 1 reply; 47+ messages in thread
From: Mark Cuss @ 2002-08-07 23:35 UTC (permalink / raw)
  To: Linux-Kernel ML

Hello all,

Please accept my apologies if this question has already been answered....

I have a new Pentium 4 computer and I can't get the DMA working for hard
disk transfers.  Specifically, its a Dell Dimension 4500 with a Pentium 4
2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset.  RedHat
7.3....

I noticed that the hard disk transfers were very slow.  I tried to set up
DMA with hdparm - see below:

[root@yoda ide]# hdparm /dev/hda

/dev/hda:
 multcount    = 16 (on)
 I/O support  =  0 (default 16-bit)
 unmaskirq    =  0 (off)
 using_dma    =  0 (off)
 keepsettings =  0 (off)
 nowerr       =  0 (off)
 readonly     =  0 (off)
 readahead    =  8 (on)
 geometry     = 4866/255/63, sectors = 78177792, start = 0
 busstate     =  1 (on)
[root@yoda ide]# hdparm -d1 /dev/hda

/dev/hda:
 setting using_dma to 1 (on)
 HDIO_SET_DMA failed: Operation not permitted
 using_dma    =  0 (off)
[root@yoda ide]#

I thought that perhaps the chipset had changed between this Northwood
machine an the older core - I have a P4 1.8 GHz machine that DMA works fine
on.  I upgraded to kernel 2.4.19 with no change.

The IDE controller (according to Windows XP....) is an Intel 82801DB Ultra
ATA Storage Controller - 24CB.  I've included the lspci --vvx listing from
the problem machine below - my aplogies for the long list.  If anyone has
any suggestions I'd really appreciate them...

Thanks....

Mark

lspci listing:

00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge
(rev 11)
 Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
<MAbort+ >SERR- <PERR-
 Latency: 0
 Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M]
 Capabilities: [e4] #09 [a104]
 Capabilities: [a0] AGP version 2.0
  Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
  Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none>
00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00
10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a
30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00

00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge
(rev 11) (prog-if 00 [Normal decode])
 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 64
 Bus: primary=00, secondary=01, subordinate=01, sec-latency=64
 Memory behind bridge: fc700000-fe7fffff
 Prefetchable memory behind bridge: dc300000-ec4fffff
 BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B-
00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00
10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22
20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00
30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00

00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01) (prog-if
00 [UHCI])
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
 Interrupt: pin A routed to IRQ 11
 Region 4: I/O ports at e800 [size=32]
00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00

00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01) (prog-if
00 [UHCI])
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
 Interrupt: pin B routed to IRQ 5
 Region 4: I/O ports at e880 [size=32]
00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00

00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01) (prog-if
00 [UHCI])
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
 Interrupt: pin C routed to IRQ 9
 Region 4: I/O ports at ec00 [size=32]
00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01
30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00

00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01) (prog-if
20 [EHCI])
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
 Interrupt: pin D routed to IRQ 10
 Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K]
 Capabilities: [50] Power Management version 2
  Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA
PME(D0+,D1-,D2-,D3hot+,D3cold+)
  Status: D0 PME-Enable- DSel=0 DScale=0 PME-
 Capabilities: [58] #0a [2080]
00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00
10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01
30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00

00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if 00
[Normal decode])
 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
<MAbort- >SERR- <PERR+
 Latency: 0
 Bus: primary=00, secondary=02, subordinate=02, sec-latency=32
 I/O behind bridge: 0000d000-0000dfff
 Memory behind bridge: fe800000-feafffff
 Prefetchable memory behind bridge: ec500000-ec5fffff
 BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B-
00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00
10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22
20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00
30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00

00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01)
 Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01) (prog-if 8a
[Master SecP PriP])
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 0
 Interrupt: pin A routed to IRQ 9
 Region 0: I/O ports at <unassigned> [size=8]
 Region 1: I/O ports at <unassigned> [size=4]
 Region 2: I/O ports at <unassigned> [size=8]
 Region 3: I/O ports at <unassigned> [size=4]
 Region 4: I/O ports at ffa0 [size=16]
 Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled]
[size=1K]
00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00
10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00
20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01
30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00

00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01)
 Subsystem: Dell Computer Corporation: Unknown device 0132
 Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Interrupt: pin B routed to IRQ 3
 Region 4: I/O ports at e480 [size=32]
00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00
10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01
30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00

01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4 Ti4200]
(rev a3) (prog-if 00 [VGA])
 Subsystem: nVidia Corporation: Unknown device 0132
 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B-
 Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 248 (1250ns min, 250ns max)
 Interrupt: pin A routed to IRQ 11
 Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M]
 Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M]
 Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K]
 Expansion ROM at fe7e0000 [disabled] [size=128K]
 Capabilities: [60] Power Management version 2
  Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
  Status: D0 PME-Enable- DSel=0 DScale=0 PME-
 Capabilities: [44] AGP version 2.0
  Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
  Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none>
00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00
10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01
30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01

02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100] (rev
10)
 Subsystem: Intel Corp.: Unknown device 0071
 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 64 (2000ns min, 14000ns max), cache line size 08
 Interrupt: pin A routed to IRQ 11
 Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K]
 Region 1: I/O ports at dc00 [size=64]
 Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K]
 Expansion ROM at feae0000 [disabled] [size=64K]
 Capabilities: [dc] Power Management version 2
  Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
  Status: D0 PME-Enable- DSel=0 DScale=2 PME-
00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00
10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00
30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38

02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24
[CrystalClear SoundFusion Audio Accelerator] (rev 01)
 Subsystem: Voyetra Technologies: Unknown device 3357
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B-
 Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort-
<MAbort- >SERR- <PERR-
 Latency: 64 (1000ns min, 6000ns max)
 Interrupt: pin A routed to IRQ 9
 Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K]
 Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M]
 Capabilities: [40] Power Management version 2
  Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
  Status: D0 PME-Enable- DSel=0 DScale=0 PME-
00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00
10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33
30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: DMA Problems with Intel 845 Chipset and Northwood CPU
  2002-08-07 23:35         ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss
@ 2002-08-08  0:58           ` John L. Korpi
  2002-08-08 16:12             ` Mark Cuss
  0 siblings, 1 reply; 47+ messages in thread
From: John L. Korpi @ 2002-08-08  0:58 UTC (permalink / raw)
  To: mcuss, Linux-Kernel ML

Had a similar problem with similar chipsets.  2.4.19-ac4 fixed the problem.

Cheers

jlk

On Wednesday 07 August 2002 07:35 pm, Mark Cuss wrote:
> Hello all,
>
> Please accept my apologies if this question has already been answered....
>
> I have a new Pentium 4 computer and I can't get the DMA working for hard
> disk transfers.  Specifically, its a Dell Dimension 4500 with a Pentium 4
> 2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset.  RedHat
> 7.3....
>
> I noticed that the hard disk transfers were very slow.  I tried to set up
> DMA with hdparm - see below:
>
> [root@yoda ide]# hdparm /dev/hda
>
> /dev/hda:
>  multcount    = 16 (on)
>  I/O support  =  0 (default 16-bit)
>  unmaskirq    =  0 (off)
>  using_dma    =  0 (off)
>  keepsettings =  0 (off)
>  nowerr       =  0 (off)
>  readonly     =  0 (off)
>  readahead    =  8 (on)
>  geometry     = 4866/255/63, sectors = 78177792, start = 0
>  busstate     =  1 (on)
> [root@yoda ide]# hdparm -d1 /dev/hda
>
> /dev/hda:
>  setting using_dma to 1 (on)
>  HDIO_SET_DMA failed: Operation not permitted
>  using_dma    =  0 (off)
> [root@yoda ide]#
>
> I thought that perhaps the chipset had changed between this Northwood
> machine an the older core - I have a P4 1.8 GHz machine that DMA works fine
> on.  I upgraded to kernel 2.4.19 with no change.
>
> The IDE controller (according to Windows XP....) is an Intel 82801DB Ultra
> ATA Storage Controller - 24CB.  I've included the lspci --vvx listing from
> the problem machine below - my aplogies for the long list.  If anyone has
> any suggestions I'd really appreciate them...
>
> Thanks....
>
> Mark
>
> lspci listing:
>
> 00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge
> (rev 11)
>  Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge
>  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> <MAbort+ >SERR- <PERR-
>  Latency: 0
>  Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M]
>  Capabilities: [e4] #09 [a104]
>  Capabilities: [a0] AGP version 2.0
>   Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
>   Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none>
> 00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00
> 10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a
> 30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00
>
> 00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge
> (rev 11) (prog-if 00 [Normal decode])
>  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 64
>  Bus: primary=00, secondary=01, subordinate=01, sec-latency=64
>  Memory behind bridge: fc700000-fe7fffff
>  Prefetchable memory behind bridge: dc300000-ec4fffff
>  BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B-
> 00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00
> 10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22
> 20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00
>
> 00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01) (prog-if
> 00 [UHCI])
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
>  Interrupt: pin A routed to IRQ 11
>  Region 4: I/O ports at e800 [size=32]
> 00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00
> 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00
>
> 00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01) (prog-if
> 00 [UHCI])
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
>  Interrupt: pin B routed to IRQ 5
>  Region 4: I/O ports at e880 [size=32]
> 00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00
> 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00
>
> 00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01) (prog-if
> 00 [UHCI])
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
>  Interrupt: pin C routed to IRQ 9
>  Region 4: I/O ports at ec00 [size=32]
> 00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00
> 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00
>
> 00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01) (prog-if
> 20 [EHCI])
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
>  Interrupt: pin D routed to IRQ 10
>  Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K]
>  Capabilities: [50] Power Management version 2
>   Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA
> PME(D0+,D1-,D2-,D3hot+,D3cold+)
>   Status: D0 PME-Enable- DSel=0 DScale=0 PME-
>  Capabilities: [58] #0a [2080]
> 00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00
> 10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00
>
> 00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if 00
> [Normal decode])
>  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> <MAbort- >SERR- <PERR+
>  Latency: 0
>  Bus: primary=00, secondary=02, subordinate=02, sec-latency=32
>  I/O behind bridge: 0000d000-0000dfff
>  Memory behind bridge: fe800000-feafffff
>  Prefetchable memory behind bridge: ec500000-ec5fffff
>  BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B-
> 00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00
> 10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22
> 20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00
>
> 00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01)
>  Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
> 00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00
> 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>
> 00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01) (prog-if
> 8a [Master SecP PriP])
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 0
>  Interrupt: pin A routed to IRQ 9
>  Region 0: I/O ports at <unassigned> [size=8]
>  Region 1: I/O ports at <unassigned> [size=4]
>  Region 2: I/O ports at <unassigned> [size=8]
>  Region 3: I/O ports at <unassigned> [size=4]
>  Region 4: I/O ports at ffa0 [size=16]
>  Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled]
> [size=1K]
> 00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00
> 10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00
> 20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00
>
> 00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01)
>  Subsystem: Dell Computer Corporation: Unknown device 0132
>  Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Interrupt: pin B routed to IRQ 3
>  Region 4: I/O ports at e480 [size=32]
> 00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00
> 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> 30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00
>
> 01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4
> Ti4200] (rev a3) (prog-if 00 [VGA])
>  Subsystem: nVidia Corporation: Unknown device 0132
>  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR- FastB2B-
>  Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 248 (1250ns min, 250ns max)
>  Interrupt: pin A routed to IRQ 11
>  Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M]
>  Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M]
>  Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K]
>  Expansion ROM at fe7e0000 [disabled] [size=128K]
>  Capabilities: [60] Power Management version 2
>   Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
> PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME-
>  Capabilities: [44] AGP version 2.0
>   Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
>   Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none>
> 00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00
> 10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01
> 30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01
>
> 02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100] (rev
> 10)
>  Subsystem: Intel Corp.: Unknown device 0071
>  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 64 (2000ns min, 14000ns max), cache line size 08
>  Interrupt: pin A routed to IRQ 11
>  Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K]
>  Region 1: I/O ports at dc00 [size=64]
>  Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K]
>  Expansion ROM at feae0000 [disabled] [size=64K]
>  Capabilities: [dc] Power Management version 2
>   Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA
> PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=2 PME-
> 00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00
> 10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00
> 30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38
>
> 02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24
> [CrystalClear SoundFusion Audio Accelerator] (rev 01)
>  Subsystem: Voyetra Technologies: Unknown device 3357
>  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> Stepping- SERR+ FastB2B-
>  Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort-
> <MAbort- >SERR- <PERR-
>  Latency: 64 (1000ns min, 6000ns max)
>  Interrupt: pin A routed to IRQ 9
>  Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K]
>  Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M]
>  Capabilities: [40] Power Management version 2
>   Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA
> PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0 PME-
> 00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00
> 10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00
> 20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33
> 30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
John L. Korpi, Ph.D. -- Senior Scientist, Networks & Infrastructure
NeuStar, Inc.
Voice:  1.216.241.2919, 1.703.435.0682
Mobile: 1.216.233.3042    Pager: 1.800.398.2959


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.30-A1
  2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar
  2002-08-07 18:33 ` Linus Torvalds
  2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig
@ 2002-08-08 12:25 ` Jamie Lokier
  2 siblings, 0 replies; 47+ messages in thread
From: Jamie Lokier @ 2002-08-08 12:25 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, linux-kernel, Alexandre Julliard

Ingo Molnar wrote:
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)  
> does two things:
> 
>  - it implements a second TLS entry for Wine's purposes.

Oh good; I was going to ask for this.  Wine isn't the only program that
wants to use its own thread-local storage mechanism and link with Glibc
at the same time.

The LDT works, but with limitations and overhead.

thanks,
-- Jamie

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: DMA Problems with Intel 845 Chipset and Northwood CPU
  2002-08-08  0:58           ` John L. Korpi
@ 2002-08-08 16:12             ` Mark Cuss
  0 siblings, 0 replies; 47+ messages in thread
From: Mark Cuss @ 2002-08-08 16:12 UTC (permalink / raw)
  To: jkorpi, Linux-Kernel ML

Thanks - that did the trick - the system runs much better with DMA on the
hard disk :)

Does anyone know if this patch will be included in the 2.4.20 release
kernel?

Thanks

Mark

----- Original Message -----
From: "John L. Korpi" <jkorpi@mindspring.com>
To: <mcuss@cdlsystems.com>; "Linux-Kernel ML" <linux-kernel@vger.kernel.org>
Sent: Wednesday, August 07, 2002 6:58 PM
Subject: Re: DMA Problems with Intel 845 Chipset and Northwood CPU


> Had a similar problem with similar chipsets.  2.4.19-ac4 fixed the
problem.
>
> Cheers
>
> jlk
>
> On Wednesday 07 August 2002 07:35 pm, Mark Cuss wrote:
> > Hello all,
> >
> > Please accept my apologies if this question has already been
answered....
> >
> > I have a new Pentium 4 computer and I can't get the DMA working for hard
> > disk transfers.  Specifically, its a Dell Dimension 4500 with a Pentium
4
> > 2.26 GHz processor (Northwood) and an 845 (Brookdale) chipset.  RedHat
> > 7.3....
> >
> > I noticed that the hard disk transfers were very slow.  I tried to set
up
> > DMA with hdparm - see below:
> >
> > [root@yoda ide]# hdparm /dev/hda
> >
> > /dev/hda:
> >  multcount    = 16 (on)
> >  I/O support  =  0 (default 16-bit)
> >  unmaskirq    =  0 (off)
> >  using_dma    =  0 (off)
> >  keepsettings =  0 (off)
> >  nowerr       =  0 (off)
> >  readonly     =  0 (off)
> >  readahead    =  8 (on)
> >  geometry     = 4866/255/63, sectors = 78177792, start = 0
> >  busstate     =  1 (on)
> > [root@yoda ide]# hdparm -d1 /dev/hda
> >
> > /dev/hda:
> >  setting using_dma to 1 (on)
> >  HDIO_SET_DMA failed: Operation not permitted
> >  using_dma    =  0 (off)
> > [root@yoda ide]#
> >
> > I thought that perhaps the chipset had changed between this Northwood
> > machine an the older core - I have a P4 1.8 GHz machine that DMA works
fine
> > on.  I upgraded to kernel 2.4.19 with no change.
> >
> > The IDE controller (according to Windows XP....) is an Intel 82801DB
Ultra
> > ATA Storage Controller - 24CB.  I've included the lspci --vvx listing
from
> > the problem machine below - my aplogies for the long list.  If anyone
has
> > any suggestions I'd really appreciate them...
> >
> > Thanks....
> >
> > Mark
> >
> > lspci listing:
> >
> > 00:00.0 Host bridge: Intel Corp. 82845 845 (Brookdale) Chipset Host
Bridge
> > (rev 11)
> >  Subsystem: Intel Corp. 82845 845 (Brookdale) Chipset Host Bridge
> >  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> > <MAbort+ >SERR- <PERR-
> >  Latency: 0
> >  Region 0: Memory at f0000000 (32-bit, prefetchable) [size=128M]
> >  Capabilities: [e4] #09 [a104]
> >  Capabilities: [a0] AGP version 2.0
> >   Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
> >   Command: RQ=0 SBA- AGP+ 64bit- FW- Rate=<none>
> > 00: 86 80 30 1a 06 01 90 20 11 00 00 06 00 00 00 00
> > 10: 08 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 30 1a
> > 30: 00 00 00 00 e4 00 00 00 00 00 00 00 00 00 00 00
> >
> > 00:01.0 PCI bridge: Intel Corp. 82845 845 (Brookdale) Chipset AGP Bridge
> > (rev 11) (prog-if 00 [Normal decode])
> >  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap- 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 64
> >  Bus: primary=00, secondary=01, subordinate=01, sec-latency=64
> >  Memory behind bridge: fc700000-fe7fffff
> >  Prefetchable memory behind bridge: dc300000-ec4fffff
> >  BridgeCtl: Parity- SERR+ NoISA- VGA+ MAbort- >Reset- FastB2B-
> > 00: 86 80 31 1a 07 01 a0 00 11 00 04 06 00 40 01 00
> > 10: 00 00 00 00 00 00 00 00 00 01 01 40 f0 00 a0 22
> > 20: 70 fc 70 fe 30 dc 40 ec 00 00 00 00 00 00 00 00
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a 00
> >
> > 00:1d.0 USB Controller: Intel Corp.: Unknown device 24c2 (rev 01)
(prog-if
> > 00 [UHCI])
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> >  Interrupt: pin A routed to IRQ 11
> >  Region 4: I/O ports at e800 [size=32]
> > 00: 86 80 c2 24 05 00 80 02 01 00 03 0c 00 00 80 00
> > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 01 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 0b 01 00 00
> >
> > 00:1d.1 USB Controller: Intel Corp.: Unknown device 24c4 (rev 01)
(prog-if
> > 00 [UHCI])
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> >  Interrupt: pin B routed to IRQ 5
> >  Region 4: I/O ports at e880 [size=32]
> > 00: 86 80 c4 24 05 00 80 02 01 00 03 0c 00 00 00 00
> > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 81 e8 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 05 02 00 00
> >
> > 00:1d.2 USB Controller: Intel Corp.: Unknown device 24c7 (rev 01)
(prog-if
> > 00 [UHCI])
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> >  Interrupt: pin C routed to IRQ 9
> >  Region 4: I/O ports at ec00 [size=32]
> > 00: 86 80 c7 24 05 00 80 02 01 00 03 0c 00 00 00 00
> > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 01 ec 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 09 03 00 00
> >
> > 00:1d.7 USB Controller: Intel Corp.: Unknown device 24cd (rev 01)
(prog-if
> > 20 [EHCI])
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> >  Interrupt: pin D routed to IRQ 10
> >  Region 0: Memory at febffc00 (32-bit, non-prefetchable) [size=1K]
> >  Capabilities: [50] Power Management version 2
> >   Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA
> > PME(D0+,D1-,D2-,D3hot+,D3cold+)
> >   Status: D0 PME-Enable- DSel=0 DScale=0 PME-
> >  Capabilities: [58] #0a [2080]
> > 00: 86 80 cd 24 06 01 90 02 01 20 03 0c 00 00 00 00
> > 10: 00 fc bf fe 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 50 00 00 00 00 00 00 00 0a 04 00 00
> >
> > 00:1e.0 PCI bridge: Intel Corp. 82801BA/CA PCI Bridge (rev 81) (prog-if
00
> > [Normal decode])
> >  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort- <TAbort-
> > <MAbort- >SERR- <PERR+
> >  Latency: 0
> >  Bus: primary=00, secondary=02, subordinate=02, sec-latency=32
> >  I/O behind bridge: 0000d000-0000dfff
> >  Memory behind bridge: fe800000-feafffff
> >  Prefetchable memory behind bridge: ec500000-ec5fffff
> >  BridgeCtl: Parity- SERR+ NoISA+ VGA- MAbort- >Reset- FastB2B-
> > 00: 86 80 4e 24 07 01 80 80 81 00 04 06 00 00 01 00
> > 10: 00 00 00 00 00 00 00 00 00 02 02 20 d0 d0 80 22
> > 20: 80 fe a0 fe 50 ec 50 ec 00 00 00 00 00 00 00 00
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 00
> >
> > 00:1f.0 ISA bridge: Intel Corp.: Unknown device 24c0 (rev 01)
> >  Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> > 00: 86 80 c0 24 0f 01 80 02 01 00 01 06 00 00 80 00
> > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> >
> > 00:1f.1 IDE interface: Intel Corp.: Unknown device 24cb (rev 01)
(prog-if
> > 8a [Master SecP PriP])
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 0
> >  Interrupt: pin A routed to IRQ 9
> >  Region 0: I/O ports at <unassigned> [size=8]
> >  Region 1: I/O ports at <unassigned> [size=4]
> >  Region 2: I/O ports at <unassigned> [size=8]
> >  Region 3: I/O ports at <unassigned> [size=4]
> >  Region 4: I/O ports at ffa0 [size=16]
> >  Region 5: Memory at 20000000 (32-bit, non-prefetchable) [disabled]
> > [size=1K]
> > 00: 86 80 cb 24 05 00 80 02 01 8a 01 01 00 00 00 00
> > 10: 01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00
> > 20: a1 ff 00 00 00 00 00 20 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 ff 01 00 00
> >
> > 00:1f.3 SMBus: Intel Corp.: Unknown device 24c3 (rev 01)
> >  Subsystem: Dell Computer Corporation: Unknown device 0132
> >  Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap- 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Interrupt: pin B routed to IRQ 3
> >  Region 4: I/O ports at e480 [size=32]
> > 00: 86 80 c3 24 01 00 80 02 01 00 05 0c 00 00 00 00
> > 10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > 20: 81 e4 00 00 00 00 00 00 00 00 00 00 28 10 32 01
> > 30: 00 00 00 00 00 00 00 00 00 00 00 00 03 02 00 00
> >
> > 01:00.0 VGA compatible controller: nVidia Corporation NV25 [GeForce4
> > Ti4200] (rev a3) (prog-if 00 [VGA])
> >  Subsystem: nVidia Corporation: Unknown device 0132
> >  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR- FastB2B-
> >  Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 248 (1250ns min, 250ns max)
> >  Interrupt: pin A routed to IRQ 11
> >  Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M]
> >  Region 1: Memory at e0000000 (32-bit, prefetchable) [size=128M]
> >  Region 2: Memory at ec480000 (32-bit, prefetchable) [size=512K]
> >  Expansion ROM at fe7e0000 [disabled] [size=128K]
> >  Capabilities: [60] Power Management version 2
> >   Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
> > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0
PME-
> >  Capabilities: [44] AGP version 2.0
> >   Status: RQ=31 SBA+ 64bit- FW+ Rate=x1,x2
> >   Command: RQ=31 SBA- AGP+ 64bit- FW- Rate=<none>
> > 00: de 10 53 02 07 00 b0 02 a3 00 00 03 00 f8 00 00
> > 10: 00 00 00 fd 08 00 00 e0 08 00 48 ec 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 de 10 32 01
> > 30: 00 00 00 00 60 00 00 00 00 00 00 00 0b 01 05 01
> >
> > 02:00.0 Ethernet controller: Intel Corp. 82557/8/9 [Ethernet Pro 100]
(rev
> > 10)
> >  Subsystem: Intel Corp.: Unknown device 0071
> >  Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort-
<TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 64 (2000ns min, 14000ns max), cache line size 08
> >  Interrupt: pin A routed to IRQ 11
> >  Region 0: Memory at feaff000 (32-bit, non-prefetchable) [size=4K]
> >  Region 1: I/O ports at dc00 [size=64]
> >  Region 2: Memory at feac0000 (32-bit, non-prefetchable) [size=128K]
> >  Expansion ROM at feae0000 [disabled] [size=64K]
> >  Capabilities: [dc] Power Management version 2
> >   Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA
> > PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=2
PME-
> > 00: 86 80 29 12 17 01 90 02 10 00 00 02 08 40 00 00
> > 10: 00 f0 af fe 01 dc 00 00 00 00 ac fe 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 71 00
> > 30: 00 00 ae fe dc 00 00 00 00 00 00 00 0b 01 08 38
> >
> > 02:02.0 Multimedia audio controller: Cirrus Logic CS 4614/22/24
> > [CrystalClear SoundFusion Audio Accelerator] (rev 01)
> >  Subsystem: Voyetra Technologies: Unknown device 3357
> >  Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
> > Stepping- SERR+ FastB2B-
> >  Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort-
> > <MAbort- >SERR- <PERR-
> >  Latency: 64 (1000ns min, 6000ns max)
> >  Interrupt: pin A routed to IRQ 9
> >  Region 0: Memory at feafd000 (32-bit, non-prefetchable) [size=4K]
> >  Region 1: Memory at fe900000 (32-bit, non-prefetchable) [size=1M]
> >  Capabilities: [40] Power Management version 2
> >   Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA
> > PME(D0-,D1-,D2-,D3hot-,D3cold-) Status: D0 PME-Enable- DSel=0 DScale=0
PME-
> > 00: 13 10 03 60 06 01 10 04 01 00 01 04 00 40 00 00
> > 10: 00 d0 af fe 00 00 90 fe 00 00 00 00 00 00 00 00
> > 20: 00 00 00 00 00 00 00 00 00 00 00 00 53 50 57 33
> > 30: 00 00 00 00 40 00 00 00 00 00 00 00 09 01 04 18
> >
> >
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel"
in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
>
> --
> John L. Korpi, Ph.D. -- Senior Scientist, Networks & Infrastructure
> NeuStar, Inc.
> Voice:  1.216.241.2919, 1.703.435.0682
> Mobile: 1.216.233.3042    Pager: 1.800.398.2959
>
>



^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-C3
  2002-08-07 18:33 ` Linus Torvalds
                     ` (3 preceding siblings ...)
  2002-08-07 22:36   ` Luca Barbieri
@ 2002-08-11 21:46   ` Ingo Molnar
  2002-08-12  7:34     ` Stephen Rothwell
                       ` (2 more replies)
  4 siblings, 3 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-11 21:46 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri

[-- Attachment #1: Type: TEXT/PLAIN, Size: 24215 bytes --]


the attached patch cleans up the TLS code and it introduces a number of
new capabilities as well:

- move the TLS space to the first 12 GDT descriptors - kernel descriptors
  come afterwards.

- make USER CS and DS just another TLS entry, which happen to have a
  default value that matches the current segments. It's done in a way
  that does not result in extra context-switch overhead.

- make segment 0040 available to Wine, allow the setting of 16-bit
  segments. Allow full flexibility of all the safe segment variants.

- sys_set_thread_area(&info) can be both for a specific GDT entry, but it
  can also trigger an 'allocation' of a yet unused TLS entry, by using
  an ->entry_number of -1. It's recommended for userspace code to use the
  -1 value, to make sure different libraries can nest properly.

- sys_get_thread_area(&info) can be used to read TLS entries into the same
  userspace descriptor format as sys_set_thread_area() does. The new
  syscalls are now actually relatively clean, and the TLS area can be
  extended seemlessly.

- move KERNEL CS, DS, TSS and LDT to the same cacheline.

- clean up all the kernel descriptors to be more or less easily
  modified/reordered from segment.h only, with minimal dependencies.

- move the GDT/TLS definitions to asm-i386/segment.h, to make it easier to
  include the constants into assembly code and lowlevel include files.

an open issue: the context-switch code uses an optimized variant of TLS
loading - only the truly affected portions of the GDT get rewritten. But
i'm not 100% convinced this is the right way - i kept the TLS in the same
format as the GDT, so we could as well just write 96 bytes
unconditionally. That's smaller a single cacheline on modern CPUs. Doing
this would greatly simplify the code. I've mainly done this current
optimization to show that it can be done in a relatively straightforward
way, but that i dont think it's worth it. Especially since the TLS area is
3 32-byte cachelines, it should easily trigger all the memcpy fastpaths in
various CPUs. So i'd suggest to keep the tls_bytes variables only, and
thus non-TLS code would see only a single branch in the context-switch
path.

another issue: i've not gone the whole way of unifying LDT and TLS support
- we've already got compatibility code in the LDT interfaces and changing
LDTs via the TLS syscalls would only make the situation even more messy.  
Nevertheless there are some new synergies between the LDT and TSS code,
which resulted in some ldt.c code reduction.

i've attached a new version of tls.c that tests the new TLS syscall
variants and shows off some of the new capabilities. TLS support works
just fine on 2.5.31 + this patch, on SMP and UP as well.

Comments?

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c	Sun Aug 11 23:28:44 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c	Sun Aug 11 23:28:44 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,14 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
+	clear_TLS(thread);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +474,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S	Sun Aug 11 23:28:44 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S	Sun Aug 11 23:28:44 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,44 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x00cffa000000ffff	/* 0x0b user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x13 user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x1b TLS entry 3 */
+	.quad 0x0000000000000000	/* ... */
+	.quad 0x0000000000000000
+	.quad 0x0000000000000000
+	.quad 0x0000000000000000
+	.quad 0x0000000000000000
+	.quad 0x0000000000000000
+	.quad 0x0000000000000000	/* ... */
+	.quad 0x0000000000000000	/* 0x5b TLS entry 11 */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
-	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
-	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
-	.quad 0x0040920000000000	/* 0x58 APM DS    data */
+	.quad 0x0040920000000000	/* 0x80 APM set up for bad BIOS's */
+	.quad 0x00409a0000000000	/* 0x88 APM CS    code */
+	.quad 0x00009a0000000000	/* 0x90 APM CS 16 code (16 bit) */
+	.quad 0x0040920000000000	/* 0x98 APM DS    data */
 	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00c09a0000000000	/* 0xa0 32-bit code */
+	.quad 0x00809a0000000000	/* 0xa8 16-bit code */
+	.quad 0x0080920000000000	/* 0xb0 16-bit data */
+	.quad 0x0080920000000000	/* 0xb8 16-bit data */
+	.quad 0x0080920000000000	/* 0xc0 16-bit data */
+	.quad 0x0000000000000000	/* 0xc8 not used */
+	.quad 0x0000000000000000	/* 0xd0 not used */
+	.quad 0x0000000000000000	/* 0xd8 not used */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c	Sun Aug 11 23:28:44 2002
@@ -681,11 +681,9 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	if (prev->nr_tls_bytes || next->nr_tls_bytes)
+		load_TLS(prev, next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,168 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx;
+	return -ESRCH;
+}
+
+static inline int first_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+		if (!desc_equal(array + idx, default_array + idx))
+			return idx;
+
+	return 0;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+		if (!desc_equal(array + idx, default_array + idx))
+			return idx;
+
+	return 0;
+}
+
+#define CHECK_TLS_IDX(idx)						\
+do {									\
+	if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX)	\
+		BUG();							\
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+
+	t->first_tls_byte = first_tls(t->tls_array) * 8;
+	t->last_tls_byte = (last_tls(t->tls_array) + 1) * 8;
 
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (t->first_tls_byte || t->last_tls_byte) {
+		CHECK_TLS_IDX(t->first_tls_byte/8);
+		CHECK_TLS_IDX(t->last_tls_byte/8-1);
+		t->nr_tls_bytes = t->last_tls_byte - t->first_tls_byte;
+		if (t->nr_tls_bytes < 0)
+			BUG();
+		if (t->nr_tls_bytes > GDT_ENTRY_TLS_ENTRIES * 8)
+			BUG();
+	} else {
+		/*
+		 * If a thread has no TLS then invert the first/last
+		 * range so that if we switch from (or to) a TLS-using
+		 * thread then it will be the thread's TLS area that
+		 * will be copied into the GDT.
+		 */
+		t->nr_tls_bytes = 0;
+		t->first_tls_byte = 0;
+		t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8;
+	}
+
+	load_TLS(t, t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c	Sun Aug 11 23:28:44 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c	Sun Aug 11 23:28:44 2002
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Sun Jun  9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S	Sun Aug 11 23:28:44 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Sun Jun  9 07:30:24 2002
+++ linux/include/linux/apm_bios.h	Sun Aug 11 23:28:44 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h	Sun Aug 11 23:28:44 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,52 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void clear_TLS(struct thread_struct *t)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+	t->nr_tls_bytes = 0;
+	t->first_tls_byte = 0;
+	t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8;
+}
+
+static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu)
+{
+	int first_byte = min(prev->first_tls_byte, next->first_tls_byte);
+	int last_byte = max(prev->last_tls_byte, next->last_tls_byte);
+
+	memcpy((char *)(cpu_gdt_table[cpu]) + first_byte, (char *)next->tls_array + first_byte, last_byte - first_byte);
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h	Sun Aug 11 23:28:44 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -376,8 +381,16 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
+
+	/*
+	 * cached TLS descriptors.
+	 *
+	 * The offset calculation is needed to not copy the whole TLS
+	 * into the local GDT all the time.
+	 * We count offsets in bytes to reduce context-switch overhead.
+	 */
+	int nr_tls_bytes, first_tls_byte, last_tls_byte;
+	struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
 };
 
 #define INIT_THREAD  {						\
@@ -401,7 +414,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Sun Jun  9 07:28:19 2002
+++ linux/include/asm-i386/segment.h	Sun Aug 11 23:28:44 2002
@@ -1,10 +1,84 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   1 - TLS segment #1			[ default user CS ]
+ *   2 - TLS segment #2			[ default user DS ]
+ *   3 - TLS segment #3			[ glibc's TLS segment ]
+ *   4 - TLS segment #4			[ Wine's %fs Win32 segment ]
+ *   5 - TLS segment #5
+ *   6 - TLS segment #6
+ *   7 - TLS segment #7
+ *   8 - TLS segment #8			[ segment 0040 used by Wine ]
+ *   9 - TLS segment #9
+ *  10 - TLS segment #9
+ *  11 - TLS segment #9
+ *
+ *  ------- start of kernel segments, on a full cacheline:
+ *
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *
+ *  ------- these are the less performance-sensitive segments:
+ *
+ *  16 - APM BIOS support
+ *  17 - APM BIOS support
+ *  18 - APM BIOS support
+ *  19 - APM BIOS support 
+ *  20 - PNPBIOS support (16->32 gate)
+ *  21 - PNPBIOS support
+ *  22 - PNPBIOS support
+ *  23 - PNPBIOS support
+ *  24 - PNPBIOS support
+ *  25 - reserved
+ *  26 - reserved
+ *  27 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES	11
+#define GDT_ENTRY_TLS_MIN	1
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define GDT_ENTRY_DEFAULT_USER_CS	(GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	(GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE	12
+
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 25 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 28
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h	Sun Aug 11 23:28:44 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 

[-- Attachment #2: Type: TEXT/PLAIN, Size: 5479 bytes --]

#include <asm/ldt.h>
#include <stdio.h>
#include <linux/unistd.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <asm/sigcontext.h>
#include <linux/unistd.h>

/*
 * TLS functionality testing utility.
 */

#define __NR_set_thread_area 243
_syscall1(int, set_thread_area, struct modify_ldt_ldt_s *, info)

#define __NR_get_thread_area 244
_syscall1(int, get_thread_area, struct modify_ldt_ldt_s *, info)

static inline void initseg (int seg)
{
	asm ("mov %w0,%%fs" : : "r" (seg));
}

static inline unsigned char __readseg (unsigned offset)
{
	unsigned char res;

	asm ("fs; movb (%1),%%al" : "=a" (res) : "r" (offset));

	return res;
}

static inline void __writeseg (unsigned offset, unsigned char b)
{
	asm ("fs; movb %b1,(%0)" : : "r" (offset), "r" (b));
}

static void readseg (void *dst, const void *src)
{
	*(char *)dst = __readseg((unsigned int)src);
}

static void writeseg (void *dst, unsigned char value)
{
	__writeseg((unsigned int)dst, value);
}

unsigned char pre_data		[4096] = { [ 0 ... 4095 ] = 33 };
unsigned char data		[4096] = { [ 0 ... 4095 ] = 44 };
unsigned char post_data		[4096] = { [ 0 ... 4095 ] = 55 };

static void print_info (struct modify_ldt_ldt_s *info)
{
	printf("info %p:\n", info);

#define P(f) printf("..."#f": %d.\n", info->##f)

	P(entry_number);
	P(base_addr);
	P(limit);
	P(seg_32bit);
	P(contents);
	P(read_exec_only);
	P(limit_in_pages);
	P(seg_not_present);
	P(useable);

}

int main (void)
{
	int i, idx, seg, ret;
	unsigned int base;
	unsigned char result;
	struct modify_ldt_ldt_s info, info2;

	memset(&info, 0, sizeof(info));
	memset(&info2, 0, sizeof(info2));

	info.entry_number = -1;
	info.base_addr = 0;
	info.limit = 0xfffff;
	info.seg_32bit = 1;
	info.contents = MODIFY_LDT_CONTENTS_DATA;
	info.read_exec_only = 0;
	info.limit_in_pages = 1;
	info.seg_not_present = 0;

	data[0] = 123;
	data[4096] = 210;

	base = 0; info.base_addr = base;
	printf("\ndoing set_thread_area(%08x):\n", base);
	ret = set_thread_area(&info);
	if (ret < 0) {
		printf("ret: %d, TEST FAILED!\n", ret);
		exit(1);
	}

	idx = info.entry_number; seg = idx * 8 + 3;
	printf("got idx: %d (sel: %02x)\n", idx, seg);

	initseg(seg);

	printf("\nreading %p byte of [0x%08x] TLS:\n", &data, base);

	readseg (&result, &data);
	if (result == 123)
		printf("====> %d --- TEST PASSED.\n\n", result);
	else
		printf("====> %d --- TEST FAILURE!\n\n", result);


	info.entry_number = -1;
	base = (unsigned int)&data; info.base_addr = base;
	printf("\ndoing set_thread_area(%08x):\n", base);
	ret = set_thread_area(&info);
	if (ret < 0) {
		printf("ret: %d, TEST FAILED!\n", ret);
		exit(1);
	}

	idx = info.entry_number; seg = idx * 8 + 3;
	printf("got idx: %d (sel: %02x)\n", idx, seg);

	initseg(seg);

	printf("\nreading %p byte of [0x%08x] TLS:\n", &data, base);

	readseg (&result, 0);
	if (result == 123)
		printf("====> %d --- TEST PASSED.\n\n", result);
	else
		printf("====> %d --- TEST FAILURE!\n\n", result);

	printf("\nreading TLS idx %d's descriptor.\n", idx);
	info2.entry_number = idx;
	ret = get_thread_area(&info2);
	if (ret < 0) {
		printf("ret: %d, TEST FAILED!\n", ret);
		exit(1);
	}
	if (memcmp(&info, &info2, sizeof(info))) {
		printf("huh, info != info2? (%d)\n",
			memcmp(&info, &info2, sizeof(info)));
		print_info(&info);
		print_info(&info2);
	} else
		printf("info == info2 - TEST PASSED.\n");

	printf("\nclearing TLS idx %d's descriptor.\n", idx);

	info.entry_number = idx;
	info.base_addr = 0;
	info.limit = 0;
	info.seg_32bit = 0;
	info.contents = 0;
	info.read_exec_only = 1;
	info.limit_in_pages = 0;
	info.seg_not_present = 1;

	ret = set_thread_area(&info);
	if (ret < 0) {
		printf("ret: %d, TEST FAILED!\n", ret);
		exit(1);
	}
	printf("TEST PASSED.\n");

	base = (unsigned int) &data;

	printf("\nre-allocating TLS idx %d's descriptor.\n", idx);

	for (i = 0; i < 2; i++) {
		info.entry_number = -1;
		info.base_addr = base;
		info.limit = 0xfffff;
		info.seg_32bit = 1;
		info.contents = MODIFY_LDT_CONTENTS_DATA;
		info.read_exec_only = 0;
		info.limit_in_pages = 1;
		info.seg_not_present = 0;

		ret = set_thread_area(&info);
		if (ret < 0) {
			printf("ret: %d, TEST FAILED!\n", ret);
			exit(1);
		}
		if (!i && (idx != info.entry_number)) {
			printf("idx %d != entry_number %d! TEST FAILED!\n",
				idx, info.entry_number);
			exit(1);
		}
		idx = info.entry_number; seg = idx * 8 + 3;
		printf("got idx: %d (sel: %02x)\n", idx, seg);
		sleep(1);

		initseg(seg);
	}

	printf("TEST PASSED.\n\n");

	printf("writing last byte of 4097 byte [0x%08x] TLS:\n", base);
	writeseg ((void *)4096, 234);
	readseg (&result, (void *)4096);
	if (result == 234)
		printf("====> %d --- TEST PASSED.\n", result);
	else
		printf("====> %d --- TEST FAILURE!.\n", result);

	printf("writing read-only segment [0x%08x] (should coredump):\n", base);
	info.entry_number = -1;
	info.read_exec_only = 1;
	base = (unsigned int)&data; info.base_addr = base;
	ret = set_thread_area(&info);
	if (ret < 0) {
		printf("ret: %d, TEST FAILED!\n", ret);
		exit(1);
	}

	idx = info.entry_number; seg = idx * 8 + 3;
	printf("got idx: %d (sel: %02x)\n", idx, seg);

	initseg(seg);
	writeseg ((void *)4096, 234);
	printf("====> %d --- TEST FAILURE!.\n", result);

	return 0;
}

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
@ 2002-08-12  7:34     ` Stephen Rothwell
  2002-08-12 10:07       ` Ingo Molnar
  2002-08-12 12:18     ` Luca Barbieri
  2002-08-12 15:53     ` [patch] tls-2.5.31-D3 Ingo Molnar
  2 siblings, 1 reply; 47+ messages in thread
From: Stephen Rothwell @ 2002-08-12  7:34 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, julliard, ldb

Hi Ingo,

On Sun, 11 Aug 2002 23:46:01 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote:
>
>  	/*
>  	 * The APM segments have byte granularity and their bases
>  	 * and limits are set at run time.
>  	 */
> -	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
> -	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
> -	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
> -	.quad 0x0040920000000000	/* 0x58 APM DS    data */
> +	.quad 0x0040920000000000	/* 0x80 APM set up for bad BIOS's */
> +	.quad 0x00409a0000000000	/* 0x88 APM CS    code */
> +	.quad 0x00009a0000000000	/* 0x90 APM CS 16 code (16 bit) */
> +	.quad 0x0040920000000000	/* 0x98 APM DS    data */

I just lost 0x40 which needs to be exactly 0x40 if it is do its job (i.e.
cope with brain dead BIOS writers using 0x40 as a segment offset in
protected mode ...

The idea is that segment 0x40 maps from physical address 0x400 to the end
of the first physical page.  As a real mode program would (more or less)
expect it to.

The other three segments don't matter as longs as they are in that order
and contiguous.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:07       ` Ingo Molnar
@ 2002-08-12  8:23         ` Stephen Rothwell
  2002-08-12 10:08           ` Alan Cox
  2002-08-12 14:46         ` Stephen Rothwell
  1 sibling, 1 reply; 47+ messages in thread
From: Stephen Rothwell @ 2002-08-12  8:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, linux-kernel, julliard, ldb

On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote:
>
> you can save/restore 0x40 in kernel-space if you need to no problem.

I guess I could around every BIOS call ...

Also, Alan (Cox) will say that's OK until he does APM on SMP on broken
BIOS's :-)

We could also just say that we no longer support those broken BIOS's ...

> so you are using the kernel's GDT in real mode as well?

No. The problem is that there are some BIOS's that contain code that (even
though they are called in protected mode) load 0x40 into ds and expect to
be able to reference stuff ...  Causes really interesting OOPSs :-(

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12  7:34     ` Stephen Rothwell
@ 2002-08-12 10:07       ` Ingo Molnar
  2002-08-12  8:23         ` Stephen Rothwell
  2002-08-12 14:46         ` Stephen Rothwell
  0 siblings, 2 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 10:07 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: torvalds, linux-kernel, julliard, ldb


On Mon, 12 Aug 2002, Stephen Rothwell wrote:

> > -	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
> > -	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
> > -	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
> > -	.quad 0x0040920000000000	/* 0x58 APM DS    data */
> > +	.quad 0x0040920000000000	/* 0x80 APM set up for bad BIOS's */
> > +	.quad 0x00409a0000000000	/* 0x88 APM CS    code */
> > +	.quad 0x00009a0000000000	/* 0x90 APM CS 16 code (16 bit) */
> > +	.quad 0x0040920000000000	/* 0x98 APM DS    data */
> 
> I just lost 0x40 which needs to be exactly 0x40 if it is do its job
> (i.e. cope with brain dead BIOS writers using 0x40 as a segment offset
> in protected mode ...

you can save/restore 0x40 in kernel-space if you need to no problem.

> The idea is that segment 0x40 maps from physical address 0x400 to the
> end of the first physical page.  As a real mode program would (more or
> less) expect it to.

so you are using the kernel's GDT in real mode as well?

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12  8:23         ` Stephen Rothwell
@ 2002-08-12 10:08           ` Alan Cox
  2002-08-12 10:49             ` Ingo Molnar
                               ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Alan Cox @ 2002-08-12 10:08 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: Ingo Molnar, Linus Torvalds, linux-kernel, julliard, ldb

On Mon, 2002-08-12 at 09:23, Stephen Rothwell wrote:
> > you can save/restore 0x40 in kernel-space if you need to no problem.
> I guess I could around every BIOS call ...
> 
> Also, Alan (Cox) will say that's OK until he does APM on SMP on broken
> BIOS's :-)

SMP actually makes no difference. I have full SMP APM working on my test
boxes now. However pre-empt and SMP are the same problem space

> We could also just say that we no longer support those broken BIOS's ...
> 
> > so you are using the kernel's GDT in real mode as well?

Yes. APM calls are made by all sorts of processes.

> No. The problem is that there are some BIOS's that contain code that (even
> though they are called in protected mode) load 0x40 into ds and expect to
> be able to reference stuff ...  Causes really interesting OOPSs :-(

Which does mean you can steal the old TLS value and put it back across
the calls just by changing the TLS data for that process. For that
matter on Windows emulation I thought Windows also needed 0x40 to be the
same offset as the BIOS does so can't we leave it hardwired ?


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:49             ` Ingo Molnar
@ 2002-08-12 10:34               ` Alan Cox
  2002-08-12 12:17                 ` Ingo Molnar
  2002-08-12 10:35               ` Alan Cox
  1 sibling, 1 reply; 47+ messages in thread
From: Alan Cox @ 2002-08-12 10:34 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb

On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote:
> but, couldnt APM use its own private GDT for real-mode calls, with 0x40
> filled in properly? That would pretty much decouple things.

That would get extremely messy when handing interrupts arriving while in
an APM bios call (which is required on many laptops). I believe the 0x40
= 0x40 assumption is identical across windows, buggy apm, buggy bios32,
buggy edd, buggy .. (you get the picture)


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:49             ` Ingo Molnar
  2002-08-12 10:34               ` Alan Cox
@ 2002-08-12 10:35               ` Alan Cox
  1 sibling, 0 replies; 47+ messages in thread
From: Alan Cox @ 2002-08-12 10:35 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb

On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote:
> but, couldnt APM use its own private GDT for real-mode calls, with 0x40
> filled in properly? That would pretty much decouple things.

Oh and secondly they are not actually real mode calls, they are
protected mode 32bit calls with certain segment registers set up to
point to specific things taken from the apm bios 32 interface


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:08           ` Alan Cox
@ 2002-08-12 10:49             ` Ingo Molnar
  2002-08-12 10:34               ` Alan Cox
  2002-08-12 10:35               ` Alan Cox
  2002-08-12 13:10             ` Kasper Dupont
  2002-08-12 15:20             ` Ingo Molnar
  2 siblings, 2 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 10:49 UTC (permalink / raw)
  To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb


On 12 Aug 2002, Alan Cox wrote:

> > No. The problem is that there are some BIOS's that contain code that (even
> > though they are called in protected mode) load 0x40 into ds and expect to
> > be able to reference stuff ...  Causes really interesting OOPSs :-(
> 
> Which does mean you can steal the old TLS value and put it back across
> the calls just by changing the TLS data for that process. For that
> matter on Windows emulation I thought Windows also needed 0x40 to be the
> same offset as the BIOS does so can't we leave it hardwired ?

i have no problem with hardwiring it (and excluding it from the TLS
allocation/setting syscalls) - in fact i almost did it that way. The
question is, is the required descriptor format 100% the same for all APM
variants, Wine and Windows and DOS emulators? It would suck if we had a
bad descriptor and also removed the ability of Wine to trap 0x40 access.

but, couldnt APM use its own private GDT for real-mode calls, with 0x40
filled in properly? That would pretty much decouple things.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 12:17                 ` Ingo Molnar
@ 2002-08-12 11:47                   ` Alan Cox
  2002-08-12 12:55                     ` Ingo Molnar
  0 siblings, 1 reply; 47+ messages in thread
From: Alan Cox @ 2002-08-12 11:47 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb

On Mon, 2002-08-12 at 13:17, Ingo Molnar wrote:
> ugh, we do Linux interrupts while in the APM BIOS?

We have to. Most APM bios expects interrupts to be happening. In
pre-emptive mode we may well even be switching to/from APM BIOS code in
2.5 at the moment. I've not looked into that.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:34               ` Alan Cox
@ 2002-08-12 12:17                 ` Ingo Molnar
  2002-08-12 11:47                   ` Alan Cox
  0 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 12:17 UTC (permalink / raw)
  To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb


On 12 Aug 2002, Alan Cox wrote:

> That would get extremely messy when handing interrupts arriving while in
> an APM bios call (which is required on many laptops). I believe the 0x40
> = 0x40 assumption is identical across windows, buggy apm, buggy bios32,
> buggy edd, buggy .. (you get the picture)

ugh, we do Linux interrupts while in the APM BIOS?

in any case, it should be possible to create a 'minimal GDT' for the APM
BIOS [so that Linux interrupt handling is still possible] - to isolate it
from Linux as much as possible. But i agree that this gets messy ...

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
  2002-08-12  7:34     ` Stephen Rothwell
@ 2002-08-12 12:18     ` Luca Barbieri
  2002-08-12 15:12       ` Ingo Molnar
  2002-08-12 15:53     ` [patch] tls-2.5.31-D3 Ingo Molnar
  2 siblings, 1 reply; 47+ messages in thread
From: Luca Barbieri @ 2002-08-12 12:18 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard

[-- Attachment #1: Type: text/plain, Size: 490 bytes --]

> Comments?
Numbers:
unconditional copy of 2 tls descs: 5 cycles
this patch with 1 tls desc: 26 cycles
this patch with 8 tls descs: 52 cycles
lldt: 51 cycles
lgdt: 50 cycles
context switch: 2000 cycles (measured with pipe read/write and vmstat so
it's not very accurate)

So this patch causes a 1% context switch performance drop for
multithreaded applications.

Note: the benchmark doesn't include the initial test for non-zero
nr_tls_bytes and doesn't include setting the LDT descriptor


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 12:55                     ` Ingo Molnar
@ 2002-08-12 12:29                       ` Alan Cox
  0 siblings, 0 replies; 47+ messages in thread
From: Alan Cox @ 2002-08-12 12:29 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb

On Mon, 2002-08-12 at 13:55, Ingo Molnar wrote:
> 
> On 12 Aug 2002, Alan Cox wrote:
> 
> > > ugh, we do Linux interrupts while in the APM BIOS?
> > 
> > We have to. Most APM bios expects interrupts to be happening. In
> > pre-emptive mode we may well even be switching to/from APM BIOS code in
> > 2.5 at the moment. I've not looked into that.
> 
> i think that since we hold the APM spinlock (do we always, when calling
> into the APM BIOS?), we should not preempt any APM BIOS code.

Looking at the 2.5.29 tree I have handy here there is no APM spinlock. I
don't have 2.5.30/31 unpacked to check those




^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 11:47                   ` Alan Cox
@ 2002-08-12 12:55                     ` Ingo Molnar
  2002-08-12 12:29                       ` Alan Cox
  0 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 12:55 UTC (permalink / raw)
  To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb


On 12 Aug 2002, Alan Cox wrote:

> > ugh, we do Linux interrupts while in the APM BIOS?
> 
> We have to. Most APM bios expects interrupts to be happening. In
> pre-emptive mode we may well even be switching to/from APM BIOS code in
> 2.5 at the moment. I've not looked into that.

i think that since we hold the APM spinlock (do we always, when calling
into the APM BIOS?), we should not preempt any APM BIOS code.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:08           ` Alan Cox
  2002-08-12 10:49             ` Ingo Molnar
@ 2002-08-12 13:10             ` Kasper Dupont
  2002-08-12 15:20             ` Ingo Molnar
  2 siblings, 0 replies; 47+ messages in thread
From: Kasper Dupont @ 2002-08-12 13:10 UTC (permalink / raw)
  To: Alan Cox
  Cc: Stephen Rothwell, Ingo Molnar, Linus Torvalds, linux-kernel,
	julliard, ldb

Alan Cox wrote:
> 
> For that
> matter on Windows emulation I thought Windows also needed 0x40 to be the
> same offset as the BIOS does so can't we leave it hardwired ?

Does Wine and the BIOS actually want the same? I would believe there
would have to be a small difference. Having Wine and BIOS using the
same memory doesn't sound right to me.

Wine wanting segment 0x40 to point to virtual address 0x400 and BIOS
wanting segment 0x40 to point to physical address 0x400 sounds more
reasonable to me. But physical address 0x400 would be virtual address
0xC0000400 with the default PAGE_OFFSET.

-- 
Kasper Dupont -- der bruger for meget tid på usenet.
For sending spam use mailto:aaarep@daimi.au.dk
or mailto:mcxumhvenwblvtl@skrammel.yaboo.dk

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 15:12       ` Ingo Molnar
@ 2002-08-12 13:43         ` Luca Barbieri
  2002-08-12 15:57           ` Ingo Molnar
  0 siblings, 1 reply; 47+ messages in thread
From: Luca Barbieri @ 2002-08-12 13:43 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard

[-- Attachment #1: Type: text/plain, Size: 1547 bytes --]

On Mon, 2002-08-12 at 17:12, Ingo Molnar wrote:
> 
> On 12 Aug 2002, Luca Barbieri wrote:
> 
> > Numbers:
> > unconditional copy of 2 tls descs: 5 cycles
> > this patch with 1 tls desc: 26 cycles
> > this patch with 8 tls descs: 52 cycles
> 
> [ 0 tls descs: 2 cycles. ]
Yes but common multithreaded applications will have at least 1 for
pthreads.

> but yes, this is rougly what i'd say this approach costs.
> 
> > lldt: 51 cycles
> > lgdt: 50 cycles
> > context switch: 2000 cycles (measured with pipe read/write and vmstat so
> > it's not very accurate)
> 
> > So this patch causes a 1% context switch performance drop for
> > multithreaded applications.
> 
> how did you calculate this?
((26 - 5) / 2000) * 100 ~= 1
Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
Pentium3 running the rdtsc timed benchmark in a loop 1 million times
with 8 unbenchmarked iterations to warm up caches and with the time to
execute an empty benchmark subtracted.

> glibc multithreaded applications can avoid the
> lldt via using the TLS, and thus it's a net win.
Surely, this patch is better than the old LDT method but much worse than
the 2-TLS one.

So I would use the 2-TLS approach plus my patch plus the syscall and
segment.h improvements of the tls-2.5.31-C3 patch plus support for
setting the 0x40 segment around APM calls.

BTW, are there any programs that would benefit from having more than 2
user-settable GDT entries but that don't need more than about 8?
(assuming we have a fixed flat code and data segment and 0x40 segment)


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 15:57           ` Ingo Molnar
@ 2002-08-12 14:17             ` Luca Barbieri
  0 siblings, 0 replies; 47+ messages in thread
From: Luca Barbieri @ 2002-08-12 14:17 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard

[-- Attachment #1: Type: text/plain, Size: 1414 bytes --]

On Mon, 2002-08-12 at 17:57, Ingo Molnar wrote:
> 
> On 12 Aug 2002, Luca Barbieri wrote:
> 
> > > > Numbers:
> > > > unconditional copy of 2 tls descs: 5 cycles
> > > > this patch with 1 tls desc: 26 cycles
> > > > this patch with 8 tls descs: 52 cycles
> > > 
> > > [ 0 tls descs: 2 cycles. ]
> > Yes but common multithreaded applications will have at least 1 for
> > pthreads.
> 
> i would not say 'common' and 'multithreaded' in the same sentence. It
> might be so in the future, but it isnt today.
Most modern servers (e.g. Apache2, MySQL) are multithreaded and so are
large desktop applications (e.g. Evolution, Galeon, Nautilus).
 
> > > how did you calculate this?
> > ((26 - 5) / 2000) * 100 ~= 1
> > Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
> > Pentium3 running the rdtsc timed benchmark in a loop 1 million times
> > with 8 unbenchmarked iterations to warm up caches and with the time to
> > execute an empty benchmark subtracted.
> 
> old libpthreads or new one?
What are you asking about? (benchmarks are in kernel mode and context
switch is from forked processes)

> > > glibc multithreaded applications can avoid the
> > > lldt via using the TLS, and thus it's a net win.
> > Surely, this patch is better than the old LDT method but much worse than
> > the 2-TLS one.
> 
> people asked for a 3rd TLS already.
It would be interesting to know what they would use it for.


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D4
  2002-08-12 16:13       ` [patch] tls-2.5.31-D4 Ingo Molnar
@ 2002-08-12 14:32         ` Luca Barbieri
  2002-08-12 17:06         ` [patch] tls-2.5.31-D5 Ingo Molnar
  1 sibling, 0 replies; 47+ messages in thread
From: Luca Barbieri @ 2002-08-12 14:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard

[-- Attachment #1: Type: text/plain, Size: 738 bytes --]

> the ability to change the default CS and DS segments
> as well.
This does not make any sense.
The user is free to load any selector in %cs/%ds/%es/%ss so the default
flat segments should be left alone so that a process can have the flat
segments _plus_ all the tls entries.

> although i suspect Wine needs a 16-bit entry, while
> the APM one is a 32-bit entry ...
AFAIK this only matters for code and stack segments and anyway the APM
one should be a 16-bit entry since it exists because the BIOS wrongly
assumes that it is a real-mode segment.

Anyway, isn't it better to put the user segments in a cacheline that
doesn't already lose one entry to the null selector? (and leave the
first one either empty or for BIOS/boot selectors)


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:07       ` Ingo Molnar
  2002-08-12  8:23         ` Stephen Rothwell
@ 2002-08-12 14:46         ` Stephen Rothwell
  1 sibling, 0 replies; 47+ messages in thread
From: Stephen Rothwell @ 2002-08-12 14:46 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Alan Cox

On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <mingo@elte.hu> wrote:
> 
> you can save/restore 0x40 in kernel-space if you need to no problem.

How about the following (untested, not even compiled):

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

diff -ruN 2.5.31/arch/i386/kernel/apm.c 2.5.31-apm.1/arch/i386/kernel/apm.c
--- 2.5.31/arch/i386/kernel/apm.c	2002-08-02 11:11:34.000000000 +1000
+++ 2.5.31-apm.1/arch/i386/kernel/apm.c	2002-08-13 00:20:56.000000000 +1000
@@ -215,6 +215,7 @@
 #include <linux/pm.h>
 #include <linux/kernel.h>
 #include <linux/smp_lock.h>
+#include <linux/smp.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -419,6 +420,7 @@
 static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
 static struct apm_user *	user_list;
 static spinlock_t		user_list_lock = SPIN_LOCK_UNLOCKED;
+static struct desc_struct	bad_bios_desc = { 0, 0x00409200 };
 
 static char			driver_version[] = "1.16";	/* no spaces */
 
@@ -569,7 +571,12 @@
 {
 	APM_DECL_SEGS
 	unsigned long	flags;
+	int			cpu;
+	struct desc_struct	save_desc_40;
 
+	cpu = get_cpu();
+	save_desc_40 = cpu_gdt_table[cpu][0x40 / 8];
+	cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc;
 	local_save_flags(flags);
 	APM_DO_CLI;
 	APM_DO_SAVE_SEGS;
@@ -591,6 +598,8 @@
 		: "memory", "cc");
 	APM_DO_RESTORE_SEGS;
 	local_irq_restore(flags);
+	cpu_gdt_table[cpu][0x40 / 8] = save_desc_40;
+	put_cpu();
 	return *eax & 0xff;
 }
 
@@ -613,7 +622,12 @@
 	u8		error;
 	APM_DECL_SEGS
 	unsigned long	flags;
+	int			cpu;
+	struct desc_struct	save_desc_40;
 
+	cpu = get_cpu();
+	save_desc_40 = cpu_gdt_table[cpu][0x40 / 8];
+	cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc;
 	local_save_flags(flags);
 	APM_DO_CLI;
 	APM_DO_SAVE_SEGS;
@@ -639,6 +653,8 @@
 	}
 	APM_DO_RESTORE_SEGS;
 	local_irq_restore(flags);
+	cpu_gdt_table[smp_processor_id()][0x40 / 8] = save_desc_40;
+	put_cpu();
 	return error;
 }
 
@@ -1923,17 +1939,14 @@
 	 * that extends up to the end of page zero (that we have reserved).
 	 * This is for buggy BIOS's that refer to (real mode) segment 0x40
 	 * even though they are called in protected mode.
-	 *
-	 * NOTE: on SMP we call into the APM BIOS only on CPU#0, so it's
-	 * enough to modify CPU#0's GDT.
 	 */
-	for (i = 0; i < NR_CPUS; i++) {
-		set_base(cpu_gdt_table[i][APM_40 >> 3],
-			 __va((unsigned long)0x40 << 4));
-		_set_limit((char *)&cpu_gdt_table[i][APM_40 >> 3], 4095 - (0x40 << 4));
+	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
+	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
+
+	apm_bios_entry.offset = apm_info.bios.offset;
+	apm_bios_entry.segment = APM_CS;
 
-		apm_bios_entry.offset = apm_info.bios.offset;
-		apm_bios_entry.segment = APM_CS;
+	for (i = 0; i < NR_CPUS; i++) {
 		set_base(cpu_gdt_table[i][APM_CS >> 3],
 			 __va((unsigned long)apm_info.bios.cseg << 4));
 		set_base(cpu_gdt_table[i][APM_CS_16 >> 3],
diff -ruN 2.5.31/arch/i386/kernel/head.S 2.5.31-apm.1/arch/i386/kernel/head.S
--- 2.5.31/arch/i386/kernel/head.S	2002-07-28 21:11:25.000000000 +1000
+++ 2.5.31-apm.1/arch/i386/kernel/head.S	2002-08-13 00:29:38.000000000 +1000
@@ -427,7 +427,10 @@
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
+	.quad 0x0000000000000000	/* 0x40 APM will be used for bad BIOS's
+					 * Will be saved and restored
+					 * across BIOS calls. MUST NOT BE ONE
+					 * OF THE FOLLOWING THREE! */
 	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
 	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0x58 APM DS    data */
diff -ruN 2.5.31/include/linux/apm_bios.h 2.5.31-apm.1/include/linux/apm_bios.h
--- 2.5.31/include/linux/apm_bios.h	2001-08-14 09:39:28.000000000 +1000
+++ 2.5.31-apm.1/include/linux/apm_bios.h	2002-08-13 00:38:52.000000000 +1000
@@ -21,8 +21,7 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_CS		0x48
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 12:18     ` Luca Barbieri
@ 2002-08-12 15:12       ` Ingo Molnar
  2002-08-12 13:43         ` Luca Barbieri
  0 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 15:12 UTC (permalink / raw)
  To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard


On 12 Aug 2002, Luca Barbieri wrote:

> Numbers:
> unconditional copy of 2 tls descs: 5 cycles
> this patch with 1 tls desc: 26 cycles
> this patch with 8 tls descs: 52 cycles

[ 0 tls descs: 2 cycles. ]

but yes, this is rougly what i'd say this approach costs.

> lldt: 51 cycles
> lgdt: 50 cycles
> context switch: 2000 cycles (measured with pipe read/write and vmstat so
> it's not very accurate)

> So this patch causes a 1% context switch performance drop for
> multithreaded applications.

how did you calculate this? glibc multithreaded applications can avoid the
lldt via using the TLS, and thus it's a net win.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 10:08           ` Alan Cox
  2002-08-12 10:49             ` Ingo Molnar
  2002-08-12 13:10             ` Kasper Dupont
@ 2002-08-12 15:20             ` Ingo Molnar
  2 siblings, 0 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 15:20 UTC (permalink / raw)
  To: Alan Cox; +Cc: Stephen Rothwell, Linus Torvalds, linux-kernel, julliard, ldb


On 12 Aug 2002, Alan Cox wrote:

> Which does mean you can steal the old TLS value and put it back across
> the calls just by changing the TLS data for that process. [...]

yes - the 0x40 segment can be saved & restored safely. We have per-CPU
GDTs so nobody can modify them while the APM BIOS is executing. (assuming
preemption is disabled.)

> [...] For that matter on Windows emulation I thought Windows also needed
> 0x40 to be the same offset as the BIOS does so can't we leave it
> hardwired ?

another thing: do we want this with descriptor priviledge level 3? Because
the APM 0x40 GDT entry was a ring 0 descriptor, but that would not be
accessible to Wine or DOSEMU.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D5
  2002-08-12 17:06         ` [patch] tls-2.5.31-D5 Ingo Molnar
@ 2002-08-12 15:21           ` Jakub Jelinek
  2002-08-12 17:41             ` Ingo Molnar
  2002-08-12 17:24           ` [patch] tls-2.5.31-D7 Ingo Molnar
  1 sibling, 1 reply; 47+ messages in thread
From: Jakub Jelinek @ 2002-08-12 15:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri

On Mon, Aug 12, 2002 at 07:06:50PM +0200, Ingo Molnar wrote:
> 
> okay, the attached patch does some more things:
> 
>  - moves the first two TLS entries and the user CS/DS entries on the same
>    cacheline.
> 
>  - excludes CS/DS from the TLS space - Luca is right in that it only slows
>    things down unnecesserily, and there is nothing that cannot be done by
>    changing the %ds %cs selectors - and every cycle counts in the 
>    context-switch path.
> 
> the only open issues are the number of TLSs supported. I'd vote for making
> them 4 and then we can inline the copy and make it unconditional, it will
> be 12 cycles to copy them all which alone is better than a branch miss. In
> this patch it's 2, thus the copying cost is 6 cycles.
> 
> with 4 entries the 0x40 entry would be taken and APM has to move further
> up, and has to save/restore the 0x40 entry across BIOS calls.

As each supported TLS entry has its context-switch time cost, I think we
should stay at 2 supported TLS entries.
My understanding was that the GDT patches were written to optimize the
common case (all threaded apps using LDT and with the advent of __thread
support causing every single application to use LDT), with 2 TLS entries
where one is for libc/libpthread and the other one is for application
usage I think it is enough for 99.9% of apps. In the rare
case someone needs more, there is still LDT which offers 8192 entries.

	Jakub

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D7
  2002-08-12 17:24           ` [patch] tls-2.5.31-D7 Ingo Molnar
@ 2002-08-12 15:45             ` Christoph Hellwig
  0 siblings, 0 replies; 47+ messages in thread
From: Christoph Hellwig @ 2002-08-12 15:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri

On Mon, Aug 12, 2002 at 07:24:25PM +0200, Ingo Molnar wrote:
> the attached patch does this:
> 
>  - there are now 4 freely usable TLS entries, amongst them 0x40 for Wine
> 
>  - the 3 APM segments fit into the hole at the end of the kernel
>    descriptor area exactly => no GDT size increase.
> 
>  - the ->private_tls code is gone - unconditional inline copies are more
>    robust and faster as well.
> 
> Plus the APM code needs Stephen's fix. I think this is the best approach
> we had so far. Any objections?

Patch looks good so far, but _please_ rename struct modify_ldt_ldt_s to
something more sensible. (yes, I know it existed before, but with this
patch the name is even more stupid than before)


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-D3
  2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
  2002-08-12  7:34     ` Stephen Rothwell
  2002-08-12 12:18     ` Luca Barbieri
@ 2002-08-12 15:53     ` Ingo Molnar
  2002-08-12 16:13       ` [patch] tls-2.5.31-D4 Ingo Molnar
  2 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 15:53 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


this is my latest TLS tree, changes relative to the 2.5.31-C3 patch:

- streamlined GDT layout:

 *   0 - null
 *   1 - TLS segment #1                 [ default user CS ]
 *   2 - TLS segment #2                 [ default user DS ]
 *   3 - TLS segment #3                 [ glibc's TLS segment ]
 *   4 - TLS segment #4                 [ Wine's %fs Win32 segment ]
 *   5 - TLS segment #5
 *   6 - TLS segment #6
 *   7 - TLS segment #7
 *   8 - APM BIOS support               [ segment 0x40 ]
 *   9 - APM BIOS support
 *  10 - APM BIOS support
 *  11 - APM BIOS support
 *  12 - kernel code segment            <==== new cacheline
 *  13 - kernel data segment
 *  14 - TSS
 *  15 - LDT
 *  16 - PNPBIOS support (16->32 gate)
 *  17 - PNPBIOS support
 *  18 - PNPBIOS support
 *  19 - PNPBIOS support
 *  20 - PNPBIOS support

- simplified the TLS context-switch code, no more offsets, just a
  thread->private_tls flag tells whether the task has a non-default TLS.

these two changes make the copying of the TLS faster as well - exactly 64
bytes need to be copied. Default memcpy() manages it in ~60 cycles, fully
inlined memcpy code does it in ~30 cycles. I'm copying the NULL entry as
well, to make the copy (and copy size) aligned on cacheline boundaries.  
The TLS area in the thread structure is not cacheline-aligned yet though.

and the APM code should be back to functioning again. If a common 0x40
segment can be agreed on then the APM entry should be changed and made
available to Wine - although i suspect Wine needs a 16-bit entry, while
the APM one is a 32-bit entry ...

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c	Mon Aug 12 15:47:36 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c	Mon Aug 12 15:47:36 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S	Mon Aug 12 15:47:36 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S	Mon Aug 12 15:47:36 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x00cffa000000ffff	/* 0x0b user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x13 user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x1b TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x23 TLS entry 4 */
+	.quad 0x0000000000000000	/* 0x2b TLS entry 5 */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 6 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 7 */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
@@ -431,15 +426,21 @@
 	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
 	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0x58 APM DS    data */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
 	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
+	.quad 0x00809a0000000000	/* 0x88 16-bit code */
+	.quad 0x0080920000000000	/* 0x90 16-bit data */
+	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
+	.quad 0x0000000000000000	/* 0xa8 not used */
+	.quad 0x0000000000000000	/* 0xb0 not used */
+	.quad 0x0000000000000000	/* 0xb8 not used */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c	Mon Aug 12 15:47:36 2002
@@ -681,11 +681,9 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	if (prev->private_tls || next->private_tls)
+		load_TLS(prev, next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,142 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx;
+	return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+
+	if (!memcmp(array, default_array, TLS_SIZE))
+		return 0;
+	return 1;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+		if (!desc_equal(array + idx, default_array + idx))
+			return idx;
+
+	return 0;
+}
+
+#define CHECK_TLS_IDX(idx)						\
+do {									\
+	if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX)	\
+		BUG();							\
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	t->private_tls = private_tls(t->tls_array);
+	load_TLS(t, t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c	Mon Aug 12 15:47:37 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c	Mon Aug 12 15:47:37 2002
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Sun Jun  9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S	Mon Aug 12 15:47:37 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Sun Jun  9 07:30:24 2002
+++ linux/include/linux/apm_bios.h	Mon Aug 12 15:47:37 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h	Mon Aug 12 15:47:37 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,42 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+	memcpy(cpu_gdt_table[cpu], next->tls_array, TLS_SIZE);
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h	Mon Aug 12 15:47:37 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -376,8 +381,16 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
+
+	/*
+	 * cached TLS descriptors.
+	 *
+	 * The offset calculation is needed to not copy the whole TLS
+	 * into the local GDT all the time.
+	 * We count offsets in bytes to reduce context-switch overhead.
+	 */
+	int private_tls;
+	struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
 };
 
 #define INIT_THREAD  {						\
@@ -401,7 +414,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Sun Jun  9 07:28:19 2002
+++ linux/include/asm-i386/segment.h	Mon Aug 12 15:47:37 2002
@@ -1,10 +1,79 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   1 - TLS segment #1			[ default user CS ]
+ *   2 - TLS segment #2			[ default user DS ]
+ *   3 - TLS segment #3			[ glibc's TLS segment ]
+ *   4 - TLS segment #4			[ Wine's %fs Win32 segment ]
+ *   5 - TLS segment #5
+ *   6 - TLS segment #6
+ *   7 - TLS segment #7
+ *
+ *  ------- start of kernel segments:
+ *
+ *   8 - APM BIOS support		[ segment 0x40 ]
+ *   9 - APM BIOS support
+ *  10 - APM BIOS support
+ *  11 - APM BIOS support 
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  16 - PNPBIOS support (16->32 gate)
+ *  17 - PNPBIOS support
+ *  18 - PNPBIOS support
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - reserved
+ *  22 - reserved
+ *  23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES	7
+#define GDT_ENTRY_TLS_MIN	1
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	(GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	(GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE	8
+
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h	Mon Aug 12 15:47:37 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D5
  2002-08-12 17:41             ` Ingo Molnar
@ 2002-08-12 15:54               ` Luca Barbieri
  2002-08-12 18:03               ` [patch] tls-2.5.31-D9 Ingo Molnar
  2002-08-13  1:50               ` [patch] tls-2.5.31-D5 Alexandre Julliard
  2 siblings, 0 replies; 47+ messages in thread
From: Luca Barbieri @ 2002-08-12 15:54 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jakub Jelinek, Linus Torvalds, Linux-Kernel ML, Alexandre Julliard

[-- Attachment #1: Type: text/plain, Size: 694 bytes --]

> well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.  
> But it certainly does not come free. We could have 3 TLS entries (0x40
> will be the last entry), and the copying cost is 9 cycles. (compared to 6
> cycles in the 2 entries case.) Good enough?
Or we could leave 0x40 fixed to 0x400 and use only 2.

This loses flexibility but anyway the only 2 apps that could use it are
dosemu and wine and I think that they already need to have it mapped at
0x400 for vm86 (no one uses 16-bit DLLs anymore).

Of course this is only valid if Win32 doesn't use it because otherwise
we would lose the ability to do null-pointer checking in programs using
Win32 DLLs (e.g. mplayer).


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-C3
  2002-08-12 13:43         ` Luca Barbieri
@ 2002-08-12 15:57           ` Ingo Molnar
  2002-08-12 14:17             ` Luca Barbieri
  0 siblings, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 15:57 UTC (permalink / raw)
  To: Luca Barbieri; +Cc: Linus Torvalds, Linux-Kernel ML, Alexandre Julliard


On 12 Aug 2002, Luca Barbieri wrote:

> > > Numbers:
> > > unconditional copy of 2 tls descs: 5 cycles
> > > this patch with 1 tls desc: 26 cycles
> > > this patch with 8 tls descs: 52 cycles
> > 
> > [ 0 tls descs: 2 cycles. ]
> Yes but common multithreaded applications will have at least 1 for
> pthreads.

i would not say 'common' and 'multithreaded' in the same sentence. It
might be so in the future, but it isnt today.

> > how did you calculate this?
> ((26 - 5) / 2000) * 100 ~= 1
> Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
> Pentium3 running the rdtsc timed benchmark in a loop 1 million times
> with 8 unbenchmarked iterations to warm up caches and with the time to
> execute an empty benchmark subtracted.

old libpthreads or new one?

> > glibc multithreaded applications can avoid the
> > lldt via using the TLS, and thus it's a net win.
> Surely, this patch is better than the old LDT method but much worse than
> the 2-TLS one.

people asked for a 3rd TLS already.

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-D4
  2002-08-12 15:53     ` [patch] tls-2.5.31-D3 Ingo Molnar
@ 2002-08-12 16:13       ` Ingo Molnar
  2002-08-12 14:32         ` Luca Barbieri
  2002-08-12 17:06         ` [patch] tls-2.5.31-D5 Ingo Molnar
  0 siblings, 2 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 16:13 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


the attached patch (against 2.5.31-vanilla) further reduces the number of
TLS entries and optimizes the load_TLS() code, which is now down to 11
cycles. There are 3 more entries left around for cacheline alignment
reasons, so we can use them just in case more TLSs are needed.

this is in essence the '2 free TLS entries' code, with the difference of
more flexibility and the ability to change the default CS and DS segments
as well.

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c	Mon Aug 12 16:12:38 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c	Mon Aug 12 16:12:38 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S	Mon Aug 12 16:12:38 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S	Mon Aug 12 16:12:38 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x00cffa000000ffff	/* 0x0b user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x13 user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x1b TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x23 TLS entry 4 */
+	.quad 0x0000000000000000	/* 0x2b reserved */
+	.quad 0x0000000000000000	/* 0x33 reserved */
+	.quad 0x0000000000000000	/* 0x3b reserved */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
@@ -431,15 +426,21 @@
 	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
 	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0x58 APM DS    data */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
 	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
+	.quad 0x00809a0000000000	/* 0x88 16-bit code */
+	.quad 0x0080920000000000	/* 0x90 16-bit data */
+	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
+	.quad 0x0000000000000000	/* 0xa8 not used */
+	.quad 0x0000000000000000	/* 0xb0 not used */
+	.quad 0x0000000000000000	/* 0xb8 not used */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c	Mon Aug 12 16:12:38 2002
@@ -681,11 +681,9 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	if (prev->private_tls || next->private_tls)
+		load_TLS(next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,142 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx;
+	return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+
+	if (!memcmp(array, default_array, TLS_SIZE))
+		return 0;
+	return 1;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+	struct desc_struct *default_array = init_task.thread.tls_array;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+		if (!desc_equal(array + idx, default_array + idx))
+			return idx;
+
+	return 0;
+}
+
+#define CHECK_TLS_IDX(idx)						\
+do {									\
+	if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX)	\
+		BUG();							\
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	t->private_tls = private_tls(t->tls_array);
+	load_TLS(t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c	Mon Aug 12 16:12:38 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c	Mon Aug 12 16:12:38 2002
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Sun Jun  9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S	Mon Aug 12 16:12:38 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Sun Jun  9 07:30:24 2002
+++ linux/include/linux/apm_bios.h	Mon Aug 12 16:12:38 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h	Mon Aug 12 16:12:38 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#if TLS_SIZE != 32
+# error update this code.
+#endif
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][i] = t->tls_array[i]
+	C(1); C(2); C(3); C(4); C(5); C(6); C(7); C(8);
+#undef C
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h	Mon Aug 12 16:12:38 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -376,8 +381,16 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
+
+	/*
+	 * cached TLS descriptors.
+	 *
+	 * The offset calculation is needed to not copy the whole TLS
+	 * into the local GDT all the time.
+	 * We count offsets in bytes to reduce context-switch overhead.
+	 */
+	int private_tls;
+	struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
 };
 
 #define INIT_THREAD  {						\
@@ -401,7 +414,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Sun Jun  9 07:28:19 2002
+++ linux/include/asm-i386/segment.h	Mon Aug 12 16:12:38 2002
@@ -1,10 +1,79 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   1 - TLS segment #1			[ default user CS ]
+ *   2 - TLS segment #2			[ default user DS ]
+ *   3 - TLS segment #3			[ glibc's TLS segment ]
+ *   4 - TLS segment #4			[ Wine's %fs Win32 segment ]
+ *
+ *  ------- start of kernel segments:
+ *
+ *   5 - reserved
+ *   6 - reserved
+ *   7 - reserved
+ *   8 - APM BIOS support		[ segment 0x40 ]
+ *   9 - APM BIOS support
+ *  10 - APM BIOS support
+ *  11 - APM BIOS support 
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  16 - PNPBIOS support (16->32 gate)
+ *  17 - PNPBIOS support
+ *  18 - PNPBIOS support
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - reserved
+ *  22 - reserved
+ *  23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES	4
+#define GDT_ENTRY_TLS_MIN	1
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	(GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	(GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE	8
+
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h	Mon Aug 12 16:12:38 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-D5
  2002-08-12 16:13       ` [patch] tls-2.5.31-D4 Ingo Molnar
  2002-08-12 14:32         ` Luca Barbieri
@ 2002-08-12 17:06         ` Ingo Molnar
  2002-08-12 15:21           ` Jakub Jelinek
  2002-08-12 17:24           ` [patch] tls-2.5.31-D7 Ingo Molnar
  1 sibling, 2 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 17:06 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


okay, the attached patch does some more things:

 - moves the first two TLS entries and the user CS/DS entries on the same
   cacheline.

 - excludes CS/DS from the TLS space - Luca is right in that it only slows
   things down unnecesserily, and there is nothing that cannot be done by
   changing the %ds %cs selectors - and every cycle counts in the 
   context-switch path.

the only open issues are the number of TLSs supported. I'd vote for making
them 4 and then we can inline the copy and make it unconditional, it will
be 12 cycles to copy them all which alone is better than a branch miss. In
this patch it's 2, thus the copying cost is 6 cycles.

with 4 entries the 0x40 entry would be taken and APM has to move further
up, and has to save/restore the 0x40 entry across BIOS calls.

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c	Mon Aug 12 17:01:11 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c	Mon Aug 12 17:01:11 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S	Mon Aug 12 17:01:11 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S	Mon Aug 12 17:01:11 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x0000000000000000	/* 0x0b reserved */
+	.quad 0x0000000000000000	/* 0x13 reserved */
+	.quad 0x0000000000000000	/* 0x1b reserved */
+	.quad 0x00cffa000000ffff	/* 0x23 user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x2b user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
@@ -431,15 +426,21 @@
 	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
 	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0x58 APM DS    data */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
 	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
+	.quad 0x00809a0000000000	/* 0x88 16-bit code */
+	.quad 0x0080920000000000	/* 0x90 16-bit data */
+	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
+	.quad 0x0000000000000000	/* 0xa8 not used */
+	.quad 0x0000000000000000	/* 0xb0 not used */
+	.quad 0x0000000000000000	/* 0xb8 not used */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c	Mon Aug 12 17:01:11 2002
@@ -681,11 +681,9 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	if (prev->private_tls || next->private_tls)
+		load_TLS(next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,125 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+	int idx;
+
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (!desc_empty(array + idx))
+			return 0;
+	return 1;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	t->private_tls = private_tls(t->tls_array);
+	load_TLS(t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c	Mon Aug 12 17:01:11 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c	Mon Aug 12 17:01:11 2002
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Sun Jun  9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S	Mon Aug 12 17:01:11 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Sun Jun  9 07:30:24 2002
+++ linux/include/linux/apm_bios.h	Mon Aug 12 17:01:11 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h	Mon Aug 12 17:01:11 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#if TLS_SIZE != 16
+# error update this code.
+#endif
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+	C(0); C(1);
+#undef C
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h	Mon Aug 12 17:01:11 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -376,8 +381,16 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
+
+	/*
+	 * cached TLS descriptors.
+	 *
+	 * The offset calculation is needed to not copy the whole TLS
+	 * into the local GDT all the time.
+	 * We count offsets in bytes to reduce context-switch overhead.
+	 */
+	int private_tls;
+	struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
 };
 
 #define INIT_THREAD  {						\
@@ -401,7 +414,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Sun Jun  9 07:28:19 2002
+++ linux/include/asm-i386/segment.h	Mon Aug 12 17:01:11 2002
@@ -1,10 +1,79 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *   1 - reserved
+ *   2 - reserved
+ *   3 - reserved
+ *
+ *   4 - default user CS		<==== new cacheline
+ *   5 - default user DS
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   6 - TLS segment #1			[ glibc's TLS segment ]
+ *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
+ *
+ *  ------- start of kernel segments:
+ *
+ *   8 - APM BIOS support		[ segment 0x40 ]
+ *   9 - APM BIOS support
+ *  10 - APM BIOS support
+ *  11 - APM BIOS support 
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  16 - PNPBIOS support (16->32 gate)
+ *  17 - PNPBIOS support
+ *  18 - PNPBIOS support
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - reserved
+ *  22 - reserved
+ *  23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES	2
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE	8
+
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h	Mon Aug 12 17:01:11 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-D7
  2002-08-12 17:06         ` [patch] tls-2.5.31-D5 Ingo Molnar
  2002-08-12 15:21           ` Jakub Jelinek
@ 2002-08-12 17:24           ` Ingo Molnar
  2002-08-12 15:45             ` Christoph Hellwig
  1 sibling, 1 reply; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 17:24 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, Alexandre Julliard, Luca Barbieri


> the only open issues are the number of TLSs supported. I'd vote for
> making them 4 and then we can inline the copy and make it unconditional,
> it will be 12 cycles to copy them all which alone is better than a
> branch miss. In this patch it's 2, thus the copying cost is 6 cycles.
> 
> with 4 entries the 0x40 entry would be taken and APM has to move further
> up, and has to save/restore the 0x40 entry across BIOS calls.

the attached patch does this:

 - there are now 4 freely usable TLS entries, amongst them 0x40 for Wine

 - the 3 APM segments fit into the hole at the end of the kernel
   descriptor area exactly => no GDT size increase.

 - the ->private_tls code is gone - unconditional inline copies are more
   robust and faster as well.

Plus the APM code needs Stephen's fix. I think this is the best approach
we had so far. Any objections?

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c	Mon Aug 12 17:21:29 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c	Mon Aug 12 17:21:29 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S	Mon Aug 12 17:21:29 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S	Mon Aug 12 17:21:29 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,40 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x0000000000000000	/* 0x0b reserved */
+	.quad 0x0000000000000000	/* 0x13 reserved */
+	.quad 0x0000000000000000	/* 0x1b reserved */
+	.quad 0x00cffa000000ffff	/* 0x23 user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x2b user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
+	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x4b TLS entry 4 */
+	.quad 0x0000000000000000	/* 0x53 reserved */
+	.quad 0x0000000000000000	/* 0x5b reserved */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
+	/* Segments used for calling PnP BIOS */
+	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
+	.quad 0x00809a0000000000	/* 0x88 16-bit code */
+	.quad 0x0080920000000000	/* 0x90 16-bit data */
+	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
-	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
-	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
-	.quad 0x0040920000000000	/* 0x58 APM DS    data */
-	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00409a0000000000	/* 0xa8 APM CS    code */
+	.quad 0x00009a0000000000	/* 0xb0 APM CS 16 code (16 bit) */
+	.quad 0x0040920000000000	/* 0xb8 APM DS    data */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c	Mon Aug 12 17:21:29 2002
@@ -681,11 +681,8 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	load_TLS(next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +831,114 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	load_TLS(t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+	struct modify_ldt_ldt_s info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c	Mon Aug 12 17:21:29 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c	Mon Aug 12 17:21:29 2002
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Sun Jun  9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S	Mon Aug 12 17:21:29 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Sun Jun  9 07:30:24 2002
+++ linux/include/linux/apm_bios.h	Mon Aug 12 17:21:29 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h	Mon Aug 12 17:21:29 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#if TLS_SIZE != 32
+# error update this code.
+#endif
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+	C(0); C(1); C(2); C(3);
+#undef C
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h	Mon Aug 12 17:21:29 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -359,6 +364,8 @@
 };
 
 struct thread_struct {
+/* cached TLS descriptors. */
+	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
 	unsigned long	esp0;
 	unsigned long	eip;
 	unsigned long	esp;
@@ -376,11 +383,10 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
 };
 
 #define INIT_THREAD  {						\
+	{ { 0, 0 } , },						\
 	0,							\
 	0, 0, 0, 0, 						\
 	{ [0 ... 7] = 0 },	/* debugging registers */	\
@@ -401,7 +407,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Sun Jun  9 07:28:19 2002
+++ linux/include/asm-i386/segment.h	Mon Aug 12 17:21:29 2002
@@ -1,10 +1,79 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *   1 - reserved
+ *   2 - reserved
+ *   3 - reserved
+ *
+ *   4 - default user CS		<==== new cacheline
+ *   5 - default user DS
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   6 - TLS segment #1			[ glibc's TLS segment ]
+ *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
+ *   8 - TLS segment #3
+ *   9 - TLS segment #4
+ *  10 - reserved
+ *  11 - reserved
+ *
+ *  ------- start of kernel segments:
+ *
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  16 - PNPBIOS support (16->32 gate)
+ *  17 - PNPBIOS support
+ *  18 - PNPBIOS support
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - APM BIOS support
+ *  22 - APM BIOS support
+ *  23 - APM BIOS support 
+ */
+#define GDT_ENTRY_TLS_ENTRIES	4
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE	12
+
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 9)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h	Mon Aug 12 17:21:29 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D5
  2002-08-12 15:21           ` Jakub Jelinek
@ 2002-08-12 17:41             ` Ingo Molnar
  2002-08-12 15:54               ` Luca Barbieri
                                 ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 17:41 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri


On Mon, 12 Aug 2002, Jakub Jelinek wrote:

> As each supported TLS entry has its context-switch time cost, I think we
> should stay at 2 supported TLS entries.

4 are almost as good - and they also solve the 0x40 problem.

> My understanding was that the GDT patches were written to optimize the
> common case (all threaded apps using LDT and with the advent of __thread
> support causing every single application to use LDT), with 2 TLS entries
> where one is for libc/libpthread and the other one is for application
> usage I think it is enough for 99.9% of apps. In the rare case someone
> needs more, there is still LDT which offers 8192 entries.

well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.  
But it certainly does not come free. We could have 3 TLS entries (0x40
will be the last entry), and the copying cost is 9 cycles. (compared to 6
cycles in the 2 entries case.) Good enough?

	Ingo


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [patch] tls-2.5.31-D9
  2002-08-12 17:41             ` Ingo Molnar
  2002-08-12 15:54               ` Luca Barbieri
@ 2002-08-12 18:03               ` Ingo Molnar
  2002-08-13  1:50               ` [patch] tls-2.5.31-D5 Alexandre Julliard
  2 siblings, 0 replies; 47+ messages in thread
From: Ingo Molnar @ 2002-08-12 18:03 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Linus Torvalds, linux-kernel, Alexandre Julliard, Luca Barbieri,
	Christoph Hellwig


okay, here is YAGL. (Yet Another GDT Layout)

3 TLS entries, 9 cycles copying and no branches in the context-switch
path. The patch also adds Christoph's suggestion and renames
modify_ldt_ldt_s (yuck!) to user_desc.

(all patches i posted were test-compiled and test-booted against
2.5.31-vanilla.)

	Ingo

--- linux/drivers/pnp/pnpbios_core.c.orig	Mon Aug 12 17:51:27 2002
+++ linux/drivers/pnp/pnpbios_core.c	Mon Aug 12 17:56:27 2002
@@ -90,7 +90,8 @@
 static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
 
 /* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (0x0060)
+#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
+
 #define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
 #define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
 #define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig	Mon Aug 12 17:56:01 2002
+++ linux/arch/i386/kernel/cpu/common.c	Mon Aug 12 17:56:27 2002
@@ -423,6 +423,7 @@
 {
 	int cpu = smp_processor_id();
 	struct tss_struct * t = init_tss + cpu;
+	struct thread_struct *thread = &current->thread;
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
-		cpu_gdt_descr[cpu].size = GDT_SIZE;
+		cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 		cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 	}
+	/*
+	 * Set up the per-thread TLS descriptor cache:
+	 */
+	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
 	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
 		BUG();
 	enter_lazy_tlb(&init_mm, current, cpu);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = thread->esp0;
 	set_tss_desc(cpu,t);
-	cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
--- linux/arch/i386/kernel/entry.S.orig	Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/entry.S	Mon Aug 12 17:56:27 2002
@@ -753,6 +753,7 @@
 	.long sys_sched_setaffinity
 	.long sys_sched_getaffinity
 	.long sys_set_thread_area
+	.long sys_get_thread_area
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig	Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/head.S	Mon Aug 12 17:56:27 2002
@@ -239,12 +239,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-#ifdef CONFIG_SMP
-	movl $(__KERNEL_DS), %eax
-	movl %eax,%ss		# Reload the stack pointer (segment only)
-#else
-	lss stack_start,%esp	# Load processor stack
-#endif
+	movl %eax,%ss
 	xorl %eax,%eax
 	lldt %ax
 	cld			# gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,40 @@
 
 ALIGN
 /*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
  */
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
-	.quad 0x0000000000000000	/* TLS descriptor */
-	.quad 0x00cf9a000000ffff	/* 0x10 kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* 0x18 kernel 4GB data at 0x00000000 */
-	.quad 0x00cffa000000ffff	/* 0x23 user   4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user   4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* TSS descriptor */
-	.quad 0x0000000000000000	/* LDT descriptor */
+	.quad 0x0000000000000000	/* 0x0b reserved */
+	.quad 0x0000000000000000	/* 0x13 reserved */
+	.quad 0x0000000000000000	/* 0x1b reserved */
+	.quad 0x00cffa000000ffff	/* 0x23 user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x2b user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
+	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x4b reserved */
+	.quad 0x0000000000000000	/* 0x53 reserved */
+	.quad 0x0000000000000000	/* 0x5b reserved */
+
+	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+
+	/* Segments used for calling PnP BIOS */
+	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
+	.quad 0x00809a0000000000	/* 0x88 16-bit code */
+	.quad 0x0080920000000000	/* 0x90 16-bit data */
+	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x0080920000000000	/* 0xa0 16-bit data */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x0040920000000000	/* 0x40 APM set up for bad BIOS's */
-	.quad 0x00409a0000000000	/* 0x48 APM CS    code */
-	.quad 0x00009a0000000000	/* 0x50 APM CS 16 code (16 bit) */
-	.quad 0x0040920000000000	/* 0x58 APM DS    data */
-	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x60 32-bit code */
-	.quad 0x00809a0000000000	/* 0x68 16-bit code */
-	.quad 0x0080920000000000	/* 0x70 16-bit data */
-	.quad 0x0080920000000000	/* 0x78 16-bit data */
-	.quad 0x0080920000000000	/* 0x80 16-bit data */
-	.quad 0x0000000000000000	/* 0x88 not used */
-	.quad 0x0000000000000000	/* 0x90 not used */
-	.quad 0x0000000000000000	/* 0x98 not used */
+	.quad 0x00409a0000000000	/* 0xa8 APM CS    code */
+	.quad 0x00009a0000000000	/* 0xb0 APM CS 16 code (16 bit) */
+	.quad 0x0040920000000000	/* 0xb8 APM DS    data */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig	Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/process.c	Mon Aug 12 17:56:27 2002
@@ -681,11 +681,8 @@
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
-	 *
-	 * NOTE: it's faster to do the two stores unconditionally
-	 * than to branch away.
 	 */
-	load_TLS_desc(next, cpu);
+	load_TLS(next, cpu);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +831,114 @@
 #undef first_sched
 
 /*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
  */
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
 {
 	struct thread_struct *t = &current->thread;
-	int writable = 0;
-	int cpu;
+	int idx;
 
-	/* do not allow unused flags */
-	if (flags & ~TLS_FLAGS_MASK)
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (desc_empty(t->tls_array + idx))
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct user_desc *u_info)
+{
+	struct thread_struct *t = &current->thread;
+	struct user_desc info;
+	struct desc_struct *desc;
+	int cpu, idx;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+	idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	if (flags & TLS_FLAG_WRITABLE)
-		writable = 1;
+	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
 
 	/*
 	 * We must not get preempted while modifying the TLS.
 	 */
 	cpu = get_cpu();
 
-        t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
-        t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
-				0xf0000 | (writable << 9) | (1 << 15) |
-					(1 << 22) | (1 << 23) | 0x7000;
+	if (LDT_empty(&info)) {
+		desc->a = 0;
+		desc->b = 0;
+	} else {
+		desc->a = LDT_entry_a(&info);
+		desc->b = LDT_entry_b(&info);
+	}
+	load_TLS(t, cpu);
 
-	load_TLS_desc(t, cpu);
 	put_cpu();
 
-	return TLS_ENTRY*8 + 3;
+	return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+	(((desc)->a >> 16) & 0x0000ffff) | \
+	(((desc)->b << 16) & 0x00ff0000) | \
+	( (desc)->b        & 0xff000000)   )
+
+#define GET_LIMIT(desc) ( \
+	((desc)->a & 0x0ffff) | \
+	 ((desc)->b & 0xf0000) )
+	
+#define GET_32BIT(desc)		(((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct user_desc *u_info)
+{
+	struct user_desc info;
+	struct desc_struct *desc;
+	int idx;
+
+	if (get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+	info.entry_number = idx;
+	info.base_addr = GET_BASE(desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
 }
 
--- linux/arch/i386/kernel/suspend.c.orig	Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/suspend.c	Mon Aug 12 17:56:27 2002
@@ -207,7 +207,7 @@
 	struct tss_struct * t = init_tss + cpu;
 
 	set_tss_desc(cpu,t);	/* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
-        cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+        cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->mm->context);	/* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig	Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/ldt.c	Mon Aug 12 17:56:27 2002
@@ -170,7 +170,7 @@
 	struct mm_struct * mm = current->mm;
 	__u32 entry_1, entry_2, *lp;
 	int error;
-	struct modify_ldt_ldt_s ldt_info;
+	struct user_desc ldt_info;
 
 	error = -EINVAL;
 	if (bytecount != sizeof(ldt_info))
@@ -200,32 +200,17 @@
 
    	/* Allow LDTs to be cleared by the user. */
    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode ||
-		    (ldt_info.contents == 0		&&
-		     ldt_info.read_exec_only == 1	&&
-		     ldt_info.seg_32bit == 0		&&
-		     ldt_info.limit_in_pages == 0	&&
-		     ldt_info.seg_not_present == 1	&&
-		     ldt_info.useable == 0 )) {
+		if (oldmode || LDT_empty(&ldt_info)) {
 			entry_1 = 0;
 			entry_2 = 0;
 			goto install;
 		}
 	}
 
-	entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
-		  (ldt_info.limit & 0x0ffff);
-	entry_2 = (ldt_info.base_addr & 0xff000000) |
-		  ((ldt_info.base_addr & 0x00ff0000) >> 16) |
-		  (ldt_info.limit & 0xf0000) |
-		  ((ldt_info.read_exec_only ^ 1) << 9) |
-		  (ldt_info.contents << 10) |
-		  ((ldt_info.seg_not_present ^ 1) << 15) |
-		  (ldt_info.seg_32bit << 22) |
-		  (ldt_info.limit_in_pages << 23) |
-		  0x7000;
-	if (!oldmode)
-		entry_2 |= (ldt_info.useable << 20);
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
 
 	/* Install the new entry ...  */
 install:
--- linux/arch/i386/boot/setup.S.orig	Mon Aug 12 17:51:32 2002
+++ linux/arch/i386/boot/setup.S	Mon Aug 12 17:56:27 2002
@@ -1005,9 +1005,14 @@
 	ret
 
 # Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
 gdt:
-	.word	0, 0, 0, 0			# dummy
-	.word	0, 0, 0, 0			# unused
+	.fill GDT_ENTRY_KERNEL_CS,8,0
 
 	.word	0xFFFF				# 4Gb - (0x100000*0x1000 = 4Gb)
 	.word	0				# base address = 0
--- linux/include/linux/apm_bios.h.orig	Mon Aug 12 17:51:39 2002
+++ linux/include/linux/apm_bios.h	Mon Aug 12 17:56:27 2002
@@ -21,8 +21,8 @@
 
 #ifdef __KERNEL__
 
-#define APM_40		0x40
-#define APM_CS		(APM_40 + 8)
+#define APM_40		(GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS		(APM_BASE + 8)
 #define APM_CS_16	(APM_CS + 8)
 #define APM_DS		(APM_CS_16 + 8)
 
--- linux/include/asm-i386/desc.h.orig	Mon Aug 12 17:56:15 2002
+++ linux/include/asm-i386/desc.h	Mon Aug 12 17:56:27 2002
@@ -2,50 +2,12 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - Thread-Local Storage (TLS) segment
- *   2 - kernel code segment
- *   3 - kernel data segment
- *   4 - user code segment		<==== new cacheline
- *   5 - user data segment
- *   6 - TSS
- *   7 - LDT
- *   8 - APM BIOS support		<==== new cacheline
- *   9 - APM BIOS support
- *  10 - APM BIOS support
- *  11 - APM BIOS support
- *  12 - PNPBIOS support		<==== new cacheline
- *  13 - PNPBIOS support
- *  14 - PNPBIOS support
- *  15 - PNPBIOS support
- *  16 - PNPBIOS support		<==== new cacheline
- *  17 - not used
- *  18 - not used
- *  19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
 
 #include <asm/mmu.h>
 
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
 
 struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
 
 extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
 
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
 
 /*
  * This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
 
 static inline void set_tss_desc(unsigned int cpu, void *addr)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
 }
 
 static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
 {
-	_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+	_set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
 }
 
-#define TLS_FLAGS_MASK			0x00000001
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
 
-#define TLS_FLAG_WRITABLE		0x00000001
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#if TLS_SIZE != 24
+# error update this code.
+#endif
 
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
 {
-	cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+	C(0); C(1); C(2);
+#undef C
 }
 
 static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig	Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/processor.h	Mon Aug 12 17:56:27 2002
@@ -22,6 +22,11 @@
 	unsigned long a,b;
 };
 
+#define desc_empty(desc) \
+		(!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -359,6 +364,8 @@
 };
 
 struct thread_struct {
+/* cached TLS descriptors. */
+	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
 	unsigned long	esp0;
 	unsigned long	eip;
 	unsigned long	esp;
@@ -376,11 +383,10 @@
 	unsigned long		v86flags, v86mask, v86mode, saved_esp0;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
-/* TLS cached descriptor */
-	struct desc_struct tls_desc;
 };
 
 #define INIT_THREAD  {						\
+	{ { 0, 0 } , },						\
 	0,							\
 	0, 0, 0, 0, 						\
 	{ [0 ... 7] = 0 },	/* debugging registers */	\
@@ -401,7 +407,7 @@
 	0,0,0,0, /* esp,ebp,esi,edi */				\
 	0,0,0,0,0,0, /* es,cs,ss */				\
 	0,0,0,0,0,0, /* ds,fs,gs */				\
-	LDT_ENTRY,0, /* ldt */					\
+	GDT_ENTRY_LDT,0, /* ldt */					\
 	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
 	{~0, } /* ioperm */					\
 }
--- linux/include/asm-i386/segment.h.orig	Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/segment.h	Mon Aug 12 17:56:27 2002
@@ -1,10 +1,79 @@
 #ifndef _ASM_SEGMENT_H
 #define _ASM_SEGMENT_H
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *   1 - reserved
+ *   2 - reserved
+ *   3 - reserved
+ *
+ *   4 - default user CS		<==== new cacheline
+ *   5 - default user DS
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   6 - TLS segment #1			[ glibc's TLS segment ]
+ *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
+ *   8 - TLS segment #3
+ *   9 - reserved
+ *  10 - reserved
+ *  11 - reserved
+ *
+ *  ------- start of kernel segments:
+ *
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - TSS
+ *  15 - LDT
+ *  16 - PNPBIOS support (16->32 gate)
+ *  17 - PNPBIOS support
+ *  18 - PNPBIOS support
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - APM BIOS support
+ *  22 - APM BIOS support
+ *  23 - APM BIOS support 
+ */
+#define GDT_ENTRY_TLS_ENTRIES	3
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
-#define __USER_CS	0x23
-#define __USER_DS	0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE	12
+
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 9)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
 
 #endif
--- linux/include/asm-i386/unistd.h.orig	Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/unistd.h	Mon Aug 12 17:56:27 2002
@@ -248,6 +248,7 @@
 #define __NR_sched_setaffinity	241
 #define __NR_sched_getaffinity	242
 #define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
--- linux/include/asm-i386/ldt.h.orig	Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/ldt.h	Mon Aug 12 17:56:27 2002
@@ -12,7 +12,7 @@
 #define LDT_ENTRY_SIZE	8
 
 #ifndef __ASSEMBLY__
-struct modify_ldt_ldt_s {
+struct user_desc {
 	unsigned int  entry_number;
 	unsigned long base_addr;
 	unsigned int  limit;


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch] tls-2.5.31-D5
  2002-08-12 17:41             ` Ingo Molnar
  2002-08-12 15:54               ` Luca Barbieri
  2002-08-12 18:03               ` [patch] tls-2.5.31-D9 Ingo Molnar
@ 2002-08-13  1:50               ` Alexandre Julliard
  2 siblings, 0 replies; 47+ messages in thread
From: Alexandre Julliard @ 2002-08-13  1:50 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Jakub Jelinek, Linus Torvalds, linux-kernel, Luca Barbieri

Ingo Molnar <mingo@elte.hu> writes:

> well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.  
> But it certainly does not come free. We could have 3 TLS entries (0x40
> will be the last entry), and the copying cost is 9 cycles. (compared to 6
> cycles in the 2 entries case.) Good enough?

Note that Wine doesn't really require the 0x40 descriptor. As long as
we can trap accesses to it and emulate them like we do now, that's
good enough. Of course having a GDT entry would save a few cycles, but
this only matters for old Win16 apps, so I'm not sure adding even 1
cycle to the task switch time is worth it.

-- 
Alexandre Julliard
julliard@winehq.com

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2002-08-13  1:47 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-08-07 18:10 [patch] tls-2.5.30-A1 Ingo Molnar
2002-08-07 18:33 ` Linus Torvalds
2002-08-07 18:43   ` Stephen Rothwell
2002-08-07 18:57     ` Linus Torvalds
2002-08-07 19:40       ` Alexandre Julliard
2002-08-07 19:31   ` Ingo Molnar
2002-08-07 19:49     ` Alexandre Julliard
2002-08-07 22:01   ` Alan Cox
2002-08-07 22:36   ` Luca Barbieri
2002-08-07 22:54     ` Ingo Molnar
2002-08-07 23:21       ` Luca Barbieri
2002-08-07 23:35         ` DMA Problems with Intel 845 Chipset and Northwood CPU Mark Cuss
2002-08-08  0:58           ` John L. Korpi
2002-08-08 16:12             ` Mark Cuss
2002-08-11 21:46   ` [patch] tls-2.5.31-C3 Ingo Molnar
2002-08-12  7:34     ` Stephen Rothwell
2002-08-12 10:07       ` Ingo Molnar
2002-08-12  8:23         ` Stephen Rothwell
2002-08-12 10:08           ` Alan Cox
2002-08-12 10:49             ` Ingo Molnar
2002-08-12 10:34               ` Alan Cox
2002-08-12 12:17                 ` Ingo Molnar
2002-08-12 11:47                   ` Alan Cox
2002-08-12 12:55                     ` Ingo Molnar
2002-08-12 12:29                       ` Alan Cox
2002-08-12 10:35               ` Alan Cox
2002-08-12 13:10             ` Kasper Dupont
2002-08-12 15:20             ` Ingo Molnar
2002-08-12 14:46         ` Stephen Rothwell
2002-08-12 12:18     ` Luca Barbieri
2002-08-12 15:12       ` Ingo Molnar
2002-08-12 13:43         ` Luca Barbieri
2002-08-12 15:57           ` Ingo Molnar
2002-08-12 14:17             ` Luca Barbieri
2002-08-12 15:53     ` [patch] tls-2.5.31-D3 Ingo Molnar
2002-08-12 16:13       ` [patch] tls-2.5.31-D4 Ingo Molnar
2002-08-12 14:32         ` Luca Barbieri
2002-08-12 17:06         ` [patch] tls-2.5.31-D5 Ingo Molnar
2002-08-12 15:21           ` Jakub Jelinek
2002-08-12 17:41             ` Ingo Molnar
2002-08-12 15:54               ` Luca Barbieri
2002-08-12 18:03               ` [patch] tls-2.5.31-D9 Ingo Molnar
2002-08-13  1:50               ` [patch] tls-2.5.31-D5 Alexandre Julliard
2002-08-12 17:24           ` [patch] tls-2.5.31-D7 Ingo Molnar
2002-08-12 15:45             ` Christoph Hellwig
2002-08-07 19:02 ` [patch] tls-2.5.30-A1 Christoph Hellwig
2002-08-08 12:25 ` Jamie Lokier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).