linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-07 12:59 Durairaj, Sundarapandian
  2004-01-07 14:08 ` Meelis Roos
  2004-01-07 17:34 ` Vladimir Kondratiev
  0 siblings, 2 replies; 43+ messages in thread
From: Durairaj, Sundarapandian @ 2004-01-07 12:59 UTC (permalink / raw)
  To: linux-kernel; +Cc: Grege, Seshadri, Harinarayanan, Kondratiev, Vladimir

Hi All,

Thanks for your review comments. I am reposting the updated patch after
incorporating the review comments.
Please review this and send your comments.

Thanks,
Sundar

------------------------------------------------

diff -Naur linux-2.6.0/arch/i386/Kconfig
linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000
+0530
+++ linux_pciexpress/arch/i386/Kconfig	2004-01-07 10:59:23.000000000
+0530
@@ -959,7 +959,7 @@
 endmenu
 
 
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
 
 config X86_VISWS_APIC
 	bool
@@ -976,6 +976,18 @@
 	depends on SMP && !(X86_VISWS || X86_VOYAGER)
 	default y
 
+config PCI_EXP_ENHANCED
+	bool "PCI_EXPRESS (EXPERIMENTAL)" 
+	depends on EXPERIMENTAL 
+	help
+	   PCI Express extends the configuration space from 256 bytes to
4k
+	   bytes. It also defines an enhanced configuration mechanism to
acces
+	   the extended configuration space.
+	   With this option, you can specify that Linux will first
attempt to
+	   access the pci configuration space through enhanced config
access
+	   mechanism (Will work only on PCI Express based system)
otherwise the
+	   pci direct mechanism will be used.
+
 config PCI
 	bool "PCI support" if !X86_VISWS
 	depends on !X86_VOYAGER
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c	2003-12-18
08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-07
18:20:23.000000000 +0530
@@ -93,6 +93,28 @@
 	return ((unsigned char *) base + offset);
 }
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+extern u64 mmcfg_base_address;
+static int __init acpi_parse_mcfg
+			 (unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg	*mcfg = NULL;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr,
size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+	if (mcfg->base_address)
+		mmcfg_base_address =mcfg->base_address;
+	printk(KERN_INFO PREFIX "Local  mcfg address %p\n",
+			mcfg->base_address);
+	return 0;
+}
+#endif /* CONFIG_PCI_EXP_ENHANCED*/
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -508,6 +530,21 @@
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	}
+	else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	}
+	else if (result > 1) {
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables
exist\n");
+	}
+#endif /*CONFIG_PCI_EXP_ENHANCED*/
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c
linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18 08:28:28.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-07
18:16:57.000000000 +0530
@@ -168,6 +168,124 @@
 
 
 /*
+ *We map full Page size on each request. Incidently that's the size we
+ *have for config space too.
+ */
+#ifdef CONFIG_PCI_EXP_ENHANCED
+/* 
+ *On PCI Express capable platform, at the time of kernel initialization
+ *the os would have scanned for mcfg table and set this variable to 
+ *appropriate value.
+ *If PCI Express not supported the variable will have 0 value
+ */
+u64 mmcfg_base_address;
+
+/*
+ *Variable used to store the base address of the last pciexpress device

+ *accessed.
+ */
+u32 pcie_last_accessed_device;
+
+unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)
+{
+	u32 dev_base = 
+		mmcfg_base_address | (bus << 20) | ((PCI_DEVFN (dev,fn))
<<12);
+	if (dev_base != pcie_last_accessed_device){
+		pcie_last_accessed_device = dev_base;
+		set_fixmap (FIX_PCIE_MCFG, dev_base);
+	}
+	return 0;
+}
+
+static int pci_express_conf_read(int seg, int bus, 
+		int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+	char * virt_addr;
+	int dev = PCI_SLOT (devfn);
+	int fn  = PCI_FUNC (devfn);
+ 
+	if (!value || ((u32)bus > 255) || ((u32)dev > 31) 
+			|| ((u32)fn > 7) || ((u32)reg > 4095)){
+		printk(KERN_ERR "pci_express_conf_read: Invalid
Parameter\n");
+  		return -EINVAL;
+	}
+
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1)){
+		printk(KERN_ERR "pci_express_conf_read: \
+					misalligned transaction\n");
+  		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, dev, fn);
+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+ 	switch (len) {
+        case 1:
+		*value = (u8)readb((unsigned long) virt_addr+reg);
+		break;
+        case 2:
+		*value = (u16)readw((unsigned long) virt_addr+reg);
+		break;
+        case 4:
+		*value = (u32)readl((unsigned long) virt_addr+reg);
+		break;
+	}
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+	return 0;
+}
+ 
+static int pci_express_conf_write(int seg, int bus, 
+			int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+	unsigned char * virt_addr;
+	int dev = PCI_SLOT (devfn);
+	int fn  = PCI_FUNC (devfn);
+	
+	if (!value || ((u32)bus > 255) || ((u32)dev > 31) || 
+		((u32)fn > 7) || ((u32)reg > 4095)){
+		printk(KERN_ERR "pci_express_conf_write: \
+					Invalid Parameter\n");
+		return -EINVAL;
+	}
+	
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1)){
+		printk(KERN_ERR "pci_express_conf_write: \
+					misalligned transaction\n");
+  		return -EINVAL;
+	}
+  
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, dev, fn);
+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+	
+	switch (len) {
+		case 1:
+			writeb(value,(unsigned long)virt_addr+reg);
+			break;
+		case 2:
+			writew(value,(unsigned long)virt_addr+reg);
+			break;
+	        case 4:
+			writel(value,(unsigned long)virt_addr+reg);
+	                break;
+     	}
+	/* Dummy read to flush PCI write */
+	readl (virt_addr);
+	spin_unlock_irqrestore(&pci_config_lock, flags);	 
+	return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+	.read   =	pci_express_conf_read,
+	.write  =	pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXP_ENHANCED */
+
+/*
  * Before we decide to use direct hardware access mechanisms, we try to
do some
  * trivial checks to ensure it at least _seems_ to be working -- we
just test
  * whether bus 00 contains a host bridge (this is similar to checking
@@ -244,6 +362,28 @@
 static int __init pci_direct_init(void)
 {
 	struct resource *region, *region2;
+	unsigned long flags;
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	/*
+ 	 *Check if platform we are running is pci express capable
+  	 */
+	if (mmcfg_base_address == 0){
+		printk(KERN_INFO 
+			"MCFG table entry is not found in ACPI
tables....\n \
+			PCI Express not supported in this platform....\n
\
+			Not enabling Enhanced Configuration....\n");
+	}
+	else {
+		local_irq_save(flags);
+		if (pci_sanity_check(&pci_express_conf)) {
+			local_irq_restore(flags);
+			printk(KERN_INFO "PCI:Using config type
PCIExp\n");
+			raw_pci_ops = &pci_express_conf;
+			return 0;
+		} 
+		local_irq_restore(flags);
+	}
+#endif /* CONFIG_PCI_EXP_ENHANCED */
 
 	if ((pci_probe & PCI_PROBE_CONF1) == 0)
 		goto type2;
diff -Naur linux-2.6.0/arch/i386/pci/Makefile
linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/Makefile	2004-01-07
10:59:23.000000000 +0530
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_EXP_ENHANCED)	+= direct.o
 
 pci-y				:= fixup.o
 pci-$(CONFIG_ACPI_PCI)		+= acpi.o
diff -Naur linux-2.6.0/drivers/acpi/tables.c
linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c	2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/drivers/acpi/tables.c	2004-01-07
11:03:43.000000000 +0530
@@ -58,6 +58,7 @@
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c
linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c	2003-12-18 08:28:38.000000000
+0530
+++ linux_pciexpress/drivers/pci/pci.c	2004-01-07 10:59:23.000000000
+0530
@@ -90,6 +90,8 @@
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI-EXP
+
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/proc.c
linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/drivers/pci/proc.c	2004-01-07 17:37:04.000000000
+0530
@@ -17,13 +17,29 @@
 #include <asm/byteorder.h>
 
 #define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096
 
 static int proc_initialized;	/* = 0 */
 
+static int pci_cfg_space_size (struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	/* Find whether the device is PCI Express device */
+	int is_pci_express_dev =  pci_find_capability(dev,
PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+	else
+#endif
+	return PCI_CFG_SPACE_SIZE; 
+}
+
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = PDE(ino);
+	struct pci_dev *dev = dp->data;
 
 	lock_kernel();
 	switch (whence) {
@@ -34,11 +50,11 @@
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = pci_cfg_space_size(dev) + off;
 		break;
 	}
 	unlock_kernel();
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > pci_cfg_space_size(dev))
 		return -EINVAL;
 	return (file->f_pos = new);
 }
@@ -59,7 +75,7 @@
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+ 		size = pci_cfg_space_size (dev);
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,12 +150,14 @@
 	int pos = *ppos;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	int size;
+	size = pci_cfg_space_size(dev);
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -384,6 +402,7 @@
 	struct pci_bus *bus = dev->bus;
 	struct proc_dir_entry *de, *e;
 	char name[16];
+	int size;
 
 	if (!proc_initialized)
 		return -EACCES;
@@ -401,7 +420,9 @@
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+
+	size = pci_cfg_space_size(dev);
+	e->size = size;
 
 	return 0;
 }
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h	2003-12-18
08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h	2004-01-07
10:59:23.000000000 +0530
@@ -67,6 +67,9 @@
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings
*/
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	FIX_PCIE_MCFG,
+#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/linux/acpi.h
linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h	2003-12-18 08:27:58.000000000
+0530
+++ linux_pciexpress/include/linux/acpi.h	2004-01-07
12:02:35.000000000 +0530
@@ -317,6 +317,13 @@
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+struct acpi_table_mcfg {
+	struct acpi_table_header 	header;
+	u8	reserved[8];
+	u64	base_address;
+} __attribute__ ((packed));
+
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +345,7 @@
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
diff -Naur linux-2.6.0/include/linux/pci.h
linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h	2003-12-18 08:28:49.000000000
+0530
+++ linux_pciexpress/include/linux/pci.h	2004-01-07
10:59:23.000000000 +0530
@@ -198,6 +198,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled
Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI-Express*/
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list
*/
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16
bits) */
 #define PCI_CAP_SIZEOF		4

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-07 12:59 [patch] PCI Express Enhanced Config Patch - 2.6.0-test11 Durairaj, Sundarapandian
@ 2004-01-07 14:08 ` Meelis Roos
  2004-01-07 17:34 ` Vladimir Kondratiev
  1 sibling, 0 replies; 43+ messages in thread
From: Meelis Roos @ 2004-01-07 14:08 UTC (permalink / raw)
  To: sundarapandian.durairaj, linux-kernel

DS> Please review this and send your comments.

DS> +menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
DS> +       bool "PCI_EXPRESS (EXPERIMENTAL)" 

Why do you use underscore in textual names? "PCI Express" seems more
natural.

-- 
Meelis Roos (mroos@linux.ee)

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-07 12:59 [patch] PCI Express Enhanced Config Patch - 2.6.0-test11 Durairaj, Sundarapandian
  2004-01-07 14:08 ` Meelis Roos
@ 2004-01-07 17:34 ` Vladimir Kondratiev
  1 sibling, 0 replies; 43+ messages in thread
From: Vladimir Kondratiev @ 2004-01-07 17:34 UTC (permalink / raw)
  To: Durairaj, Sundarapandian; +Cc: linux-kernel, Grege, Seshadri, Harinarayanan

Durairaj, Sundarapandian wrote:

>Hi All,
>
>Thanks for your review comments. I am reposting the updated patch after
>incorporating the review comments.
>Please review this and send your comments.
>
>Thanks,
>Sundar
>
>------------------------------------------------
>
>  
>
<skip>

>diff -Naur linux-2.6.0/arch/i386/pci/direct.c
>linux_pciexpress/arch/i386/pci/direct.c
>--- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18 08:28:28.000000000
>+0530
>+++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-07
>18:16:57.000000000 +0530
>@@ -168,6 +168,124 @@
> 
> 
> /*
>+ *We map full Page size on each request. Incidently that's the size we
>+ *have for config space too.
>+ */
>+#ifdef CONFIG_PCI_EXP_ENHANCED
>+/* 
>+ *On PCI Express capable platform, at the time of kernel initialization
>+ *the os would have scanned for mcfg table and set this variable to 
>+ *appropriate value.
>+ *If PCI Express not supported the variable will have 0 value
>+ */
>+u64 mmcfg_base_address;
>  
>
I'd made this variable 'unsigned long'. Later, you compare/assign to and 
from u32 value.
Actually, it is bus address, which is unsigned long.

>+
>+/*
>+ *Variable used to store the base address of the last pciexpress device
>
>+ *accessed.
>+ */
>+u32 pcie_last_accessed_device;
>+
>+unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)
>+{
>+	u32 dev_base = 
>+		mmcfg_base_address | (bus << 20) | ((PCI_DEVFN (dev,fn))
><<12);
>+	if (dev_base != pcie_last_accessed_device){
>+		pcie_last_accessed_device = dev_base;
>+		set_fixmap (FIX_PCIE_MCFG, dev_base);
>+	}
>+	return 0;
>+}
>  
>
I suggest to use single 'devfn' argument instead of separate 'dev' and 
'fn', like this:
unsigned long pci_exp_set_dev_base (int bus, int devfn)
and, correspondingly,

u32 dev_base = mmcfg_base_address | (bus << 20) | (devfn <<12);

>+
>+static int pci_express_conf_read(int seg, int bus, 
>+		int devfn, int reg, int len, u32 *value)
>+{
>+	unsigned long flags;
>+	char * virt_addr;
>  
>
Taking into account change above, you save some computation here:
... delete 'dev' and 'fn' calculations

>+	int dev = PCI_SLOT (devfn);
>+	int fn  = PCI_FUNC (devfn);
>+ 
>  
>
in this if() change

((u32)dev > 31) || ((u32)fn > 7)
to
((u32)devfn > 255)

>+	if (!value || ((u32)bus > 255) || ((u32)dev > 31) 
>+			|| ((u32)fn > 7) || ((u32)reg > 4095)){
>+		printk(KERN_ERR "pci_express_conf_read: Invalid
>Parameter\n");
>+  		return -EINVAL;
>+	}
>+
>+	/* Shoot misalligned transaction now */
>+	if (reg & (len-1)){
>+		printk(KERN_ERR "pci_express_conf_read: \
>+					misalligned transaction\n");
>+  		return -EINVAL;
>+	}
>+
>+	spin_lock_irqsave(&pci_config_lock, flags);
>  
>
and call

pci_exp_set_dev_base(bus, devfn);

>+	pci_exp_set_dev_base(bus, dev, fn);
>+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
>  
>
virt_addr is constant, convert it to static variable and assign in 
pci_direct_init().
No need to recalculate.

>+ 	switch (len) {
>+        case 1:
>+		*value = (u8)readb((unsigned long) virt_addr+reg);
>+		break;
>+        case 2:
>+		*value = (u16)readw((unsigned long) virt_addr+reg);
>+		break;
>+        case 4:
>+		*value = (u32)readl((unsigned long) virt_addr+reg);
>+		break;
>+	}
>+	spin_unlock_irqrestore(&pci_config_lock, flags);
>+	return 0;
>+}
>+ 
>  
>
the same changes dev,fn -> devfn for _write

>+static int pci_express_conf_write(int seg, int bus, 
>+			int devfn, int reg, int len, u32 value)
>+{
>+	unsigned long flags;
>+	unsigned char * virt_addr;
>+	int dev = PCI_SLOT (devfn);
>+	int fn  = PCI_FUNC (devfn);
>+	
>+	if (!value || ((u32)bus > 255) || ((u32)dev > 31) || 
>+		((u32)fn > 7) || ((u32)reg > 4095)){
>+		printk(KERN_ERR "pci_express_conf_write: \
>+					Invalid Parameter\n");
>+		return -EINVAL;
>+	}
>+	
>+	/* Shoot misalligned transaction now */
>+	if (reg & (len-1)){
>+		printk(KERN_ERR "pci_express_conf_write: \
>+					misalligned transaction\n");
>+  		return -EINVAL;
>+	}
>+  
>+	spin_lock_irqsave(&pci_config_lock, flags);
>+	pci_exp_set_dev_base(bus, dev, fn);
>+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
>  
>
See above - no need to recalculate virt_addr.

>+	
>+	switch (len) {
>+		case 1:
>+			writeb(value,(unsigned long)virt_addr+reg);
>+			break;
>+		case 2:
>+			writew(value,(unsigned long)virt_addr+reg);
>+			break;
>+	        case 4:
>+			writel(value,(unsigned long)virt_addr+reg);
>+	                break;
>+     	}
>+	/* Dummy read to flush PCI write */
>+	readl (virt_addr);
>+	spin_unlock_irqrestore(&pci_config_lock, flags);	 
>+	return 0;
>+}
>+
>  
>


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01 20:11                   ` Matthew Wilcox
@ 2004-02-01 21:35                     ` Eric W. Biederman
  0 siblings, 0 replies; 43+ messages in thread
From: Eric W. Biederman @ 2004-02-01 21:35 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

Matthew Wilcox <willy@debian.org> writes:

> On Sun, Feb 01, 2004 at 11:28:38AM -0700, Eric W. Biederman wrote:
> > I suspect the check for addresses being an address being too big
> > would be useful even on 64bit architectures.  I just looked
> > and ia64 does not have that check and bad BARs could cause
> > interesting problems.  It makes sense to catch illegal bar values
> > earlier, but this whole part of the code is a slow path so putting in
> > a sanity check should not hurt anything.
> 
> Hard to say since ia64 boxes don't usually have BIOSes written by people
> on minimum wage.  It certainly wouldn't hurt to check though.

An uninitialized BAR could have the same effect and it is easier to
trigger.  ia64 boxes get so little volume you can miss things.  I
killed an ia64 box once by changing the serial console baud rate.

I have no problem with firmware it is just that since it must be
written on a per board basis, and firmware is hard to update you
simply can't assume it is correct.  Some little bug may have gotten
through the QA checks.  Hardware has similar pressures except it
is even harder to correct.

So the concern is just healthy programmer paranoia.  If it can
go wrong it will.

> > I think we want an ioremap_pfn instead of an ioremap64.  On 64bit
> > platforms ioremap64 is just ioremap so it is redundant.  On 32bit
> > platforms ioremap64 is really ioremap_pfn with just a different
> > wrapper around it.  While ioremap_pfn makes sense on all
> > architectures.  
> 
> Yeah, but it doesn't make sense to driver writers.  They have a hard
> enough time dealing with scatterlists.  Heck, I don't even know what
> pfn stands for.  So I'm all for having ioremap_pfn() as the base function,
> as long as we still have ioremap(), ioremap64() and map_resource() as
> wrappers around it.

pfn is short for page frame number.  It is the abbreviation that is used
by the kernel vm layer for a page number.  And it is a term that at
least to be used quite a lot in texts explaining virtual memory and
paging.  

If you have a map_resource I don't know why you would need ioremap64.
But I have no problems with wrappers.  The question mostly is which
function is architecture specific and which can be generic.

If I was not in the middle of moving out of my current apartment
hunting I think I would whip up a patch.
 
> > Could the patch be updated to do that please?
> 
> Yup, not hard.  Patch below.

Thanks for the updated patch.
 
> Actually, it's not relevant for ia64 because ia64 accesses PCI config
> space through SAL.  I did a patch for ia64 and posted it to the list
> last week (SAL was revised to allow for the larger address space).
> It's part of the below patch.

Ok it is nice to see the support getting there.

The table entry should be just as relevant on ia64.  It is odd
though that on ia64 the firmware interface is updated and on x86 a
firmware bypass interface is added.

Eric



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01 18:28                 ` Eric W. Biederman
@ 2004-02-01 20:11                   ` Matthew Wilcox
  2004-02-01 21:35                     ` Eric W. Biederman
  0 siblings, 1 reply; 43+ messages in thread
From: Matthew Wilcox @ 2004-02-01 20:11 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, Linus Torvalds, Durairaj, Sundarapandian,
	Kernel Mailing List, linux-pci, Alan Cox, Greg KH, Andi Kleen,
	Andrew Morton, mj, Kondratiev, Vladimir, Seshadri, Harinarayanan,
	Nakajima, Jun

On Sun, Feb 01, 2004 at 11:28:38AM -0700, Eric W. Biederman wrote:
> I suspect the check for addresses being an address being too big
> would be useful even on 64bit architectures.  I just looked
> and ia64 does not have that check and bad BARs could cause
> interesting problems.  It makes sense to catch illegal bar values
> earlier, but this whole part of the code is a slow path so putting in
> a sanity check should not hurt anything.

Hard to say since ia64 boxes don't usually have BIOSes written by people
on minimum wage.  It certainly wouldn't hurt to check though.

> I think we want an ioremap_pfn instead of an ioremap64.  On 64bit
> platforms ioremap64 is just ioremap so it is redundant.  On 32bit
> platforms ioremap64 is really ioremap_pfn with just a different
> wrapper around it.  While ioremap_pfn makes sense on all
> architectures.  

Yeah, but it doesn't make sense to driver writers.  They have a hard
enough time dealing with scatterlists.  Heck, I don't even know what
pfn stands for.  So I'm all for having ioremap_pfn() as the base function,
as long as we still have ioremap(), ioremap64() and map_resource() as
wrappers around it.

> > > at least check to ensure the high half is zero.  If it the high half
> > > is not zero we can print an annoying error message.  All of the normal
> > > pci capabilities are still limited to being in the first 256 bytes of
> > > the configuration space.  So not a lot is lost if we can't enable the
> > > entire 4K. 
> > 
> > Yes, I think that's a reasonable thing to do until an ioremap64
> > exists.
> 
> Could the patch be updated to do that please?

Yup, not hard.  Patch below.

> > i386 doesn't support it but ia64, ppc and sparc do.  I presume each
> > domain will get its own MCFG space.  It won't be hard to support, but
> > there's no point in doing it until hardware exists.
> 
> Ok, I guess that did get implemented then.  Especially for the ia64
> case it is a significant question what does the acpi table need to
> look like for multiple MCFG spaces.  We should implement this properly
> if we can.  This is another issue that is relevant because the
> interfaces are still being defined.

Actually, it's not relevant for ia64 because ia64 accesses PCI config
space through SAL.  I did a patch for ia64 and posted it to the list
last week (SAL was revised to allow for the larger address space).
It's part of the below patch.

diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig	2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig	2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
 	  PCI-based systems don't have any BIOS at all. Linux can also try to
 	  detect the PCI hardware directly without using the BIOS.
 
-	  With this option, you can specify how Linux should detect the PCI
-	  devices. If you choose "BIOS", the BIOS will be used, if you choose
-	  "Direct", the BIOS won't be used, and if you choose "Any", the
-	  kernel will try the direct access method and falls back to the BIOS
-	  if that doesn't work. If unsure, go with the default, which is
-	  "Any".
+	  With this option, you can specify how Linux should detect the
+	  PCI devices. If you choose "BIOS", the BIOS will be used,
+	  if you choose "Direct", the BIOS won't be used, and if you
+	  choose "MMConfig", then PCI Express MMCONFIG will be used.
+	  If you choose "Any", the kernel will try MMCONFIG, then the
+	  direct access method and falls back to the BIOS if that doesn't
+	  work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+	bool "MMConfig"
 
 config PCI_GODIRECT
 	bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
  	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
 	default y
 
+config PCI_MMCONFIG
+	bool
+	depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+	select ACPI_BOOT
+	default y
+
 config PCI_USE_VECTOR
 	bool "Vector-based interrupt indexing"
 	depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c	2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c	2004-02-01 14:41:56.000000000 -0500
@@ -95,6 +95,31 @@ char *__acpi_map_table(unsigned long phy
 }
 
 
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg *mcfg;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+
+	if (mcfg->base_reserved) {
+		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+		return -ENODEV;
+	}
+
+	pci_mmcfg_base_addr = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +540,19 @@ acpi_boot_init (void)
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_MMCONFIG
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	} else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	} else if (result > 1) {
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile	2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
 obj-y				:= i386.o
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 
 pci-y				:= fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c	2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c	2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+				PCI_PROBE_MMCONF;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit  pcibios_setup(char *st
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	else if (!strcmp(str, "nommconf")) {
+		pci_probe &= ~PCI_PROBE_MMCONF;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c	1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c	2004-01-29 11:37:09.000000000 -0500
@@ -0,0 +1,115 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	if (dev_base != mmcfg_last_accessed_device) {
+		mmcfg_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		*value = readb(mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		*value = readw(mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		*value = readl(mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		writeb(value, mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		writew(value, mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		writel(value, mmcfg_virt_addr + reg);
+		break;
+	}
+
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+	.read =		pci_mmcfg_read,
+	.write =	pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+	struct resource *region;
+
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		goto out;
+	if (!pci_mmcfg_base_addr)
+		goto out;
+	region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+			"PCI MMCONFIG");
+	if (!region)
+		goto out;
+
+	printk(KERN_INFO "PCI: Using MMCONFIG\n");
+	raw_pci_ops = &pci_mmcfg;
+	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h	2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -urpNX build-tools/dontdiff linus-2.6/arch/ia64/pci/pci.c pciexp-2.6/arch/ia64/pci/pci.c
--- linus-2.6/arch/ia64/pci/pci.c	2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/ia64/pci/pci.c	2004-02-01 14:55:22.000000000 -0500
@@ -55,8 +55,11 @@ struct pci_fixup pcibios_fixups[1];
 
 #define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \
 	((u64)(seg << 24) | (u64)(bus << 16) | \
-	 (u64)(devfn << 8) | (u64)(reg))
+	 (u64)(devfn << 8) | (u64)(reg)), 0
 
+#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
+	((u64)(seg << 28) | (u64)(bus << 20) | \
+	 (u64)(devfn << 12) | (u64)(reg)), 1
 
 static int
 pci_sal_read (int seg, int bus, int devfn, int reg, int len, u32 *value)
@@ -64,10 +67,14 @@ pci_sal_read (int seg, int bus, int devf
 	int result = 0;
 	u64 data = 0;
 
-	if (!value || (seg > 255) || (bus > 255) || (devfn > 255) || (reg > 255))
+	if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
 		return -EINVAL;
 
-	result = ia64_sal_pci_config_read(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, &data);
+	if ((seg < 256) && (reg < 256)) {
+		result = ia64_sal_pci_config_read(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, &data);
+	} else {
+		result = ia64_sal_pci_config_read(PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg), len, &data);
+	}
 
 	*value = (u32) data;
 
@@ -77,13 +84,17 @@ pci_sal_read (int seg, int bus, int devf
 static int
 pci_sal_write (int seg, int bus, int devfn, int reg, int len, u32 value)
 {
-	if ((seg > 255) || (bus > 255) || (devfn > 255) || (reg > 255))
+	if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
 		return -EINVAL;
 
-	return ia64_sal_pci_config_write(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, value);
+	if ((seg < 256) && (reg < 256)) {
+		return ia64_sal_pci_config_write(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, value);
+	} else {
+		return ia64_sal_pci_config_write(PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg), len, value);
+	}
 }
 
-struct pci_raw_ops pci_sal_ops = {
+static struct pci_raw_ops pci_sal_ops = {
 	.read = 	pci_sal_read,
 	.write =	pci_sal_write
 };
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c	2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c	2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c	2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c	2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch
 
 	/* Several chips lock up trying to read undefined config space */
 	if (capable(CAP_SYS_ADMIN)) {
-		size = 256;
+		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
 	}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
 	unsigned int size = count;
 	loff_t init_off = off;
 
-	if (off > 256)
+	if (off > dev->cfg_size)
 		return 0;
-	if (off + count > 256) {
-		size = 256 - off;
+	if (off + count > dev->cfg_size) {
+		size = dev->cfg_size - off;
 		count = size;
 	}
 
@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
 	.write = pci_write_config,
 };
 
+static struct bin_attribute pcie_config_attr = {
+	.attr =	{
+		.name = "config",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = 4096,
+	.read = pci_read_config,
+	.write = pci_write_config,
+};
+
 void pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct 
 	device_create_file (dev, &dev_attr_class);
 	device_create_file (dev, &dev_attr_irq);
 	device_create_file (dev, &dev_attr_resource);
-	sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	if (pdev->cfg_size < 4096) {
+		sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	} else {
+		sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+	}
 }
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c	2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c	2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *
+ *  %PCI_CAP_ID_EXP          PCI Express
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c	2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
 	kfree(pci_dev);
 }
 
+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+	/* Find whether the device is PCI Express */
+	int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+	return PCI_CFG_SPACE_SIZE;
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c	2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	struct inode *inode = file->f_dentry->d_inode;
 
-	down(&file->f_dentry->d_inode->i_sem);
+	down(&inode->i_sem);
 	switch (whence) {
 	case 0:
 		new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = inode->i_size + off;
 		break;
 	}
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > inode->i_size)
 		new = -EINVAL;
 	else
 		file->f_pos = new;
-	up(&file->f_dentry->d_inode->i_sem);
+	up(&inode->i_sem);
 	return new;
 }
 
@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
+	int size = dev->cfg_size;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h	2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h	2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	FIX_PCIE_MCFG,
+#endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
 #define NR_FIX_BTMAPS	16
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-ia64/sal.h pciexp-2.6/include/asm-ia64/sal.h
--- linus-2.6/include/asm-ia64/sal.h	2004-01-07 18:02:59.000000000 -0500
+++ pciexp-2.6/include/asm-ia64/sal.h	2004-02-01 14:56:26.000000000 -0500
@@ -741,10 +741,10 @@ ia64_sal_mc_set_params (u64 param_type, 
 
 /* Read from PCI configuration space */
 static inline s64
-ia64_sal_pci_config_read (u64 pci_config_addr, u64 size, u64 *value)
+ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value)
 {
 	struct ia64_sal_retval isrv;
-	SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, 0, 0, 0, 0, 0);
+	SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0);
 	if (value)
 		*value = isrv.v0;
 	return isrv.status;
@@ -752,11 +752,11 @@ ia64_sal_pci_config_read (u64 pci_config
 
 /* Write to PCI configuration space */
 static inline s64
-ia64_sal_pci_config_write (u64 pci_config_addr, u64 size, u64 value)
+ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value)
 {
 	struct ia64_sal_retval isrv;
 	SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value,
-	         0, 0, 0, 0);
+	         type, 0, 0, 0);
 	return isrv.status;
 }
 
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h	2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h	2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+	struct acpi_table_header	header;
+	u8				reserved[8];
+	u32				base_address;
+	u32				base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);
 
 extern int acpi_mp_config;
 
+extern u32 pci_mmcfg_base_addr;
+
 #else	/*!CONFIG_ACPI_BOOT*/
 
 #define acpi_mp_config	0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h	2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h	2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
 	unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
 	unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
 
+	int		cfg_size;	/* Size of configuration space */
+
 	/*
 	 * Instead of touching interrupt line and base address registers
 	 * directly, use the values stored here. They might be different!

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01 15:18               ` Matthew Wilcox
@ 2004-02-01 18:28                 ` Eric W. Biederman
  2004-02-01 20:11                   ` Matthew Wilcox
  0 siblings, 1 reply; 43+ messages in thread
From: Eric W. Biederman @ 2004-02-01 18:28 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun


To the Intel guess working out the ACPI 3.0 interfaces.  

1) How do we find the Root Complex Register Block?
2) How does ACPI describe multiple MMCONFIG spaces?

Matthew Wilcox <willy@debian.org> writes:

> On Sun, Feb 01, 2004 at 04:00:01AM -0700, Eric W. Biederman wrote:
>
> > I'm wondering if ioremap or set_fixmap could be modified to take
> > a page frame number or possibly a token like the dma mapping functions
> > so we don't need all of the low bits and can actually solve these
> > problems on a 32bit architecture.
> 
> The interface I'd actually like to see drivers using is
> 	unsigned long addr = map_resource(struct resource *res);
> 
> which would probably be implemented something like:
> 
> static inline unsigned long map_resource(struct resource *res)
> {
> 	ioremap(res->start, res->end - res->start + 1);
> }

On 32bit arches I think it would look like:

static void *map_resource(struct resource *res)
{
        void *addr;
        unsigned long first_pfn, last_pfn, pages;
        unsigned long offset;
        if (res->end >= too_big) {
        	return NULL;
        }
        offset = res->start & ~PAGE_MASK;
        first_pfn = res->start >> PAGE_SHIFT;
        last_pfn  = res->end   >> PAGE_SHIFT;
        pages = last_pfn - first_pfn + 1;
        addr = ioremap_pfn(first_pfn, pages);
        if (addr) {
		addr = (void *)(offset + (char *)addr);
        }
        return addr;
}

I suspect the check for addresses being an address being too big
would be useful even on 64bit architectures.  I just looked
and ia64 does not have that check and bad BARs could cause
interesting problems.  It makes sense to catch illegal bar values
earlier, but this whole part of the code is a slow path so putting in
a sanity check should not hurt anything.

I think we want an ioremap_pfn instead of an ioremap64.  On 64bit
platforms ioremap64 is just ioremap so it is redundant.  On 32bit
platforms ioremap64 is really ioremap_pfn with just a different
wrapper around it.  While ioremap_pfn makes sense on all
architectures.  

ioremap_pfn has the added plus that on 32bit arches it will
usually have a 32bit pfn as well.

> on 64-bit architectures.  32-bit architectures would need more though.
> vm_struct uses 'unsigned long' to represent the phys_addr.  Can we get
> away with changing phys_addr to phys_pagenr?  That should allow 32-bit
> arches to implement ioremap64().

I don't see why not.  vm_struct is only used with vmalloc data,
and phys_addr is only used by a handful of architecture specific
files.  Everything else is in terms of page frame numbers already.

> Note I don't intend to do this work -- 64-bit BARs work fine on ia64 ;-)

Ah yes the architecture on the end of the hardware update list.

The real problem is that struct resource needs to be expanded on 32bit
architectures.  I'm not certain how much changing that will affect.  I
know it has been looked at a time or two before, and there some
problems that were noted on 32bit architectures.  This may be a 2.7
issue.

> > The first draft of the patch had a u64 there.  So I think we should
> > at least check to ensure the high half is zero.  If it the high half
> > is not zero we can print an annoying error message.  All of the normal
> > pci capabilities are still limited to being in the first 256 bytes of
> > the configuration space.  So not a lot is lost if we can't enable the
> > entire 4K. 
> 
> Yes, I think that's a reasonable thing to do until an ioremap64
> exists.

Could the patch be updated to do that please?

> > There is also one piece I did not see the PCI Express configuration
> > space for the root complex.  This is a configuration space with no pci
> > bus/dev/fn numbers, if my memory serves me correctly.
> 
> Your memory is correct.  The spec says:
> 
>    System firmware communicates the base address of the RCRB for each Root
>    Port to the operating system. Multiple Root Ports may be associated
>    with the same RCRB. The RCRB memory-mapped registers must not reside
>    in the same address space as the memory-mapped configuration space.
> 
> (RCRB == Root Complex Register Block)
> 
> I don't know how these systems intend to communicate the the base address.
> Presumably this will also be part of ACPI 3.0.  Anyway, this is an
> orthogonal issue from this patch which only aims to allow use of the
> MMCONFIG space.

It is an orthogonal issue of everything except that Intel appears to
be testing out the new table interface on the Linux community.  So
this is a reasonable time to provide feedback.

> > On the interface side I also have the question how it will be
> > described when there are multiple memory configuration spaces
> > corresponding to disjoint pci configuration spaces.  I don't think
> > we can easily support that in Linux at the moment but there is no
> > reason why there can't be a table entry for it.
> 
> i386 doesn't support it but ia64, ppc and sparc do.  I presume each
> domain will get its own MCFG space.  It won't be hard to support, but
> there's no point in doing it until hardware exists.

Ok, I guess that did get implemented then.  Especially for the ia64
case it is a significant question what does the acpi table need to
look like for multiple MCFG spaces.  We should implement this properly
if we can.  This is another issue that is relevant because the
interfaces are still being defined.

Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01 11:00             ` Eric W. Biederman
@ 2004-02-01 15:18               ` Matthew Wilcox
  2004-02-01 18:28                 ` Eric W. Biederman
  0 siblings, 1 reply; 43+ messages in thread
From: Matthew Wilcox @ 2004-02-01 15:18 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, Linus Torvalds, Durairaj, Sundarapandian,
	Kernel Mailing List, linux-pci, Alan Cox, Greg KH, Andi Kleen,
	Andrew Morton, mj, Kondratiev, Vladimir, Seshadri, Harinarayanan,
	Nakajima, Jun

On Sun, Feb 01, 2004 at 04:00:01AM -0700, Eric W. Biederman wrote:
> Matthew Wilcox <willy@debian.org> writes:
> > This is actually a Linux limitation -- we're pretty bad at dealing with
> > 64-bit BARs on 32-bit architectures.  There's two interfaces to get
> > at it -- ioremap() and set_fixmap().  Both of these interfaces take an
> > unsigned long to describe a physical address.
> 
> Which is another issue besides PCI express.  This 32bit VM on a 64bit
> box I find quite annoying.  Now that there are 64bit x86 alternatives
> I won't be shy about using 64bit addresses in BARs, if I need them.  
> On a box with 4G or more you need PAE anyway...
> 
> I'm wondering if ioremap or set_fixmap could be modified to take
> a page frame number or possibly a token like the dma mapping functions
> so we don't need all of the low bits and can actually solve these
> problems on a 32bit architecture.

The interface I'd actually like to see drivers using is
	unsigned long addr = map_resource(struct resource *res);

which would probably be implemented something like:

static inline unsigned long map_resource(struct resource *res)
{
	ioremap(res->start, res->end - res->start + 1);
}

on 64-bit architectures.  32-bit architectures would need more though.
vm_struct uses 'unsigned long' to represent the phys_addr.  Can we get
away with changing phys_addr to phys_pagenr?  That should allow 32-bit
arches to implement ioremap64().

Note I don't intend to do this work -- 64-bit BARs work fine on ia64 ;-)

> The first draft of the patch had a u64 there.  So I think we should
> at least check to ensure the high half is zero.  If it the high half
> is not zero we can print an annoying error message.  All of the normal
> pci capabilities are still limited to being in the first 256 bytes of
> the configuration space.  So not a lot is lost if we can't enable the
> entire 4K. 

Yes, I think that's a reasonable thing to do until an ioremap64 exists.

> There is also one piece I did not see the PCI Express configuration
> space for the root complex.  This is a configuration space with no pci
> bus/dev/fn numbers, if my memory serves me correctly.

Your memory is correct.  The spec says:

   System firmware communicates the base address of the RCRB for each Root
   Port to the operating system. Multiple Root Ports may be associated
   with the same RCRB. The RCRB memory-mapped registers must not reside
   in the same address space as the memory-mapped configuration space.

(RCRB == Root Complex Register Block)

I don't know how these systems intend to communicate the the base address.
Presumably this will also be part of ACPI 3.0.  Anyway, this is an
orthogonal issue from this patch which only aims to allow use of the
MMCONFIG space.

> On the interface side I also have the question how it will be
> described when there are multiple memory configuration spaces
> corresponding to disjoint pci configuration spaces.  I don't think
> we can easily support that in Linux at the moment but there is no
> reason why there can't be a table entry for it.

i386 doesn't support it but ia64, ppc and sparc do.  I presume each
domain will get its own MCFG space.  It won't be hard to support, but
there's no point in doing it until hardware exists.

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01  5:10           ` Matthew Wilcox
  2004-02-01 11:00             ` Eric W. Biederman
@ 2004-02-01 11:10             ` Eric W. Biederman
  1 sibling, 0 replies; 43+ messages in thread
From: Eric W. Biederman @ 2004-02-01 11:10 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun


One last note.  I believe I have a system in a lab where
this patch can be tested out.  If so I will see if I can
get this to the right people for testing.

Eric



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-02-01  5:10           ` Matthew Wilcox
@ 2004-02-01 11:00             ` Eric W. Biederman
  2004-02-01 15:18               ` Matthew Wilcox
  2004-02-01 11:10             ` Eric W. Biederman
  1 sibling, 1 reply; 43+ messages in thread
From: Eric W. Biederman @ 2004-02-01 11:00 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

Matthew Wilcox <willy@debian.org> writes:

> On Sat, Jan 31, 2004 at 02:57:29PM -0700, Eric W. Biederman wrote:
> > Is it really safe to treat the base address as a u32?  I know
> > if I was doing the BIOS and that address was tied to a 32bit BAR I
> > would be extremely tempted to put those 256M of address space above
> > 4G.  Putting something like that below 4G leads to 1/2 Gig of memory
> > missing. 
> 
> This is actually a Linux limitation -- we're pretty bad at dealing with
> 64-bit BARs on 32-bit architectures.  There's two interfaces to get
> at it -- ioremap() and set_fixmap().  Both of these interfaces take an
> unsigned long to describe a physical address.

Which is another issue besides PCI express.  This 32bit VM on a 64bit
box I find quite annoying.  Now that there are 64bit x86 alternatives
I won't be shy about using 64bit addresses in BARs, if I need them.  
On a box with 4G or more you need PAE anyway...

I'm wondering if ioremap or set_fixmap could be modified to take
a page frame number or possibly a token like the dma mapping functions
so we don't need all of the low bits and can actually solve these
problems on a 32bit architecture.

> > Point being I don't think it is safe to assume the BIOS always puts
> > the extended PCI configuration space below 4G.
> 
> MCFG isn't described in any released version of the ACPI spec, so I
> don't know whether it's even possible for it to be a 64-bit address.
> There's a reserved field that might be used for the upper 32 bits.

The first draft of the patch had a u64 there.  So I think we should
at least check to ensure the high half is zero.  If it the high half
is not zero we can print an annoying error message.  All of the normal
pci capabilities are still limited to being in the first 256 bytes of
the configuration space.  So not a lot is lost if we can't enable the
entire 4K. 

There is also one piece I did not see the PCI Express configuration
space for the root complex.  This is a configuration space with no pci
bus/dev/fn numbers, if my memory serves me correctly.

On the interface side I also have the question how it will be
described when there are multiple memory configuration spaces
corresponding to disjoint pci configuration spaces.  I don't think
we can easily support that in Linux at the moment but there is no
reason why there can't be a table entry for it.

Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-31 21:57         ` Eric W. Biederman
  2004-02-01  4:41           ` Grant Grundler
@ 2004-02-01  5:10           ` Matthew Wilcox
  2004-02-01 11:00             ` Eric W. Biederman
  2004-02-01 11:10             ` Eric W. Biederman
  1 sibling, 2 replies; 43+ messages in thread
From: Matthew Wilcox @ 2004-02-01  5:10 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, Linus Torvalds, Durairaj, Sundarapandian,
	Kernel Mailing List, linux-pci, Alan Cox, Greg KH, Andi Kleen,
	Andrew Morton, mj, Kondratiev, Vladimir, Seshadri, Harinarayanan,
	Nakajima, Jun

On Sat, Jan 31, 2004 at 02:57:29PM -0700, Eric W. Biederman wrote:
> Is it really safe to treat the base address as a u32?  I know
> if I was doing the BIOS and that address was tied to a 32bit BAR I
> would be extremely tempted to put those 256M of address space above
> 4G.  Putting something like that below 4G leads to 1/2 Gig of memory
> missing. 

This is actually a Linux limitation -- we're pretty bad at dealing with
64-bit BARs on 32-bit architectures.  There's two interfaces to get
at it -- ioremap() and set_fixmap().  Both of these interfaces take an
unsigned long to describe a physical address.

> Point being I don't think it is safe to assume the BIOS always puts
> the extended PCI configuration space below 4G.

MCFG isn't described in any released version of the ACPI spec, so I
don't know whether it's even possible for it to be a 64-bit address.
There's a reserved field that might be used for the upper 32 bits.

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-31 21:57         ` Eric W. Biederman
@ 2004-02-01  4:41           ` Grant Grundler
  2004-02-01  5:10           ` Matthew Wilcox
  1 sibling, 0 replies; 43+ messages in thread
From: Grant Grundler @ 2004-02-01  4:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, Linus Torvalds, Durairaj, Sundarapandian,
	Kernel Mailing List, linux-pci, Alan Cox, Greg KH, Andi Kleen,
	Andrew Morton, mj, Kondratiev, Vladimir, Seshadri, Harinarayanan,
	Nakajima, Jun

On Sat, Jan 31, 2004 at 02:57:29PM -0700, Eric W. Biederman wrote:
> Is it really safe to treat the base address as a u32?

Sorry...I missed this in the code...but the following confuses me:

>   I know
> if I was doing the BIOS and that address was tied to a 32bit BAR I
> would be extremely tempted to put those 256M of address space above
> 4G.

uhmm, how can one put a 32-bit BAR above 4G?
You meant 64-bit BAR?

> Point being I don't think it is safe to assume the BIOS always puts
> the extended PCI configuration space below 4G.

where MMCONFIG lives is orthogonal to where BARs point to.
I'm pretty sure I missed the point...sorry.

grant

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 16:42       ` Matthew Wilcox
  2004-01-29 16:52         ` Linus Torvalds
@ 2004-01-31 21:57         ` Eric W. Biederman
  2004-02-01  4:41           ` Grant Grundler
  2004-02-01  5:10           ` Matthew Wilcox
  1 sibling, 2 replies; 43+ messages in thread
From: Eric W. Biederman @ 2004-01-31 21:57 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

Matthew Wilcox <willy@debian.org> writes:

> On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> > The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by 
> > creating a variable for the value, you're actually making code generation 
> > worse. You might as well have
> > 
> > 	#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
> > 
> > instead.
> 
> Ahh, I missed the comment towards the top of fixmap.h that this is a
> constant address.  You're so smart sometimes ;-)
> 
> > That said, this patch looks perfectly acceptable to me. With some testing, 
> > I'd take it through Greg or -mm.
> 
> Cool.  Here's the final version for testing then.

Is it really safe to treat the base address as a u32?  I know
if I was doing the BIOS and that address was tied to a 32bit BAR I
would be extremely tempted to put those 256M of address space above
4G.  Putting something like that below 4G leads to 1/2 Gig of memory
missing. 

You can also put the memory above 4G on most intel chipsets but I'd
rather have my memory down low where my legacy OS could get to it
rather than have my PCI extended configuration space down low where
nothing really needs it. 

Point being I don't think it is safe to assume the BIOS always puts
the extended PCI configuration space below 4G.

Eric

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-30 16:58 Nakajima, Jun
  0 siblings, 0 replies; 43+ messages in thread
From: Nakajima, Jun @ 2004-01-30 16:58 UTC (permalink / raw)
  To: Greg KH, Durairaj, Sundarapandian, Kondratiev, Vladimir,
	Seshadri, Harinarayanan
  Cc: Matthew Wilcox, Kernel Mailing List, linux-pci

> Also, can someone from Intel test out Matthew's patch to make sure it
> works properly for them on their hardware?  It's much cleaner than the
> last patch submitted by you all :)

Okay I'll make sure it happens.

Jun

> -----Original Message-----
> From: Greg KH [mailto:greg@kroah.com]
> Sent: Friday, January 30, 2004 8:33 AM
> To: Durairaj, Sundarapandian; Kondratiev, Vladimir; Seshadri,
> Harinarayanan; Nakajima, Jun
> Cc: Matthew Wilcox; Kernel Mailing List; linux-
> pci@atrey.karlin.mff.cuni.cz
> Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
> 
> On Thu, Jan 29, 2004 at 10:09:52AM -0800, Greg KH wrote:
> > On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> > >
> > > That said, this patch looks perfectly acceptable to me. With some
> testing,
> > > I'd take it through Greg or -mm.
> >
> > It's looking much better.  But I _really_ want to actually test this
on
> > real hardware.  As no one is shipping PCI Express hardware yet,
there is
> > no rush to get this patch into the kernel tree.
> 
> Also, can someone from Intel test out Matthew's patch to make sure it
> works properly for them on their hardware?  It's much cleaner than the
> last patch submitted by you all :)
> 
> thanks,
> 
> greg k-h

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 18:09       ` Greg KH
@ 2004-01-30 16:33         ` Greg KH
  0 siblings, 0 replies; 43+ messages in thread
From: Greg KH @ 2004-01-30 16:33 UTC (permalink / raw)
  To: Durairaj, Sundarapandian, Kondratiev, Vladimir, Seshadri,
	Harinarayanan, Nakajima, Jun
  Cc: Matthew Wilcox, Kernel Mailing List, linux-pci

On Thu, Jan 29, 2004 at 10:09:52AM -0800, Greg KH wrote:
> On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> > 
> > That said, this patch looks perfectly acceptable to me. With some testing, 
> > I'd take it through Greg or -mm.
> 
> It's looking much better.  But I _really_ want to actually test this on
> real hardware.  As no one is shipping PCI Express hardware yet, there is
> no rush to get this patch into the kernel tree.

Also, can someone from Intel test out Matthew's patch to make sure it
works properly for them on their hardware?  It's much cleaner than the
last patch submitted by you all :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 16:05     ` Linus Torvalds
  2004-01-29 16:42       ` Matthew Wilcox
@ 2004-01-29 18:09       ` Greg KH
  2004-01-30 16:33         ` Greg KH
  1 sibling, 1 reply; 43+ messages in thread
From: Greg KH @ 2004-01-29 18:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Matthew Wilcox, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Andi Kleen, Andrew Morton, mj, Kondratiev,
	Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> 
> That said, this patch looks perfectly acceptable to me. With some testing, 
> I'd take it through Greg or -mm.

It's looking much better.  But I _really_ want to actually test this on
real hardware.  As no one is shipping PCI Express hardware yet, there is
no rush to get this patch into the kernel tree.

Bill Irwin and I are working on getting some PCI Express hardware to
test this patch out on.  We've been promised some for a while, hopefully
it turns up soon...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 16:42       ` Matthew Wilcox
@ 2004-01-29 16:52         ` Linus Torvalds
  2004-01-31 21:57         ` Eric W. Biederman
  1 sibling, 0 replies; 43+ messages in thread
From: Linus Torvalds @ 2004-01-29 16:52 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Durairaj, Sundarapandian, Kernel Mailing List, linux-pci,
	Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj, Kondratiev,
	Vladimir, Seshadri, Harinarayanan, Nakajima, Jun



On Thu, 29 Jan 2004, Matthew Wilcox wrote:
> 
> Ahh, I missed the comment towards the top of fixmap.h that this is a
> constant address.  You're so smart sometimes ;-)

Hey, you'd better verify that the compiler doesn't do anything stupid (but
the good news is that if it doesn't inline the thing properly and do all
the constant folding, you should get a link-time failure about
"__this_fixmap_does_not_exist", so we should be fairly safe).

		Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 16:05     ` Linus Torvalds
@ 2004-01-29 16:42       ` Matthew Wilcox
  2004-01-29 16:52         ` Linus Torvalds
  2004-01-31 21:57         ` Eric W. Biederman
  2004-01-29 18:09       ` Greg KH
  1 sibling, 2 replies; 43+ messages in thread
From: Matthew Wilcox @ 2004-01-29 16:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Matthew Wilcox, Durairaj, Sundarapandian, Kernel Mailing List,
	linux-pci, Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj,
	Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by 
> creating a variable for the value, you're actually making code generation 
> worse. You might as well have
> 
> 	#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
> 
> instead.

Ahh, I missed the comment towards the top of fixmap.h that this is a
constant address.  You're so smart sometimes ;-)

> That said, this patch looks perfectly acceptable to me. With some testing, 
> I'd take it through Greg or -mm.

Cool.  Here's the final version for testing then.

diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig	2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig	2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
 	  PCI-based systems don't have any BIOS at all. Linux can also try to
 	  detect the PCI hardware directly without using the BIOS.
 
-	  With this option, you can specify how Linux should detect the PCI
-	  devices. If you choose "BIOS", the BIOS will be used, if you choose
-	  "Direct", the BIOS won't be used, and if you choose "Any", the
-	  kernel will try the direct access method and falls back to the BIOS
-	  if that doesn't work. If unsure, go with the default, which is
-	  "Any".
+	  With this option, you can specify how Linux should detect the
+	  PCI devices. If you choose "BIOS", the BIOS will be used,
+	  if you choose "Direct", the BIOS won't be used, and if you
+	  choose "MMConfig", then PCI Express MMCONFIG will be used.
+	  If you choose "Any", the kernel will try MMCONFIG, then the
+	  direct access method and falls back to the BIOS if that doesn't
+	  work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+	bool "MMConfig"
 
 config PCI_GODIRECT
 	bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
  	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
 	default y
 
+config PCI_MMCONFIG
+	bool
+	depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+	select ACPI_BOOT
+	default y
+
 config PCI_USE_VECTOR
 	bool "Vector-based interrupt indexing"
 	depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c	2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c	2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
 }
 
 
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg *mcfg;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+
+	if (mcfg->base_address)
+		pci_mmcfg_base_addr = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_MMCONFIG
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	} else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	} else if (result > 1) {
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile	2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
 obj-y				:= i386.o
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 
 pci-y				:= fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c	2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c	2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+				PCI_PROBE_MMCONF;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit  pcibios_setup(char *st
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	else if (!strcmp(str, "nommconf")) {
+		pci_probe &= ~PCI_PROBE_MMCONF;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c	1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c	2004-01-29 11:37:09.000000000 -0500
@@ -0,0 +1,115 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	if (dev_base != mmcfg_last_accessed_device) {
+		mmcfg_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		*value = readb(mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		*value = readw(mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		*value = readl(mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		writeb(value, mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		writew(value, mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		writel(value, mmcfg_virt_addr + reg);
+		break;
+	}
+
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+	.read =		pci_mmcfg_read,
+	.write =	pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+	struct resource *region;
+
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		goto out;
+	if (!pci_mmcfg_base_addr)
+		goto out;
+	region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+			"PCI MMCONFIG");
+	if (!region)
+		goto out;
+
+	printk(KERN_INFO "PCI: Using MMCONFIG\n");
+	raw_pci_ops = &pci_mmcfg;
+	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h	2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c	2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c	2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c	2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c	2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch
 
 	/* Several chips lock up trying to read undefined config space */
 	if (capable(CAP_SYS_ADMIN)) {
-		size = 256;
+		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
 	}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
 	unsigned int size = count;
 	loff_t init_off = off;
 
-	if (off > 256)
+	if (off > dev->cfg_size)
 		return 0;
-	if (off + count > 256) {
-		size = 256 - off;
+	if (off + count > dev->cfg_size) {
+		size = dev->cfg_size - off;
 		count = size;
 	}
 
@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
 	.write = pci_write_config,
 };
 
+static struct bin_attribute pcie_config_attr = {
+	.attr =	{
+		.name = "config",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = 4096,
+	.read = pci_read_config,
+	.write = pci_write_config,
+};
+
 void pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct 
 	device_create_file (dev, &dev_attr_class);
 	device_create_file (dev, &dev_attr_irq);
 	device_create_file (dev, &dev_attr_resource);
-	sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	if (pdev->cfg_size < 4096) {
+		sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	} else {
+		sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+	}
 }
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c	2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c	2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *
+ *  %PCI_CAP_ID_EXP          PCI Express
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c	2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
 	kfree(pci_dev);
 }
 
+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+	/* Find whether the device is PCI Express */
+	int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+	return PCI_CFG_SPACE_SIZE;
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c	2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	struct inode *inode = file->f_dentry->d_inode;
 
-	down(&file->f_dentry->d_inode->i_sem);
+	down(&inode->i_sem);
 	switch (whence) {
 	case 0:
 		new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = inode->i_size + off;
 		break;
 	}
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > inode->i_size)
 		new = -EINVAL;
 	else
 		file->f_pos = new;
-	up(&file->f_dentry->d_inode->i_sem);
+	up(&inode->i_sem);
 	return new;
 }
 
@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
+	int size = dev->cfg_size;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h	2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h	2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	FIX_PCIE_MCFG,
+#endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
 #define NR_FIX_BTMAPS	16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h	2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h	2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+	struct acpi_table_header	header;
+	u8				reserved[8];
+	u32				base_address;
+	u32				base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);
 
 extern int acpi_mp_config;
 
+extern u32 pci_mmcfg_base_addr;
+
 #else	/*!CONFIG_ACPI_BOOT*/
 
 #define acpi_mp_config	0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h	2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h	2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
 	unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
 	unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
 
+	int		cfg_size;	/* Size of configuration space */
+
 	/*
 	 * Instead of touching interrupt line and base address registers
 	 * directly, use the values stored here. They might be different!

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 15:59   ` Matthew Wilcox
@ 2004-01-29 16:05     ` Linus Torvalds
  2004-01-29 16:42       ` Matthew Wilcox
  2004-01-29 18:09       ` Greg KH
  0 siblings, 2 replies; 43+ messages in thread
From: Linus Torvalds @ 2004-01-29 16:05 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Durairaj, Sundarapandian, Kernel Mailing List, linux-pci,
	Alan Cox, Greg KH, Andi Kleen, Andrew Morton, mj, Kondratiev,
	Vladimir, Seshadri, Harinarayanan, Nakajima, Jun



On Thu, 29 Jan 2004, Matthew Wilcox wrote:
> 
> Brian Gerst spotted a bug -- I'd forgotten to initialise mmcfg_virt_addr.

The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by 
creating a variable for the value, you're actually making code generation 
worse. You might as well have

	#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))

instead.

That said, this patch looks perfectly acceptable to me. With some testing, 
I'd take it through Greg or -mm.

		Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 15:09 ` Matthew Wilcox
@ 2004-01-29 15:59   ` Matthew Wilcox
  2004-01-29 16:05     ` Linus Torvalds
  0 siblings, 1 reply; 43+ messages in thread
From: Matthew Wilcox @ 2004-01-29 15:59 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Durairaj, Sundarapandian, linux-kernel, linux-pci, torvalds,
	alan, greg, Andi Kleen, akpm, mj, Kondratiev, Vladimir, Seshadri,
	Harinarayanan, Nakajima, Jun

On Thu, Jan 29, 2004 at 03:09:25PM +0000, Matthew Wilcox wrote:
> On Thu, Jan 29, 2004 at 05:02:39PM +0530, Durairaj, Sundarapandian wrote:
> > Please review this updated patch and send your comments.
> 
> Here's a rewrite of Sundarapandian Durairaj's patch for accessing extended
> PCI configuration space.  Changes of note:

Brian Gerst spotted a bug -- I'd forgotten to initialise mmcfg_virt_addr.
Updated patch:

diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig	2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig	2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
 	  PCI-based systems don't have any BIOS at all. Linux can also try to
 	  detect the PCI hardware directly without using the BIOS.
 
-	  With this option, you can specify how Linux should detect the PCI
-	  devices. If you choose "BIOS", the BIOS will be used, if you choose
-	  "Direct", the BIOS won't be used, and if you choose "Any", the
-	  kernel will try the direct access method and falls back to the BIOS
-	  if that doesn't work. If unsure, go with the default, which is
-	  "Any".
+	  With this option, you can specify how Linux should detect the
+	  PCI devices. If you choose "BIOS", the BIOS will be used,
+	  if you choose "Direct", the BIOS won't be used, and if you
+	  choose "MMConfig", then PCI Express MMCONFIG will be used.
+	  If you choose "Any", the kernel will try MMCONFIG, then the
+	  direct access method and falls back to the BIOS if that doesn't
+	  work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+	bool "MMConfig"
 
 config PCI_GODIRECT
 	bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
  	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
 	default y
 
+config PCI_MMCONFIG
+	bool
+	depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+	select ACPI_BOOT
+	default y
+
 config PCI_USE_VECTOR
 	bool "Vector-based interrupt indexing"
 	depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c	2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c	2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
 }
 
 
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg *mcfg;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+
+	if (mcfg->base_address)
+		pci_mmcfg_base_addr = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_MMCONFIG
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	} else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	} else if (result > 1) {
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile	2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
 obj-y				:= i386.o
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 
 pci-y				:= fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c	2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c	2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+				PCI_PROBE_MMCONF;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit  pcibios_setup(char *st
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	else if (!strcmp(str, "nommconf")) {
+		pci_probe &= ~PCI_PROBE_MMCONF;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c	1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c	2004-01-29 10:50:30.000000000 -0500
@@ -0,0 +1,119 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+/* The virtual address of the fixed PTE */
+static unsigned long mmcfg_virt_addr;
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	if (dev_base != mmcfg_last_accessed_device) {
+		mmcfg_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		*value = readb(mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		*value = readw(mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		*value = readl(mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		writeb(value, mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		writew(value, mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		writel(value, mmcfg_virt_addr + reg);
+		break;
+	}
+
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+	.read =		pci_mmcfg_read,
+	.write =	pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+	struct resource *region;
+
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		goto out;
+	if (!pci_mmcfg_base_addr)
+		goto out;
+	region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+			"PCI MMCONFIG");
+	if (!region)
+		goto out;
+
+	/* Calculate the virtual address of the PTE */
+	mmcfg_virt_addr = fix_to_virt(FIX_PCIE_MCFG);
+
+	printk(KERN_INFO "PCI: Using MMCONFIG\n");
+	raw_pci_ops = &pci_mmcfg;
+	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h	2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c	2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c	2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c	2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c	2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch
 
 	/* Several chips lock up trying to read undefined config space */
 	if (capable(CAP_SYS_ADMIN)) {
-		size = 256;
+		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
 	}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
 	unsigned int size = count;
 	loff_t init_off = off;
 
-	if (off > 256)
+	if (off > dev->cfg_size)
 		return 0;
-	if (off + count > 256) {
-		size = 256 - off;
+	if (off + count > dev->cfg_size) {
+		size = dev->cfg_size - off;
 		count = size;
 	}
 
@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
 	.write = pci_write_config,
 };
 
+static struct bin_attribute pcie_config_attr = {
+	.attr =	{
+		.name = "config",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = 4096,
+	.read = pci_read_config,
+	.write = pci_write_config,
+};
+
 void pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct 
 	device_create_file (dev, &dev_attr_class);
 	device_create_file (dev, &dev_attr_irq);
 	device_create_file (dev, &dev_attr_resource);
-	sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	if (pdev->cfg_size < 4096) {
+		sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	} else {
+		sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+	}
 }
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c	2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c	2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *
+ *  %PCI_CAP_ID_EXP          PCI Express
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c	2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
 	kfree(pci_dev);
 }
 
+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+	/* Find whether the device is PCI Express */
+	int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+	return PCI_CFG_SPACE_SIZE;
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c	2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	struct inode *inode = file->f_dentry->d_inode;
 
-	down(&file->f_dentry->d_inode->i_sem);
+	down(&inode->i_sem);
 	switch (whence) {
 	case 0:
 		new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = inode->i_size + off;
 		break;
 	}
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > inode->i_size)
 		new = -EINVAL;
 	else
 		file->f_pos = new;
-	up(&file->f_dentry->d_inode->i_sem);
+	up(&inode->i_sem);
 	return new;
 }
 
@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
+	int size = dev->cfg_size;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h	2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h	2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	FIX_PCIE_MCFG,
+#endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
 #define NR_FIX_BTMAPS	16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h	2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h	2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+	struct acpi_table_header	header;
+	u8				reserved[8];
+	u32				base_address;
+	u32				base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);
 
 extern int acpi_mp_config;
 
+extern u32 pci_mmcfg_base_addr;
+
 #else	/*!CONFIG_ACPI_BOOT*/
 
 #define acpi_mp_config	0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h	2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h	2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
 	unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
 	unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
 
+	int		cfg_size;	/* Size of configuration space */
+
 	/*
 	 * Instead of touching interrupt line and base address registers
 	 * directly, use the values stored here. They might be different!

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-29 11:32 Durairaj, Sundarapandian
@ 2004-01-29 15:09 ` Matthew Wilcox
  2004-01-29 15:59   ` Matthew Wilcox
  0 siblings, 1 reply; 43+ messages in thread
From: Matthew Wilcox @ 2004-01-29 15:09 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Andi Kleen, akpm,
	mj, Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

On Thu, Jan 29, 2004 at 05:02:39PM +0530, Durairaj, Sundarapandian wrote:
> Please review this updated patch and send your comments.

Here's a rewrite of Sundarapandian Durairaj's patch for accessing extended
PCI configuration space.  Changes of note:

 - Forward-ported to 2.6.2-rc2
 - Renamed most of the 'Express' to 'MMCONFIG' since that is what we're
   actually doing (and it would seem to be the same for PCI-X 2.0)
 - Separate out the mmconfig accesses into its own file rather than lumping
   them in with direct.  Inline the bits from include/asm-i386/pci.h.
 - Request the memory region we're going to use for MMCONFIG accesses.
 - Remove the EXPERIMENTAL tag.
 - Add support in sysfs for the extended config space.
 - Use i_size in proc_bus_pci_lseek().
 - Move cfg_size to where it will pack better in pci_dev.

Comments?  I don't have any x86 PCI-E hardware, so it's not even
slightly tested.

Greg, I'm not entirely keen on the sysfs patch I did, but I don't see
a better way of doing it right now.


diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig	2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig	2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
 	  PCI-based systems don't have any BIOS at all. Linux can also try to
 	  detect the PCI hardware directly without using the BIOS.
 
-	  With this option, you can specify how Linux should detect the PCI
-	  devices. If you choose "BIOS", the BIOS will be used, if you choose
-	  "Direct", the BIOS won't be used, and if you choose "Any", the
-	  kernel will try the direct access method and falls back to the BIOS
-	  if that doesn't work. If unsure, go with the default, which is
-	  "Any".
+	  With this option, you can specify how Linux should detect the
+	  PCI devices. If you choose "BIOS", the BIOS will be used,
+	  if you choose "Direct", the BIOS won't be used, and if you
+	  choose "MMConfig", then PCI Express MMCONFIG will be used.
+	  If you choose "Any", the kernel will try MMCONFIG, then the
+	  direct access method and falls back to the BIOS if that doesn't
+	  work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+	bool "MMConfig"
 
 config PCI_GODIRECT
 	bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
  	depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
 	default y
 
+config PCI_MMCONFIG
+	bool
+	depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+	select ACPI_BOOT
+	default y
+
 config PCI_USE_VECTOR
 	bool "Vector-based interrupt indexing"
 	depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c	2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c	2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
 }
 
 
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg *mcfg;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+
+	if (mcfg->base_address)
+		pci_mmcfg_base_addr = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_MMCONFIG
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	} else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	} else if (result > 1) {
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_MMCONFIG */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile	2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
 obj-y				:= i386.o
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 
 pci-y				:= fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c	2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c	2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+				PCI_PROBE_MMCONF;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit  pcibios_setup(char *st
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	else if (!strcmp(str, "nommconf")) {
+		pci_probe &= ~PCI_PROBE_MMCONF;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c	1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c	2004-01-29 09:14:34.000000000 -0500
@@ -0,0 +1,116 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+/* The virtual address of the fixed PTE */
+static char *mmcfg_virt_addr;
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	if (dev_base != mmcfg_last_accessed_device) {
+		mmcfg_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		*value = readb(mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		*value = readw(mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		*value = readl(mmcfg_virt_addr + reg);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	pci_exp_set_dev_base(bus, devfn);
+
+	switch (len) {
+	case 1:
+		writeb(value, mmcfg_virt_addr + reg);
+		break;
+	case 2:
+		writew(value, mmcfg_virt_addr + reg);
+		break;
+	case 4:
+		writel(value, mmcfg_virt_addr + reg);
+		break;
+	}
+
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+	.read =		pci_mmcfg_read,
+	.write =	pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+	struct resource *region;
+
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		goto out;
+	if (!pci_mmcfg_base_addr)
+		goto out;
+	region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+			"PCI MMCONFIG");
+	if (!region)
+		goto out;
+
+	printk(KERN_INFO "PCI: Using MMCONFIG\n");
+	raw_pci_ops = &pci_mmcfg;
+	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+	return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h	2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h	2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c	2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c	2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c	2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c	2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch
 
 	/* Several chips lock up trying to read undefined config space */
 	if (capable(CAP_SYS_ADMIN)) {
-		size = 256;
+		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
 	}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
 	unsigned int size = count;
 	loff_t init_off = off;
 
-	if (off > 256)
+	if (off > dev->cfg_size)
 		return 0;
-	if (off + count > 256) {
-		size = 256 - off;
+	if (off + count > dev->cfg_size) {
+		size = dev->cfg_size - off;
 		count = size;
 	}
 
@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
 	.write = pci_write_config,
 };
 
+static struct bin_attribute pcie_config_attr = {
+	.attr =	{
+		.name = "config",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = 4096,
+	.read = pci_read_config,
+	.write = pci_write_config,
+};
+
 void pci_create_sysfs_dev_files (struct pci_dev *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct 
 	device_create_file (dev, &dev_attr_class);
 	device_create_file (dev, &dev_attr_irq);
 	device_create_file (dev, &dev_attr_resource);
-	sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	if (pdev->cfg_size < 4096) {
+		sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+	} else {
+		sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+	}
 }
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c	2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c	2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *
+ *  %PCI_CAP_ID_EXP          PCI Express
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c	2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
 	kfree(pci_dev);
 }
 
+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+	/* Find whether the device is PCI Express */
+	int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+	return PCI_CFG_SPACE_SIZE;
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c	2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c	2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	struct inode *inode = file->f_dentry->d_inode;
 
-	down(&file->f_dentry->d_inode->i_sem);
+	down(&inode->i_sem);
 	switch (whence) {
 	case 0:
 		new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = inode->i_size + off;
 		break;
 	}
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > inode->i_size)
 		new = -EINVAL;
 	else
 		file->f_pos = new;
-	up(&file->f_dentry->d_inode->i_sem);
+	up(&inode->i_sem);
 	return new;
 }
 
@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
+	int size = dev->cfg_size;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h	2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h	2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
 #endif
+#ifdef CONFIG_PCI_MMCONFIG
+	FIX_PCIE_MCFG,
+#endif
 	__end_of_permanent_fixed_addresses,
 	/* temporary boot-time mappings, used before ioremap() is functional */
 #define NR_FIX_BTMAPS	16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h	2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h	2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+	struct acpi_table_header	header;
+	u8				reserved[8];
+	u32				base_address;
+	u32				base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);
 
 extern int acpi_mp_config;
 
+extern u32 pci_mmcfg_base_addr;
+
 #else	/*!CONFIG_ACPI_BOOT*/
 
 #define acpi_mp_config	0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h	2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h	2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
 	unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
 	unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
 
+	int		cfg_size;	/* Size of configuration space */
+
 	/*
 	 * Instead of touching interrupt line and base address registers
 	 * directly, use the values stored here. They might be different!

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-29 11:32 Durairaj, Sundarapandian
  2004-01-29 15:09 ` Matthew Wilcox
  0 siblings, 1 reply; 43+ messages in thread
From: Durairaj, Sundarapandian @ 2004-01-29 11:32 UTC (permalink / raw)
  To: linux-kernel, linux-pci
  Cc: torvalds, alan, greg, Andi Kleen, akpm, mj, Kondratiev, Vladimir,
	Seshadri, Harinarayanan, Nakajima, Jun, Durairaj, Sundarapandian

Hi All,

Thanks for the comments.

Please review this updated patch and send your comments.

Thanks,
Sundar

Note:
This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladimir (Vladimir.Kondratiev@intel.com) and
Harinarayanan (Harinarayanan.Seshadri@intel.com)  and my previous
patches .
I tested it on our i386 platform. 

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.  

diff -Naur linux-2.6.0/arch/i386/Kconfig
linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000
+0530
+++ linux_pciexpress/arch/i386/Kconfig	2004-01-29 16:50:56.000000000
+0530
@@ -1020,6 +1020,18 @@
 
 endchoice
 
+config PCI_EXPRESS
+	bool "PCI_EXPRESS (EXPERIMENTAL)" 
+	depends on EXPERIMENTAL && PCI
+	select ACPI_BOOT
+	help
+	  PCI Express is the next generation PCI architecture that
supports
+	  the configuration space size of 4K bytes. With this option, 
+	  Linux will first attempt to access the configuration space
through 
+	  enhanced config access mechanism (will work only on 
+	  PCI Express based system) otherwise other standard PCI access 
+	  mechanism will be used.
+
 config PCI_BIOS
 	bool
 	depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c	2003-12-18
08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-29
16:14:43.000000000 +0530
@@ -93,6 +93,27 @@
 	return ((unsigned char *) base + offset);
 }
 
+#ifdef CONFIG_PCI_EXPRESS
+static int __init acpi_parse_mcfg
+			(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg	*mcfg = NULL;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
+						(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+	if (mcfg->base_address)
+		mmcfg_base_address = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_EXPRESS */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -508,6 +529,20 @@
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_EXPRESS
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	} else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	} else if (result > 1) {
+		printk(KERN_WARNING PREFIX
+			"Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_EXPRESS */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/common.c
linux_pciexpress/arch/i386/pci/common.c
--- linux-2.6.0/arch/i386/pci/common.c	2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/common.c	2004-01-29
16:14:45.000000000 +0530
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2
+				 | PCI_PROBE_ENHANCED;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	else if (!strcmp(str, "nopciexpress")) {
+		pci_probe &= ~PCI_PROBE_ENHANCED;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c
linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18 08:28:28.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-29
16:14:45.000000000 +0530
@@ -167,6 +167,60 @@
 };
 
 
+#ifdef CONFIG_PCI_EXPRESS
+/*
+ * We map full Page size on each PCI Express request. Incidentally
that's 
+ * the size we have for config space too in PCI Express devices.
+ * On PCI Express capable platform, at the time of kernel
initialization
+ * the OS would have scanned for MCFG table and set this variable to 
+ * appropriate value. If PCI Express not supported the variable will 
+ * have 0 value
+ */
+u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual  address of fixed PTE
+ */
+char *mmcfg_virt_addr;
+
+/*
+ * Variable used to store the base address of the last PCI Express
device
+ * accessed.
+ */
+u32 pcie_last_accessed_device;
+
+static int pci_express_conf_read(int seg, int bus,
+		int devfn, int reg, int len, u32 *value)
+{
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) {
+		printk(KERN_ERR "%s: Invalid Parameter\n",
+				__FUNCTION__);
+  		return -EINVAL;
+	}
+	pci_express_read(bus, devfn, reg, len, value);
+
+	return 0;
+}
+ 
+static int pci_express_conf_write(int seg, int bus, 
+			int devfn, int reg, int len, u32 value)
+{
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
+		printk(KERN_ERR "%s: Invalid Parameter\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+	pci_express_write(bus, devfn, reg, len, value);
+
+	return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+	.read   =	pci_express_conf_read,
+	.write  =	pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXPRESS */
+
 /*
  * Before we decide to use direct hardware access mechanisms, we try to
do some
  * trivial checks to ensure it at least _seems_ to be working -- we
just test
@@ -244,7 +298,30 @@
 static int __init pci_direct_init(void)
 {
 	struct resource *region, *region2;
+	
+#ifdef CONFIG_PCI_EXPRESS
+	if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
+		goto type1;
+	/*
+ 	 * Check if platform we are running is PCI Express capable
+  	 */
+	if (mmcfg_base_address == 0) {
+		printk(KERN_INFO 
+		      "MCFG table entry is not found in ACPI
tables....\n"
+		      "Not enabling Enhanced Configuration....\n");
+		goto type1;
+	}
 
+	/* Calculate the virtual address of the PTE */
+	mmcfg_virt_addr = (char *)fix_to_virt(FIX_PCIE_MCFG);
+
+	if (pci_sanity_check(&pci_express_conf)) {
+		printk(KERN_INFO "PCI: Using config type PCIExp\n");
+		raw_pci_ops = &pci_express_conf;
+		return 0;
+	}
+type1:
+#endif /* CONFIG_PCI_EXPRESS */
 	if ((pci_probe & PCI_PROBE_CONF1) == 0)
 		goto type2;
 	region = request_region(0xCF8, 8, "PCI conf1");
diff -Naur linux-2.6.0/arch/i386/pci/Makefile
linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/Makefile	2004-01-29
16:14:45.000000000 +0530
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_EXPRESS)	+= direct.o
 
 pci-y				:= fixup.o
 pci-$(CONFIG_ACPI_PCI)		+= acpi.o
diff -Naur linux-2.6.0/arch/i386/pci/pci.h
linux_pciexpress/arch/i386/pci/pci.h
--- linux-2.6.0/arch/i386/pci/pci.h	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/pci.h	2004-01-29
16:14:45.000000000 +0530
@@ -15,6 +15,11 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#ifdef CONFIG_PCI_EXPRESS
+#define PCI_PROBE_ENHANCED	0x0008
+#else
+#define PCI_PROBE_ENHANCED 	0x0
+#endif
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -Naur linux-2.6.0/drivers/acpi/tables.c
linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c	2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/drivers/acpi/tables.c	2004-01-29
16:14:08.000000000 +0530
@@ -58,6 +58,7 @@
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c
linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c	2003-12-18 08:28:38.000000000
+0530
+++ linux_pciexpress/drivers/pci/pci.c	2004-01-29 16:13:58.000000000
+0530
@@ -90,6 +90,7 @@
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI-EXP
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/probe.c
linux_pciexpress/drivers/pci/probe.c
--- linux-2.6.0/drivers/pci/probe.c	2003-12-18 08:29:06.000000000
+0530
+++ linux_pciexpress/drivers/pci/probe.c	2004-01-29
16:13:58.000000000 +0530
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,21 @@
 	kfree(pci_dev);
 }
 
+/* 
+ * pci_cfg_space_size - get the configuration space size of the PCI
device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXPRESS
+	/* Find whether the device is PCI Express device */
+	int is_pci_express_dev = 
+		pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+	return PCI_CFG_SPACE_SIZE; 
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +532,7 @@
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -Naur linux-2.6.0/drivers/pci/proc.c
linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/drivers/pci/proc.c	2004-01-29 16:13:58.000000000
+0530
@@ -16,14 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = PDE(ino);
+	struct pci_dev *dev = dp->data;
 
 	lock_kernel();
 	switch (whence) {
@@ -34,11 +35,11 @@
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = dev->cfg_size + off;
 		break;
 	}
 	unlock_kernel();
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > dev->cfg_size)
 		return -EINVAL;
 	return (file->f_pos = new);
 }
@@ -59,7 +60,7 @@
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+ 		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -133,13 +134,14 @@
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
 	int cnt;
+	int size = dev->cfg_size;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -401,7 +403,7 @@
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h	2003-12-18
08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h	2004-01-29
16:15:38.000000000 +0530
@@ -67,6 +67,9 @@
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings
*/
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	FIX_PCIE_MCFG,
+#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/asm-i386/pci.h
linux_pciexpress/include/asm-i386/pci.h
--- linux-2.6.0/include/asm-i386/pci.h	2003-12-18 08:28:47.000000000
+0530
+++ linux_pciexpress/include/asm-i386/pci.h	2004-01-29
16:15:39.000000000 +0530
@@ -96,4 +96,69 @@
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_PCI_EXPRESS
+extern spinlock_t pci_config_lock;
+
+/*
+ * Variable used to store the base address of the last PCI Express
device
+ * accessed.
+ */
+extern u32 pcie_last_accessed_device;
+
+/*
+ * Variable used to store the base address of the chipset
+ */
+extern u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual  address of fixed PTE
+ */
+extern char *mmcfg_virt_addr;
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = 
+		mmcfg_base_address | (bus << 20) | (devfn << 12);
+	if (dev_base != pcie_last_accessed_device) {
+		pcie_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static inline void pci_express_read(int bus, int devfn, int reg, 
+		int len, u32 *value)
+{
+	pci_exp_set_dev_base(bus, devfn);
+ 	switch (len) {
+        case 1:
+		*value = (u8)readb(mmcfg_virt_addr + reg);
+		break;
+        case 2:
+		*value = (u16)readw(mmcfg_virt_addr + reg);
+		break;
+        case 4:
+		*value = (u32)readl(mmcfg_virt_addr + reg);
+		break;
+	}
+}
+
+static inline void pci_express_write(int bus, int devfn, int reg, 
+	int len, u32 value)
+{
+	pci_exp_set_dev_base(bus, devfn);
+	switch (len) {
+		case 1:
+			writeb(value, mmcfg_virt_addr + reg);
+			break;
+		case 2:
+			writew(value, mmcfg_virt_addr + reg);
+			break;
+	        case 4:
+			writel(value, mmcfg_virt_addr + reg);
+	                break;
+     	}
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+}
+#endif /* CONFIG_PCI_EXPRESS */
 #endif /* __i386_PCI_H */
diff -Naur linux-2.6.0/include/linux/acpi.h
linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h	2003-12-18 08:27:58.000000000
+0530
+++ linux_pciexpress/include/linux/acpi.h	2004-01-29
16:15:20.000000000 +0530
@@ -317,6 +317,13 @@
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+struct acpi_table_mcfg {
+	struct acpi_table_header 	header;
+	u8	reserved[8];
+	u32	base_address;
+	u32	base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +345,7 @@
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -437,4 +445,7 @@
 
 #endif /*!CONFIG_ACPI_INTERPRETER*/
 
+#ifdef CONFIG_PCI_EXPRESS
+extern u32 mmcfg_base_address;
+#endif
 #endif /*_LINUX_ACPI_H*/
diff -Naur linux-2.6.0/include/linux/pci.h
linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h	2003-12-18 08:28:49.000000000
+0530
+++ linux_pciexpress/include/linux/pci.h	2004-01-29
16:43:01.000000000 +0530
@@ -198,6 +198,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled
Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI-EXPANDED */
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list
*/
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16
bits) */
 #define PCI_CAP_SIZEOF		4
@@ -424,6 +425,7 @@
 #define PCI_NAME_HALF	__stringify(20)	/* less than half to handle slop
*/
 	char		pretty_name[PCI_NAME_SIZE];	/* pretty name
for users to see */
 #endif
+	int cfg_size;
 };
 
 #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-28  9:38 Durairaj, Sundarapandian
  2004-01-28 14:42 ` Vladimir Kondratiev
@ 2004-01-28 15:18 ` Matthew Wilcox
  1 sibling, 0 replies; 43+ messages in thread
From: Matthew Wilcox @ 2004-01-28 15:18 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Andi Kleen, akpm,
	mj, Kondratiev, Vladimir, Seshadri, Harinarayanan, Nakajima, Jun

On Wed, Jan 28, 2004 at 03:08:01PM +0530, Durairaj, Sundarapandian wrote:
> -menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
> +menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"

I think this is unnecessary.  For users, PCI Express is just another
form of PCI.

> +config PCI_EXPRESS
> +	bool "PCI_EXPRESS (EXPERIMENTAL)" 
> +	depends on EXPERIMENTAL && ACPI_BOOT

Can't we do this with select?  ie:

config PCI_EXPRESS
	bool "PCI Express (EXPERIMENTAL)"
	depends on EXPERIMENTAL
	select ACPI_BOOT

> +	help
> +	  PCI Express extends the configuration space from 256 bytes to
> +	  4k bytes. It also defines an enhanced configuration mechanism
> +	  to access the extended configuration space. With this option, 
> +	  you can specify that Linux will first attempt to access the 
> +	  PCI configuration space through enhanced config access 
> +	  mechanism (will work only on PCI Express based system)
> +	  otherwise other standard PCI access mechanism will be used.

I don't think this help is terribly helpful to the user.  How about:

	help
	  PCI Express is a new I/O architecture that is used in many
	  systems from 2004 onwards.  Even if there are no PCI Express
	  slots on your motherboard, it may use PCI Express internally.
	  If you don't know, it is safe to say Y here.

Also, I would place this entry after PCI and make it depend on PCI (since
all the PCI infrastructure is relevant to PCI Express).

> +#ifdef CONFIG_PCI_EXPRESS
> +	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
> +	if (!result) {
> +		printk(KERN_WARNING PREFIX "MCFG not present\n");
> +		return 0;
> +	}
> +	else if (result < 0) {

CodingStyle recommends joining these last two lines together.

> +static int pci_express_conf_read(int seg, int bus,
> +		int devfn, int reg, int len, u32 *value)
> +{
> +	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) {
> +		printk(KERN_ERR "pci_express_conf_read: "
> +					"Invalid Parameter\n");
> +  		return -EINVAL;
> +	}
> +
> +	/* Shoot misaligned transaction now */
> +	if (reg & (len-1)) {
> +		printk(KERN_ERR "pci_express_conf_read: "
> +					"misaligned transaction\n");
> +  		return -EINVAL;
> +	}

This last bit is not needed; Linux doesn't let misaligned requests get
this far.  See drivers/pci/access.c::pci_bus_read_config_##size

> @@ -90,6 +90,8 @@
>   *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
>   *
>   *  %PCI_CAP_ID_PCIX         PCI-X
> + *  %PCI_CAP_ID_EXP          PCI-EXP
> +

seems like a stray blank line?

>   */
>  int
>  pci_find_capability(struct pci_dev *dev, int cap)
> diff -Naur linux-2.6.0/drivers/pci/probe.c linux_pciexpress/drivers/pci/probe.c
> --- linux-2.6.0/drivers/pci/probe.c	2003-12-18 08:29:06.000000000 +0530
> +++ linux_pciexpress/drivers/pci/probe.c	2004-01-28 12:06:39.000000000 +0530
> @@ -17,6 +17,8 @@
>  
>  #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
>  #define CARDBUS_RESERVE_BUSNR	3
> +#define PCI_CFG_SPACE_SIZE	256
> +#define PCI_CFG_SPACE_EXP_SIZE	4096

fwiw, PCI-X 2 also has 4096 bytes of config space.  Perhaps we can just
agree that 'EXP' stands for 'Expanded', not 'Express' in this instance?
;-)

> +static int pci_cfg_space_size(struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCI_EXPRESS
> +	/* Find whether the device is PCI Express device */
> +	int is_pci_express_dev = 
> +		pci_find_capability(dev, PCI_CAP_ID_EXP);
> +	if (is_pci_express_dev)
> +		return PCI_CFG_SPACE_EXP_SIZE;
> +	else

I would drop the `else' here.

> +#endif
> +	return PCI_CFG_SPACE_SIZE; 
> +}
> +
>  /*
>   * Read the config data for a PCI device, sanity-check it
>   * and fill in the dev structure...
> @@ -515,6 +533,7 @@
>  	dev->multifunction = !!(hdr_type & 0x80);
>  	dev->vendor = l & 0xffff;
>  	dev->device = (l >> 16) & 0xffff;
> +	dev->cfg_size = pci_cfg_space_size(dev);

Good idea to cache it in the pci_dev.

> +static inline void pci_express_read(int bus, int devfn, int reg, 
> +		int len, u32 *value)
> +{
> +	unsigned long flags;
> +	spin_lock_irqsave(&pci_config_lock, flags);

You're already under the pci_lock spinlock (again see drivers/pci/access.c),
so I think this is unnecessary.


This is coming together nicely.

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-28 14:42 ` Vladimir Kondratiev
  2004-01-28 14:54   ` Christoph Hellwig
@ 2004-01-28 15:00   ` Martin Mares
  1 sibling, 0 replies; 43+ messages in thread
From: Martin Mares @ 2004-01-28 15:00 UTC (permalink / raw)
  To: Vladimir Kondratiev
  Cc: Durairaj, Sundarapandian, linux-kernel, linux-pci, torvalds,
	alan, greg, Andi Kleen, akpm, Seshadri, Harinarayanan, Nakajima,
	Jun

Hello!

> - if you will present 4k config space for all devices, it will save lots 
> of work: you do not need to modify struct pci_dev, do not need almost 
> all stuff in drivers/pci/proc.c. By presenting 4k config for PCI device 
> you should not broke anything.

For example the sizes of files in /sys/bus/pci should reflect the real size of
configuration space.

				Have a nice fortnight
-- 
Martin `MJ' Mares   <mj@ucw.cz>   http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
"Anyone can build a fast CPU. The trick is to build a fast system." -- S. Cray

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-28 14:42 ` Vladimir Kondratiev
@ 2004-01-28 14:54   ` Christoph Hellwig
  2004-01-28 15:00   ` Martin Mares
  1 sibling, 0 replies; 43+ messages in thread
From: Christoph Hellwig @ 2004-01-28 14:54 UTC (permalink / raw)
  To: Vladimir Kondratiev
  Cc: Durairaj, Sundarapandian, linux-kernel, linux-pci, torvalds,
	alan, greg, Andi Kleen, akpm, mj, Seshadri, Harinarayanan,
	Nakajima, Jun

On Wed, Jan 28, 2004 at 04:42:52PM +0200, Vladimir Kondratiev wrote:
> My inputs:
> 
> - I do not like pci_express_read implemented as inline function. It is 
> called only in one place. It is more appropriate, on my opinion, to 
> merge all stuff added to include/asm-i386/pci.h , into 
> arch/i386/pci/direct.c.

Actually it should be in a file of it's own, e.g. arch/i386/pci/express.c


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-28  9:38 Durairaj, Sundarapandian
@ 2004-01-28 14:42 ` Vladimir Kondratiev
  2004-01-28 14:54   ` Christoph Hellwig
  2004-01-28 15:00   ` Martin Mares
  2004-01-28 15:18 ` Matthew Wilcox
  1 sibling, 2 replies; 43+ messages in thread
From: Vladimir Kondratiev @ 2004-01-28 14:42 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Andi Kleen, akpm,
	mj, Seshadri, Harinarayanan, Nakajima, Jun

My inputs:

- I do not like pci_express_read implemented as inline function. It is 
called only in one place. It is more appropriate, on my opinion, to 
merge all stuff added to include/asm-i386/pci.h , into 
arch/i386/pci/direct.c.

- if you will present 4k config space for all devices, it will save lots 
of work: you do not need to modify struct pci_dev, do not need almost 
all stuff in drivers/pci/proc.c. By presenting 4k config for PCI device 
you should not broke anything.

- Here and in _write function:
+static int pci_express_conf_read(int seg, int bus,
+        int devfn, int reg, int len, u32 *value)
+{
+    if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) {
+        printk(KERN_ERR "pci_express_conf_read: "
+                    "Invalid Parameter\n");
Worth to use
         printk(KERN_ERR "%s: Invalid Parameter\n",__FUNCTION__);

Durairaj, Sundarapandian wrote:

>Hi All, 
>
>Thanks for your comments. I am posting this patch after incorporating
>the review comments.
>
>Please find the attached patch file. Please review this and send your
>comments.
>
>Thanks,
>Sundar
>
>Note:
>This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
>kernel following up to the Vladimir (Vladimir.Kondratiev@intel.com) and
>Harinarayanan (Harinarayanan.Seshadri@intel.com)  and my previous
>patches .
>I tested it on our i386 platform. 
>
>This patch also implements a mechanism for the kernel to find the
>chipset specific mmcfg base address. The kernel will detect the base
>address of the chipset through the ACPI table entry and based on that
>the PCI subsystem will be initialized.  
>  
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-28  9:38 Durairaj, Sundarapandian
  2004-01-28 14:42 ` Vladimir Kondratiev
  2004-01-28 15:18 ` Matthew Wilcox
  0 siblings, 2 replies; 43+ messages in thread
From: Durairaj, Sundarapandian @ 2004-01-28  9:38 UTC (permalink / raw)
  To: linux-kernel, linux-pci
  Cc: torvalds, alan, greg, Andi Kleen, akpm, mj, Kondratiev, Vladimir,
	Seshadri, Harinarayanan, Nakajima, Jun, Durairaj, Sundarapandian

[-- Attachment #1: Type: text/plain, Size: 730 bytes --]

Hi All, 

Thanks for your comments. I am posting this patch after incorporating
the review comments.

Please find the attached patch file. Please review this and send your
comments.

Thanks,
Sundar

Note:
This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladimir (Vladimir.Kondratiev@intel.com) and
Harinarayanan (Harinarayanan.Seshadri@intel.com)  and my previous
patches .
I tested it on our i386 platform. 

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.  

[-- Attachment #2: mcfg_2.6.lkml.patch --]
[-- Type: application/octet-stream, Size: 15758 bytes --]

diff -Naur linux-2.6.0/arch/i386/Kconfig linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000 +0530
+++ linux_pciexpress/arch/i386/Kconfig	2004-01-28 12:04:20.000000000 +0530
@@ -959,7 +959,7 @@
 endmenu
 
 
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
 
 config X86_VISWS_APIC
 	bool
@@ -976,6 +976,18 @@
 	depends on SMP && !(X86_VISWS || X86_VOYAGER)
 	default y
 
+config PCI_EXPRESS
+	bool "PCI_EXPRESS (EXPERIMENTAL)" 
+	depends on EXPERIMENTAL && ACPI_BOOT
+	help
+	  PCI Express extends the configuration space from 256 bytes to
+	  4k bytes. It also defines an enhanced configuration mechanism
+	  to access the extended configuration space. With this option, 
+	  you can specify that Linux will first attempt to access the 
+	  PCI configuration space through enhanced config access 
+	  mechanism (will work only on PCI Express based system)
+	  otherwise other standard PCI access mechanism will be used.
+
 config PCI
 	bool "PCI support" if !X86_VISWS
 	depends on !X86_VOYAGER
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c	2003-12-18 08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-28 11:43:28.000000000 +0530
@@ -93,6 +93,27 @@
 	return ((unsigned char *) base + offset);
 }
 
+#ifdef CONFIG_PCI_EXPRESS
+static int __init acpi_parse_mcfg
+			(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg	*mcfg = NULL;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
+						(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+	if (mcfg->base_address)
+		mmcfg_base_address = mcfg->base_address;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_EXPRESS */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -508,6 +529,22 @@
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_EXPRESS
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	}
+	else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	}
+	else if (result > 1) {
+		printk(KERN_WARNING PREFIX
+			"Multiple MCFG tables exist\n");
+	}
+#endif /* CONFIG_PCI_EXPRESS */
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/common.c linux_pciexpress/arch/i386/pci/common.c
--- linux-2.6.0/arch/i386/pci/common.c	2003-12-18 08:28:46.000000000 +0530
+++ linux_pciexpress/arch/i386/pci/common.c	2004-01-28 11:51:38.000000000 +0530
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2
+				 | PCI_PROBE_ENHANCED;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	else if (!strcmp(str, "nopciexpress")) {
+		pci_probe &= ~PCI_PROBE_ENHANCED;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18 08:28:28.000000000 +0530
+++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-28 11:27:07.000000000 +0530
@@ -167,6 +167,73 @@
 };
 
 
+#ifdef CONFIG_PCI_EXPRESS
+/*
+ * We map full Page size on each PCI Express request. Incidentally that's 
+ * the size we have for config space too in PCI Express devices.
+ * On PCI Express capable platform, at the time of kernel initialization
+ * the OS would have scanned for MCFG table and set this variable to 
+ * appropriate value. If PCI Express not supported the variable will 
+ * have 0 value
+ */
+u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual  address of fixed PTE
+ */
+char *mmcfg_virt_addr;
+
+/*
+ * Variable used to store the base address of the last PCI Express device
+ * accessed.
+ */
+u32 pcie_last_accessed_device;
+
+static int pci_express_conf_read(int seg, int bus,
+		int devfn, int reg, int len, u32 *value)
+{
+	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) {
+		printk(KERN_ERR "pci_express_conf_read: "
+					"Invalid Parameter\n");
+  		return -EINVAL;
+	}
+
+	/* Shoot misaligned transaction now */
+	if (reg & (len-1)) {
+		printk(KERN_ERR "pci_express_conf_read: "
+					"misaligned transaction\n");
+  		return -EINVAL;
+	}
+	pci_express_read(bus, devfn, reg, len, value);
+
+	return 0;
+}
+ 
+static int pci_express_conf_write(int seg, int bus, 
+			int devfn, int reg, int len, u32 value)
+{
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
+		printk(KERN_ERR "pci_express_conf_write: "
+					"Invalid Parameter\n");
+		return -EINVAL;
+	}
+
+	/* Shoot misaligned transaction now */
+	if (reg & (len-1)) {
+		printk(KERN_ERR "pci_express_conf_write: "
+					"misaligned transaction\n");
+  		return -EINVAL;
+	}
+	pci_express_write(bus, devfn, reg, len, value);
+	return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+	.read   =	pci_express_conf_read,
+	.write  =	pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXPRESS */
+
 /*
  * Before we decide to use direct hardware access mechanisms, we try to do some
  * trivial checks to ensure it at least _seems_ to be working -- we just test
@@ -244,7 +311,30 @@
 static int __init pci_direct_init(void)
 {
 	struct resource *region, *region2;
+	
+#ifdef CONFIG_PCI_EXPRESS
+	if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
+		goto type1;
+	/*
+ 	 * Check if platform we are running is PCI Express capable
+  	 */
+	if (mmcfg_base_address == 0) {
+		printk(KERN_INFO 
+		      "MCFG table entry is not found in ACPI tables....\n"
+		      "Not enabling Enhanced Configuration....\n");
+		goto type1;
+	}
 
+	/* Calculate the virtual address of the PTE */
+	mmcfg_virt_addr = (char *)fix_to_virt(FIX_PCIE_MCFG);
+
+	if (pci_sanity_check(&pci_express_conf)) {
+		printk(KERN_INFO "PCI: Using config type PCIExp\n");
+		raw_pci_ops = &pci_express_conf;
+		return 0;
+	}
+type1:
+#endif /* CONFIG_PCI_EXPRESS */
 	if ((pci_probe & PCI_PROBE_CONF1) == 0)
 		goto type2;
 	region = request_region(0xCF8, 8, "PCI conf1");
diff -Naur linux-2.6.0/arch/i386/pci/Makefile linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile	2003-12-18 08:28:57.000000000 +0530
+++ linux_pciexpress/arch/i386/pci/Makefile	2004-01-26 13:32:28.000000000 +0530
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_EXPRESS)	+= direct.o
 
 pci-y				:= fixup.o
 pci-$(CONFIG_ACPI_PCI)		+= acpi.o
diff -Naur linux-2.6.0/arch/i386/pci/pci.h linux_pciexpress/arch/i386/pci/pci.h
--- linux-2.6.0/arch/i386/pci/pci.h	2003-12-18 08:28:57.000000000 +0530
+++ linux_pciexpress/arch/i386/pci/pci.h	2004-01-26 13:32:28.000000000 +0530
@@ -15,6 +15,11 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#ifdef CONFIG_PCI_EXPRESS
+#define PCI_PROBE_ENHANCED	0x0008
+#else
+#define PCI_PROBE_ENHANCED 	0x0
+#endif
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -Naur linux-2.6.0/drivers/acpi/tables.c linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c	2003-12-18 08:28:46.000000000 +0530
+++ linux_pciexpress/drivers/acpi/tables.c	2004-01-26 13:31:51.000000000 +0530
@@ -58,6 +58,7 @@
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c	2003-12-18 08:28:38.000000000 +0530
+++ linux_pciexpress/drivers/pci/pci.c	2004-01-26 13:31:40.000000000 +0530
@@ -90,6 +90,8 @@
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI-EXP
+
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/probe.c linux_pciexpress/drivers/pci/probe.c
--- linux-2.6.0/drivers/pci/probe.c	2003-12-18 08:29:06.000000000 +0530
+++ linux_pciexpress/drivers/pci/probe.c	2004-01-28 12:06:39.000000000 +0530
@@ -17,6 +17,8 @@
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR	3
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
 
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
@@ -479,6 +481,22 @@
 	kfree(pci_dev);
 }
 
+/* 
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXPRESS
+	/* Find whether the device is PCI Express device */
+	int is_pci_express_dev = 
+		pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+	else
+#endif
+	return PCI_CFG_SPACE_SIZE; 
+}
+
 /*
  * Read the config data for a PCI device, sanity-check it
  * and fill in the dev structure...
@@ -515,6 +533,7 @@
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
+	dev->cfg_size = pci_cfg_space_size(dev);
 
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
diff -Naur linux-2.6.0/drivers/pci/proc.c linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c	2003-12-18 08:28:57.000000000 +0530
+++ linux_pciexpress/drivers/pci/proc.c	2004-01-26 15:45:34.000000000 +0530
@@ -16,14 +16,15 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
-#define PCI_CFG_SPACE_SIZE 256
-
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = PDE(ino);
+	struct pci_dev *dev = dp->data;
 
 	lock_kernel();
 	switch (whence) {
@@ -34,11 +35,11 @@
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = dev->cfg_size + off;
 		break;
 	}
 	unlock_kernel();
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > dev->cfg_size)
 		return -EINVAL;
 	return (file->f_pos = new);
 }
@@ -59,7 +60,7 @@
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+ 		size = dev->cfg_size;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -133,13 +134,14 @@
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
 	int cnt;
+	int size = dev->cfg_size;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -401,7 +403,7 @@
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	e->size = dev->cfg_size;
 
 	return 0;
 }
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h	2003-12-18 08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h	2004-01-26 13:33:49.000000000 +0530
@@ -67,6 +67,9 @@
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	FIX_PCIE_MCFG,
+#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/asm-i386/pci.h linux_pciexpress/include/asm-i386/pci.h
--- linux-2.6.0/include/asm-i386/pci.h	2003-12-18 08:28:47.000000000 +0530
+++ linux_pciexpress/include/asm-i386/pci.h	2004-01-28 11:33:51.000000000 +0530
@@ -96,4 +96,76 @@
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_PCI_EXPRESS
+extern spinlock_t pci_config_lock;
+
+/*
+ * Variable used to store the base address of the last PCI Express device
+ * accessed.
+ */
+extern u32 pcie_last_accessed_device;
+
+/*
+ * Variable used to store the base address of the chipset
+ */
+extern u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual  address of fixed PTE
+ */
+extern char *mmcfg_virt_addr;
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+	u32 dev_base = 
+		mmcfg_base_address | (bus << 20) | (devfn << 12);
+	if (dev_base != pcie_last_accessed_device) {
+		pcie_last_accessed_device = dev_base;
+		set_fixmap(FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static inline void pci_express_read(int bus, int devfn, int reg, 
+		int len, u32 *value)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, devfn);
+ 	switch (len) {
+        case 1:
+		*value = (u8)readb(mmcfg_virt_addr + reg);
+		break;
+        case 2:
+		*value = (u16)readw(mmcfg_virt_addr + reg);
+		break;
+        case 4:
+		*value = (u32)readl(mmcfg_virt_addr + reg);
+		break;
+	}
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static inline void pci_express_write(int bus, int devfn, int reg, 
+	int len, u32 value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, devfn);
+	switch (len) {
+		case 1:
+			writeb(value, mmcfg_virt_addr + reg);
+			break;
+		case 2:
+			writew(value, mmcfg_virt_addr + reg);
+			break;
+	        case 4:
+			writel(value, mmcfg_virt_addr + reg);
+	                break;
+     	}
+	/* Dummy read to flush PCI write */
+	readl(mmcfg_virt_addr);
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+#endif /* CONFIG_PCI_EXPRESS */
 #endif /* __i386_PCI_H */
diff -Naur linux-2.6.0/include/linux/acpi.h linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h	2003-12-18 08:27:58.000000000 +0530
+++ linux_pciexpress/include/linux/acpi.h	2004-01-26 13:33:09.000000000 +0530
@@ -317,6 +317,13 @@
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+struct acpi_table_mcfg {
+	struct acpi_table_header 	header;
+	u8	reserved[8];
+	u32	base_address;
+	u32	base_reserved;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +345,7 @@
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
@@ -437,4 +445,7 @@
 
 #endif /*!CONFIG_ACPI_INTERPRETER*/
 
+#ifdef CONFIG_PCI_EXPRESS
+extern u32 mmcfg_base_address;
+#endif
 #endif /*_LINUX_ACPI_H*/
diff -Naur linux-2.6.0/include/linux/pci.h linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h	2003-12-18 08:28:49.000000000 +0530
+++ linux_pciexpress/include/linux/pci.h	2004-01-26 15:47:23.000000000 +0530
@@ -198,6 +198,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI-Express */
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
 #define PCI_CAP_SIZEOF		4
@@ -424,6 +425,7 @@
 #define PCI_NAME_HALF	__stringify(20)	/* less than half to handle slop */
 	char		pretty_name[PCI_NAME_SIZE];	/* pretty name for users to see */
 #endif
+	int cfg_size;
 };
 
 #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-23 19:31         ` Martin Mares
@ 2004-01-23 20:08           ` Stefan Smietanowski
  0 siblings, 0 replies; 43+ messages in thread
From: Stefan Smietanowski @ 2004-01-23 20:08 UTC (permalink / raw)
  To: Martin Mares
  Cc: Pavel Machek, Randy.Dunlap, Alan Cox, ak,
	sundarapandian.durairaj, linux-kernel, linux-pci, torvalds, greg,
	vladimir.kondratiev, harinarayanan.seshadri

Martin Mares wrote:

> Hi!
> 
> 
>>I'd call it "noexpress". pciexp sounds like PCI exception, PCI
>>expected or something...
> 
> 
> Well, "noexpress" sounds like it does nothing in common with PCI,
> which is more misleading than the connotations you mention.

nopciexpress sounds better than but it's sorta long...

// Stefan


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-23 19:19       ` Pavel Machek
@ 2004-01-23 19:31         ` Martin Mares
  2004-01-23 20:08           ` Stefan Smietanowski
  0 siblings, 1 reply; 43+ messages in thread
From: Martin Mares @ 2004-01-23 19:31 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Randy.Dunlap, Alan Cox, ak, sundarapandian.durairaj,
	linux-kernel, linux-pci, torvalds, greg, vladimir.kondratiev,
	harinarayanan.seshadri

Hi!

> I'd call it "noexpress". pciexp sounds like PCI exception, PCI
> expected or something...

Well, "noexpress" sounds like it does nothing in common with PCI,
which is more misleading than the connotations you mention.

				Have a nice fortnight
-- 
Martin `MJ' Mares   <mj@ucw.cz>   http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
P.C.M.C.I.A. stands for `People Can't Memorize Computer Industry Acronyms'

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 19:40     ` Randy.Dunlap
@ 2004-01-23 19:19       ` Pavel Machek
  2004-01-23 19:31         ` Martin Mares
  0 siblings, 1 reply; 43+ messages in thread
From: Pavel Machek @ 2004-01-23 19:19 UTC (permalink / raw)
  To: Randy.Dunlap
  Cc: Alan Cox, ak, sundarapandian.durairaj, linux-kernel, linux-pci,
	torvalds, greg, vladimir.kondratiev, harinarayanan.seshadri

Hi!

> | On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
> | > > +#ifdef CONFIG_PCI_EXPRESS
> | > > +	else if (!strcmp(str, "no_pcie")) {
> | > 
> | > Would "no_pciexp" be better? no_pcie looks nearly like a typo.
> | 
> | Other "nofoo" generally don't use "_" (Linux kernel really needs an
> | actual policy document for such stuff tho)
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Right, let's keep it consistent, like "nopciexp".

I'd call it "noexpress". pciexp sounds like PCI exception, PCI
expected or something...
								Pavel
-- 
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 18:21   ` Alan Cox
@ 2004-01-22 19:40     ` Randy.Dunlap
  2004-01-23 19:19       ` Pavel Machek
  0 siblings, 1 reply; 43+ messages in thread
From: Randy.Dunlap @ 2004-01-22 19:40 UTC (permalink / raw)
  To: Alan Cox
  Cc: ak, sundarapandian.durairaj, linux-kernel, linux-pci, torvalds,
	greg, vladimir.kondratiev, harinarayanan.seshadri

On Thu, 22 Jan 2004 18:21:04 +0000 Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

| On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
| > > +#ifdef CONFIG_PCI_EXPRESS
| > > +	else if (!strcmp(str, "no_pcie")) {
| > 
| > Would "no_pciexp" be better? no_pcie looks nearly like a typo.
| 
| Other "nofoo" generally don't use "_" (Linux kernel really needs an
| actual policy document for such stuff tho)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Right, let's keep it consistent, like "nopciexp".

--
~Randy

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 13:12 ` Andi Kleen
@ 2004-01-22 18:21   ` Alan Cox
  2004-01-22 19:40     ` Randy.Dunlap
  0 siblings, 1 reply; 43+ messages in thread
From: Alan Cox @ 2004-01-22 18:21 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Durairaj, Sundarapandian, Linux Kernel Mailing List, linux-pci,
	torvalds, greg, Kondratiev, Vladimir, Seshadri, Harinarayanan

On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
> > +#ifdef CONFIG_PCI_EXPRESS
> > +	else if (!strcmp(str, "no_pcie")) {
> 
> Would "no_pciexp" be better? no_pcie looks nearly like a typo.

Other "nofoo" generally don't use "_" (Linux kernel really needs an
actual policy document for such stuff tho)


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 10:21 Durairaj, Sundarapandian
                   ` (3 preceding siblings ...)
  2004-01-22 16:40 ` Grant Grundler
@ 2004-01-22 17:00 ` Greg KH
  4 siblings, 0 replies; 43+ messages in thread
From: Greg KH @ 2004-01-22 17:00 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, Andi Kleen, Kondratiev,
	Vladimir, Seshadri, Harinarayanan

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> Hi All, 
> 
> I am reposting the updated patch after incorporating the review
> comments.

Hm, looks like you have lots of comments already, so I'll wait for your
next revision before making any more about the code.

> This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
> kernel following up to the Vladimir (Vladimir.Kondratiev@intel.com) and
> Harinarayanan (Harinarayanan.Seshadri@intel.com)  and my previous
> patches .
> I tested it on our i386 platform. 

If I could ask, exactly what chipset was this tested on?  I'm waist deep
in chipset specs right now, so it would be nice to see if I could try to
test this code out on something that both you have, and have not, tested
it on.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 10:21 Durairaj, Sundarapandian
                   ` (2 preceding siblings ...)
  2004-01-22 13:12 ` Andi Kleen
@ 2004-01-22 16:40 ` Grant Grundler
  2004-01-22 17:00 ` Greg KH
  4 siblings, 0 replies; 43+ messages in thread
From: Grant Grundler @ 2004-01-22 16:40 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Andi Kleen,
	Kondratiev, Vladimir, Seshadri, Harinarayanan

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> I tested it on our i386 platform. 

Any chance Intel can test this on an IA64 box?

...
>  /*
> + *We map full Page size on each request. Incidently that's the size we
> + *have for config space too.
> + */

"full Page size" != 4k on several architectures.
PCI Express is going to be implemented on ia64 and power as well.

...
> diff -Naur linux-2.6.0/include/asm-i386/pci.h
...
> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

Andi is right - this is a definite no-no.

grant

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 10:21 Durairaj, Sundarapandian
  2004-01-22 10:44 ` Andrew Morton
  2004-01-22 11:09 ` Martin Mares
@ 2004-01-22 13:12 ` Andi Kleen
  2004-01-22 18:21   ` Alan Cox
  2004-01-22 16:40 ` Grant Grundler
  2004-01-22 17:00 ` Greg KH
  4 siblings, 1 reply; 43+ messages in thread
From: Andi Kleen @ 2004-01-22 13:12 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Kondratiev,
	Vladimir, Seshadri, Harinarayanan

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> Please review this and send in your comments.

Looks better now. Still a few nitpicks.

> +	  access mechanism (Will work only on PCI Express based system)
> +	  otherwise the pci direct mechanism will be used.

Is that true? It won't use PCI BIOS anymore?  If true this looks not
right.

>  	return ((unsigned char *) base + offset);
>  }
>  
> +#ifdef CONFIG_PCI_EXPRESS
> +extern u32 mmcfg_base_address;

Please put that into some header.

> +{
> +	struct acpi_table_mcfg	*mcfg = NULL;
> +
> +	if (!phys_addr || !size)
> +		return -EINVAL;
> +
> +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> +						(phys_addr, size);
> +	if (!mcfg) {
> +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
> +		return -ENODEV;
> +	}
> +	if (mcfg->base_address)
> +		mmcfg_base_address = (u32)mcfg->base_address;
> +	printk(KERN_INFO PREFIX "Local  mcfg address %p\n",
> +			mcfg->base_address);

Better drop that printk. It's probably not needed and ACPI is already
too noisy.


> +	}
> +	else if (result < 0) {
> +		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
> +		return result;
> +	}
> +	else if (result > 1) {
> +		printk(KERN_WARNING PREFIX \

The \ is not needed.

>  		return NULL;
>  	}
>  #endif
> +#ifdef CONFIG_PCI_EXPRESS
> +	else if (!strcmp(str, "no_pcie")) {

Would "no_pciexp" be better? no_pcie looks nearly like a typo.

> +	/* Shoot misalligned transaction now */
> +	if (reg & (len-1)){
> +		printk(KERN_ERR "pci_express_conf_read: \
> +					misalligned transaction\n");

misaligned is spelled with one l only (occurs a few more times)

> +#ifdef CONFIG_PCI_EXPRESS
> +	if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
> +		goto type1;
> +	/*
> + 	 *Check if platform we are running is pci express capable

Please always add a space between the * and the text (occurs also a few
more times) 

> +  	 */
> +	if (mmcfg_base_address == 0){
> +		printk(KERN_INFO 
> +		      "MCFG table entry is not found in ACPI
> tables....\n \
> +		       PCI Express not supported in this platform....\n

on this platform

> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

static in a header is a bad idea. Make this a global, defined in some file.

> +static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
> +{
> +	u32 dev_base = 
> +		mmcfg_base_address | (bus << 20) | (devfn << 12);
> +	if (dev_base != pcie_last_accessed_device){
> +		pcie_last_accessed_device = dev_base;
> +		set_fixmap (FIX_PCIE_MCFG, dev_base);
> +	}
> +}
> +
> +static __inline__ void pci_express_read(int bus, int devfn, int reg, 
> +		int len, u32 *value)
> +{
> +	unsigned long flags;
> +	spin_lock_irqsave(&pci_config_lock, flags);
> +	pci_exp_set_dev_base(bus, devfn);

You could share/uninline the read/write functions when you made the interface
something like

	void *map_addr = pci_exp_map_dev_base(bus, devfn);

	... use map_addr... for the access

Having them inline doesn't make much sense anyways because they should
be accessed using function pointers.

> +	/* Dummy read to flush PCI write */
> +	readl (mmcfg_virt_addr);
> +	spin_unlock_irqrestore(&pci_config_lock, flags);

And move the spin lock/unlock into a inline too. Then an 64bit 
implementation can just define it as a dummy (not needed when
everything is statically mapped) 

-Andi

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 10:21 Durairaj, Sundarapandian
  2004-01-22 10:44 ` Andrew Morton
@ 2004-01-22 11:09 ` Martin Mares
  2004-01-22 13:12 ` Andi Kleen
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 43+ messages in thread
From: Martin Mares @ 2004-01-22 11:09 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, Andi Kleen,
	Kondratiev, Vladimir, Seshadri, Harinarayanan

Hello!

> I am reposting the updated patch after incorporating the review comments.

Looks good, but there are still some places to polish (in addition to
Andrew's comments):

> +	  to access the pci configuration space through enhanced config
> +	  access mechanism (Will work only on PCI Express based system)

"pci" should be "PCI".

> +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> +						(phys_addr, size);

Wrapping long lines is good, but you seem to overdo it.

> +	printk(KERN_INFO PREFIX "Local  mcfg address %p\n",
> +			mcfg->base_address);

Again -- you should be consistent in usign caps: "MCFG", not "mcfg".

> +#ifdef CONFIG_PCI_EXPRESS
> +	else if (!strcmp(str, "no_pcie")) {

Why "no_pcie" with an underscore when existing switches ("noacpi", "nobios"
etc.) don't have one?

> +	if (mmcfg_base_address == 0){
> +		printk(KERN_INFO 
> +		      "MCFG table entry is not found in ACPI
> tables....\n \
> +		       PCI Express not supported in this platform....\n
> \
> +		       Not enabling Enhanced Configuration....\n");
> +		goto type1;
> +	}

Why printing such a enormous banner for reporting a trivial error?
One line is enough.

> +		printk(KERN_INFO "PCI:Using config type PCIExp\n");

"PCI:Using" -> "PCI: Using".

>  obj-$(CONFIG_PCI_DIRECT)	+= direct.o
> +obj-$(CONFIG_PCI_EXPRES)	+= direct.o

PCI_EXPRES -> PCI_EXPRESS

Also, linking the same object twice doesn't look right.

> +static int pci_cfg_space_size (struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCI_EXPRESS
> +	/* Find whether the device is PCI Express device */
> +	int is_pci_express_dev = 
> +		pci_find_capability(dev, PCI_CAP_ID_EXP);
> +	if (is_pci_express_dev)
> +		return PCI_CFG_SPACE_EXP_SIZE;
> +	else
> +#endif
> +	return PCI_CFG_SPACE_SIZE; 
> +}

We really shouldn't scan the capability list during each access to /proc/bus/pci.
Better calculate the configuration space size when probing the device
and put it to struct pci_dev.

> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

Header files should not contain static variables.

> +static __inline__ void pci_express_read(int bus, int devfn, int reg, 
> +		int len, u32 *value)

Why is this inline?

> +	u64	base_address;

If the base_address is 64-bit and you stuff it in a 32-bit variable, you
should check the upper 32 bits and in case they are non-zero, print an error
message.

				Have a nice fortnight
-- 
Martin `MJ' Mares   <mj@ucw.cz>   http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
Entropy isn't what it used to be.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2004-01-22 10:21 Durairaj, Sundarapandian
@ 2004-01-22 10:44 ` Andrew Morton
  2004-01-22 11:09 ` Martin Mares
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 43+ messages in thread
From: Andrew Morton @ 2004-01-22 10:44 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, linux-pci, torvalds, alan, greg, ak,
	vladimir.kondratiev, harinarayanan.seshadri,
	sundarapandian.durairaj

"Durairaj, Sundarapandian" <sundarapandian.durairaj@intel.com> wrote:
>
> This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
> ...
> Please review this and send in your comments.

A bit of triviata:

> diff -Naur linux-2.6.0/arch/i386/Kconfig
> linux_pciexpress/arch/i386/Kconfig
> --- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000
> +0530

Your mailer wordwrapped it.  This seems to be a favourite pastime at Intel
;) You need to struggle with your email client for a while, or give up and
use attachments.

> +++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-12
> 14:14:22.000000000 +0530
> @@ -93,6 +93,29 @@
>  	return ((unsigned char *) base + offset);
>  }
>  
> +#ifdef CONFIG_PCI_EXPRESS
> +extern u32 mmcfg_base_address;

extern declarations should go in .h files, not in .c.

> +static int __init acpi_parse_mcfg
> +			 (unsigned long phys_addr, unsigned long size)
> +{
> +	struct acpi_table_mcfg	*mcfg = NULL;
> +
> +	if (!phys_addr || !size)
> +		return -EINVAL;
> +
> +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> +						(phys_addr, size);
> +	if (!mcfg) {
> +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
> +		return -ENODEV;
> +	}
> +	if (mcfg->base_address)
> +		mmcfg_base_address = (u32)mcfg->base_address;

Is it OK to chop this u64 down to u32?  If so, why was it u64?

> +#ifdef CONFIG_PCI_EXPRESS
> +	else if (!strcmp(str, "no_pcie")) {
> +		pci_probe &= !PCI_PROBE_ENHANCED;
> +		return NULL;
> +	}

should that be a `!' operator?  Or `~'?

>  /*
> + *We map full Page size on each request. Incidently that's the size we
> + *have for config space too.
> + */

Conventionally we put a space after the "*" in comments.


> +/*
> + *Variable used to store the virtual  address of fixed PTE
> + */
> +char * mmcfg_virt_addr;

But we don't put spaces after this sort of asterisk.

> +
> +static int pci_express_conf_read(int seg, int bus,
> +		int devfn, int reg, int len, u32 *value)
> +{
> +	if (!value || ((u32)bus > 255) || ((u32)devfn > 255) 
> +		|| ((u32)reg > 4095)){

If you're casting these so as to catch negative ints the cast should be to
`unsigned', not `u32'.  Or, better, make this function simply take unsigned
args if negative values are nonsensical.


> +	/* Shoot misalligned transaction now */

Spellling error here.

> +	if (reg & (len-1)){

Space before the {

> +		printk(KERN_ERR "pci_express_conf_read: \
> +					misalligned transaction\n");

This string has a bunch of tabs in the middle.

Remove the `\' and do

		printk(KERN_ERR "pci_express_conf_read: "
				"misalligned transaction\n");


> +static int pci_express_conf_write(int seg, int bus, 
> +			int devfn, int reg, int len, u32 value)
> +{
> +	if (((u32)bus > 255) || ((u32)devfn > 255) 
> +		|| ((u32)reg > 4095)){

See above.

> +		printk(KERN_ERR "pci_express_conf_write: \
> +					Invalid Parameter\n");

tabs

> +	/* Shoot misalligned transaction now */

spelling

> +	if (reg & (len-1)){
> +		printk(KERN_ERR "pci_express_conf_write: \
> +					misalligned transaction\n");

tabs

> +	if (mmcfg_base_address == 0){

space

> +		printk(KERN_INFO 
> +		      "MCFG table entry is not found in ACPI
> tables....\n \
> +		       PCI Express not supported in this platform....\n
> \
> +		       Not enabling Enhanced Configuration....\n");

Use compile-time string concatentation here as well.

>  static int proc_initialized;	/* = 0 */

The comment here isn't really needed: initalisaton of BSS is kernel
folklore.

>  
> +static int pci_cfg_space_size (struct pci_dev *dev)

No space before the opening parenthesis.

> + 		size = pci_cfg_space_size (dev);

Ditto

> +extern u32 mmcfg_base_address;
> +extern spinlock_t pci_config_lock;
> +extern char * mmcfg_virt_addr;

These should all be in header files.

> +
> +static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
> +static __inline__ void pci_express_read(int bus, int devfn, int reg, 

`inline', not __inline__

> +		int len, u32 *value)
> +{
> +	unsigned long flags;
> +	spin_lock_irqsave(&pci_config_lock, flags);
> +	pci_exp_set_dev_base(bus, devfn);
> + 	switch (len) {
> +        case 1:
> +		*value = (u8)readb((unsigned long) mmcfg_virt_addr +
> reg);
> +		break;
> +        case 2:
> +		*value = (u16)readw((unsigned long) mmcfg_virt_addr +
> reg);
> +		break;
> +        case 4:
> +		*value = (u32)readl((unsigned long) mmcfg_virt_addr +
> reg);

Are these casts needed?

> +static __inline__ void pci_express_write(int bus, int devfn, int reg, 

inline

> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&pci_config_lock, flags);
> +	pci_exp_set_dev_base(bus, devfn);
> +	switch (len) {
> +		case 1:
> +			writeb(value,(unsigned long)mmcfg_virt_addr +
> reg);
> +			break;
> +		case 2:
> +			writew(value,(unsigned long)mmcfg_virt_addr +
> reg);
> +			break;
> +	        case 4:
> +			writel(value,(unsigned long)mmcfg_virt_addr +
> reg);
> +	                break;
> +     	}

Are these casts needed?


^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-22 10:21 Durairaj, Sundarapandian
  2004-01-22 10:44 ` Andrew Morton
                   ` (4 more replies)
  0 siblings, 5 replies; 43+ messages in thread
From: Durairaj, Sundarapandian @ 2004-01-22 10:21 UTC (permalink / raw)
  To: linux-kernel, linux-pci
  Cc: torvalds, alan, greg, Andi Kleen, Kondratiev, Vladimir, Seshadri,
	Harinarayanan, Durairaj, Sundarapandian

Hi All, 

I am reposting the updated patch after incorporating the review
comments.

This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladimir (Vladimir.Kondratiev@intel.com) and
Harinarayanan (Harinarayanan.Seshadri@intel.com)  and my previous
patches .
I tested it on our i386 platform. 

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.  

Please review this and send in your comments.

Thanks,
Sundar

diff -Naur linux-2.6.0/arch/i386/Kconfig
linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000
+0530
+++ linux_pciexpress/arch/i386/Kconfig	2004-01-12 14:28:38.000000000
+0530
@@ -959,7 +959,7 @@
 endmenu
 
 
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
 
 config X86_VISWS_APIC
 	bool
@@ -976,6 +976,18 @@
 	depends on SMP && !(X86_VISWS || X86_VOYAGER)
 	default y
 
+config PCI_EXPRESS
+	bool "PCI_EXPRESS (EXPERIMENTAL)" 
+	depends on EXPERIMENTAL && ACPI_BOOT
+	help
+	  PCI Express extends the configuration space from 256 bytes to
+	  4k bytes. It also defines an enhanced configuration mechanism
+	  to acces the extended configuration space.
+	  With this option, you can specify that Linux will first
attempt
+	  to access the pci configuration space through enhanced config
+	  access mechanism (Will work only on PCI Express based system)
+	  otherwise the pci direct mechanism will be used.
+
 config PCI
 	bool "PCI support" if !X86_VISWS
 	depends on !X86_VOYAGER
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c	2003-12-18
08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-12
14:14:22.000000000 +0530
@@ -93,6 +93,29 @@
 	return ((unsigned char *) base + offset);
 }
 
+#ifdef CONFIG_PCI_EXPRESS
+extern u32 mmcfg_base_address;
+static int __init acpi_parse_mcfg
+			 (unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_mcfg	*mcfg = NULL;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table
+						(phys_addr, size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+	if (mcfg->base_address)
+		mmcfg_base_address = (u32)mcfg->base_address;
+	printk(KERN_INFO PREFIX "Local  mcfg address %p\n",
+			mcfg->base_address);
+	return 0;
+}
+#endif /* CONFIG_PCI_EXPRESS*/
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -508,6 +531,22 @@
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_EXPRESS
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	}
+	else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	}
+	else if (result > 1) {
+		printk(KERN_WARNING PREFIX \
+			"Multiple MCFG tables exist\n");
+	}
+#endif /*CONFIG_PCI_EXPRESS*/
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/common.c
linux_pciexpress/arch/i386/pci/common.c
--- linux-2.6.0/arch/i386/pci/common.c	2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/common.c	2004-01-22
11:54:42.000000000 +0530
@@ -19,7 +19,8 @@
 extern  void pcibios_sort(void);
 #endif
 
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2
+				 | PCI_PROBE_ENHANCED;
 
 int pcibios_last_bus = -1;
 struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@
 		return NULL;
 	}
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	else if (!strcmp(str, "no_pcie")) {
+		pci_probe &= !PCI_PROBE_ENHANCED;
+		return NULL;
+	}
+#endif
 #ifdef CONFIG_ACPI_PCI
 	else if (!strcmp(str, "noacpi")) {
 		pci_probe |= PCI_NO_ACPI_ROUTING;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c
linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18 08:28:28.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-22
12:07:37.000000000 +0530
@@ -168,6 +168,71 @@
 
 
 /*
+ *We map full Page size on each request. Incidently that's the size we
+ *have for config space too.
+ */
+#ifdef CONFIG_PCI_EXPRESS
+/* 
+ *On PCI Express capable platform, at the time of kernel initialization
+ *the os would have scanned for mcfg table and set this variable to 
+ *appropriate value. If PCI Express not supported the variable will 
+ * have 0 value
+ */
+u32 mmcfg_base_address;
+
+/*
+ *Variable used to store the virtual  address of fixed PTE
+ */
+char * mmcfg_virt_addr;
+
+static int pci_express_conf_read(int seg, int bus,
+		int devfn, int reg, int len, u32 *value)
+{
+	if (!value || ((u32)bus > 255) || ((u32)devfn > 255) 
+		|| ((u32)reg > 4095)){
+		printk(KERN_ERR "pci_express_conf_read: \
+					Invalid Parameter\n");
+  		return -EINVAL;
+	}
+
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1)){
+		printk(KERN_ERR "pci_express_conf_read: \
+					misalligned transaction\n");
+  		return -EINVAL;
+	}
+	pci_express_read(bus, devfn, reg, len, value);
+
+	return 0;
+}
+ 
+static int pci_express_conf_write(int seg, int bus, 
+			int devfn, int reg, int len, u32 value)
+{
+	if (((u32)bus > 255) || ((u32)devfn > 255) 
+		|| ((u32)reg > 4095)){
+		printk(KERN_ERR "pci_express_conf_write: \
+					Invalid Parameter\n");
+		return -EINVAL;
+	}
+
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1)){
+		printk(KERN_ERR "pci_express_conf_write: \
+					misalligned transaction\n");
+  		return -EINVAL;
+	}
+	pci_express_write(bus, devfn, reg, len, value);
+	return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+	.read   =	pci_express_conf_read,
+	.write  =	pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXPRESS */
+
+/*
  * Before we decide to use direct hardware access mechanisms, we try to
do some
  * trivial checks to ensure it at least _seems_ to be working -- we
just test
  * whether bus 00 contains a host bridge (this is similar to checking
@@ -244,7 +309,31 @@
 static int __init pci_direct_init(void)
 {
 	struct resource *region, *region2;
+	
+#ifdef CONFIG_PCI_EXPRESS
+	if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
+		goto type1;
+	/*
+ 	 *Check if platform we are running is pci express capable
+  	 */
+	if (mmcfg_base_address == 0){
+		printk(KERN_INFO 
+		      "MCFG table entry is not found in ACPI
tables....\n \
+		       PCI Express not supported in this platform....\n
\
+		       Not enabling Enhanced Configuration....\n");
+		goto type1;
+	}
 
+	/* Calculate the virtual address of the PTE */
+	mmcfg_virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+
+	if (pci_sanity_check(&pci_express_conf)) {
+		printk(KERN_INFO "PCI:Using config type PCIExp\n");
+		raw_pci_ops = &pci_express_conf;
+		return 0;
+	}
+type1:
+#endif /* CONFIG_PCI_EXPRESS */
 	if ((pci_probe & PCI_PROBE_CONF1) == 0)
 		goto type2;
 	region = request_region(0xCF8, 8, "PCI conf1");
diff -Naur linux-2.6.0/arch/i386/pci/Makefile
linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/Makefile	2004-01-12
13:38:55.000000000 +0530
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_EXPRES)	+= direct.o
 
 pci-y				:= fixup.o
 pci-$(CONFIG_ACPI_PCI)		+= acpi.o
diff -Naur linux-2.6.0/arch/i386/pci/pci.h
linux_pciexpress/arch/i386/pci/pci.h
--- linux-2.6.0/arch/i386/pci/pci.h	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/pci.h	2004-01-12
13:38:55.000000000 +0530
@@ -15,6 +15,11 @@
 #define PCI_PROBE_BIOS		0x0001
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
+#ifdef CONFIG_PCI_EXPRESS
+#define PCI_PROBE_ENHANCED	0x0008
+#else
+#define PCI_PROBE_ENHANCED 	0x0
+#endif
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_NO_CHECKS		0x0400
diff -Naur linux-2.6.0/drivers/acpi/tables.c
linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c	2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/drivers/acpi/tables.c	2004-01-12
13:38:20.000000000 +0530
@@ -58,6 +58,7 @@
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+	[ACPI_MCFG]		= "MCFG",
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c
linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c	2003-12-18 08:28:38.000000000
+0530
+++ linux_pciexpress/drivers/pci/pci.c	2004-01-12 13:38:06.000000000
+0530
@@ -90,6 +90,8 @@
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI-EXP
+
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/proc.c
linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c	2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/drivers/pci/proc.c	2004-01-12 14:36:27.000000000
+0530
@@ -17,13 +17,30 @@
 #include <asm/byteorder.h>
 
 #define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096
 
 static int proc_initialized;	/* = 0 */
 
+static int pci_cfg_space_size (struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXPRESS
+	/* Find whether the device is PCI Express device */
+	int is_pci_express_dev = 
+		pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (is_pci_express_dev)
+		return PCI_CFG_SPACE_EXP_SIZE;
+	else
+#endif
+	return PCI_CFG_SPACE_SIZE; 
+}
+
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = PDE(ino);
+	struct pci_dev *dev = dp->data;
 
 	lock_kernel();
 	switch (whence) {
@@ -34,11 +51,11 @@
 		new = file->f_pos + off;
 		break;
 	case 2:
-		new = PCI_CFG_SPACE_SIZE + off;
+		new = pci_cfg_space_size(dev) + off;
 		break;
 	}
 	unlock_kernel();
-	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+	if (new < 0 || new > pci_cfg_space_size(dev))
 		return -EINVAL;
 	return (file->f_pos = new);
 }
@@ -59,7 +76,7 @@
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+ 		size = pci_cfg_space_size (dev);
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,12 +151,14 @@
 	int pos = *ppos;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	int size;
+	size = pci_cfg_space_size(dev);
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -384,6 +403,7 @@
 	struct pci_bus *bus = dev->bus;
 	struct proc_dir_entry *de, *e;
 	char name[16];
+	int size;
 
 	if (!proc_initialized)
 		return -EACCES;
@@ -401,7 +421,8 @@
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+	size = pci_cfg_space_size(dev);
+	e->size = size;
 
 	return 0;
 }
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h	2003-12-18
08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h	2004-01-12
13:40:19.000000000 +0530
@@ -67,6 +67,9 @@
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings
*/
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
+#ifdef CONFIG_PCI_EXPRESS
+	FIX_PCIE_MCFG,
+#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/asm-i386/pci.h
linux_pciexpress/include/asm-i386/pci.h
--- linux-2.6.0/include/asm-i386/pci.h	2003-12-18 08:28:47.000000000
+0530
+++ linux_pciexpress/include/asm-i386/pci.h	2004-01-12
14:39:42.000000000 +0530
@@ -96,4 +96,68 @@
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_PCI_EXPRESS
+/*
+ *Variable used to store the base address of the last pciexpress device
+ *accessed.
+ */
+static u32 pcie_last_accessed_device;
+
+extern u32 mmcfg_base_address;
+extern spinlock_t pci_config_lock;
+extern char * mmcfg_virt_addr;
+
+static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
+{
+	u32 dev_base = 
+		mmcfg_base_address | (bus << 20) | (devfn << 12);
+	if (dev_base != pcie_last_accessed_device){
+		pcie_last_accessed_device = dev_base;
+		set_fixmap (FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static __inline__ void pci_express_read(int bus, int devfn, int reg, 
+		int len, u32 *value)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, devfn);
+ 	switch (len) {
+        case 1:
+		*value = (u8)readb((unsigned long) mmcfg_virt_addr +
reg);
+		break;
+        case 2:
+		*value = (u16)readw((unsigned long) mmcfg_virt_addr +
reg);
+		break;
+        case 4:
+		*value = (u32)readl((unsigned long) mmcfg_virt_addr +
reg);
+		break;
+	}
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static __inline__ void pci_express_write(int bus, int devfn, int reg, 
+	int len, u32 value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, devfn);
+	switch (len) {
+		case 1:
+			writeb(value,(unsigned long)mmcfg_virt_addr +
reg);
+			break;
+		case 2:
+			writew(value,(unsigned long)mmcfg_virt_addr +
reg);
+			break;
+	        case 4:
+			writel(value,(unsigned long)mmcfg_virt_addr +
reg);
+	                break;
+     	}
+	/* Dummy read to flush PCI write */
+	readl (mmcfg_virt_addr);
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+#endif /* CONFIG_PCI_EXPRESS */
 #endif /* __i386_PCI_H */
diff -Naur linux-2.6.0/include/linux/acpi.h
linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h	2003-12-18 08:27:58.000000000
+0530
+++ linux_pciexpress/include/linux/acpi.h	2004-01-12
14:43:23.000000000 +0530
@@ -317,6 +317,12 @@
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+struct acpi_table_mcfg {
+	struct acpi_table_header 	header;
+	u8	reserved[8];
+	u64	base_address;
+} __attribute__ ((packed));
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +344,7 @@
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+	ACPI_MCFG,
 	ACPI_TABLE_COUNT
 };
 
diff -Naur linux-2.6.0/include/linux/pci.h
linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h	2003-12-18 08:28:49.000000000
+0530
+++ linux_pciexpress/include/linux/pci.h	2004-01-12
13:40:01.000000000 +0530
@@ -198,6 +198,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled
Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI-Express*/
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list
*/
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16
bits) */
 #define PCI_CAP_SIZEOF		4

^ permalink raw reply	[flat|nested] 43+ messages in thread

* RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2004-01-07 16:44 Nakajima, Jun
  0 siblings, 0 replies; 43+ messages in thread
From: Nakajima, Jun @ 2004-01-07 16:44 UTC (permalink / raw)
  To: Durairaj, Sundarapandian, linux-kernel
  Cc: Grege, Seshadri, Harinarayanan, Kondratiev, Vladimir, Brown, Len

This was pointed out before, but you have dependency on ACPI (i.e.
CONFIG_ACPI_BOOT). To me, it's not resolved yet in this patch.

>  static int __init pci_direct_init(void)
>  {
>  	struct resource *region, *region2;
> +	unsigned long flags;
> +#ifdef CONFIG_PCI_EXP_ENHANCED

I think you should move flags inside #ifdef. But do you really need to
hold a spin lock there? This is done by arch_initcall().

	Jun
> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> owner@vger.kernel.org] On Behalf Of Durairaj, Sundarapandian
> Sent: Wednesday, January 07, 2004 5:00 AM
> To: linux-kernel@vger.kernel.org
> Cc: Grege@kroah.com; Seshadri, Harinarayanan; Kondratiev, Vladimir
> Subject: RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
> 
> Hi All,
> 
> Thanks for your review comments. I am reposting the updated patch
after
> incorporating the review comments.
> Please review this and send your comments.
> 
> Thanks,
> Sundar
> 
> ------------------------------------------------
> 
> diff -Naur linux-2.6.0/arch/i386/Kconfig
> linux_pciexpress/arch/i386/Kconfig
> --- linux-2.6.0/arch/i386/Kconfig	2003-12-18 08:28:16.000000000
> +0530
> +++ linux_pciexpress/arch/i386/Kconfig	2004-01-07
10:59:23.000000000
> +0530
> @@ -959,7 +959,7 @@
>  endmenu
> 
> 
> -menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
> +menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
> 
>  config X86_VISWS_APIC
>  	bool
> @@ -976,6 +976,18 @@
>  	depends on SMP && !(X86_VISWS || X86_VOYAGER)
>  	default y
> 
> +config PCI_EXP_ENHANCED
> +	bool "PCI_EXPRESS (EXPERIMENTAL)"
> +	depends on EXPERIMENTAL
> +	help
> +	   PCI Express extends the configuration space from 256 bytes to
> 4k
> +	   bytes. It also defines an enhanced configuration mechanism to
> acces
> +	   the extended configuration space.
> +	   With this option, you can specify that Linux will first
> attempt to
> +	   access the pci configuration space through enhanced config
> access
> +	   mechanism (Will work only on PCI Express based system)
> otherwise the
> +	   pci direct mechanism will be used.
> +
>  config PCI
>  	bool "PCI support" if !X86_VISWS
>  	depends on !X86_VOYAGER
> diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
> linux_pciexpress/arch/i386/kernel/acpi/boot.c
> --- linux-2.6.0/arch/i386/kernel/acpi/boot.c	2003-12-18
> 08:29:29.000000000 +0530
> +++ linux_pciexpress/arch/i386/kernel/acpi/boot.c	2004-01-07
> 18:20:23.000000000 +0530
> @@ -93,6 +93,28 @@
>  	return ((unsigned char *) base + offset);
>  }
> 
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +extern u64 mmcfg_base_address;
> +static int __init acpi_parse_mcfg
> +			 (unsigned long phys_addr, unsigned long size)
> +{
> +	struct acpi_table_mcfg	*mcfg = NULL;
> +
> +	if (!phys_addr || !size)
> +		return -EINVAL;
> +
> +	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr,
> size);
> +	if (!mcfg) {
> +		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
> +		return -ENODEV;
> +	}
> +	if (mcfg->base_address)
> +		mmcfg_base_address =mcfg->base_address;
> +	printk(KERN_INFO PREFIX "Local  mcfg address %p\n",
> +			mcfg->base_address);
> +	return 0;
> +}
> +#endif /* CONFIG_PCI_EXP_ENHANCED*/
> 
>  #ifdef CONFIG_X86_LOCAL_APIC
> 
> @@ -508,6 +530,21 @@
> 
>  #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
> 
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
> +	if (!result) {
> +		printk(KERN_WARNING PREFIX "MCFG not present\n");
> +		return 0;
> +	}
> +	else if (result < 0) {
> +		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
> +		return result;
> +	}
> +	else if (result > 1) {
> +		printk(KERN_WARNING PREFIX "Multiple MCFG tables
> exist\n");
> +	}
> +#endif /*CONFIG_PCI_EXP_ENHANCED*/
> +
>  #ifdef CONFIG_X86_LOCAL_APIC
>  	if (acpi_lapic && acpi_ioapic) {
>  		smp_found_config = 1;
> diff -Naur linux-2.6.0/arch/i386/pci/direct.c
> linux_pciexpress/arch/i386/pci/direct.c
> --- linux-2.6.0/arch/i386/pci/direct.c	2003-12-18
08:28:28.000000000
> +0530
> +++ linux_pciexpress/arch/i386/pci/direct.c	2004-01-07
> 18:16:57.000000000 +0530
> @@ -168,6 +168,124 @@
> 
> 
>  /*
> + *We map full Page size on each request. Incidently that's the size
we
> + *have for config space too.
> + */
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +/*
> + *On PCI Express capable platform, at the time of kernel
initialization
> + *the os would have scanned for mcfg table and set this variable to
> + *appropriate value.
> + *If PCI Express not supported the variable will have 0 value
> + */
> +u64 mmcfg_base_address;
> +
> +/*
> + *Variable used to store the base address of the last pciexpress
device
> 
> + *accessed.
> + */
> +u32 pcie_last_accessed_device;
> +
> +unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)
> +{
> +	u32 dev_base =
> +		mmcfg_base_address | (bus << 20) | ((PCI_DEVFN (dev,fn))
> <<12);
> +	if (dev_base != pcie_last_accessed_device){
> +		pcie_last_accessed_device = dev_base;
> +		set_fixmap (FIX_PCIE_MCFG, dev_base);
> +	}
> +	return 0;
> +}
> +
> +static int pci_express_conf_read(int seg, int bus,
> +		int devfn, int reg, int len, u32 *value)
> +{
> +	unsigned long flags;
> +	char * virt_addr;
> +	int dev = PCI_SLOT (devfn);
> +	int fn  = PCI_FUNC (devfn);
> +
> +	if (!value || ((u32)bus > 255) || ((u32)dev > 31)
> +			|| ((u32)fn > 7) || ((u32)reg > 4095)){
> +		printk(KERN_ERR "pci_express_conf_read: Invalid
> Parameter\n");
> +  		return -EINVAL;
> +	}
> +
> +	/* Shoot misalligned transaction now */
> +	if (reg & (len-1)){
> +		printk(KERN_ERR "pci_express_conf_read: \
> +					misalligned transaction\n");
> +  		return -EINVAL;
> +	}
> +
> +	spin_lock_irqsave(&pci_config_lock, flags);
> +	pci_exp_set_dev_base(bus, dev, fn);
> +	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
> + 	switch (len) {
> +        case 1:
> +		*value = (u8)readb((unsigned long) virt_addr+reg);
> +		break;
> +        case 2:
> +		*value = (u16)readw((unsigned long) virt_addr+reg);
> +		break;
> +        case 4:
> +		*value = (u32)readl((unsigned long) virt_addr+reg);
> +		break;
> +	}
> +	spin_unlock_irqrestore(&pci_config_lock, flags);
> +	return 0;
> +}
> +
> +static int pci_express_conf_write(int seg, int bus,
> +			int devfn, int reg, int len, u32 value)
> +{
> +	unsigned long flags;
> +	unsigned char * virt_addr;
> +	int dev = PCI_SLOT (devfn);
> +	int fn  = PCI_FUNC (devfn);
> +
> +	if (!value || ((u32)bus > 255) || ((u32)dev > 31) ||
> +		((u32)fn > 7) || ((u32)reg > 4095)){
> +		printk(KERN_ERR "pci_express_conf_write: \
> +					Invalid Parameter\n");
> +		return -EINVAL;
> +	}
> +
> +	/* Shoot misalligned transaction now */
> +	if (reg & (len-1)){
> +		printk(KERN_ERR "pci_express_conf_write: \
> +					misalligned transaction\n");
> +  		return -EINVAL;
> +	}
> +
> +	spin_lock_irqsave(&pci_config_lock, flags);
> +	pci_exp_set_dev_base(bus, dev, fn);
> +	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
> +
> +	switch (len) {
> +		case 1:
> +			writeb(value,(unsigned long)virt_addr+reg);
> +			break;
> +		case 2:
> +			writew(value,(unsigned long)virt_addr+reg);
> +			break;
> +	        case 4:
> +			writel(value,(unsigned long)virt_addr+reg);
> +	                break;
> +     	}
> +	/* Dummy read to flush PCI write */
> +	readl (virt_addr);
> +	spin_unlock_irqrestore(&pci_config_lock, flags);
> +	return 0;
> +}
> +
> +static struct pci_raw_ops pci_express_conf = {
> +	.read   =	pci_express_conf_read,
> +	.write  =	pci_express_conf_write,
> +};
> +#endif /* CONFIG_PCI_EXP_ENHANCED */
> +
> +/*
>   * Before we decide to use direct hardware access mechanisms, we try
to
> do some
>   * trivial checks to ensure it at least _seems_ to be working -- we
> just test
>   * whether bus 00 contains a host bridge (this is similar to checking
> @@ -244,6 +362,28 @@
>  static int __init pci_direct_init(void)
>  {
>  	struct resource *region, *region2;
> +	unsigned long flags;
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	/*
> + 	 *Check if platform we are running is pci express capable
> +  	 */
> +	if (mmcfg_base_address == 0){
> +		printk(KERN_INFO
> +			"MCFG table entry is not found in ACPI
> tables....\n \
> +			PCI Express not supported in this platform....\n
> \
> +			Not enabling Enhanced Configuration....\n");
> +	}
> +	else {
> +		local_irq_save(flags);
> +		if (pci_sanity_check(&pci_express_conf)) {
> +			local_irq_restore(flags);
> +			printk(KERN_INFO "PCI:Using config type
> PCIExp\n");
> +			raw_pci_ops = &pci_express_conf;
> +			return 0;
> +		}
> +		local_irq_restore(flags);
> +	}
> +#endif /* CONFIG_PCI_EXP_ENHANCED */
> 
>  	if ((pci_probe & PCI_PROBE_CONF1) == 0)
>  		goto type2;
> diff -Naur linux-2.6.0/arch/i386/pci/Makefile
> linux_pciexpress/arch/i386/pci/Makefile
> --- linux-2.6.0/arch/i386/pci/Makefile	2003-12-18
08:28:57.000000000
> +0530
> +++ linux_pciexpress/arch/i386/pci/Makefile	2004-01-07
> 10:59:23.000000000 +0530
> @@ -2,6 +2,7 @@
> 
>  obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
>  obj-$(CONFIG_PCI_DIRECT)	+= direct.o
> +obj-$(CONFIG_PCI_EXP_ENHANCED)	+= direct.o
> 
>  pci-y				:= fixup.o
>  pci-$(CONFIG_ACPI_PCI)		+= acpi.o
> diff -Naur linux-2.6.0/drivers/acpi/tables.c
> linux_pciexpress/drivers/acpi/tables.c
> --- linux-2.6.0/drivers/acpi/tables.c	2003-12-18 08:28:46.000000000
> +0530
> +++ linux_pciexpress/drivers/acpi/tables.c	2004-01-07
> 11:03:43.000000000 +0530
> @@ -58,6 +58,7 @@
>  	[ACPI_SSDT]		= "SSDT",
>  	[ACPI_SPMI]		= "SPMI",
>  	[ACPI_HPET]		= "HPET",
> +	[ACPI_MCFG]		= "MCFG",
>  };
> 
>  /* System Description Table (RSDT/XSDT) */
> diff -Naur linux-2.6.0/drivers/pci/pci.c
> linux_pciexpress/drivers/pci/pci.c
> --- linux-2.6.0/drivers/pci/pci.c	2003-12-18 08:28:38.000000000
> +0530
> +++ linux_pciexpress/drivers/pci/pci.c	2004-01-07
10:59:23.000000000
> +0530
> @@ -90,6 +90,8 @@
>   *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap
>   *
>   *  %PCI_CAP_ID_PCIX         PCI-X
> + *  %PCI_CAP_ID_EXP          PCI-EXP
> +
>   */
>  int
>  pci_find_capability(struct pci_dev *dev, int cap)
> diff -Naur linux-2.6.0/drivers/pci/proc.c
> linux_pciexpress/drivers/pci/proc.c
> --- linux-2.6.0/drivers/pci/proc.c	2003-12-18 08:28:57.000000000
> +0530
> +++ linux_pciexpress/drivers/pci/proc.c	2004-01-07
17:37:04.000000000
> +0530
> @@ -17,13 +17,29 @@
>  #include <asm/byteorder.h>
> 
>  #define PCI_CFG_SPACE_SIZE 256
> +#define PCI_CFG_SPACE_EXP_SIZE 4096
> 
>  static int proc_initialized;	/* = 0 */
> 
> +static int pci_cfg_space_size (struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	/* Find whether the device is PCI Express device */
> +	int is_pci_express_dev =  pci_find_capability(dev,
> PCI_CAP_ID_EXP);
> +	if (is_pci_express_dev)
> +		return PCI_CFG_SPACE_EXP_SIZE;
> +	else
> +#endif
> +	return PCI_CFG_SPACE_SIZE;
> +}
> +
>  static loff_t
>  proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
>  {
>  	loff_t new = -1;
> +	const struct inode *ino = file->f_dentry->d_inode;
> +	const struct proc_dir_entry *dp = PDE(ino);
> +	struct pci_dev *dev = dp->data;
> 
>  	lock_kernel();
>  	switch (whence) {
> @@ -34,11 +50,11 @@
>  		new = file->f_pos + off;
>  		break;
>  	case 2:
> -		new = PCI_CFG_SPACE_SIZE + off;
> +		new = pci_cfg_space_size(dev) + off;
>  		break;
>  	}
>  	unlock_kernel();
> -	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
> +	if (new < 0 || new > pci_cfg_space_size(dev))
>  		return -EINVAL;
>  	return (file->f_pos = new);
>  }
> @@ -59,7 +75,7 @@
>  	 */
> 
>  	if (capable(CAP_SYS_ADMIN))
> -		size = PCI_CFG_SPACE_SIZE;
> + 		size = pci_cfg_space_size (dev);
>  	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
>  		size = 128;
>  	else
> @@ -134,12 +150,14 @@
>  	int pos = *ppos;
>  	int cnt;
> 
> -	if (pos >= PCI_CFG_SPACE_SIZE)
> +	int size;
> +	size = pci_cfg_space_size(dev);
> +	if (pos >= size)
>  		return 0;
> -	if (nbytes >= PCI_CFG_SPACE_SIZE)
> -		nbytes = PCI_CFG_SPACE_SIZE;
> -	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
> -		nbytes = PCI_CFG_SPACE_SIZE - pos;
> +	if (nbytes >= size)
> +		nbytes = size;
> +	if (pos + nbytes > size)
> +		nbytes = size - pos;
>  	cnt = nbytes;
> 
>  	if (!access_ok(VERIFY_READ, buf, cnt))
> @@ -384,6 +402,7 @@
>  	struct pci_bus *bus = dev->bus;
>  	struct proc_dir_entry *de, *e;
>  	char name[16];
> +	int size;
> 
>  	if (!proc_initialized)
>  		return -EACCES;
> @@ -401,7 +420,9 @@
>  		return -ENOMEM;
>  	e->proc_fops = &proc_bus_pci_operations;
>  	e->data = dev;
> -	e->size = PCI_CFG_SPACE_SIZE;
> +
> +	size = pci_cfg_space_size(dev);
> +	e->size = size;
> 
>  	return 0;
>  }
> diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
> linux_pciexpress/include/asm-i386/fixmap.h
> --- linux-2.6.0/include/asm-i386/fixmap.h	2003-12-18
> 08:28:06.000000000 +0530
> +++ linux_pciexpress/include/asm-i386/fixmap.h	2004-01-07
> 10:59:23.000000000 +0530
> @@ -67,6 +67,9 @@
>  	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings
> */
>  	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
>  #endif
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	FIX_PCIE_MCFG,
> +#endif
>  #ifdef CONFIG_ACPI_BOOT
>  	FIX_ACPI_BEGIN,
>  	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
> diff -Naur linux-2.6.0/include/linux/acpi.h
> linux_pciexpress/include/linux/acpi.h
> --- linux-2.6.0/include/linux/acpi.h	2003-12-18 08:27:58.000000000
> +0530
> +++ linux_pciexpress/include/linux/acpi.h	2004-01-07
> 12:02:35.000000000 +0530
> @@ -317,6 +317,13 @@
>  	char				ec_id[0];
>  } __attribute__ ((packed));
> 
> +struct acpi_table_mcfg {
> +	struct acpi_table_header 	header;
> +	u8	reserved[8];
> +	u64	base_address;
> +} __attribute__ ((packed));
> +
> +
>  /* Table Handlers */
> 
>  enum acpi_table_id {
> @@ -338,6 +345,7 @@
>  	ACPI_SSDT,
>  	ACPI_SPMI,
>  	ACPI_HPET,
> +	ACPI_MCFG,
>  	ACPI_TABLE_COUNT
>  };
> 
> diff -Naur linux-2.6.0/include/linux/pci.h
> linux_pciexpress/include/linux/pci.h
> --- linux-2.6.0/include/linux/pci.h	2003-12-18 08:28:49.000000000
> +0530
> +++ linux_pciexpress/include/linux/pci.h	2004-01-07
> 10:59:23.000000000 +0530
> @@ -198,6 +198,7 @@
>  #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled
> Interrupts */
>  #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
>  #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
> +#define  PCI_CAP_ID_EXP 	0x10	/* PCI-Express*/
>  #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list
> */
>  #define PCI_CAP_FLAGS		2	/* Capability defined
flags (16
> bits) */
>  #define PCI_CAP_SIZEOF		4
> -
> To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
       [not found] <183UK-2Re-11@gated-at.bofh.it>
@ 2003-12-29 19:12 ` Andi Kleen
  0 siblings, 0 replies; 43+ messages in thread
From: Andi Kleen @ 2003-12-29 19:12 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: Seshadri, Harinarayanan, Kondratiev, Vladimir, linux-kernel

"Durairaj, Sundarapandian" <sundarapandian.durairaj@intel.com> writes:

> +u32 pcie_last_accessed_device;
> +
> +unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)
> +{
> +	u32 dev_base = 
> +		mmcfg_base_address | (bus << 20) | ((PCI_DEVFN (dev,fn))
> <<12);
> +	if (dev_base != pcie_last_accessed_device){
> +		pcie_last_accessed_device = dev_base;
> +		set_fixmap (FIX_PCIE_MCFG, dev_base);
> +	}

Can you please put the details on how the fixmap is managed into
an asm-i386/* file. x86-64 shares the i386 PCI subsystem and I would
like to use a different implement a different method there (just mapping 
everything statically) 

> +}
> +
> +static int pci_express_conf_read(int seg, int bus, int devfn, int reg,
> int len, u32 *value)
> +{
> +	 unsigned long flags;
> +	 unsigned long base_address;
> +	 char * virt_addr;
> +	 int dev = PCI_SLOT (devfn);
> +	 int fn  = PCI_FUNC (devfn);
> + 
> +  if (!value || ((u32)bus > 255) || ((u32)dev > 31) || ((u32)fn > 7) ||
> ((u32)reg > 4095))
> +  	return -EINVAL;
> +
> +	/* Shoot misalligned transaction now */
> +	if (reg & (len-1))
> +  	return -EINVAL;

It would be better to printk here because nobody will check the return
value of this function

Same for _write.

> +			writew(value,(unsigned long)virt_addr+reg);
> +       		        break;
> +	        case 4:
> +			writel(value,(unsigned long)virt_addr+reg);
> +	                break;
> +     	}
> + 	/* Dummy read to flush PCI write */
> +	readl (virt_addr);

I thought the consensus was that reading the same address is dangerous
because some registers react when they are only read.
Also you cannot readl on a w or b registers, if you do that you would
need to use the same size.

Also are you sure that the old port based config read/write functions
actually flushed their writes? I don't think they did.  It probably
would be best to just drop that read.

Otherwise it should read some save register in the same mapping.


> +	 if( is_pci_exp_platform() != 0){

Shouldn't there a pci_probe option for this here?

It would be probably better to add some way to turn this off at runtime,
since it's still experimental and most drivers will probably work
with the old methods for now.

>  
>  	if (capable(CAP_SYS_ADMIN))
> -		size = PCI_CFG_SPACE_SIZE;
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +		if (pci_find_capability(dev,PCI_CAP_ID_EXP))
> +			size = PCI_CFG_SPACE_EXP_SIZE;
> +		else
> +#endif /*CONFIG_PCI_EXP_ENHANCED */
> + 		size = PCI_CFG_SPACE_SIZE;

This would be somewhat cleaner in an standard function that returns
the cfg space size for a device, especially since this code is often duplicated
in your patch.


> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +struct acpi_table_mcfg {
> +	struct acpi_table_header 	header;
> +	u8	reserved[8];
> +	u64	base_address;
> +} __attribute__ ((packed));
> +#endif

Declarations don't need to be #ifdefed.

-Andi

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2003-12-29 11:55 ` Christoph Hellwig
@ 2003-12-29 12:51   ` Johan Sjoholm
  0 siblings, 0 replies; 43+ messages in thread
From: Johan Sjoholm @ 2003-12-29 12:51 UTC (permalink / raw)
  To: Christoph Hellwig, Durairaj, Sundarapandian
  Cc: Linux Kernel List Vger, Seshadri, Harinarayanan, Kondratiev, Vladimir

Perhaps this discussion would be better suited to be kept on the linux-pci
mailing list ? Or at the very least, cc'd to it.

-- 
Johan Sjoholm
CEO and Head of Development
Building 31 Clustering  -   http://www.phs.se
js@phs.se   ~   www.phs.se  ~   +46 520 48 87 17


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2003-12-29 11:32 Durairaj, Sundarapandian
  2003-12-29 11:53 ` Arjan van de Ven
@ 2003-12-29 11:55 ` Christoph Hellwig
  2003-12-29 12:51   ` Johan Sjoholm
  1 sibling, 1 reply; 43+ messages in thread
From: Christoph Hellwig @ 2003-12-29 11:55 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, Seshadri, Harinarayanan, Kondratiev, Vladimir

On Mon, Dec 29, 2003 at 05:02:39PM +0530, Durairaj, Sundarapandian wrote:
> Hi All,
> 
> This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
> kernel following up to the Vladamir (Vladimir.Kondratiev@intel.com) and
> HariNarayanan (Harinarayanan.Seshadri@intel.com) patches . I tested it
> on our i386 platform. 
> 
> This patch also implements a mechanism for the kernel to find the
> chipset specific mmcfg base address. The kernel will detect the base
> address of the chipset through the ACPI table entry and based on that
> the PCI subsystem will be initialized.  
> 
> Please review this and send in your comments.

The patch is b0rked due to linewrapping.  This means a) you need to fix the
mailer and b) make sure you wrap lines after 80 chars as to the coding style.

More comments below:

>  
> +config PCI_EXP_ENHANCED
> +	bool "PCI_EXPRESS (EXPERIMENTAL)" 
> +	depends on EXPERIMENTAL 

Should probably depend on ACPI if you're using ACPI tables to parse.

> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +extern u64 mmcfg_base_address;

Should probably go into a header.

> +static int __init
> +acpi_parse_mcfg (
> +	unsigned long		phys_addr,
> +	unsigned long		size)

Slightly strange coding style, should be

static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)

> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
> +	if (!result) {
> +		printk(KERN_WARNING PREFIX "MCFG not present\n");
> +		return 0;
> +	}
> +	else if (result < 0) {

	} else if (result < 0) {

> +		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
> +		return result;
> +	}
> +	else if (result > 1) 

	} else if (result > 1)

> +#endif /*CONFIG_PCI_EXP_ENHANCED*/

#endif /* CONFIG_PCI_EXP_ENHANCED */

> +/* mmcfg base address will be initalised by the os initalisation 
> + * code on PCI Express capable platform 
> + */
> +static int is_pci_exp_platform(void )

static int is_pci_exp_platform(void)

> +{
> +/* 
> +	At the time of initialisation of  the os would have 
> +	scanned for mcfg table and set this variable to appropriate 
> +	value If PCI Express not supported the variable 
> +	will have 0 value
> +*/
> +	if (mmcfg_base_address == 0){

	if (mmcfg_base_address == 0) {

also the comment is formatted wrongly, the grammar doesn't make sense to me
(but I'm not native speaker), and it looks slightly superflous..

Also why do you need this function at all instead of checking for
mmcfg_base_address beeing non-NULL directly?
	
> +		printk(KERN_INFO "MCFG table entry is not found in ACPI
> tables....\n");
> +		printk(KERN_INFO " PCI Express not supported in this
> platform....\n");
> +		printk(KERN_INFO " Not enabling Enhanced
> Configuration....\n");

This printk is a bit verbose for this beeing quite normal for 99.999% of
the PCs currently out there..

> +/*
> + Variable used to store the base address of the last pciexpress device 
> +  accessed.
> + */

/*
 * Base address of the last pciexpress device accessed.
 */

> +unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)

unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)

> +	if (dev_base != pcie_last_accessed_device){
> +		pcie_last_accessed_device = dev_base;
> +		set_fixmap (FIX_PCIE_MCFG, dev_base);

again, please no whitespace here

> +	 unsigned long flags;
> +	 unsigned long base_address;
> +	 char * virt_addr;
> +	 int dev = PCI_SLOT (devfn);
> +	 int fn  = PCI_FUNC (devfn);
> + 
> +  if (!value || ((u32)bus > 255) || ((u32)dev > 31) || ((u32)fn > 7) ||
> ((u32)reg > 4095))
> +  	return -EINVAL;

Indentation over this function is rather broken.  Please read through
Documentation/CodingStyle in the source tree and fix up accordingly.

>  static int __init pci_direct_init(void)
>  {
>  	struct resource *region, *region2;
> +	unsigned long flags;
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	local_irq_save(flags);
> +	/*
> +	 *	Check if platform we are running is pci express capable
> +	 */
> +	 if( is_pci_exp_platform() != 0){

Again, codingstyle is quite strange.  with that function the comment is
rather superflous.  With the abstraction removed as I suggested above
it'll make sense again, though.

> +++ linux-2.6_pciexpress/drivers/acpi/tables.c	2003-12-24
> 18:34:38.048354440 +0530
> @@ -58,6 +58,9 @@
>  	[ACPI_SSDT]		= "SSDT",
>  	[ACPI_SPMI]		= "SPMI",
>  	[ACPI_HPET]		= "HPET",
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	[ACPI_MCFG]		= "MCFG",
> +#endif

Is there a problem with just having this table always declared?

> 18:34:41.249867736 +0530
> @@ -18,11 +18,23 @@
>  
>  #define PCI_CFG_SPACE_SIZE 256
>  
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +#define PCI_CFG_SPACE_EXP_SIZE 4096
> +#endif

No ifdef around this, please

>  static loff_t
>  proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
>  {
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	const struct inode *ino = file->f_dentry->d_inode;
> +	const struct proc_dir_entry *dp = PDE(ino);
> +	struct pci_dev *dev = dp->data;
> +	/* Find whether the device is PCI Express device */
> +	int is_pci_express_dev =  pci_find_capability(dev,
> PCI_CAP_ID_EXP);
> +#endif /*CONFIG_PCI_EXP_ENHANCED*/
>  	loff_t new = -1;
>  
>  	lock_kernel();
> @@ -34,12 +46,22 @@
>  		new = file->f_pos + off;
>  		break;
>  	case 2:
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +		if (is_pci_express_dev)
> +			new = PCI_CFG_SPACE_EXP_SIZE + off;
> +		else
> +#endif /*CONFIG_PCI_EXP_ENHANCED*/
>  		new = PCI_CFG_SPACE_SIZE + off;
>  		break;
>  	}
>  	unlock_kernel();
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	if (is_pci_express_dev && (new < 0 || new >
> PCI_CFG_SPACE_EXP_SIZE))
> +		return -EINVAL;
> +#else
>  	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
>  		return -EINVAL;
> +#endif /*CONFIG_PCI_EXP_ENHANCED */
>  	return (file->f_pos = new);
>  }

This code is too ugly to live.  you should just add a cfg_space_size
member to struct pci_dev and do the checks only once based on that one.

>  	if (capable(CAP_SYS_ADMIN))
> -		size = PCI_CFG_SPACE_SIZE;
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +		if (pci_find_capability(dev,PCI_CAP_ID_EXP))
> +			size = PCI_CFG_SPACE_EXP_SIZE;
> +		else
> +#endif /*CONFIG_PCI_EXP_ENHANCED */

Shouldn't this be conditional on whether we actually use the
pci express config sppaces accessorts?  Also for that small ifdefs
it's superflous to have the comment after the endif.

but with the cfg_space_size member in struct pci_dev all this shoud
just go away..

> 18:34:21.000000000 +0530
> @@ -317,6 +317,15 @@
>  	char				ec_id[0];
>  } __attribute__ ((packed));
>  
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +struct acpi_table_mcfg {
> +	struct acpi_table_header 	header;
> +	u8	reserved[8];
> +	u64	base_address;
> +} __attribute__ ((packed));
> +#endif

No ifdef needed again, unused struct declaration don't harm,
ifdefs do OTOH.

>  enum acpi_table_id {
> @@ -338,6 +347,9 @@
>  	ACPI_SSDT,
>  	ACPI_SPMI,
>  	ACPI_HPET,
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	ACPI_MCFG,
> +#endif

Same comment as above.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
  2003-12-29 11:32 Durairaj, Sundarapandian
@ 2003-12-29 11:53 ` Arjan van de Ven
  2003-12-29 11:55 ` Christoph Hellwig
  1 sibling, 0 replies; 43+ messages in thread
From: Arjan van de Ven @ 2003-12-29 11:53 UTC (permalink / raw)
  To: Durairaj, Sundarapandian
  Cc: linux-kernel, Seshadri, Harinarayanan, Kondratiev, Vladimir

[-- Attachment #1: Type: text/plain, Size: 1760 bytes --]


> diff -Naur linux-2.6_src/drivers/acpi/tables.c
> linux-2.6_pciexpress/drivers/acpi/tables.c
> --- linux-2.6_src/drivers/acpi/tables.c	2003-11-27 17:48:39.000000000
> +0530
> +++ linux-2.6_pciexpress/drivers/acpi/tables.c	2003-12-24
> 18:34:38.048354440 +0530
> @@ -58,6 +58,9 @@
>  	[ACPI_SSDT]		= "SSDT",
>  	[ACPI_SPMI]		= "SPMI",
>  	[ACPI_HPET]		= "HPET",
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	[ACPI_MCFG]		= "MCFG",
> +#endif

why this ifdef ?
>  #define PCI_CFG_SPACE_SIZE 256
>  
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +#define PCI_CFG_SPACE_EXP_SIZE 4096
> +#endif

or this one

> @@ -34,12 +46,22 @@
>  		new = file->f_pos + off;
>  		break;
>  	case 2:
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +		if (is_pci_express_dev)
> +			new = PCI_CFG_SPACE_EXP_SIZE + off;
> +		else
> +#endif /*CONFIG_PCI_EXP_ENHANCED*/

this really looks like you want a dummy is_pci_express_dev (which is
forced 0) instead of all these ifdefs
> diff -Naur linux-2.6_src/include/linux/acpi.h
> linux-2.6_pciexpress/include/linux/acpi.h
> --- linux-2.6_src/include/linux/acpi.h	2003-11-27 17:47:18.000000000
> +0530
> +++ linux-2.6_pciexpress/include/linux/acpi.h	2003-12-24
> 18:34:21.000000000 +0530
> @@ -317,6 +317,15 @@
>  	char				ec_id[0];
>  } __attribute__ ((packed));
>  
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +struct acpi_table_mcfg {
> +	struct acpi_table_header 	header;
> +	u8	reserved[8];
> +	u64	base_address;
> +} __attribute__ ((packed));
> +#endif

why an ifdef around a struct definition ???

>  enum acpi_table_id {
> @@ -338,6 +347,9 @@
>  	ACPI_SSDT,
>  	ACPI_SPMI,
>  	ACPI_HPET,
> +#ifdef CONFIG_PCI_EXP_ENHANCED
> +	ACPI_MCFG,
> +#endif
>  	ACPI_TABLE_COUNT
>  };

.... or an enum....



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch] PCI Express Enhanced Config Patch - 2.6.0-test11
@ 2003-12-29 11:32 Durairaj, Sundarapandian
  2003-12-29 11:53 ` Arjan van de Ven
  2003-12-29 11:55 ` Christoph Hellwig
  0 siblings, 2 replies; 43+ messages in thread
From: Durairaj, Sundarapandian @ 2003-12-29 11:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Seshadri, Harinarayanan, Kondratiev, Vladimir

Hi All,

This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladamir (Vladimir.Kondratiev@intel.com) and
HariNarayanan (Harinarayanan.Seshadri@intel.com) patches . I tested it
on our i386 platform. 

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.  

Please review this and send in your comments.

Thanks,
Sundar


diff -Naur linux-2.6_src/arch/i386/Kconfig
linux-2.6_pciexpress/arch/i386/Kconfig
--- linux-2.6_src/arch/i386/Kconfig	2003-12-24 14:41:04.000000000
+0530
+++ linux-2.6_pciexpress/arch/i386/Kconfig	2003-12-24
18:34:29.740617408 +0530
@@ -959,7 +959,7 @@
 endmenu
 
 
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"
 
 config X86_VISWS_APIC
 	bool
@@ -976,6 +976,18 @@
 	depends on SMP && !(X86_VISWS || X86_VOYAGER)
 	default y
 
+config PCI_EXP_ENHANCED
+	bool "PCI_EXPRESS (EXPERIMENTAL)" 
+	depends on EXPERIMENTAL 
+	help
+	   PCI Express extends the configuration space from 256 bytes to
4k
+	   bytes. It also defines an enhanced configuration mechanism to
acces
+	   the extended configuration space.
+	   With this option, you can specify that Linux will first
attempt to
+	   access the pci configuration space through enhanced config
access
+	   mechanism (Will work only on PCI Express based system)
otherwise the
+	   pci direct mechanism will be used.
+
 config PCI
 	bool "PCI support" if !X86_VISWS
 	depends on !X86_VOYAGER
diff -Naur linux-2.6_src/arch/i386/kernel/acpi/boot.c
linux-2.6_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6_src/arch/i386/kernel/acpi/boot.c	2003-11-27
17:47:49.000000000 +0530
+++ linux-2.6_pciexpress/arch/i386/kernel/acpi/boot.c	2003-12-24
18:34:26.844057752 +0530
@@ -93,12 +93,33 @@
 	return ((unsigned char *) base + offset);
 }
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+extern u64 mmcfg_base_address;
+static int __init
+acpi_parse_mcfg (
+	unsigned long		phys_addr,
+	unsigned long		size)
+{
+	struct acpi_table_mcfg	*mcfg = NULL;
 
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+	if (!phys_addr || !size)
+		return -EINVAL;
 
+	mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr,
size);
+	if (!mcfg) {
+		printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+		return -ENODEV;
+	}
+	if (mcfg->base_address)
+		mmcfg_base_address =mcfg->base_address;
+	printk(KERN_INFO PREFIX "Local  mcfg address 0x%x\n",
+			mcfg->base_address);
+	return 0;
+}
+#endif /* CONFIG_PCI_EXP_ENHANCED*/
 
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 static int __init
 acpi_parse_madt (
 	unsigned long		phys_addr,
@@ -508,6 +529,21 @@
 
 #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if (!result) {
+		printk(KERN_WARNING PREFIX "MCFG not present\n");
+		return 0;
+	}
+	else if (result < 0) {
+		printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+		return result;
+	}
+	else if (result > 1) 
+		printk(KERN_WARNING PREFIX "Multiple MCFG tables
exist\n");
+#endif /*CONFIG_PCI_EXP_ENHANCED*/
+
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (acpi_lapic && acpi_ioapic) {
 		smp_found_config = 1;
diff -Naur linux-2.6_src/arch/i386/pci/direct.c
linux-2.6_pciexpress/arch/i386/pci/direct.c
--- linux-2.6_src/arch/i386/pci/direct.c	2003-11-27
17:47:49.000000000 +0530
+++ linux-2.6_pciexpress/arch/i386/pci/direct.c	2003-12-24
18:34:29.432664224 +0530
@@ -168,6 +168,123 @@
 
 
 /*
+	We map full Page size on each request. Incidently that's the
size we
+	have for config space too.
+*/
+#ifdef CONFIG_PCI_EXP_ENHANCED
+u64 mmcfg_base_address;
+
+/* mmcfg base address will be initalised by the os initalisation 
+ * code on PCI Express capable platform 
+ */
+static int is_pci_exp_platform(void )
+{
+/* 
+	At the time of initialisation of  the os would have 
+	scanned for mcfg table and set this variable to appropriate 
+	value If PCI Express not supported the variable 
+	will have 0 value
+*/
+	if (mmcfg_base_address == 0){
+		printk(KERN_INFO "MCFG table entry is not found in ACPI
tables....\n");
+		printk(KERN_INFO " PCI Express not supported in this
platform....\n");
+		printk(KERN_INFO " Not enabling Enhanced
Configuration....\n");
+		return 0;
+	}
+	return 1;
+}
+/*
+ Variable used to store the base address of the last pciexpress device 
+  accessed.
+ */
+u32 pcie_last_accessed_device;
+
+unsigned long pci_exp_set_dev_base (int bus, int dev, int fn)
+{
+	u32 dev_base = 
+		mmcfg_base_address | (bus << 20) | ((PCI_DEVFN (dev,fn))
<<12);
+	if (dev_base != pcie_last_accessed_device){
+		pcie_last_accessed_device = dev_base;
+		set_fixmap (FIX_PCIE_MCFG, dev_base);
+	}
+}
+
+static int pci_express_conf_read(int seg, int bus, int devfn, int reg,
int len, u32 *value)
+{
+	 unsigned long flags;
+	 unsigned long base_address;
+	 char * virt_addr;
+	 int dev = PCI_SLOT (devfn);
+	 int fn  = PCI_FUNC (devfn);
+ 
+  if (!value || ((u32)bus > 255) || ((u32)dev > 31) || ((u32)fn > 7) ||
((u32)reg > 4095))
+  	return -EINVAL;
+
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1))
+  	return -EINVAL;
+
+  spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, dev, fn);
+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+  switch (len) {
+        case 1:
+       		*value = (u8)readb((unsigned long)
virt_addr+reg);
+                break;
+        case 2:
+       		*value = (u16)readw((unsigned long)
virt_addr+reg);
+                break;
+        case 4:
+       		*value = (u32)readl((unsigned long)
virt_addr+reg);
+                break;
+	}
+  spin_unlock_irqrestore(&pci_config_lock, flags);
+  return 0;
+}
+ 
+static int pci_express_conf_write(int seg, int bus, int devfn, int reg,
int len, u32 value)
+{
+	unsigned long flags;
+	unsigned long base_address;
+	unsigned char * virt_addr;
+	int dev = PCI_SLOT (devfn);
+	int fn  = PCI_FUNC (devfn);
+	
+	if (!value || ((u32)bus > 255) || ((u32)dev > 31) || ((u32)fn >
7) || ((u32)reg > 4095))
+		return -EINVAL;
+	
+	/* Shoot misalligned transaction now */
+	if (reg & (len-1))
+  	return -EINVAL;
+  
+	spin_lock_irqsave(&pci_config_lock, flags);
+	pci_exp_set_dev_base(bus, dev, fn);
+	virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+	
+	switch (len) {
+		case 1:
+			writeb(value,(unsigned long)virt_addr+reg);
+			break;
+		case 2:
+			writew(value,(unsigned long)virt_addr+reg);
+       		        break;
+	        case 4:
+			writel(value,(unsigned long)virt_addr+reg);
+	                break;
+     	}
+ 	/* Dummy read to flush PCI write */
+	readl (virt_addr);
+	spin_unlock_irqrestore(&pci_config_lock, flags);	 
+	return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+				.read   =        pci_express_conf_read,
+        .write  =        pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXP_ENHANCED */
+
+/*
  * Before we decide to use direct hardware access mechanisms, we try to
do some
  * trivial checks to ensure it at least _seems_ to be working -- we
just test
  * whether bus 00 contains a host bridge (this is similar to checking
@@ -244,6 +361,22 @@
 static int __init pci_direct_init(void)
 {
 	struct resource *region, *region2;
+	unsigned long flags;
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	local_irq_save(flags);
+	/*
+	 *	Check if platform we are running is pci express capable
+	 */
+	 if( is_pci_exp_platform() != 0){
+		if (pci_sanity_check(&pci_express_conf)) {
+			local_irq_restore(flags);
+			printk(KERN_INFO "PCI:Using config type
PCIExp\n");
+			raw_pci_ops = &pci_express_conf;
+			return 0;
+		} 
+	}
+	local_irq_restore(flags);
+#endif /* CONFIG_PCI_EXP_ENHANCED */
 
 	if ((pci_probe & PCI_PROBE_CONF1) == 0)
 		goto type2;
diff -Naur linux-2.6_src/arch/i386/pci/Makefile
linux-2.6_pciexpress/arch/i386/pci/Makefile
--- linux-2.6_src/arch/i386/pci/Makefile	2003-11-27
17:47:49.000000000 +0530
+++ linux-2.6_pciexpress/arch/i386/pci/Makefile	2003-12-24
18:34:29.438663312 +0530
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_EXP_ENHANCED)	+= direct.o
 
 pci-y				:= fixup.o
 pci-$(CONFIG_ACPI_PCI)		+= acpi.o
diff -Naur linux-2.6_src/drivers/acpi/tables.c
linux-2.6_pciexpress/drivers/acpi/tables.c
--- linux-2.6_src/drivers/acpi/tables.c	2003-11-27 17:48:39.000000000
+0530
+++ linux-2.6_pciexpress/drivers/acpi/tables.c	2003-12-24
18:34:38.048354440 +0530
@@ -58,6 +58,9 @@
 	[ACPI_SSDT]		= "SSDT",
 	[ACPI_SPMI]		= "SPMI",
 	[ACPI_HPET]		= "HPET",
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	[ACPI_MCFG]		= "MCFG",
+#endif
 };
 
 /* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6_src/drivers/pci/pci.c
linux-2.6_pciexpress/drivers/pci/pci.c
--- linux-2.6_src/drivers/pci/pci.c	2003-11-27 17:48:59.000000000
+0530
+++ linux-2.6_pciexpress/drivers/pci/pci.c	2003-12-24
18:34:41.216872752 +0530
@@ -90,6 +90,8 @@
  *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
  *
  *  %PCI_CAP_ID_PCIX         PCI-X
+ *  %PCI_CAP_ID_EXP          PCI-EXP
+
  */
 int
 pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6_src/drivers/pci/proc.c
linux-2.6_pciexpress/drivers/pci/proc.c
--- linux-2.6_src/drivers/pci/proc.c	2003-11-27 17:48:59.000000000
+0530
+++ linux-2.6_pciexpress/drivers/pci/proc.c	2003-12-24
18:34:41.249867736 +0530
@@ -18,11 +18,23 @@
 
 #define PCI_CFG_SPACE_SIZE 256
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+#define PCI_CFG_SPACE_EXP_SIZE 4096
+#endif
+ 
+
 static int proc_initialized;	/* = 0 */
 
 static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = PDE(ino);
+	struct pci_dev *dev = dp->data;
+	/* Find whether the device is PCI Express device */
+	int is_pci_express_dev =  pci_find_capability(dev,
PCI_CAP_ID_EXP);
+#endif /*CONFIG_PCI_EXP_ENHANCED*/
 	loff_t new = -1;
 
 	lock_kernel();
@@ -34,12 +46,22 @@
 		new = file->f_pos + off;
 		break;
 	case 2:
+#ifdef CONFIG_PCI_EXP_ENHANCED
+		if (is_pci_express_dev)
+			new = PCI_CFG_SPACE_EXP_SIZE + off;
+		else
+#endif /*CONFIG_PCI_EXP_ENHANCED*/
 		new = PCI_CFG_SPACE_SIZE + off;
 		break;
 	}
 	unlock_kernel();
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	if (is_pci_express_dev && (new < 0 || new >
PCI_CFG_SPACE_EXP_SIZE))
+		return -EINVAL;
+#else
 	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
 		return -EINVAL;
+#endif /*CONFIG_PCI_EXP_ENHANCED */
 	return (file->f_pos = new);
 }
 
@@ -59,7 +81,12 @@
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		size = PCI_CFG_SPACE_SIZE;
+#ifdef CONFIG_PCI_EXP_ENHANCED
+		if (pci_find_capability(dev,PCI_CAP_ID_EXP))
+			size = PCI_CFG_SPACE_EXP_SIZE;
+		else
+#endif /*CONFIG_PCI_EXP_ENHANCED */
+ 		size = PCI_CFG_SPACE_SIZE;
 	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
 		size = 128;
 	else
@@ -134,12 +161,21 @@
 	int pos = *ppos;
 	int cnt;
 
-	if (pos >= PCI_CFG_SPACE_SIZE)
+	int size;
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	if (pci_find_capability(dev,PCI_CAP_ID_EXP))
+		size = PCI_CFG_SPACE_EXP_SIZE;
+	else
+#endif /* CONFIG_PCI_EXP_ENHANCED */
+	size = PCI_CFG_SPACE_SIZE;
+
+
+	if (pos >= size)
 		return 0;
-	if (nbytes >= PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE;
-	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
-		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
 	cnt = nbytes;
 
 	if (!access_ok(VERIFY_READ, buf, cnt))
@@ -384,6 +420,7 @@
 	struct pci_bus *bus = dev->bus;
 	struct proc_dir_entry *de, *e;
 	char name[16];
+	int size;
 
 	if (!proc_initialized)
 		return -EACCES;
@@ -401,7 +438,14 @@
 		return -ENOMEM;
 	e->proc_fops = &proc_bus_pci_operations;
 	e->data = dev;
-	e->size = PCI_CFG_SPACE_SIZE;
+
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	if (pci_find_capability(dev,PCI_CAP_ID_EXP))
+		size = PCI_CFG_SPACE_EXP_SIZE;
+	else
+#endif /*CONFIG_PCI_EXP_ENHANCED */
+	size = PCI_CFG_SPACE_SIZE;
+	e->size = size;
 
 	return 0;
 }
diff -Naur linux-2.6_src/include/asm-i386/fixmap.h
linux-2.6_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6_src/include/asm-i386/fixmap.h	2003-11-27
17:47:00.000000000 +0530
+++ linux-2.6_pciexpress/include/asm-i386/fixmap.h	2003-12-24
18:34:19.213217816 +0530
@@ -67,6 +67,9 @@
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings
*/
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	FIX_PCIE_MCFG,
+#endif
 #ifdef CONFIG_ACPI_BOOT
 	FIX_ACPI_BEGIN,
 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6_src/include/linux/acpi.h
linux-2.6_pciexpress/include/linux/acpi.h
--- linux-2.6_src/include/linux/acpi.h	2003-11-27 17:47:18.000000000
+0530
+++ linux-2.6_pciexpress/include/linux/acpi.h	2003-12-24
18:34:21.000000000 +0530
@@ -317,6 +317,15 @@
 	char				ec_id[0];
 } __attribute__ ((packed));
 
+#ifdef CONFIG_PCI_EXP_ENHANCED
+struct acpi_table_mcfg {
+	struct acpi_table_header 	header;
+	u8	reserved[8];
+	u64	base_address;
+} __attribute__ ((packed));
+#endif
+
+
 /* Table Handlers */
 
 enum acpi_table_id {
@@ -338,6 +347,9 @@
 	ACPI_SSDT,
 	ACPI_SPMI,
 	ACPI_HPET,
+#ifdef CONFIG_PCI_EXP_ENHANCED
+	ACPI_MCFG,
+#endif
 	ACPI_TABLE_COUNT
 };
 
diff -Naur linux-2.6_src/include/linux/pci.h
linux-2.6_pciexpress/include/linux/pci.h
--- linux-2.6_src/include/linux/pci.h	2003-11-27 17:47:11.000000000
+0530
+++ linux-2.6_pciexpress/include/linux/pci.h	2003-12-24
18:34:20.000000000 +0530
@@ -198,6 +198,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled
Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI-Express*/
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list
*/
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16
bits) */
 #define PCI_CAP_SIZEOF		4

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2004-02-01 21:42 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-01-07 12:59 [patch] PCI Express Enhanced Config Patch - 2.6.0-test11 Durairaj, Sundarapandian
2004-01-07 14:08 ` Meelis Roos
2004-01-07 17:34 ` Vladimir Kondratiev
  -- strict thread matches above, loose matches on Subject: below --
2004-01-30 16:58 Nakajima, Jun
2004-01-29 11:32 Durairaj, Sundarapandian
2004-01-29 15:09 ` Matthew Wilcox
2004-01-29 15:59   ` Matthew Wilcox
2004-01-29 16:05     ` Linus Torvalds
2004-01-29 16:42       ` Matthew Wilcox
2004-01-29 16:52         ` Linus Torvalds
2004-01-31 21:57         ` Eric W. Biederman
2004-02-01  4:41           ` Grant Grundler
2004-02-01  5:10           ` Matthew Wilcox
2004-02-01 11:00             ` Eric W. Biederman
2004-02-01 15:18               ` Matthew Wilcox
2004-02-01 18:28                 ` Eric W. Biederman
2004-02-01 20:11                   ` Matthew Wilcox
2004-02-01 21:35                     ` Eric W. Biederman
2004-02-01 11:10             ` Eric W. Biederman
2004-01-29 18:09       ` Greg KH
2004-01-30 16:33         ` Greg KH
2004-01-28  9:38 Durairaj, Sundarapandian
2004-01-28 14:42 ` Vladimir Kondratiev
2004-01-28 14:54   ` Christoph Hellwig
2004-01-28 15:00   ` Martin Mares
2004-01-28 15:18 ` Matthew Wilcox
2004-01-22 10:21 Durairaj, Sundarapandian
2004-01-22 10:44 ` Andrew Morton
2004-01-22 11:09 ` Martin Mares
2004-01-22 13:12 ` Andi Kleen
2004-01-22 18:21   ` Alan Cox
2004-01-22 19:40     ` Randy.Dunlap
2004-01-23 19:19       ` Pavel Machek
2004-01-23 19:31         ` Martin Mares
2004-01-23 20:08           ` Stefan Smietanowski
2004-01-22 16:40 ` Grant Grundler
2004-01-22 17:00 ` Greg KH
2004-01-07 16:44 Nakajima, Jun
     [not found] <183UK-2Re-11@gated-at.bofh.it>
2003-12-29 19:12 ` Andi Kleen
2003-12-29 11:32 Durairaj, Sundarapandian
2003-12-29 11:53 ` Arjan van de Ven
2003-12-29 11:55 ` Christoph Hellwig
2003-12-29 12:51   ` Johan Sjoholm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).