linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* x86/pci Oops with CONFIG_SND_HDA_INTEL
@ 2010-05-19 15:13 Graham Ramsey
  2010-05-19 16:44 ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: Graham Ramsey @ 2010-05-19 15:13 UTC (permalink / raw)
  To: linux-kernel

Hi,
I am on x86_64 with latest (v2.6.34) kernel. When i set 
CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel oops.
When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the 
dmesg (below).

I have bisected down to one commit that causes the problem:

   commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
   x86/pci: AMD one chain system to use pci read out res
   ...

I have reverted this in my current kernel (v3.6.34) and it seems to work 
OK like this.
Is it possible to get this commit either removed, or amended to work 
with SND_HDA_INTEL ?


OOPS from dmesg
---------------
Pid: 1714, comm: modprobe Not tainted 2.6.34 #3 
ALiveSATA2-GLAN/ALiveSATA2-GLAN
RIP: 0010:[<ffffffffa0018d11>]  [<ffffffffa0018d11>] 
azx_probe+0x3a2/0xa6a [snd_hda_intel]
RSP: 0018:ffff88007e80bd18  EFLAGS: 00010282
RAX: ffffc90000078000 RBX: ffff88007d724c00 RCX: 000000000000000d
RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffffffff813f2f24
RBP: ffff88007f9da088 R08: 0000000000000000 R09: 0000000000000040
R10: 0000000000000008 R11: 000000000000000a R12: ffff88007f9da000
R13: ffff88007d726400 R14: 0000000000000000 R15: 0000000000000000
FS:  00007f1a13aa6700(0000) GS:ffff880001700000(0000) 
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffc90000078000 CR3: 0000000037ad1000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 1714, threadinfo ffff88007e80a000, task 
ffff88007d0ed080)
Stack:
  0000000000000292 0000000000000005 ffff88007f9c62c8 ffff88007d60e948
<0> ffff88007d60e948 ffffffff810c5d3f ffff88007e80bd64 ffff88007e80bdb8
<0> ffff88007f9033f0 ffff88007d60e948 ffff88007e80bdb8 ffff88007e80bdb8
Call Trace:
  [<ffffffff810c5d3f>] ? sysfs_addrm_finish+0x29/0xb4
  [<ffffffff8116e849>] ? local_pci_probe+0x12/0x19
  [<ffffffff8116ea65>] ? pci_device_probe+0x60/0x8e
  [<ffffffff811ac286>] ? driver_sysfs_add+0x42/0x69
  [<ffffffff811ac4cd>] ? driver_probe_device+0x8e/0x10e
  [<ffffffff811ac59c>] ? __driver_attach+0x4f/0x6f
  [<ffffffff811ac54d>] ? __driver_attach+0x0/0x6f
  [<ffffffff811ab8cc>] ? bus_for_each_dev+0x47/0x72
  [<ffffffff811abede>] ? bus_add_driver+0xa2/0x1f2
  [<ffffffff811ac7bc>] ? driver_register+0x8d/0xf5
  [<ffffffff8116ecb0>] ? __pci_register_driver+0x50/0xbb
  [<ffffffffa001e000>] ? alsa_card_azx_init+0x0/0x22 [snd_hda_intel]
  [<ffffffff810002d0>] ? do_one_initcall+0x4f/0x13f
  [<ffffffff8104aa56>] ? sys_init_module+0xc5/0x2cf
  [<ffffffff81001eab>] ? system_call_fastpath+0x16/0x1b
Code: 83 f0 01 00 00 31 f6 48 89 df e8 9f e4 ff ff 85 c0 0f 88 f5 03 00 
00 4c 89 e7 e8 e2 45 15 e1 8b 7b 40 e8 70 41 03 e1 48 8b 43 38 <66> 44 
8b 38 8b 43 14 83 e8 03 83 f8 01 77 2c 31 d2 be 85 43 00
RIP  [<ffffffffa0018d11>] azx_probe+0x3a2/0xa6a [snd_hda_intel]
  RSP <ffff88007e80bd18>
CR2: ffffc90000078000
---[ end trace 1814cadd98ff217d ]---



Patch for revert of commit
-------------------------------------

--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -83,12 +83,11 @@ static int __init early_fill_mp_bus_info(void)
         struct range range[RANGE_NUM];
         u64 val;
         u32 address;
-       bool found;

         if (!early_pci_allowed())
                 return -1;

-       found = false;
+       found_all_numa_early = 0;
         for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
                 u32 id;
                 u16 device;
@@ -102,12 +101,12 @@ static int __init early_fill_mp_bus_info(void)
                 device = (id>>16) & 0xffff;
                 if (pci_probes[i].vendor == vendor &&
                     pci_probes[i].device == device) {
-                       found = true;
+                       found_all_numa_early = 1;
                         break;
                 }
         }

-       if (!found)
+       if (!found_all_numa_early)
                 return 0;

         pci_root_num = 0;
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 64a1228..894a17e 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -6,6 +6,7 @@

  int pci_root_num;
  struct pci_root_info pci_root_info[PCI_ROOT_NR];
+int found_all_numa_early;

  void x86_pci_root_bus_res_quirks(struct pci_bus *b)
  {
@@ -21,6 +22,10 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
         if (!pci_root_num)
                 return;

+       /* for amd, if only one root bus, don't need to do anything */
+       if (pci_root_num < 2 && found_all_numa_early)
+               return;
+
         for (i = 0; i < pci_root_num; i++) {
                 if (pci_root_info[i].bus_min == b->number)
                         break;
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 804a4b4..d4ad5fa 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -19,6 +19,7 @@ struct pci_root_info {
  #define PCI_ROOT_NR 4
  extern int pci_root_num;
  extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
+extern int found_all_numa_early;

  extern void update_res(struct pci_root_info *info, resource_size_t start,
                       resource_size_t end, unsigned long flags, int 
merge);

My lspci
--------

00:00.0 Host bridge: VIA Technologies, Inc. K8T890CF Host Bridge
00:00.1 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
00:00.2 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
00:00.3 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
00:00.4 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
00:00.5 PIC: VIA Technologies, Inc. VT3351 I/O APIC Interrupt Controller
00:00.7 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
00:01.0 PCI bridge: VIA Technologies, Inc. [K8T890 North / VT8237 South] 
PCI Bridge
00:02.0 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
Controller
00:03.0 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
Controller
00:03.1 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
Controller
00:03.2 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
Controller
00:03.3 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
Controller
00:0f.0 RAID bus controller: VIA Technologies, Inc. VT8237A SATA 2-Port 
Controller (rev 80)
00:0f.1 IDE interface: VIA Technologies, Inc. 
VT82C586A/B/VT82C686/A/B/VT823x/A/C PIPC Bus Master IDE (rev 07)
00:10.0 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
Controller (rev a0)
00:10.1 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
Controller (rev a0)
00:10.2 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
Controller (rev a0)
00:10.3 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
Controller (rev a0)
00:10.4 USB Controller: VIA Technologies, Inc. USB 2.0 (rev 86)
00:11.0 ISA bridge: VIA Technologies, Inc. VT8237A PCI to ISA Bridge
00:11.7 Host bridge: VIA Technologies, Inc. VT8251 Ultra VLINK Controller
00:13.0 Host bridge: VIA Technologies, Inc. VT8237A Host Bridge
00:18.0 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
HyperTransport Technology Configuration
00:18.1 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
Address Map
00:18.2 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
DRAM Controller
00:18.3 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
Miscellaneous Control
02:00.0 SATA controller: JMicron Technology Corp. JMB362/JMB363 Serial 
ATA Controller (rev 02)
02:00.1 IDE interface: JMicron Technology Corp. JMB362/JMB363 Serial ATA 
Controller (rev 02)
03:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. 
RTL8111/8168B PCI Express Gigabit Ethernet controller (rev 01)
06:00.0 VGA compatible controller: nVidia Corporation G71 [GeForce 7900 
GS] (rev a1)
80:01.0 Audio device: VIA Technologies, Inc. VT1708/A [Azalia HDAC] (VIA 
High Definition Audio Controller) (rev 10)


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-19 15:13 x86/pci Oops with CONFIG_SND_HDA_INTEL Graham Ramsey
@ 2010-05-19 16:44 ` Bjorn Helgaas
  2010-05-19 17:16   ` Graham Ramsey
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-05-19 16:44 UTC (permalink / raw)
  To: Graham Ramsey; +Cc: linux-kernel, Yinghai Lu, linux-pci

On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
> I am on x86_64 with latest (v2.6.34) kernel. When i set 
> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel oops.
> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the 
> dmesg (below).
> 
> I have bisected down to one commit that causes the problem:
> 
>    commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>    x86/pci: AMD one chain system to use pci read out res
>    ...

I CC'd Yinghai, the author of that patch.  That commit went in after
2.6.33, so this is probably a regression between .33 and .34.  Can
you open a report at https://bugzilla.kernel.org and respond to this
thread with the URL?

Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
bugzilla.

Thanks a lot for your report!

Bjorn

> I have reverted this in my current kernel (v3.6.34) and it seems to work 
> OK like this.
> Is it possible to get this commit either removed, or amended to work 
> with SND_HDA_INTEL ?
 
> OOPS from dmesg
> ---------------
> Pid: 1714, comm: modprobe Not tainted 2.6.34 #3 
> ALiveSATA2-GLAN/ALiveSATA2-GLAN
> RIP: 0010:[<ffffffffa0018d11>]  [<ffffffffa0018d11>] 
> azx_probe+0x3a2/0xa6a [snd_hda_intel]
> RSP: 0018:ffff88007e80bd18  EFLAGS: 00010282
> RAX: ffffc90000078000 RBX: ffff88007d724c00 RCX: 000000000000000d
> RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffffffff813f2f24
> RBP: ffff88007f9da088 R08: 0000000000000000 R09: 0000000000000040
> R10: 0000000000000008 R11: 000000000000000a R12: ffff88007f9da000
> R13: ffff88007d726400 R14: 0000000000000000 R15: 0000000000000000
> FS:  00007f1a13aa6700(0000) GS:ffff880001700000(0000) 
> knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffffc90000078000 CR3: 0000000037ad1000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process modprobe (pid: 1714, threadinfo ffff88007e80a000, task 
> ffff88007d0ed080)
> Stack:
>   0000000000000292 0000000000000005 ffff88007f9c62c8 ffff88007d60e948
> <0> ffff88007d60e948 ffffffff810c5d3f ffff88007e80bd64 ffff88007e80bdb8
> <0> ffff88007f9033f0 ffff88007d60e948 ffff88007e80bdb8 ffff88007e80bdb8
> Call Trace:
>   [<ffffffff810c5d3f>] ? sysfs_addrm_finish+0x29/0xb4
>   [<ffffffff8116e849>] ? local_pci_probe+0x12/0x19
>   [<ffffffff8116ea65>] ? pci_device_probe+0x60/0x8e
>   [<ffffffff811ac286>] ? driver_sysfs_add+0x42/0x69
>   [<ffffffff811ac4cd>] ? driver_probe_device+0x8e/0x10e
>   [<ffffffff811ac59c>] ? __driver_attach+0x4f/0x6f
>   [<ffffffff811ac54d>] ? __driver_attach+0x0/0x6f
>   [<ffffffff811ab8cc>] ? bus_for_each_dev+0x47/0x72
>   [<ffffffff811abede>] ? bus_add_driver+0xa2/0x1f2
>   [<ffffffff811ac7bc>] ? driver_register+0x8d/0xf5
>   [<ffffffff8116ecb0>] ? __pci_register_driver+0x50/0xbb
>   [<ffffffffa001e000>] ? alsa_card_azx_init+0x0/0x22 [snd_hda_intel]
>   [<ffffffff810002d0>] ? do_one_initcall+0x4f/0x13f
>   [<ffffffff8104aa56>] ? sys_init_module+0xc5/0x2cf
>   [<ffffffff81001eab>] ? system_call_fastpath+0x16/0x1b
> Code: 83 f0 01 00 00 31 f6 48 89 df e8 9f e4 ff ff 85 c0 0f 88 f5 03 00 
> 00 4c 89 e7 e8 e2 45 15 e1 8b 7b 40 e8 70 41 03 e1 48 8b 43 38 <66> 44 
> 8b 38 8b 43 14 83 e8 03 83 f8 01 77 2c 31 d2 be 85 43 00
> RIP  [<ffffffffa0018d11>] azx_probe+0x3a2/0xa6a [snd_hda_intel]
>   RSP <ffff88007e80bd18>
> CR2: ffffc90000078000
> ---[ end trace 1814cadd98ff217d ]---
> 
> 
> 
> Patch for revert of commit
> -------------------------------------
> 
> --- a/arch/x86/pci/amd_bus.c
> +++ b/arch/x86/pci/amd_bus.c
> @@ -83,12 +83,11 @@ static int __init early_fill_mp_bus_info(void)
>          struct range range[RANGE_NUM];
>          u64 val;
>          u32 address;
> -       bool found;
> 
>          if (!early_pci_allowed())
>                  return -1;
> 
> -       found = false;
> +       found_all_numa_early = 0;
>          for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
>                  u32 id;
>                  u16 device;
> @@ -102,12 +101,12 @@ static int __init early_fill_mp_bus_info(void)
>                  device = (id>>16) & 0xffff;
>                  if (pci_probes[i].vendor == vendor &&
>                      pci_probes[i].device == device) {
> -                       found = true;
> +                       found_all_numa_early = 1;
>                          break;
>                  }
>          }
> 
> -       if (!found)
> +       if (!found_all_numa_early)
>                  return 0;
> 
>          pci_root_num = 0;
> diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
> index 64a1228..894a17e 100644
> --- a/arch/x86/pci/bus_numa.c
> +++ b/arch/x86/pci/bus_numa.c
> @@ -6,6 +6,7 @@
> 
>   int pci_root_num;
>   struct pci_root_info pci_root_info[PCI_ROOT_NR];
> +int found_all_numa_early;
> 
>   void x86_pci_root_bus_res_quirks(struct pci_bus *b)
>   {
> @@ -21,6 +22,10 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
>          if (!pci_root_num)
>                  return;
> 
> +       /* for amd, if only one root bus, don't need to do anything */
> +       if (pci_root_num < 2 && found_all_numa_early)
> +               return;
> +
>          for (i = 0; i < pci_root_num; i++) {
>                  if (pci_root_info[i].bus_min == b->number)
>                          break;
> diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
> index 804a4b4..d4ad5fa 100644
> --- a/arch/x86/pci/bus_numa.h
> +++ b/arch/x86/pci/bus_numa.h
> @@ -19,6 +19,7 @@ struct pci_root_info {
>   #define PCI_ROOT_NR 4
>   extern int pci_root_num;
>   extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
> +extern int found_all_numa_early;
> 
>   extern void update_res(struct pci_root_info *info, resource_size_t start,
>                        resource_size_t end, unsigned long flags, int 
> merge);
> 
> My lspci
> --------
> 
> 00:00.0 Host bridge: VIA Technologies, Inc. K8T890CF Host Bridge
> 00:00.1 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
> 00:00.2 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
> 00:00.3 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
> 00:00.4 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
> 00:00.5 PIC: VIA Technologies, Inc. VT3351 I/O APIC Interrupt Controller
> 00:00.7 Host bridge: VIA Technologies, Inc. VT3351 Host Bridge
> 00:01.0 PCI bridge: VIA Technologies, Inc. [K8T890 North / VT8237 South] 
> PCI Bridge
> 00:02.0 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
> Controller
> 00:03.0 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
> Controller
> 00:03.1 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
> Controller
> 00:03.2 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
> Controller
> 00:03.3 PCI bridge: VIA Technologies, Inc. K8T890 PCI to PCI Bridge 
> Controller
> 00:0f.0 RAID bus controller: VIA Technologies, Inc. VT8237A SATA 2-Port 
> Controller (rev 80)
> 00:0f.1 IDE interface: VIA Technologies, Inc. 
> VT82C586A/B/VT82C686/A/B/VT823x/A/C PIPC Bus Master IDE (rev 07)
> 00:10.0 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
> Controller (rev a0)
> 00:10.1 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
> Controller (rev a0)
> 00:10.2 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
> Controller (rev a0)
> 00:10.3 USB Controller: VIA Technologies, Inc. VT82xxxxx UHCI USB 1.1 
> Controller (rev a0)
> 00:10.4 USB Controller: VIA Technologies, Inc. USB 2.0 (rev 86)
> 00:11.0 ISA bridge: VIA Technologies, Inc. VT8237A PCI to ISA Bridge
> 00:11.7 Host bridge: VIA Technologies, Inc. VT8251 Ultra VLINK Controller
> 00:13.0 Host bridge: VIA Technologies, Inc. VT8237A Host Bridge
> 00:18.0 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
> HyperTransport Technology Configuration
> 00:18.1 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
> Address Map
> 00:18.2 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
> DRAM Controller
> 00:18.3 Host bridge: Advanced Micro Devices [AMD] K8 [Athlon64/Opteron] 
> Miscellaneous Control
> 02:00.0 SATA controller: JMicron Technology Corp. JMB362/JMB363 Serial 
> ATA Controller (rev 02)
> 02:00.1 IDE interface: JMicron Technology Corp. JMB362/JMB363 Serial ATA 
> Controller (rev 02)
> 03:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. 
> RTL8111/8168B PCI Express Gigabit Ethernet controller (rev 01)
> 06:00.0 VGA compatible controller: nVidia Corporation G71 [GeForce 7900 
> GS] (rev a1)
> 80:01.0 Audio device: VIA Technologies, Inc. VT1708/A [Azalia HDAC] (VIA 
> High Definition Audio Controller) (rev 10)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-19 16:44 ` Bjorn Helgaas
@ 2010-05-19 17:16   ` Graham Ramsey
  2010-05-19 18:01     ` Yinghai
  0 siblings, 1 reply; 29+ messages in thread
From: Graham Ramsey @ 2010-05-19 17:16 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: linux-kernel, Yinghai Lu, linux-pci

On 19/05/10 17:44, Bjorn Helgaas wrote:
> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
>    
>> I am on x86_64 with latest (v2.6.34) kernel. When i set
>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel oops.
>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
>> dmesg (below).
>>
>> I have bisected down to one commit that causes the problem:
>>
>>     commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>>     x86/pci: AMD one chain system to use pci read out res
>>     ...
>>      
> I CC'd Yinghai, the author of that patch.  That commit went in after
> 2.6.33, so this is probably a regression between .33 and .34.  Can
> you open a report at https://bugzilla.kernel.org and respond to this
> thread with the URL?
>
> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
> bugzilla.
>
> Thanks a lot for your report!
>
> Bjorn
>
>    
Done
https://bugzilla.kernel.org/show_bug.cgi?id=16007


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-19 17:16   ` Graham Ramsey
@ 2010-05-19 18:01     ` Yinghai
  2010-05-19 22:47       ` Graham Ramsey
  0 siblings, 1 reply; 29+ messages in thread
From: Yinghai @ 2010-05-19 18:01 UTC (permalink / raw)
  To: Graham Ramsey; +Cc: Bjorn Helgaas, linux-kernel, linux-pci

On 05/19/2010 10:16 AM, Graham Ramsey wrote:
> On 19/05/10 17:44, Bjorn Helgaas wrote:
>> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
>>   
>>> I am on x86_64 with latest (v2.6.34) kernel. When i set
>>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel
>>> oops.
>>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
>>> dmesg (below).
>>>
>>> I have bisected down to one commit that causes the problem:
>>>
>>>     commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>>>     x86/pci: AMD one chain system to use pci read out res
>>>     ...
>>>      
>> I CC'd Yinghai, the author of that patch.  That commit went in after
>> 2.6.33, so this is probably a regression between .33 and .34.  Can
>> you open a report at https://bugzilla.kernel.org and respond to this
>> thread with the URL?
>>
>> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
>> bugzilla.
>>
>> Thanks a lot for your report!
>>

please send out bootlog with pci=earlydump.

looks like your system have a very sick BIOS, 

system have two HT chains.

PCI: Probing PCI hardware (bus 00)
...
PCI: Discovered primary peer bus 80 [IRQ]


rt to non-coherent only set one link:
node 0 link 0: io port [1000, ffffff]
TOM: 0000000080000000 aka 2048M
node 0 link 0: mmio [e0000000, efffffff]
node 0 link 0: mmio [a0000, bffff]
node 0 link 0: mmio [80000000, ffffffff]
bus: [00, ff] on node 0 link 0

YH

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-19 18:01     ` Yinghai
@ 2010-05-19 22:47       ` Graham Ramsey
  2010-05-20  0:03         ` Yinghai
  0 siblings, 1 reply; 29+ messages in thread
From: Graham Ramsey @ 2010-05-19 22:47 UTC (permalink / raw)
  To: Yinghai; +Cc: Bjorn Helgaas, linux-kernel, linux-pci

On 19/05/10 19:01, Yinghai wrote:
> On 05/19/2010 10:16 AM, Graham Ramsey wrote:
>    
>> On 19/05/10 17:44, Bjorn Helgaas wrote:
>>      
>>> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
>>>
>>>        
>>>> I am on x86_64 with latest (v2.6.34) kernel. When i set
>>>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel
>>>> oops.
>>>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
>>>> dmesg (below).
>>>>
>>>> I have bisected down to one commit that causes the problem:
>>>>
>>>>      commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>>>>      x86/pci: AMD one chain system to use pci read out res
>>>>      ...
>>>>
>>>>          
>>> I CC'd Yinghai, the author of that patch.  That commit went in after
>>> 2.6.33, so this is probably a regression between .33 and .34.  Can
>>> you open a report at https://bugzilla.kernel.org and respond to this
>>> thread with the URL?
>>>
>>> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
>>> bugzilla.
>>>
>>> Thanks a lot for your report!
>>>
>>>        
> please send out bootlog with pci=earlydump.
>
> looks like your system have a very sick BIOS,
>
> system have two HT chains.
>
> PCI: Probing PCI hardware (bus 00)
> ...
> PCI: Discovered primary peer bus 80 [IRQ]
>
>
> rt to non-coherent only set one link:
> node 0 link 0: io port [1000, ffffff]
> TOM: 0000000080000000 aka 2048M
> node 0 link 0: mmio [e0000000, efffffff]
> node 0 link 0: mmio [a0000, bffff]
> node 0 link 0: mmio [80000000, ffffffff]
> bus: [00, ff] on node 0 link 0
>
> YH
>
>    
I have uploaded full boot log (of a working kernel) to bug if that is ok

https://bugzilla.kernel.org/attachment.cgi?id=26444


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-19 22:47       ` Graham Ramsey
@ 2010-05-20  0:03         ` Yinghai
  2010-05-20  0:22           ` Jesse Barnes
  0 siblings, 1 reply; 29+ messages in thread
From: Yinghai @ 2010-05-20  0:03 UTC (permalink / raw)
  To: Graham Ramsey, Jesse Barnes; +Cc: Bjorn Helgaas, linux-kernel, linux-pci

On 05/19/2010 03:47 PM, Graham Ramsey wrote:
> On 19/05/10 19:01, Yinghai wrote:
>> On 05/19/2010 10:16 AM, Graham Ramsey wrote:
>>   
>>> On 19/05/10 17:44, Bjorn Helgaas wrote:
>>>     
>>>> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
>>>>
>>>>       
>>>>> I am on x86_64 with latest (v2.6.34) kernel. When i set
>>>>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel
>>>>> oops.
>>>>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
>>>>> dmesg (below).
>>>>>
>>>>> I have bisected down to one commit that causes the problem:
>>>>>
>>>>>      commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>>>>>      x86/pci: AMD one chain system to use pci read out res
>>>>>      ...
>>>>>
>>>>>          
>>>> I CC'd Yinghai, the author of that patch.  That commit went in after
>>>> 2.6.33, so this is probably a regression between .33 and .34.  Can
>>>> you open a report at https://bugzilla.kernel.org and respond to this
>>>> thread with the URL?
>>>>
>>>> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
>>>> bugzilla.
>>>>
>>>> Thanks a lot for your report!
>>>>
>>>>        
>> please send out bootlog with pci=earlydump.
>>
>> looks like your system have a very sick BIOS,
>>
>> system have two HT chains.
>>
>> PCI: Probing PCI hardware (bus 00)
>> ...
>> PCI: Discovered primary peer bus 80 [IRQ]
>>
>>
>> rt to non-coherent only set one link:
>> node 0 link 0: io port [1000, ffffff]
>> TOM: 0000000080000000 aka 2048M
>> node 0 link 0: mmio [e0000000, efffffff]
>> node 0 link 0: mmio [a0000, bffff]
>> node 0 link 0: mmio [80000000, ffffffff]
>> bus: [00, ff] on node 0 link 0
>>
>> YH
>>
>>    
> I have uploaded full boot log (of a working kernel) to bug if that is ok
> 
> https://bugzilla.kernel.org/attachment.cgi?id=26444
> 

ah, that 80:01.0 is standalone device, the system still only have one HT chain.

that is CRAZY that they can sell those poor designed chips.

actually 3e3da00c is fixing another bug with one HT chain.

Jesse,
We have two options:
1. revert that 3e3da00c
2. or use quirks to black out system with VIA chipset.

please let me know which one you prefer.

Thanks

Yinghai

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-20  0:03         ` Yinghai
@ 2010-05-20  0:22           ` Jesse Barnes
  2010-05-20  0:36             ` Yinghai
  0 siblings, 1 reply; 29+ messages in thread
From: Jesse Barnes @ 2010-05-20  0:22 UTC (permalink / raw)
  To: Yinghai; +Cc: Graham Ramsey, Bjorn Helgaas, linux-kernel, linux-pci

On Wed, 19 May 2010 17:03:04 -0700
Yinghai <yinghai.lu@oracle.com> wrote:

> On 05/19/2010 03:47 PM, Graham Ramsey wrote:
> > On 19/05/10 19:01, Yinghai wrote:
> >> On 05/19/2010 10:16 AM, Graham Ramsey wrote:
> >>   
> >>> On 19/05/10 17:44, Bjorn Helgaas wrote:
> >>>     
> >>>> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
> >>>>
> >>>>       
> >>>>> I am on x86_64 with latest (v2.6.34) kernel. When i set
> >>>>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel
> >>>>> oops.
> >>>>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
> >>>>> dmesg (below).
> >>>>>
> >>>>> I have bisected down to one commit that causes the problem:
> >>>>>
> >>>>>      commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
> >>>>>      x86/pci: AMD one chain system to use pci read out res
> >>>>>      ...
> >>>>>
> >>>>>          
> >>>> I CC'd Yinghai, the author of that patch.  That commit went in after
> >>>> 2.6.33, so this is probably a regression between .33 and .34.  Can
> >>>> you open a report at https://bugzilla.kernel.org and respond to this
> >>>> thread with the URL?
> >>>>
> >>>> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
> >>>> bugzilla.
> >>>>
> >>>> Thanks a lot for your report!
> >>>>
> >>>>        
> >> please send out bootlog with pci=earlydump.
> >>
> >> looks like your system have a very sick BIOS,
> >>
> >> system have two HT chains.
> >>
> >> PCI: Probing PCI hardware (bus 00)
> >> ...
> >> PCI: Discovered primary peer bus 80 [IRQ]
> >>
> >>
> >> rt to non-coherent only set one link:
> >> node 0 link 0: io port [1000, ffffff]
> >> TOM: 0000000080000000 aka 2048M
> >> node 0 link 0: mmio [e0000000, efffffff]
> >> node 0 link 0: mmio [a0000, bffff]
> >> node 0 link 0: mmio [80000000, ffffffff]
> >> bus: [00, ff] on node 0 link 0
> >>
> >> YH
> >>
> >>    
> > I have uploaded full boot log (of a working kernel) to bug if that is ok
> > 
> > https://bugzilla.kernel.org/attachment.cgi?id=26444
> > 
> 
> ah, that 80:01.0 is standalone device, the system still only have one HT chain.
> 
> that is CRAZY that they can sell those poor designed chips.
> 
> actually 3e3da00c is fixing another bug with one HT chain.
> 
> Jesse,
> We have two options:
> 1. revert that 3e3da00c
> 2. or use quirks to black out system with VIA chipset.
> 
> please let me know which one you prefer.

I'm guessing these VIA chipsets are pretty common; how common is the
platform bug you fixed with 3e3da00c?

I'd rather quirk one platform than a whole bunch...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-20  0:22           ` Jesse Barnes
@ 2010-05-20  0:36             ` Yinghai
  2010-05-20 17:08               ` [Bug 16007] " Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: Yinghai @ 2010-05-20  0:36 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: Graham Ramsey, Bjorn Helgaas, linux-kernel, linux-pci

On 05/19/2010 05:22 PM, Jesse Barnes wrote:
> On Wed, 19 May 2010 17:03:04 -0700
> Yinghai <yinghai.lu@oracle.com> wrote:
> 
>> On 05/19/2010 03:47 PM, Graham Ramsey wrote:
>>> On 19/05/10 19:01, Yinghai wrote:
>>>> On 05/19/2010 10:16 AM, Graham Ramsey wrote:
>>>>   
>>>>> On 19/05/10 17:44, Bjorn Helgaas wrote:
>>>>>     
>>>>>> On Wednesday, May 19, 2010 09:13:24 am Graham Ramsey wrote:
>>>>>>
>>>>>>       
>>>>>>> I am on x86_64 with latest (v2.6.34) kernel. When i set
>>>>>>> CONFIG_SND_HDA_INTEL=Y It hangs at an early stage in boot with kernel
>>>>>>> oops.
>>>>>>> When i use CONFIG_SND_HDA_INTEL=M the machine will boot, and i get the
>>>>>>> dmesg (below).
>>>>>>>
>>>>>>> I have bisected down to one commit that causes the problem:
>>>>>>>
>>>>>>>      commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>>>>>>>      x86/pci: AMD one chain system to use pci read out res
>>>>>>>      ...
>>>>>>>
>>>>>>>          
>>>>>> I CC'd Yinghai, the author of that patch.  That commit went in after
>>>>>> 2.6.33, so this is probably a regression between .33 and .34.  Can
>>>>>> you open a report at https://bugzilla.kernel.org and respond to this
>>>>>> thread with the URL?
>>>>>>
>>>>>> Please attach the complete dmesg (with SND_HDA_INTEL=m) to the
>>>>>> bugzilla.
>>>>>>
>>>>>> Thanks a lot for your report!
>>>>>>
>>>>>>        
>>>> please send out bootlog with pci=earlydump.
>>>>
>>>> looks like your system have a very sick BIOS,
>>>>
>>>> system have two HT chains.
>>>>
>>>> PCI: Probing PCI hardware (bus 00)
>>>> ...
>>>> PCI: Discovered primary peer bus 80 [IRQ]
>>>>
>>>>
>>>> rt to non-coherent only set one link:
>>>> node 0 link 0: io port [1000, ffffff]
>>>> TOM: 0000000080000000 aka 2048M
>>>> node 0 link 0: mmio [e0000000, efffffff]
>>>> node 0 link 0: mmio [a0000, bffff]
>>>> node 0 link 0: mmio [80000000, ffffffff]
>>>> bus: [00, ff] on node 0 link 0
>>>>
>>>> YH
>>>>
>>>>    
>>> I have uploaded full boot log (of a working kernel) to bug if that is ok
>>>
>>> https://bugzilla.kernel.org/attachment.cgi?id=26444
>>>
>>
>> ah, that 80:01.0 is standalone device, the system still only have one HT chain.
>>
>> that is CRAZY that they can sell those poor designed chips.
>>
>> actually 3e3da00c is fixing another bug with one HT chain.
>>
>> Jesse,
>> We have two options:
>> 1. revert that 3e3da00c
>> 2. or use quirks to black out system with VIA chipset.
>>
>> please let me know which one you prefer.
> 
> I'm guessing these VIA chipsets are pretty common; how common is the
> platform bug you fixed with 3e3da00c?

one laptop with firewire on AMD 64 bit laptop. can not find the mail any more.

> 
> I'd rather quirk one platform than a whole bunch...

maybe you you can revert that patch at first.

Thanks

Yinghai

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-20  0:36             ` Yinghai
@ 2010-05-20 17:08               ` Bjorn Helgaas
  2010-06-02 16:58                 ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-05-20 17:08 UTC (permalink / raw)
  To: Yinghai
  Cc: Jesse Barnes, Graham Ramsey, linux-kernel, linux-pci, bugzilla-daemon

> >>>> looks like your system have a very sick BIOS,
> >>>>
> >>>> system have two HT chains.
> >>>>
> >>>> PCI: Probing PCI hardware (bus 00)
> >>>> PCI: Discovered primary peer bus 80 [IRQ]
> >>>>
> >>>> rt to non-coherent only set one link:
> >>>> node 0 link 0: io port [1000, ffffff]
> >>>> TOM: 0000000080000000 aka 2048M
> >>>> node 0 link 0: mmio [e0000000, efffffff]
> >>>> node 0 link 0: mmio [a0000, bffff]
> >>>> node 0 link 0: mmio [80000000, ffffffff]
> >>>> bus: [00, ff] on node 0 link 0

> >> ah, that 80:01.0 is standalone device, the system still only have one HT chain.
> >> that is CRAZY that they can sell those poor designed chips.
> >>
> >> actually 3e3da00c is fixing another bug with one HT chain.
> >>
> >> We have two options:
> >> 1. revert that 3e3da00c
> >> 2. or use quirks to black out system with VIA chipset.

This is voodoo kernel development, and I don't think we should do it.

Can you explain the cause of Graham's oops?  All I can see is that we
discovered a host bridge window of [mem 0x80000000-0xfcffffffff] to
bus 00, we did *not* find a bridge leading to bus 80, we found a device
on bus 80 that is inside the window forwarded to bus 00, so we moved
that device outside the window:

  bus: 00 index 1 [mem 0x80000000-0xfcffffffff]
  pci 0000:80:01.0: reg 10: [mem 0xfebfc000-0xfebfffff 64bit]
  pci 0000:80:01.0: address space collision: [mem 0xfebfc000-0xfebfffff 64bit] conflicts with PCI Bus #00 [mem 0x80000000-0xfcffffffff]
  pci 0000:80:01.0: BAR 0: set to [mem 0xfd00000000-0xfd00003fff 64bit]

I have no idea why this led to a page fault at ffffc90000078000:

  BUG: unable to handle kernel paging request at ffffc90000078000
  IP: [<ffffffffa0018d11>] azx_probe+0x3a2/0xa6a [snd_hda_intel]

It looks to me like amd_bus.c just failed to discover the host bridge
to bus 80.  If the BIOS can program the chipset to work that way, we
should be able to figure that out, too.

Graham, I think your "pci=earlydump" log is missing the KERN_DEBUG
output.  It would be interesting to see that for the patched kernel
so we can compare it with 2.6.34.

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-05-20 17:08               ` [Bug 16007] " Bjorn Helgaas
@ 2010-06-02 16:58                 ` Bjorn Helgaas
  2010-06-11 21:49                   ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-02 16:58 UTC (permalink / raw)
  To: Yinghai
  Cc: Jesse Barnes, Graham Ramsey, linux-kernel, linux-pci, bugzilla-daemon

I think the basic problem is that Yinghai's patch broke your system,
and this is a regression between 2.6.33 and 2.6.34.

We could use a quirk like yours (which looks fine, BTW) to cover up
this regression, but I don't like that approach because other machines
are probably affected by the same issue, and we'd have to find and
fix them one-by-one.

I think it'd be better to figure out the problem with 3e3da00c01d
and fix or revert it.  I said earlier that I wasn't in favor of just
reverting it, and I still don't like that option because it will
likely break something.  But Yinghai didn't supply any details about
the system that 3e3da00c01d fixed, so I don't know how to fix things
so both that system and yours work.

I assume that 2.6.34 with 3e3da00c01d reverted will work fine even
without "pci=use_crs".  Can you try that and attach the dmesg log?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-06-02 16:58                 ` Bjorn Helgaas
@ 2010-06-11 21:49                   ` Bjorn Helgaas
  2010-06-11 22:08                     ` Yinghai Lu
  2010-06-11 23:06                     ` Yinghai Lu
  0 siblings, 2 replies; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-11 21:49 UTC (permalink / raw)
  To: Yinghai
  Cc: Jesse Barnes, Graham Ramsey, linux-kernel, linux-pci,
	bugzilla-daemon, Myron Stowe, Robert Richter, Harald Welte,
	Joseph Chan

[If you haven't been following this bug, the report is at [3].]

Here's a theory.  I'm not an expert in HyperTransport, so maybe somebody
who knows HyperTransport and/or VIA chipsets can validate or refute it.

This is based on the _HyperTransport I/O Link Specification_, rev 3.10b [1],
and the _BIOS and Kernel Developer's Guide (BKDG) for AMD Family 10h
Processors_ [2].

In a nutshell, I think the problem is that amd_bus.c treats a
HyperTransport (HT) host bridge as though it were a PCI host bridge.  In
particular, when an HT chain contains more than one PCI host bridge, the
HT host bridge apertures encompass all the PCI host bridges, but
amd_bus.c mistakenly assigns all those resources to one PCI host bridge.

>From a software point of view, HyperTransport is similar but not
identical to PCI.  It is possible to make native HyperTransport
peripheral devices, but PCI devices must be attached via a
HyperTransport-to-PCI bridge [1, sec 4.1].

A PCI host bridge has a platform-specific non-PCI connection, e.g., a
front-side bus, on the primary (upstream) side and a PCI bus on the
secondary (downstream) side.  Note that in the HyperTransport spec,
"host bridge" refers to the interface from the host, e.g., CPU cores, to
a HyperTransport chain.  This HyperTransport host bridge has a
HyperTransport link on the secondary side, *not* a PCI bus.

A HyperTransport-to-PCI bridge is one kind of PCI host bridge, because
the primary side is HyperTransport and the secondary side is PCI.

Graham's machine contains one HT host bridge leading to an HT chain, and
it has PCI devices on buses 00, 02, 03, 06, and 80.  In addition, the HT
host bridge configuration registers appear at device 18 (hex) in bus 00
configuration space, though they are not actually PCI functions.  PCI
buses 02, 03, and 06 are reachable from bus 00 via the PCI-to-PCI
bridges at 00:03.3, 00:03.2, and 00:02.0, respectively.

However, there are no PCI-to-PCI bridges that lead to bus 00 or bus 80,
so the HT chain must contain two separate PCI host bridges that lead to
them.

Now, here's the problem: amd_bus.c reads the HT host bridge configuration
and learns that it routes buses 00-ff and the related address space,
including the following range, down the HT chain at node 0, link 0:

    [mem 0x80000000-0xfcffffffff]

That makes sense, because both PCI host bridges are on that HT chain, so
the HT host bridge has to forward all that address space.  The problem
is that amd_bus.c assumes there's only one PCI host bridge on the
chain, so it assigns *all* that address space to PCI bus 00.

This doesn't work because parts of that address space belong to bus 80,
not bus 00, and we can't reach bus 80 from PCI bus 00.  In particular,
we know that at least the following address space is routed to bus 80,
because the 80:01.0 device does work at this address, which is in the
middle of the range we found above:

    [mem 0xfebfc000-0xfebfffff]

(Note that we can reach bus 80 from the HT chain, but the HT chain is
outside the PCI domain, even though some of the HT registers appear in
PCI bus 00 config space.  We need a second PCI host bridge from the HT
chain to PCI bus 80.)

The HT spec does suggest that an HT/PCI host bridge should implement a
HyperTransport Bridge Header [1, sec 7.4].  This header would make the
HT/PCI host bridge look just like a PCI-to-PCI bridge, with the usual
primary/secondary/subordinate bus numbers, memory, prefetchable memory,
and I/O port apertures, etc.

If all the HT/PCI host bridges on a chain were implemented this way, I
think it probably would work to pretend the HT host bridge is a PCI host
bridge.  But this sort of implementation is apparently not universal.
The VIA chipset in Graham's machine doesn't do it that way, and the
Serverworks HT-2100 chipset in the HP DL785 doesn't either.


[1] http://www.hypertransport.org/docs/twgdocs/HTC20051222-0046-0033_changes.pdf
[2] http://support.amd.com/us/Embedded_TechDocs/31116-Public-GH-BKDG_3-28_5-28-09.pdf
[3] https://bugzilla.kernel.org/show_bug.cgi?id=16007

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-06-11 21:49                   ` Bjorn Helgaas
@ 2010-06-11 22:08                     ` Yinghai Lu
  2010-06-11 23:06                     ` Yinghai Lu
  1 sibling, 0 replies; 29+ messages in thread
From: Yinghai Lu @ 2010-06-11 22:08 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Jesse Barnes, Graham Ramsey, linux-kernel, linux-pci,
	bugzilla-daemon, Myron Stowe, Robert Richter, Harald Welte,
	Joseph Chan

On Fri, Jun 11, 2010 at 2:49 PM, Bjorn Helgaas <bjorn.helgaas@hp.com> wrote:
> [If you haven't been following this bug, the report is at [3].]
>
> Here's a theory.  I'm not an expert in HyperTransport, so maybe somebody
> who knows HyperTransport and/or VIA chipsets can validate or refute it.
>
> This is based on the _HyperTransport I/O Link Specification_, rev 3.10b [1],
> and the _BIOS and Kernel Developer's Guide (BKDG) for AMD Family 10h
> Processors_ [2].
>
> In a nutshell, I think the problem is that amd_bus.c treats a
> HyperTransport (HT) host bridge as though it were a PCI host bridge.  In
> particular, when an HT chain contains more than one PCI host bridge, the
> HT host bridge apertures encompass all the PCI host bridges, but
> amd_bus.c mistakenly assigns all those resources to one PCI host bridge.

I don't think so. that system only have one HT chain.

May 19 23:20:33 ocham kernel: pci 0000:00:18.1 config space:
May 19 23:20:33 ocham kernel: 00: 22 10 01 11 00 00 00 00 00 00 00 06
00 00 80 00
May 19 23:20:33 ocham kernel: 10: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 20: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 30: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 40: 03 00 00 00 00 00 7f 00 00 00 00 00
01 00 00 00
May 19 23:20:33 ocham kernel: 50: 00 00 00 00 02 00 00 00 00 00 00 00
03 00 00 00
May 19 23:20:33 ocham kernel: 60: 00 00 00 00 04 00 00 00 00 00 00 00
05 00 00 00
May 19 23:20:33 ocham kernel: 70: 00 00 00 00 06 00 00 00 00 00 00 00
07 00 00 00
May 19 23:20:33 ocham kernel: 80: 03 00 e0 00 80 ff ef 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 90: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: a0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: b0: 03 0a 00 00 00 0b 00 00 03 00 80 00
00 ff ff 00
May 19 23:20:33 ocham kernel: c0: 13 10 00 00 00 f0 ff 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: d0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: e0: 03 00 00 ff 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: f0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00

the (0xe4) =  ff 00 00 03

mean it will route pci operation all to node0 link0.

that chip from VIA has some design problem that will produce one orphan device.

May 19 23:20:33 ocham kernel: pci 0000:80:01.0 config space:
May 19 23:20:33 ocham kernel: 00: 06 11 88 32 06 00 10 00 10 00 03 04
10 00 00 00
May 19 23:20:33 ocham kernel: 10: 04 c0 bf fe 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 20: 00 00 00 00 00 00 00 00 00 00 00 00
49 18 88 08
May 19 23:20:33 ocham kernel: 30: 00 00 00 00 50 00 00 00 00 00 00 00
0b 01 00 00
May 19 23:20:33 ocham kernel: 40: 00 30 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 50: 01 60 42 c8 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 60: 05 70 80 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 70: 10 00 91 00 00 00 00 00 00 00 30 00
00 00 00 00
May 19 23:20:33 ocham kernel: 80: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: 90: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: a0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: b0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: c0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: d0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: e0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00
May 19 23:20:33 ocham kernel: f0: 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00

YH

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-06-11 21:49                   ` Bjorn Helgaas
  2010-06-11 22:08                     ` Yinghai Lu
@ 2010-06-11 23:06                     ` Yinghai Lu
  2010-06-14 14:18                       ` Bjorn Helgaas
                                         ` (2 more replies)
  1 sibling, 3 replies; 29+ messages in thread
From: Yinghai Lu @ 2010-06-11 23:06 UTC (permalink / raw)
  To: Bjorn Helgaas, Graham Ramsey
  Cc: Jesse Barnes, linux-kernel, linux-pci, bugzilla-daemon,
	Myron Stowe, Robert Richter, Harald Welte, Joseph Chan


please check if this one workaround the problem

Thanks

Yinghai Lu

[PATCH] x86, pci: handle fallout pci devices with peer root bus

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/pci/bus_numa.c |    4 +++-
 kernel/resource.c       |    2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/x86/pci/bus_numa.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/bus_numa.c
+++ linux-2.6/arch/x86/pci/bus_numa.c
@@ -22,7 +22,8 @@ void x86_pci_root_bus_res_quirks(struct
 		return;
 
 	for (i = 0; i < pci_root_num; i++) {
-		if (pci_root_info[i].bus_min == b->number)
+		if (pci_root_info[i].bus_min <= b->number &&
+		    pci_root_info[i].bus_max >= b->number)
 			break;
 	}
 
@@ -37,6 +38,7 @@ void x86_pci_root_bus_res_quirks(struct
 	for (j = 0; j < info->res_num; j++) {
 		struct resource *res;
 		struct resource *root;
+		struct resource *tmp;
 
 		res = &info->res[j];
 		pci_bus_add_resource(b, res, 0);
Index: linux-2.6/kernel/resource.c
===================================================================
--- linux-2.6.orig/kernel/resource.c
+++ linux-2.6/kernel/resource.c
@@ -451,7 +451,7 @@ static struct resource * __insert_resour
 		if (!first)
 			return first;
 
-		if (first == parent)
+		if (first == parent || first == new)
 			return first;
 
 		if ((first->start > new->start) || (first->end < new->end))

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-06-11 23:06                     ` Yinghai Lu
@ 2010-06-14 14:18                       ` Bjorn Helgaas
  2010-06-14 17:47                       ` [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus Yinghai Lu
  2010-06-21 17:28                       ` [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL Bjorn Helgaas
  2 siblings, 0 replies; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-14 14:18 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Graham Ramsey, Jesse Barnes, linux-kernel, linux-pci,
	bugzilla-daemon, Myron Stowe, Robert Richter, Harald Welte,
	Joseph Chan

On Friday, June 11, 2010 05:06:49 pm Yinghai Lu wrote:
> 
> please check if this one workaround the problem
> 
> Thanks
> 
> Yinghai Lu
> 
> [PATCH] x86, pci: handle fallout pci devices with peer root bus
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>

This patch apparently does cover up the problem, but it fails on
so many levels:

  - incomprehensible summary
  - no changelog
  - no bugzilla pointer
  - unrelated junk in patch ("tmp")
  - completely unexplained change to generic resource.c
  - no indication that we understand the root cause

> ---
>  arch/x86/pci/bus_numa.c |    4 +++-
>  kernel/resource.c       |    2 +-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/arch/x86/pci/bus_numa.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/pci/bus_numa.c
> +++ linux-2.6/arch/x86/pci/bus_numa.c
> @@ -22,7 +22,8 @@ void x86_pci_root_bus_res_quirks(struct
>  		return;
>  
>  	for (i = 0; i < pci_root_num; i++) {
> -		if (pci_root_info[i].bus_min == b->number)
> +		if (pci_root_info[i].bus_min <= b->number &&
> +		    pci_root_info[i].bus_max >= b->number)
>  			break;
>  	}
>  
> @@ -37,6 +38,7 @@ void x86_pci_root_bus_res_quirks(struct
>  	for (j = 0; j < info->res_num; j++) {
>  		struct resource *res;
>  		struct resource *root;
> +		struct resource *tmp;
>  
>  		res = &info->res[j];
>  		pci_bus_add_resource(b, res, 0);
> Index: linux-2.6/kernel/resource.c
> ===================================================================
> --- linux-2.6.orig/kernel/resource.c
> +++ linux-2.6/kernel/resource.c
> @@ -451,7 +451,7 @@ static struct resource * __insert_resour
>  		if (!first)
>  			return first;
>  
> -		if (first == parent)
> +		if (first == parent || first == new)
>  			return first;
>  
>  		if ((first->start > new->start) || (first->end < new->end))
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-11 23:06                     ` Yinghai Lu
  2010-06-14 14:18                       ` Bjorn Helgaas
@ 2010-06-14 17:47                       ` Yinghai Lu
  2010-06-14 18:14                         ` Jesse Barnes
  2010-06-14 18:34                         ` Bjorn Helgaas
  2010-06-21 17:28                       ` [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL Bjorn Helgaas
  2 siblings, 2 replies; 29+ messages in thread
From: Yinghai Lu @ 2010-06-14 17:47 UTC (permalink / raw)
  To: Jesse Barnes, Thomas Gleixner, Ingo Molnar, H. Peter Anvin
  Cc: Bjorn Helgaas, Graham Ramsey, linux-kernel, linux-pci,
	Robert Richter, Harald Welte, Joseph Chan, Jiri Slaby,
	Hidetoshi Seto, Andrew Morton, Dominik Brodowski


Graham bisected
|    commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
|    x86/pci: AMD one chain system to use pci read out res

cause the SND_HDA_INTEL doesn't work anymore.

https://bugzilla.kernel.org/show_bug.cgi?id=16007

It turns out that his system with via chipset only have one hypertransport
chain, but does have one extra orphan device 80:01.0

 PCI: Probing PCI hardware (bus 00)
 PCI: Discovered primary peer bus 80 [IRQ]

 node 0 link 0: io port [1000, ffffff]
 TOM: 0000000080000000 aka 2048M
 node 0 link 0: mmio [e0000000, efffffff]
 node 0 link 0: mmio [a0000, bffff]
 node 0 link 0: mmio [80000000, ffffffff]
 bus: [00, ff] on node 0 link 0

Try to make peer root buses to share same mmio/io resources if those peer root
buses fall into the same bus range.

Also need to update insert_resource to avoid insert same resource two times.

We need this patch for 2.6.34 stable.

Reported-by: Graham Ramsey <ramsey.graham@ntlworld.com>
Bisected-by: Graham Ramsey <ramsey.graham@ntlworld.com>
Tested-by: Graham Ramsey <ramsey.graham@ntlworld.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: stable@kernel.org

---
 arch/x86/pci/bus_numa.c |    3 ++-
 kernel/resource.c       |    2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/x86/pci/bus_numa.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/bus_numa.c
+++ linux-2.6/arch/x86/pci/bus_numa.c
@@ -22,7 +22,8 @@ void x86_pci_root_bus_res_quirks(struct
 		return;
 
 	for (i = 0; i < pci_root_num; i++) {
-		if (pci_root_info[i].bus_min == b->number)
+		if (pci_root_info[i].bus_min <= b->number &&
+		    pci_root_info[i].bus_max >= b->number)
 			break;
 	}
 
Index: linux-2.6/kernel/resource.c
===================================================================
--- linux-2.6.orig/kernel/resource.c
+++ linux-2.6/kernel/resource.c
@@ -451,7 +451,7 @@ static struct resource * __insert_resour
 		if (!first)
 			return first;
 
-		if (first == parent)
+		if (first == parent || first == new)
 			return first;
 
 		if ((first->start > new->start) || (first->end < new->end))

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 17:47                       ` [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus Yinghai Lu
@ 2010-06-14 18:14                         ` Jesse Barnes
  2010-06-14 18:22                           ` Yinghai Lu
  2010-06-14 18:34                         ` Bjorn Helgaas
  1 sibling, 1 reply; 29+ messages in thread
From: Jesse Barnes @ 2010-06-14 18:14 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Bjorn Helgaas,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On Mon, 14 Jun 2010 10:47:59 -0700
Yinghai Lu <yinghai.lu@oracle.com> wrote:

> 
> Graham bisected
> |    commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
> |    x86/pci: AMD one chain system to use pci read out res
> 
> cause the SND_HDA_INTEL doesn't work anymore.
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=16007
> 
> It turns out that his system with via chipset only have one hypertransport
> chain, but does have one extra orphan device 80:01.0
> 
>  PCI: Probing PCI hardware (bus 00)
>  PCI: Discovered primary peer bus 80 [IRQ]
> 
>  node 0 link 0: io port [1000, ffffff]
>  TOM: 0000000080000000 aka 2048M
>  node 0 link 0: mmio [e0000000, efffffff]
>  node 0 link 0: mmio [a0000, bffff]
>  node 0 link 0: mmio [80000000, ffffffff]
>  bus: [00, ff] on node 0 link 0
> 
> Try to make peer root buses to share same mmio/io resources if those peer root
> buses fall into the same bus range.
> 
> Also need to update insert_resource to avoid insert same resource two times.

So 3e3da00c01d050307e753fb7b3e84aefc16da0d0 was supposed to address the
case where some laptop RAM ranges ended up incorrect.  Would using _CRS
on those machines also address that problem?  If so, we should consider
dropping amd_bus.c like we did with intel_bus.c.

Yinghai, do you still have people from the RAM bug that could test
using _CRS data?

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 18:14                         ` Jesse Barnes
@ 2010-06-14 18:22                           ` Yinghai Lu
  0 siblings, 0 replies; 29+ messages in thread
From: Yinghai Lu @ 2010-06-14 18:22 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Bjorn Helgaas,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On 06/14/2010 11:14 AM, Jesse Barnes wrote:
> On Mon, 14 Jun 2010 10:47:59 -0700
> Yinghai Lu <yinghai.lu@oracle.com> wrote:
> 
>>
>> Graham bisected
>> |    commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
>> |    x86/pci: AMD one chain system to use pci read out res
>>
>> cause the SND_HDA_INTEL doesn't work anymore.
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=16007
>>
>> It turns out that his system with via chipset only have one hypertransport
>> chain, but does have one extra orphan device 80:01.0
>>
>>  PCI: Probing PCI hardware (bus 00)
>>  PCI: Discovered primary peer bus 80 [IRQ]
>>
>>  node 0 link 0: io port [1000, ffffff]
>>  TOM: 0000000080000000 aka 2048M
>>  node 0 link 0: mmio [e0000000, efffffff]
>>  node 0 link 0: mmio [a0000, bffff]
>>  node 0 link 0: mmio [80000000, ffffffff]
>>  bus: [00, ff] on node 0 link 0
>>
>> Try to make peer root buses to share same mmio/io resources if those peer root
>> buses fall into the same bus range.
>>
>> Also need to update insert_resource to avoid insert same resource two times.
> 
> So 3e3da00c01d050307e753fb7b3e84aefc16da0d0 was supposed to address the
> case where some laptop RAM ranges ended up incorrect.  Would using _CRS
> on those machines also address that problem?  If so, we should consider
> dropping amd_bus.c like we did with intel_bus.c.
> 
> Yinghai, do you still have people from the RAM bug that could test
> using _CRS data?

I can not find the mail anymore.

looks like someone is using one AMD k8 Aruma laptop for firewire development.

YH

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 17:47                       ` [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus Yinghai Lu
  2010-06-14 18:14                         ` Jesse Barnes
@ 2010-06-14 18:34                         ` Bjorn Helgaas
  2010-06-14 18:39                           ` H. Peter Anvin
  1 sibling, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-14 18:34 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Jesse Barnes, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On Monday, June 14, 2010 11:47:59 am Yinghai Lu wrote:
> 
> Graham bisected
> |    commit 3e3da00c01d050307e753fb7b3e84aefc16da0d0
> |    x86/pci: AMD one chain system to use pci read out res
> 
> cause the SND_HDA_INTEL doesn't work anymore.
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=16007
> 
> It turns out that his system with via chipset only have one hypertransport
> chain, but does have one extra orphan device 80:01.0
> 
>  PCI: Probing PCI hardware (bus 00)
>  PCI: Discovered primary peer bus 80 [IRQ]
> 
>  node 0 link 0: io port [1000, ffffff]
>  TOM: 0000000080000000 aka 2048M
>  node 0 link 0: mmio [e0000000, efffffff]
>  node 0 link 0: mmio [a0000, bffff]
>  node 0 link 0: mmio [80000000, ffffffff]
>  bus: [00, ff] on node 0 link 0
> 
> Try to make peer root buses to share same mmio/io resources if those peer root
> buses fall into the same bus range.

Yinghai, did you read https://bugzilla.kernel.org/show_bug.cgi?id=16007#c15 ?

I made the point there that an HT chain may contain multiple HT/PCI
host bridges, but you are stuck on the idea that "one HT chain == one
PCI root bus."

I have not found the "one PCI host bridge per HT chain" requirement
in the HT spec (if you find it, please point me to it).

If an HT chain may contain multiple HT/PCI host bridges, then it's
obvious that the HT host bridge registers read by amd_bus.c don't
contain enough information to correctly assign address space to the
PCI root buses.

> Also need to update insert_resource to avoid insert same resource two times.
> 
> We need this patch for 2.6.34 stable.

No, we don't!  Not yet, anyway.  We need to find the root cause of this
problem, not just paper over it and wait for it to pop up again somewhere
else.

> Reported-by: Graham Ramsey <ramsey.graham@ntlworld.com>
> Bisected-by: Graham Ramsey <ramsey.graham@ntlworld.com>
> Tested-by: Graham Ramsey <ramsey.graham@ntlworld.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Cc: stable@kernel.org
> 
> ---
>  arch/x86/pci/bus_numa.c |    3 ++-
>  kernel/resource.c       |    2 +-
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/arch/x86/pci/bus_numa.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/pci/bus_numa.c
> +++ linux-2.6/arch/x86/pci/bus_numa.c
> @@ -22,7 +22,8 @@ void x86_pci_root_bus_res_quirks(struct
>  		return;
>  
>  	for (i = 0; i < pci_root_num; i++) {
> -		if (pci_root_info[i].bus_min == b->number)
> +		if (pci_root_info[i].bus_min <= b->number &&
> +		    pci_root_info[i].bus_max >= b->number)
>  			break;
>  	}
>  
> Index: linux-2.6/kernel/resource.c
> ===================================================================
> --- linux-2.6.orig/kernel/resource.c
> +++ linux-2.6/kernel/resource.c
> @@ -451,7 +451,7 @@ static struct resource * __insert_resour
>  		if (!first)
>  			return first;
>  
> -		if (first == parent)
> +		if (first == parent || first == new)
>  			return first;
>  
>  		if ((first->start > new->start) || (first->end < new->end))
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 18:34                         ` Bjorn Helgaas
@ 2010-06-14 18:39                           ` H. Peter Anvin
  2010-06-14 18:55                             ` Yinghai Lu
  2010-06-14 19:43                             ` Bjorn Helgaas
  0 siblings, 2 replies; 29+ messages in thread
From: H. Peter Anvin @ 2010-06-14 18:39 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On 06/14/2010 11:34 AM, Bjorn Helgaas wrote:
> 
> I made the point there that an HT chain may contain multiple HT/PCI
> host bridges, but you are stuck on the idea that "one HT chain == one
> PCI root bus."
> 
> I have not found the "one PCI host bridge per HT chain" requirement
> in the HT spec (if you find it, please point me to it).
> 
> If an HT chain may contain multiple HT/PCI host bridges, then it's
> obvious that the HT host bridge registers read by amd_bus.c don't
> contain enough information to correctly assign address space to the
> PCI root buses.
> 

A HT-to-PCI bridge appears as a PCI-to-PCI bridge (i.e. a Header Type 1
device), not as a host bridge (a Header Type 0 device).

That is at least the software model as defined.

	-hpa

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 18:39                           ` H. Peter Anvin
@ 2010-06-14 18:55                             ` Yinghai Lu
  2010-06-14 20:00                               ` Bjorn Helgaas
  2010-06-14 19:43                             ` Bjorn Helgaas
  1 sibling, 1 reply; 29+ messages in thread
From: Yinghai Lu @ 2010-06-14 18:55 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Bjorn Helgaas, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On 06/14/2010 11:39 AM, H. Peter Anvin wrote:
> On 06/14/2010 11:34 AM, Bjorn Helgaas wrote:
>>
>> I made the point there that an HT chain may contain multiple HT/PCI
>> host bridges, but you are stuck on the idea that "one HT chain == one
>> PCI root bus."

should be.

>>
>> I have not found the "one PCI host bridge per HT chain" requirement
>> in the HT spec (if you find it, please point me to it).

according to my experience with LinuxBIOS. AMD chipset, nvidia and serverworks (broadcom)

>>
>> If an HT chain may contain multiple HT/PCI host bridges, then it's
>> obvious that the HT host bridge registers read by amd_bus.c don't
>> contain enough information to correctly assign address space to the
>> PCI root buses.

the host bridges is on AMD CPUs, 

>>
> 
> A HT-to-PCI bridge appears as a PCI-to-PCI bridge (i.e. a Header Type 1
> device), not as a host bridge (a Header Type 0 device).
> 
> That is at least the software model as defined.

one HT chain could have some HT devices, HT devices could be HT tunnel or HT bridge.

If it is HT tunnel, the next device will use same primary pci bus number with some addon device number.
It it is HT bridge, will like some kind pci-to-pci bridge.

link between KT890 and vt32551? is some kind va-link? it is not HT between them

somehow the southbridge vt32551 respond the sound_intel from 80:01.0... and it is supposed to be under some pci bridge.

YH

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 18:39                           ` H. Peter Anvin
  2010-06-14 18:55                             ` Yinghai Lu
@ 2010-06-14 19:43                             ` Bjorn Helgaas
  1 sibling, 0 replies; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-14 19:43 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On Monday, June 14, 2010 12:39:54 pm H. Peter Anvin wrote:
> On 06/14/2010 11:34 AM, Bjorn Helgaas wrote:
> > 
> > I made the point there that an HT chain may contain multiple HT/PCI
> > host bridges, but you are stuck on the idea that "one HT chain == one
> > PCI root bus."
> > 
> > I have not found the "one PCI host bridge per HT chain" requirement
> > in the HT spec (if you find it, please point me to it).
> > 
> > If an HT chain may contain multiple HT/PCI host bridges, then it's
> > obvious that the HT host bridge registers read by amd_bus.c don't
> > contain enough information to correctly assign address space to the
> > PCI root buses.
> 
> A HT-to-PCI bridge appears as a PCI-to-PCI bridge (i.e. a Header Type 1
> device), not as a host bridge (a Header Type 0 device).
> 
> That is at least the software model as defined.

Certainly that's what the HT I/O Link spec (v3.10, sec 7.4) suggests,
and I think I saw hints that AMD chipsets do that.  I can't tell from
the HT I/O spec whether it would be an actual defect to use host bridges
instead of PCI-to-PCI bridges, and I can imagine why one might want to
leave an existing PCI host bridge design alone and merely glue on an
HT interface, rather than redesign the bridge register set.

In any case, the VIA chipset in Graham's machine does not have a
PCI-to-PCI bridge leading to bus 80 (see
https://bugzilla.kernel.org/show_bug.cgi?id=16007#c14).
However, ACPI *does* report a PCI host bridge leading to bus 80,
and the apertures it reports seem to be correct (see
https://bugzilla.kernel.org/show_bug.cgi?id=16007#c6).

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 18:55                             ` Yinghai Lu
@ 2010-06-14 20:00                               ` Bjorn Helgaas
  2010-06-14 20:08                                 ` H. Peter Anvin
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-14 20:00 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: H. Peter Anvin, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On Monday, June 14, 2010 12:55:44 pm Yinghai Lu wrote:
> On 06/14/2010 11:39 AM, H. Peter Anvin wrote:
> > On 06/14/2010 11:34 AM, Bjorn Helgaas wrote:
> >>
> >> I made the point there that an HT chain may contain multiple HT/PCI
> >> host bridges, but you are stuck on the idea that "one HT chain == one
> >> PCI root bus."
> 
> should be.
> 
> >> I have not found the "one PCI host bridge per HT chain" requirement
> >> in the HT spec (if you find it, please point me to it).
> 
> according to my experience with LinuxBIOS. AMD chipset, nvidia and serverworks (broadcom)

I'm afraid I'm still not convinced.

> >> If an HT chain may contain multiple HT/PCI host bridges, then it's
> >> obvious that the HT host bridge registers read by amd_bus.c don't
> >> contain enough information to correctly assign address space to the
> >> PCI root buses.
> 
> the host bridges is on AMD CPUs, 

Don't confuse the HT host bridge with the PCI host bridge.  The HT I/O spec
is quite clear that it uses "host bridge" to refer to the HT host bridge,
i.e., the interface between CPUs and a HyperTransport link.

I agree that the *HT host bridge* is indeed on the AMD CPU.  But that is
certainly not the same as the PCI host bridge that bridges between an HT
link and a PCI bus.

See sections 4.9.4 (HT host bridge) and 7.4 (HT/PCI host bridge), for
example.

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 20:00                               ` Bjorn Helgaas
@ 2010-06-14 20:08                                 ` H. Peter Anvin
  2010-06-14 20:20                                   ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: H. Peter Anvin @ 2010-06-14 20:08 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On 06/14/2010 01:00 PM, Bjorn Helgaas wrote:
>>
>> the host bridges is on AMD CPUs, 
> 
> Don't confuse the HT host bridge with the PCI host bridge.  The HT I/O spec
> is quite clear that it uses "host bridge" to refer to the HT host bridge,
> i.e., the interface between CPUs and a HyperTransport link.
> 
> I agree that the *HT host bridge* is indeed on the AMD CPU.  But that is
> certainly not the same as the PCI host bridge that bridges between an HT
> link and a PCI bus.
> 
> See sections 4.9.4 (HT host bridge) and 7.4 (HT/PCI host bridge), for
> example.
> 

>From a software point of view the latter is [largely] a PCI-to-PCI
bridge, though; it's not a root-level host bridge in the classical sense
(as noted in section 7.4).

Incidentally, in my copy of HT 3.10b, section 7.4 is marked
"HyperTransport Bridge Headers", and does not use the term "host bridge"
to refer to a secondary PCI bus.  Section 4.9.4 is simply marked "Host
Bridge".  As such, I think the HT spec is pretty consistent about
unambiguously referring to the HT host bridge when using the term "host
bridge".

	-hpa


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 20:08                                 ` H. Peter Anvin
@ 2010-06-14 20:20                                   ` Bjorn Helgaas
  2010-06-14 21:10                                     ` H. Peter Anvin
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-14 20:20 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On Monday, June 14, 2010 02:08:37 pm H. Peter Anvin wrote:
> On 06/14/2010 01:00 PM, Bjorn Helgaas wrote:
> >>
> >> the host bridges is on AMD CPUs, 
> > 
> > Don't confuse the HT host bridge with the PCI host bridge.  The HT I/O spec
> > is quite clear that it uses "host bridge" to refer to the HT host bridge,
> > i.e., the interface between CPUs and a HyperTransport link.
> > 
> > I agree that the *HT host bridge* is indeed on the AMD CPU.  But that is
> > certainly not the same as the PCI host bridge that bridges between an HT
> > link and a PCI bus.
> > 
> > See sections 4.9.4 (HT host bridge) and 7.4 (HT/PCI host bridge), for
> > example.
> 
> From a software point of view the latter is [largely] a PCI-to-PCI
> bridge, though; it's not a root-level host bridge in the classical sense
> (as noted in section 7.4).

OK, but Graham's system doesn't have anything resembling a PCI-to-PCI
bridge leading to bus 80.  So while I agree that in an ideal world,
HT/PCI host bridges might always look like PCI-to-PCI bridges, it
seems this is not the case in practice.

> Incidentally, in my copy of HT 3.10b, section 7.4 is marked
> "HyperTransport Bridge Headers", and does not use the term "host bridge"
> to refer to a secondary PCI bus.  Section 4.9.4 is simply marked "Host
> Bridge".  As such, I think the HT spec is pretty consistent about
> unambiguously referring to the HT host bridge when using the term "host
> bridge".

Yes, absolutely.  My point is that what the HT spec means by "host bridge"
is not the same as what the PCI spec and Linux mean by "PCI host bridge".

Those are two completely separate functions, and I think Yinghai is
confusing them when he says "the host bridge is on the AMD CPU and
amd_bus.c uses its config to determine PCI root bus resources."

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 20:20                                   ` Bjorn Helgaas
@ 2010-06-14 21:10                                     ` H. Peter Anvin
  2010-06-15  1:49                                       ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: H. Peter Anvin @ 2010-06-14 21:10 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski

On 06/14/2010 01:20 PM, Bjorn Helgaas wrote:
> 
> OK, but Graham's system doesn't have anything resembling a PCI-to-PCI
> bridge leading to bus 80.  So while I agree that in an ideal world,
> HT/PCI host bridges might always look like PCI-to-PCI bridges, it
> seems this is not the case in practice.
> 

Invisible PCI bridges have been known to occur in pure PCI space, too.

> Yes, absolutely.  My point is that what the HT spec means by "host bridge"
> is not the same as what the PCI spec and Linux mean by "PCI host bridge".

Actually, they're *exactly* the same thing.

	-hpa

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-14 21:10                                     ` H. Peter Anvin
@ 2010-06-15  1:49                                       ` Bjorn Helgaas
  2010-06-15  1:56                                         ` H. Peter Anvin
  0 siblings, 1 reply; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-15  1:49 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski, Myron Stowe

On Monday, June 14, 2010 03:10:20 pm H. Peter Anvin wrote:
> On 06/14/2010 01:20 PM, Bjorn Helgaas wrote:
> > 
> > OK, but Graham's system doesn't have anything resembling a PCI-to-PCI
> > bridge leading to bus 80.  So while I agree that in an ideal world,
> > HT/PCI host bridges might always look like PCI-to-PCI bridges, it
> > seems this is not the case in practice.
> 
> Invisible PCI bridges have been known to occur in pure PCI space, too.

Are you talking about PCI host bridges that don't appear in PCI config
space?  I suppose those could be described as "invisible," but since
host bridges aren't architected and their primary interface isn't PCI,
it seems only natural that we'd discover them by a non-PCI mechanism.
They're invisible in PCI terms, but obviously perfectly discoverable
and configurable via ACPI.

If you ask me, it's weird that most x86 chipsets put PCI host bridge
configuration in PCI config space -- it may be convenient in some ways,
but still architecturally strange.

> > Yes, absolutely.  My point is that what the HT spec means by "host bridge"
> > is not the same as what the PCI spec and Linux mean by "PCI host bridge".
> 
> Actually, they're *exactly* the same thing.

If HT is identical to PCI, I agree "HT host bridge" means the same
as "PCI host bridge" (that's almost too trivial to say :-)).

I guess I'm still dubious that HT is identical to PCI.  Since Graham's
box has a single HT change, we know that all his devices are on HT
chain A.  If HT is identical to PCI, that chain must be bus 00.  Here
are the relevant parts of the box:

  00:00.0 Host bridge: VIA K8T890CF Host Bridge
  00:00.1 Host bridge: VIA VT3351 Host Bridge
  00:00.2 Host bridge: VIA VT3351 Host Bridge
  00:00.3 Host bridge: VIA VT3351 Host Bridge
  00:00.4 Host bridge: VIA VT3351 Host Bridge
  00:00.7 Host bridge: VIA VT3351 Host Bridge
  00:02.0 PCI bridge:  VIA K8T890 PCI to PCI Bridge [to bus 06]
  00:03.0 PCI bridge:  VIA K8T890 PCI to PCI Bridge [to bus 05]
  00:03.1 PCI bridge:  VIA K8T890 PCI to PCI Bridge [to bus 04]
  00:03.2 PCI bridge:  VIA K8T890 PCI to PCI Bridge [to bus 03]
  00:03.3 PCI bridge:  VIA K8T890 PCI to PCI Bridge [to bus 02]
  00:11.7 Host bridge: VIA VT8251 Ultra VLINK Controller
  00:13.0 Host bridge: VIA VT8237A Host Bridge
  00:18.0 Host bridge: AMD HyperTransport Technology Configuration
  80:01.0 Audio device: VIA VT1708/A High Definition Audio Controller

The question is "how do we get to bus 80?"  If everything behind the
AMD HT host bridge is PCI and can be understood solely in terms of
PCI specs, there must be a P2P bridge from bus 00 to bus 80.  We
clearly don't have that.

I suppose one could argue that there's a non-standard P2P bridge
from bus 00 to bus 80, but I can't imagine anybody doing that.
An OS would have to have vendor-specific code just to do PCI
resource management, and that really misses the point of PCI.

It seems more likely to me that one of the VIA host bridges leads
to bus 80.  PCI host bridges are not architected, so if this bridge
lives on HT chain 00, and we can think of HT as "not quite PCI,"
then it seems natural that the host bridge would be VIA-specific,
just like it was in pre-HT days.

The underlying question for all of this is "what's the future of
amd_bus.c?" or stated another way, "does AMD HT config and standard
PCI P2P bridge config tell us everything we need to know about
address space routing?"

On this machine, I claim the answer is "no," and therefore we must
use ACPI to discover and configure the host bridges, i.e., we have
to turn on "pci=use_crs".  We currently turn it on automatically for
machines from 2008 and newer.  I think we need to do it for older
machines, too, perhaps even whenever we use ACPI at all.

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-15  1:49                                       ` Bjorn Helgaas
@ 2010-06-15  1:56                                         ` H. Peter Anvin
  2010-06-15 15:30                                           ` Bjorn Helgaas
  0 siblings, 1 reply; 29+ messages in thread
From: H. Peter Anvin @ 2010-06-15  1:56 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski, Myron Stowe

On 06/14/2010 06:49 PM, Bjorn Helgaas wrote:

>>
>> Invisible PCI bridges have been known to occur in pure PCI space, too.
> 
> Are you talking about PCI host bridges that don't appear in PCI config
> space?  I suppose those could be described as "invisible," but since
> host bridges aren't architected and their primary interface isn't PCI,
> it seems only natural that we'd discover them by a non-PCI mechanism.
> They're invisible in PCI terms, but obviously perfectly discoverable
> and configurable via ACPI.

I mean invisible PCI-PCI bridges.  Yes, they exist.

> If you ask me, it's weird that most x86 chipsets put PCI host bridge
> configuration in PCI config space -- it may be convenient in some ways,
> but still architecturally strange.

It is only strange because they are non-bridge devices.  PCI-Express
fixes that to some degree with the whole "root complex" notion, but
really a PCI host bridge should have been a bridge device from the start.

> I suppose one could argue that there's a non-standard P2P bridge
> from bus 00 to bus 80, but I can't imagine anybody doing that.

Ah, ye of little imagination.

> An OS would have to have vendor-specific code just to do PCI
> resource management, and that really misses the point of PCI.

This really misses the point of HT...

> It seems more likely to me that one of the VIA host bridges leads
> to bus 80.  PCI host bridges are not architected, so if this bridge
> lives on HT chain 00, and we can think of HT as "not quite PCI,"
> then it seems natural that the host bridge would be VIA-specific,
> just like it was in pre-HT days.

I think the best word for it is "incompetent braindamage", but that's
just me...

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus
  2010-06-15  1:56                                         ` H. Peter Anvin
@ 2010-06-15 15:30                                           ` Bjorn Helgaas
  0 siblings, 0 replies; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-15 15:30 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Yinghai Lu, Jesse Barnes, Thomas Gleixner, Ingo Molnar,
	Graham Ramsey, linux-kernel, linux-pci, Robert Richter,
	Harald Welte, Joseph Chan, Jiri Slaby, Hidetoshi Seto,
	Andrew Morton, Dominik Brodowski, Myron Stowe

On Monday, June 14, 2010 07:56:17 pm H. Peter Anvin wrote:
> On 06/14/2010 06:49 PM, Bjorn Helgaas wrote:
> 
> >> Invisible PCI bridges have been known to occur in pure PCI space, too.
> > 
> > Are you talking about PCI host bridges that don't appear in PCI config
> > space?  I suppose those could be described as "invisible," but since
> > host bridges aren't architected and their primary interface isn't PCI,
> > it seems only natural that we'd discover them by a non-PCI mechanism.
> > They're invisible in PCI terms, but obviously perfectly discoverable
> > and configurable via ACPI.
> 
> I mean invisible PCI-PCI bridges.  Yes, they exist.

Can you educate me more about these?  What specifically is invisible?
Do they appear in config space?  Are they in config space but merely
non-standard?

Let's say we have:

  1) Invisible P2P bridge from bus X to bus 80.

  2) PCI host bridge to bus 80.

Neither appears in PCI config space.  In both cases, we would
discover bus 80 by blindly probing buses 00-ff.  We could
distinguish them by putting a bus analyzer on bus X: if we
see bus 80 traffic on bus X, we must have case (1).  If the
invisible P2P bridge happened to be below a standard P2P bridge,
we could also distinguish them by disabling the standard bridge:
if bus 80 disappeared, we'd know this is also case (1).

But in general, they seem pretty hard to distinguish, so I wonder
if it's possible that we have a case of mistaken identity, and we
only thought we had invisible P2P bridges because we started from
the assumption that systems only had a single PCI host bridge.

> > If you ask me, it's weird that most x86 chipsets put PCI host bridge
> > configuration in PCI config space -- it may be convenient in some ways,
> > but still architecturally strange.
> 
> It is only strange because they are non-bridge devices.  PCI-Express
> fixes that to some degree with the whole "root complex" notion, but
> really a PCI host bridge should have been a bridge device from the start.

Well, even if host bridges had always looked like P2P bridges, we'd
still have the chicken-and-egg problem of knowing where to look for
them.  The OS could use the hack of "always assume bus 00 exists and
enumerate it," but then we still have to worry about multiple segments.
So we always need a non-PCI description of where the PCI buses live.

> > I suppose one could argue that there's a non-standard P2P bridge
> > from bus 00 to bus 80, but I can't imagine anybody doing that.
> 
> Ah, ye of little imagination.

Heh, nobody's ever accused me of having a vivid imagination :-)

> > An OS would have to have vendor-specific code just to do PCI
> > resource management, and that really misses the point of PCI.
> 
> This really misses the point of HT...

I don't follow you here.  I was trying to get at the fact that if
there are non-standard P2P bridges, an OS without a device-specific
driver would not even find devices behind the bridge (unless it
has a blind probe hack) and it would not know the bridge apertures,
so it could never change resource assignments of peers of the bridge
or devices behind the bridge.

Does HT change that reasoning somehow?

> > It seems more likely to me that one of the VIA host bridges leads
> > to bus 80.  PCI host bridges are not architected, so if this bridge
> > lives on HT chain 00, and we can think of HT as "not quite PCI,"
> > then it seems natural that the host bridge would be VIA-specific,
> > just like it was in pre-HT days.
> 
> I think the best word for it is "incompetent braindamage", but that's
> just me...

That's a pretty broad brush.  We've dismissed many ACPI issues as
being "incompetent braindamage" on the part of BIOS engineers, only
to find out later that we really had problems in the Linux/ACPI code.
Since I know approximately nothing about the VIA chipset, and I see
plenty of warts in Linux PCI code, I'm not ready to assign blame yet.

Bjorn

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL
  2010-06-11 23:06                     ` Yinghai Lu
  2010-06-14 14:18                       ` Bjorn Helgaas
  2010-06-14 17:47                       ` [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus Yinghai Lu
@ 2010-06-21 17:28                       ` Bjorn Helgaas
  2 siblings, 0 replies; 29+ messages in thread
From: Bjorn Helgaas @ 2010-06-21 17:28 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Graham Ramsey, Jesse Barnes, linux-kernel, linux-pci,
	bugzilla-daemon, Myron Stowe, Robert Richter, Harald Welte,
	Joseph Chan

I think the best long-term fix is to always enable "pci=use_crs",
regardless of the BIOS date (currently we only do it for 2008 and
newer).  System designers and BIOS writers expect the OS to pay
attention to that information, and indications are that Windows
does use it, so I think we will ultimately be better off if we
use the expected, best-tested path.

However, we have at least one known Linux issue (bug #16228) when
_CRS is enabled, so I'm hesitant to enable it unconditionally at
least until that is resolved.

In the short term, I think we should apply Graham's quirk from
comment #8, which enables pci=use_crs just for his system.

Here's my response to Yinghai's patches.  ACPI gives us these resources:
  pci_root PNP0A03:00: host bridge window [mem 0x80000000-0xff37ffff] (bus 00)
  pci_root PNP0A08:00: host bridge window [mem 0xfebfc000-0xfebfffff] (bus 80)

Yinghai's patch (comment #17, with a v2 posted to the list but not in
the bugzilla), gives us these resources:
  pci_bus 0000:00: resource 5 [mem 0x80000000-0xfcffffffff]
  pci_bus 0000:80: resource 5 [mem 0x80000000-0xfcffffffff]

I think it's just a bad idea to assign the same range to both buses,
especially when the BIOS is telling us what we should be using.

I also think it's a mistake to mess with the resource code to deal
with this specific case.  A change like that makes resource.c hard
to understand and maintain in the future.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2010-06-21 17:27 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-19 15:13 x86/pci Oops with CONFIG_SND_HDA_INTEL Graham Ramsey
2010-05-19 16:44 ` Bjorn Helgaas
2010-05-19 17:16   ` Graham Ramsey
2010-05-19 18:01     ` Yinghai
2010-05-19 22:47       ` Graham Ramsey
2010-05-20  0:03         ` Yinghai
2010-05-20  0:22           ` Jesse Barnes
2010-05-20  0:36             ` Yinghai
2010-05-20 17:08               ` [Bug 16007] " Bjorn Helgaas
2010-06-02 16:58                 ` Bjorn Helgaas
2010-06-11 21:49                   ` Bjorn Helgaas
2010-06-11 22:08                     ` Yinghai Lu
2010-06-11 23:06                     ` Yinghai Lu
2010-06-14 14:18                       ` Bjorn Helgaas
2010-06-14 17:47                       ` [PATCH -v2] x86, pci: Handle fallout pci devices with peer root bus Yinghai Lu
2010-06-14 18:14                         ` Jesse Barnes
2010-06-14 18:22                           ` Yinghai Lu
2010-06-14 18:34                         ` Bjorn Helgaas
2010-06-14 18:39                           ` H. Peter Anvin
2010-06-14 18:55                             ` Yinghai Lu
2010-06-14 20:00                               ` Bjorn Helgaas
2010-06-14 20:08                                 ` H. Peter Anvin
2010-06-14 20:20                                   ` Bjorn Helgaas
2010-06-14 21:10                                     ` H. Peter Anvin
2010-06-15  1:49                                       ` Bjorn Helgaas
2010-06-15  1:56                                         ` H. Peter Anvin
2010-06-15 15:30                                           ` Bjorn Helgaas
2010-06-14 19:43                             ` Bjorn Helgaas
2010-06-21 17:28                       ` [Bug 16007] x86/pci Oops with CONFIG_SND_HDA_INTEL Bjorn Helgaas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).