linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
@ 2007-10-09 20:40 chandru
  2007-10-09 21:06 ` Muli Ben-Yehuda
  2007-10-10  5:37 ` Vivek Goyal
  0 siblings, 2 replies; 21+ messages in thread
From: chandru @ 2007-10-09 20:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: mark_salyzyn, muli, ak, vgoyal

kdump kernel fails to boot with calgary iommu and aacraid driver on a
x366 box.  The ongoing dma's of aacraid from the first kernel continue
to exist until the driver is loaded in the kdump kernel. Calgary is
initialized prior to aacraid and creation of new tce tables causes wrong
dma's to occur. 

Here we try to grab the tce tables of the first kernel in kdump kernel
and use them. While in the kdump kernel we do not allocate new tce
tables but rather read the base address register contents of calgary
iommu and use the tables that the registers point to. With these changes
the kdump kernel and hence aacraid now boots normally. 

Another point that came when talking with Vivek was to reserve part of
the tce table space in first kernel for use in the kdump kernel. 

Signed-off-by: Chandru S <chandru@in.ibm.com>
---

--- linux-2.6.23-rc9/arch/x86_64/kernel/pci-calgary.c.orig
2007-10-09 23:39:22.000000000 +0530
+++ linux-2.6.23-rc9/arch/x86_64/kernel/pci-calgary.c   2007-10-10
01:25:53.000000000 +0530
@@ -35,6 +35,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/bootmem.h>
 #include <asm/iommu.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -165,6 +166,7 @@ static void calgary_dump_error_regs(stru
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct
pci_dev *dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static int  calgary_bus_has_devices(int , unsigned short ) __init;

 static struct cal_chipset_ops calgary_chip_ops = {
        .handle_quirks = calgary_handle_quirks,
@@ -819,7 +821,23 @@ static int __init calgary_setup_tar(stru

        tbl = pci_iommu(dev->bus);
        tbl->it_base = (unsigned
long)bus_info[dev->bus->number].tce_space;
-       tce_free(tbl, 0, tbl->it_size);
+#ifdef CONFIG_CRASH_DUMP
+        if (is_kdump_kernel()){
+                u64 *tp;
+                unsigned int index;
+                tp = ((u64*)tbl->it_base);
+                for(index=0;index < tbl->it_size; index++ ){
+                        if ( *tp != 0x0 )
+                                set_bit(index,tbl->it_map);
+
+                        tp++;
+                }
+        }
+        else
+#endif
+       {
+               tce_free(tbl, 0, tbl->it_size);
+       }

        if (is_calgary(dev->device))
                tbl->chip_ops = &calgary_chip_ops;
@@ -1177,6 +1195,43 @@ static int __init calgary_locate_bbars(v
                }
        }

+#ifdef CONFIG_CRASH_DUMP
+       /*
+        * If this is a kdump kernel, then try grabbing the tce tables
+        * from first kernel by reading the contents of the base
+        * address register of calgary iommu
+        */
+       if(is_kdump_kernel()){
+               for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+                       struct calgary_bus_info *info = &bus_info[bus];
+                       unsigned short pci_device;
+                       unsigned long tce_space;
+                       u32 val;
+
+                       val = read_pci_config(bus, 0, 0, 0);
+                       pci_device = (val & 0xFFFF0000) >> 16;
+
+                       if (!is_cal_pci_dev(pci_device))
+                               continue;
+
+                       if (info->translation_disabled)
+                               continue;
+
+                       if (calgary_bus_has_devices(bus, pci_device) ||
+                               translate_empty_slots ){
+
+                               target = calgary_reg(bus_info[bus].bbar,
tar_offset(bus));
+                               tce_space = be64_to_cpu(readq(target));
+                               tce_space = tce_space & TAR_SW_BITS;
+
+                               BUG_ON(specified_table_size >
TCE_TABLE_SIZE_8M);
+
+                               tce_space = tce_space &
( ~specified_table_size);
+                               info->tce_space = (u64
*)__va(tce_space);
+                       }
+               }
+       }
+#endif
        return 0;

 error:
@@ -1380,7 +1435,10 @@ void __init detect_calgary(void)
                return;
        }

-       specified_table_size = determine_tce_table_size(end_pfn *
PAGE_SIZE);
+       if(is_kdump_kernel())
+               specified_table_size =
determine_tce_table_size(saved_max_pfn * PAGE_SIZE);
+       else
+               specified_table_size = determine_tce_table_size(end_pfn
* PAGE_SIZE);

        for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
                struct calgary_bus_info *info = &bus_info[bus];
@@ -1398,10 +1456,17 @@ void __init detect_calgary(void)

                if (calgary_bus_has_devices(bus, pci_device) ||
                    translate_empty_slots) {
-                       tbl = alloc_tce_table();
-                       if (!tbl)
-                               goto cleanup;
-                       info->tce_space = tbl;
+               /*
+                * If this is the kdump kernel then do not allocate
+                * new tce tables, try using the tce tables from the
+                * first kernel 
+                */
+                       if(!is_kdump_kernel()){
+                               tbl = alloc_tce_table();
+                               if (!tbl)
+                                       goto cleanup;
+                               info->tce_space = tbl;
+                       }
                        calgary_found = 1;
                }
        }
--- linux-2.6.23-rc9/include/linux/bootmem.h.orig       2007-10-09
23:39:32.000000000 +0530
+++ linux-2.6.23-rc9/include/linux/bootmem.h    2007-10-09
23:26:19.000000000 +0530
@@ -21,6 +21,12 @@ extern unsigned long max_pfn;

 #ifdef CONFIG_CRASH_DUMP
 extern unsigned long saved_max_pfn;
+static inline int is_kdump_kernel(void)
+{
+       return reset_devices ? 1 : 0 ;
+}
+#else
+static inline is_kdump_kernel(void) { return 0; }
 #endif

 /*
                                


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-09 20:40 [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump chandru
@ 2007-10-09 21:06 ` Muli Ben-Yehuda
  2007-10-10  5:30   ` Vivek Goyal
  2007-10-10  5:37 ` Vivek Goyal
  1 sibling, 1 reply; 21+ messages in thread
From: Muli Ben-Yehuda @ 2007-10-09 21:06 UTC (permalink / raw)
  To: chandru; +Cc: linux-kernel, mark_salyzyn, ak, vgoyal

Hi Chandru,

Thanks for the patch. Comments on the patch below, but first a general
question for my education: the main problem here that aacraid
continues DMA'ing when it shouldn't. Why can't we shut it down
cleanly? Even without the presence of an IOMMU, it seems dangerous to
let an adapter continue DMA'ing to and from memory when the kernel is
an inconsistent state.

The patch below looks reasonable *if* that is the least worst way of
doing it - let's see if we can come up with something cleaner that
doesn't rely in the new kernel on data (which may or may not be
corrupted...) from the old kernel.

Cheers,
Muli

On Wed, Oct 10, 2007 at 02:10:13AM +0530, chandru wrote:

> kdump kernel fails to boot with calgary iommu and aacraid driver on
> a x366 box.  The ongoing dma's of aacraid from the first kernel
> continue to exist until the driver is loaded in the kdump
> kernel. Calgary is initialized prior to aacraid and creation of new
> tce tables causes wrong dma's to occur.
> 
> Here we try to grab the tce tables of the first kernel in kdump
> kernel and use them. While in the kdump kernel we do not allocate
> new tce tables but rather read the base address register contents of
> calgary iommu and use the tables that the registers point to. With
> these changes the kdump kernel and hence aacraid now boots normally.

> Another point that came when talking with Vivek was to reserve part
> of the tce table space in first kernel for use in the kdump kernel.
> 
> Signed-off-by: Chandru S <chandru@in.ibm.com>
> ---
> 
> --- linux-2.6.23-rc9/arch/x86_64/kernel/pci-calgary.c.orig
> 2007-10-09 23:39:22.000000000 +0530
> +++ linux-2.6.23-rc9/arch/x86_64/kernel/pci-calgary.c   2007-10-10
> 01:25:53.000000000 +0530
> @@ -35,6 +35,7 @@
>  #include <linux/pci_ids.h>
>  #include <linux/pci.h>
>  #include <linux/delay.h>
> +#include <linux/bootmem.h>
>  #include <asm/iommu.h>
>  #include <asm/calgary.h>
>  #include <asm/tce.h>
> @@ -165,6 +166,7 @@ static void calgary_dump_error_regs(stru
>  static void calioc2_handle_quirks(struct iommu_table *tbl, struct
> pci_dev *dev);
>  static void calioc2_tce_cache_blast(struct iommu_table *tbl);
>  static void calioc2_dump_error_regs(struct iommu_table *tbl);
> +static int  calgary_bus_has_devices(int , unsigned short ) __init;

Please add parameter names (int bus, etc).

>  static struct cal_chipset_ops calgary_chip_ops = {
>         .handle_quirks = calgary_handle_quirks,
> @@ -819,7 +821,23 @@ static int __init calgary_setup_tar(stru
> 
>         tbl = pci_iommu(dev->bus);
>         tbl->it_base = (unsigned
> long)bus_info[dev->bus->number].tce_space;
> -       tce_free(tbl, 0, tbl->it_size);
> +#ifdef CONFIG_CRASH_DUMP
> +        if (is_kdump_kernel()){
> +                u64 *tp;
> +                unsigned int index;
> +                tp = ((u64*)tbl->it_base);
> +                for(index=0;index < tbl->it_size; index++ ){
> +                        if ( *tp != 0x0 )
> +                                set_bit(index,tbl->it_map);
> +
> +                        tp++;
> +                }
> +        }
> +        else
> +#endif
> +       {
> +               tce_free(tbl, 0, tbl->it_size);
> +       }

Please no #ifdefs in here. Do it like this:

if (is_kdump_kernel())
   something()
else
   something_else()

Where is_kdump_kernel() is defined to 0 if CONFIG_CRASH_DUMP is not
set.

Also, please move the part where we scan the table into a suitably
named function, e.g., calgary_init_bitmap_from_tce_table().

Also, coding style - please run the patch through checkpatch.pl.

> +#ifdef CONFIG_CRASH_DUMP
> +       /*
> +        * If this is a kdump kernel, then try grabbing the tce tables
> +        * from first kernel by reading the contents of the base
> +        * address register of calgary iommu
> +        */
> +       if(is_kdump_kernel()){

Same comments as above.

> +               for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
> +                       struct calgary_bus_info *info = &bus_info[bus];
> +                       unsigned short pci_device;
> +                       unsigned long tce_space;
> +                       u32 val;
> +
> +                       val = read_pci_config(bus, 0, 0, 0);
> +                       pci_device = (val & 0xFFFF0000) >> 16;
> +
> +                       if (!is_cal_pci_dev(pci_device))
> +                               continue;
> +
> +                       if (info->translation_disabled)
> +                               continue;
> +
> +                       if (calgary_bus_has_devices(bus, pci_device) ||
> +                               translate_empty_slots ){
> +
> +                               target = calgary_reg(bus_info[bus].bbar,
> tar_offset(bus));
> +                               tce_space = be64_to_cpu(readq(target));
> +                               tce_space = tce_space & TAR_SW_BITS;
> +
> +                               BUG_ON(specified_table_size >
> TCE_TABLE_SIZE_8M);
> +
> +                               tce_space = tce_space & ( ~specified_table_size);
> +                               info->tce_space = (u64
> *)__va(tce_space);
> +                       }
> +               }
> +       }
> +#endif

This should be encapsulated in a function. Something like:

if (is_kdump_kernel())
   grab_tce_space_from_tar()

>         return 0;
> 
>  error:
> @@ -1380,7 +1435,10 @@ void __init detect_calgary(void)
>                 return;
>         }
> 
> -       specified_table_size = determine_tce_table_size(end_pfn *
> PAGE_SIZE);
> +       if(is_kdump_kernel())
> +               specified_table_size =
> determine_tce_table_size(saved_max_pfn * PAGE_SIZE);
> +       else
> +               specified_table_size = determine_tce_table_size(end_pfn
> * PAGE_SIZE);

What is the value of saved_max_pfn if !kdump? Can we just use it
unconditionally? If not, I prefer this as

max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn;
...

>         for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
>                 struct calgary_bus_info *info = &bus_info[bus];
> @@ -1398,10 +1456,17 @@ void __init detect_calgary(void)
> 
>                 if (calgary_bus_has_devices(bus, pci_device) ||
>                     translate_empty_slots) {
> -                       tbl = alloc_tce_table();
> -                       if (!tbl)
> -                               goto cleanup;
> -                       info->tce_space = tbl;
> +               /*
> +                * If this is the kdump kernel then do not allocate
> +                * new tce tables, try using the tce tables from the
> +                * first kernel 
> +                */
> +                       if(!is_kdump_kernel()){
> +                               tbl = alloc_tce_table();
> +                               if (!tbl)
> +                                       goto cleanup;
> +                               info->tce_space = tbl;
> +                       }

Coding style, but otherwise looks ok.

>                         calgary_found = 1;
>                 }
>         }
> --- linux-2.6.23-rc9/include/linux/bootmem.h.orig       2007-10-09
> 23:39:32.000000000 +0530
> +++ linux-2.6.23-rc9/include/linux/bootmem.h    2007-10-09
> 23:26:19.000000000 +0530
> @@ -21,6 +21,12 @@ extern unsigned long max_pfn;
> 
>  #ifdef CONFIG_CRASH_DUMP
>  extern unsigned long saved_max_pfn;
> +static inline int is_kdump_kernel(void)
> +{
> +       return reset_devices ? 1 : 0 ;
> +}
> +#else
> +static inline is_kdump_kernel(void) { return 0; }
>  #endif

Please define is_kdump_kernel() even if CONFIG_CRASH_DUMP is set, that
will let us avoid the ifdef in C files.

Cheers,
Muli
-- 
SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007
http://www.haifa.il.ibm.com/Workshops/systor2007/

Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-09 21:06 ` Muli Ben-Yehuda
@ 2007-10-10  5:30   ` Vivek Goyal
  2007-10-14  5:41     ` Muli Ben-Yehuda
  0 siblings, 1 reply; 21+ messages in thread
From: Vivek Goyal @ 2007-10-10  5:30 UTC (permalink / raw)
  To: Muli Ben-Yehuda; +Cc: chandru, linux-kernel, mark_salyzyn, ak

On Tue, Oct 09, 2007 at 11:06:23PM +0200, Muli Ben-Yehuda wrote:
> Hi Chandru,
> 
> Thanks for the patch. Comments on the patch below, but first a general
> question for my education: the main problem here that aacraid
> continues DMA'ing when it shouldn't. Why can't we shut it down
> cleanly? Even without the presence of an IOMMU, it seems dangerous to
> let an adapter continue DMA'ing to and from memory when the kernel is
> an inconsistent state.
> 

Hi Muli,

After the kernel crash, we can't rely on the crashed kernel's data
structures or methods any more. We can't call the device shutdown methods of
all the device drivers as we might be invoking a driver which actually might
have caused the crash. Hence we don't perform any device shutdown
in the case of kdump. Instead after the crash, we try to take the shortest
route to second kernel (Execute minimum code in crashed kernel).

Whatever special handling is required to bring up the second kernel on 
a potentially unknown state hardware, is taken in second kernel.

We will not be too concerned about ongoing DMA's as long as there is no
corruption of tce tables. That would mean DMA is happening in first
kernel's memory buffer and second kernel is not impacted. But if TCE
tables themselves are corrupted, then it can potentially interfere with
second kernel's operation. Don't know how it can be addressed.
  
> The patch below looks reasonable *if* that is the least worst way of
> doing it - let's see if we can come up with something cleaner that
> doesn't rely in the new kernel on data (which may or may not be
> corrupted...) from the old kernel.
> 

I think the issue here is that some DMA was going on when first kernel
crashed. After the crash, second kernel booted and created new TCE tables
and switched to it. This resulted in ongoing DMA failure and hardware raised
an alarm. 

In this case, probably it would make sense to re-use the TCE tables of
previous kernel (until and unless we have a way to tell hardware not
to flag a DMA error if TCE mapping is changed while DMA is going on ?)

I think, we also need to reserve some TCE table entries (in first kernel),
which can be used by second kernel for saving kernel core file to disk. There
might be a case where first kernel has used up all TCE entries and second
kernel can't allocate more. I think ppc64 has taken the approach of freeing
some entries in second kernel but that will have the problem as you might
be clearing an entry which is being used by ongoing DMA.  

Thanks
Vivek

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-09 20:40 [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump chandru
  2007-10-09 21:06 ` Muli Ben-Yehuda
@ 2007-10-10  5:37 ` Vivek Goyal
  1 sibling, 0 replies; 21+ messages in thread
From: Vivek Goyal @ 2007-10-10  5:37 UTC (permalink / raw)
  To: chandru; +Cc: linux-kernel, mark_salyzyn, muli, ak

On Wed, Oct 10, 2007 at 02:10:13AM +0530, chandru wrote:
> kdump kernel fails to boot with calgary iommu and aacraid driver on a
> x366 box.  The ongoing dma's of aacraid from the first kernel continue
> to exist until the driver is loaded in the kdump kernel. Calgary is
> initialized prior to aacraid and creation of new tce tables causes wrong
> dma's to occur. 
> 
> Here we try to grab the tce tables of the first kernel in kdump kernel
> and use them. While in the kdump kernel we do not allocate new tce
> tables but rather read the base address register contents of calgary
> iommu and use the tables that the registers point to. With these changes
> the kdump kernel and hence aacraid now boots normally. 
> 
> Another point that came when talking with Vivek was to reserve part of
> the tce table space in first kernel for use in the kdump kernel. 
> 

Yes, I think reserving some entries for second kernel will be important
to make sure second kernel can perform DMA while saving the kernel core.
We don't want to clear some entries in second kernel as some ongoing DMA
might be using it and we will run into issues.

[..]
> --- linux-2.6.23-rc9/include/linux/bootmem.h.orig       2007-10-09
> 23:39:32.000000000 +0530
> +++ linux-2.6.23-rc9/include/linux/bootmem.h    2007-10-09
> 23:26:19.000000000 +0530
> @@ -21,6 +21,12 @@ extern unsigned long max_pfn;
> 
>  #ifdef CONFIG_CRASH_DUMP
>  extern unsigned long saved_max_pfn;
> +static inline int is_kdump_kernel(void)
> +{
> +       return reset_devices ? 1 : 0 ;
> +}
> +#else
> +static inline is_kdump_kernel(void) { return 0; }
>  #endif

In general, run checkpatch.pl on the patch.

is_kdump_kernel() can go in kdump specific files and exported here.
This function can be used by some other drivers too.

I think use "elfcorehdr" instead of "reset_devices" to detect the kdump
kernel. reset_devices can in general also be used for booting on a kernel
on a fauly bios which does not does a good job of resetting the devices.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-10  5:30   ` Vivek Goyal
@ 2007-10-14  5:41     ` Muli Ben-Yehuda
  2007-10-15  6:29       ` Vivek Goyal
  0 siblings, 1 reply; 21+ messages in thread
From: Muli Ben-Yehuda @ 2007-10-14  5:41 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: chandru, linux-kernel, mark_salyzyn, ak

On Wed, Oct 10, 2007 at 11:00:58AM +0530, Vivek Goyal wrote:
> On Tue, Oct 09, 2007 at 11:06:23PM +0200, Muli Ben-Yehuda wrote:
> > Hi Chandru,
> > 
> > Thanks for the patch. Comments on the patch below, but first a general
> > question for my education: the main problem here that aacraid
> > continues DMA'ing when it shouldn't. Why can't we shut it down
> > cleanly? Even without the presence of an IOMMU, it seems dangerous to
> > let an adapter continue DMA'ing to and from memory when the kernel is
> > an inconsistent state.
> > 
> 
> Hi Muli,
> 
> After the kernel crash, we can't rely on the crashed kernel's data
> structures or methods any more. We can't call the device shutdown
> methods of all the device drivers as we might be invoking a driver
> which actually might have caused the crash. Hence we don't perform
> any device shutdown in the case of kdump. Instead after the crash,
> we try to take the shortest route to second kernel (Execute minimum
> code in crashed kernel).
> 
> Whatever special handling is required to bring up the second kernel on 
> a potentially unknown state hardware, is taken in second kernel.

That makes sense, but does it really make more sense to set-up proper
IOMMU translation tables for DMA's which have been triggered from the
first kernel than to either quiesce the device ASAP (this means before
enabling IOMMU translation...) or to trap such DMA's and continue,
rather than letting them go through?

> We will not be too concerned about ongoing DMA's as long as there is no
> corruption of tce tables. That would mean DMA is happening in first
> kernel's memory buffer and second kernel is not impacted. But if TCE
> tables themselves are corrupted, then it can potentially interfere with
> second kernel's operation. Don't know how it can be addressed.

I worry about two cases: the first is TCE table corruption, the second
is DMA's to areas which were fine in the first kernel but are wrong
for the second kernel.

> > The patch below looks reasonable *if* that is the least worst way
> > of doing it - let's see if we can come up with something cleaner
> > that doesn't rely in the new kernel on data (which may or may not
> > be corrupted...) from the old kernel.
> 
> I think the issue here is that some DMA was going on when first
> kernel crashed. After the crash, second kernel booted and created
> new TCE tables and switched to it. This resulted in ongoing DMA
> failure and hardware raised an alarm.
> 
> In this case, probably it would make sense to re-use the TCE tables
> of previous kernel (until and unless we have a way to tell hardware
> not to flag a DMA error if TCE mapping is changed while DMA is going
> on ?)

We don't have a way to do that, but we should be able to trap the DMA,
figure out that we're in a kdump kernel, and then just ignore it and
continue.

> I think, we also need to reserve some TCE table entries (in first
> kernel), which can be used by second kernel for saving kernel core
> file to disk. There might be a case where first kernel has used up
> all TCE entries and second kernel can't allocate more. I think ppc64
> has taken the approach of freeing some entries in second kernel but
> that will have the problem as you might be clearing an entry which
> is being used by ongoing DMA.

That's a good point - how do PPC (or other architectures with an
isolation-capable IOMMU) handle this whole issue?

Cheers,
Muli
-- 
SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007
http://www.haifa.il.ibm.com/Workshops/systor2007/

Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-14  5:41     ` Muli Ben-Yehuda
@ 2007-10-15  6:29       ` Vivek Goyal
  2007-12-24  5:15         ` Chandru
  0 siblings, 1 reply; 21+ messages in thread
From: Vivek Goyal @ 2007-10-15  6:29 UTC (permalink / raw)
  To: Muli Ben-Yehuda; +Cc: chandru, linux-kernel, mark_salyzyn, ak

On Sun, Oct 14, 2007 at 07:41:10AM +0200, Muli Ben-Yehuda wrote:
> On Wed, Oct 10, 2007 at 11:00:58AM +0530, Vivek Goyal wrote:
> > On Tue, Oct 09, 2007 at 11:06:23PM +0200, Muli Ben-Yehuda wrote:
> > > Hi Chandru,
> > > 
> > > Thanks for the patch. Comments on the patch below, but first a general
> > > question for my education: the main problem here that aacraid
> > > continues DMA'ing when it shouldn't. Why can't we shut it down
> > > cleanly? Even without the presence of an IOMMU, it seems dangerous to
> > > let an adapter continue DMA'ing to and from memory when the kernel is
> > > an inconsistent state.
> > > 
> > 
> > Hi Muli,
> > 
> > After the kernel crash, we can't rely on the crashed kernel's data
> > structures or methods any more. We can't call the device shutdown
> > methods of all the device drivers as we might be invoking a driver
> > which actually might have caused the crash. Hence we don't perform
> > any device shutdown in the case of kdump. Instead after the crash,
> > we try to take the shortest route to second kernel (Execute minimum
> > code in crashed kernel).
> > 
> > Whatever special handling is required to bring up the second kernel on 
> > a potentially unknown state hardware, is taken in second kernel.
> 
> That makes sense, but does it really make more sense to set-up proper
> IOMMU translation tables for DMA's which have been triggered from the
> first kernel than to either quiesce the device ASAP (this means before
> enabling IOMMU translation...) or to trap such DMA's and continue,
> rather than letting them go through?
> 

Trapping DMA and ignoring will make sense. I don't know how it can be done.
Toggle write permissions in TCE table and handle it in exception handler?

> > We will not be too concerned about ongoing DMA's as long as there is no
> > corruption of tce tables. That would mean DMA is happening in first
> > kernel's memory buffer and second kernel is not impacted. But if TCE
> > tables themselves are corrupted, then it can potentially interfere with
> > second kernel's operation. Don't know how it can be addressed.
> 
> I worry about two cases: the first is TCE table corruption, the second
> is DMA's to areas which were fine in the first kernel but are wrong
> for the second kernel.
> 
> > > The patch below looks reasonable *if* that is the least worst way
> > > of doing it - let's see if we can come up with something cleaner
> > > that doesn't rely in the new kernel on data (which may or may not
> > > be corrupted...) from the old kernel.
> > 
> > I think the issue here is that some DMA was going on when first
> > kernel crashed. After the crash, second kernel booted and created
> > new TCE tables and switched to it. This resulted in ongoing DMA
> > failure and hardware raised an alarm.
> > 
> > In this case, probably it would make sense to re-use the TCE tables
> > of previous kernel (until and unless we have a way to tell hardware
> > not to flag a DMA error if TCE mapping is changed while DMA is going
> > on ?)
> 
> We don't have a way to do that, but we should be able to trap the DMA,
> figure out that we're in a kdump kernel, and then just ignore it and
> continue.
> 

What does ignoring DMA mean here? Acutual DMA does not take place but device
thinks DMA is complete? 

Trapping DMA and ignoring in kdump kernel (if possible), seems to be better
than letting DMA happen. This will reduce chances of corruption of second
kernel.  

Can you please give little more details in terms of how to trap DMA. I think
chandru should be able to write a patch for this.

> > I think, we also need to reserve some TCE table entries (in first
> > kernel), which can be used by second kernel for saving kernel core
> > file to disk. There might be a case where first kernel has used up
> > all TCE entries and second kernel can't allocate more. I think ppc64
> > has taken the approach of freeing some entries in second kernel but
> > that will have the problem as you might be clearing an entry which
> > is being used by ongoing DMA.
> 
> That's a good point - how do PPC (or other architectures with an
> isolation-capable IOMMU) handle this whole issue?
> 

Right now ppc64 just uses the TCE tables from first kernel and allows the 
older DMA's to finish. Then they free up some TCE table entries (2K) to 
make space for DMA entries from second kernel. 

Mohan, please corret me, if I am wrong.

Thanks
Vivek

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-10-15  6:29       ` Vivek Goyal
@ 2007-12-24  5:15         ` Chandru
  2008-03-10 13:20           ` Chandru
  0 siblings, 1 reply; 21+ messages in thread
From: Chandru @ 2007-12-24  5:15 UTC (permalink / raw)
  To: Muli Ben-Yehuda; +Cc: linux-kernel, ak, jdmason

Hi Muli,

I have tried to work with CCR ,CSR, CSMR, CSAR, CFGRW, GBRERRM  
registers but have been unable to make calgary generate an exception 
upon error condition.  In alloc_tce_table() , I am setting WRITE_ONLY 
access permission bit to tce entries but it isn't helping.  Would you 
kindly let me know how we can make calgary to generate an exception in 
error conditions ?.

Thanks,
Chandru


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2007-12-24  5:15         ` Chandru
@ 2008-03-10 13:20           ` Chandru
  2008-03-10 16:09             ` Andrew Morton
  2008-03-11 13:29             ` Vivek Goyal
  0 siblings, 2 replies; 21+ messages in thread
From: Chandru @ 2008-03-10 13:20 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, jdmason, Muli Ben-Yehuda

Chandru wrote:
> Hi Muli,
>
> I have tried to work with CCR ,CSR, CSMR, CSAR, CFGRW, GBRERRM  
> registers but have been unable to make calgary generate an exception 
> upon error condition.  In alloc_tce_table() , I am setting WRITE_ONLY 
> access permission bit to tce entries but it isn't helping.  Would you 
> kindly let me know how we can make calgary to generate an exception in 
> error conditions ?.
>
> Thanks,
> Chandru
>
>
Hello Andrew,

Could you pls consider the attached patch for inclusion in mainline.

Thanks,
Chandru

Signed-off-by: Chandru S <chandru@in.ibm.com>
---

--- arch/x86/kernel/pci-calgary_64.c.orig       2008-03-10 
15:54:08.000000000 +0530
+++ arch/x86/kernel/pci-calgary_64.c    2008-03-10 16:34:20.000000000 +0530
@@ -36,6 +36,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -166,6 +167,8 @@ static void calgary_dump_error_regs(stru
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct 
pci_dev *dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void grab_tce_space_from_tar(void);

 static struct cal_chipset_ops calgary_chip_ops = {
        .handle_quirks = calgary_handle_quirks,
@@ -828,7 +831,11 @@ static int __init calgary_setup_tar(stru

        tbl = pci_iommu(dev->bus);
        tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-       tce_free(tbl, 0, tbl->it_size);
+
+       if (is_kdump_kernel())
+               calgary_init_bitmap_from_tce_table(tbl);
+       else
+               tce_free(tbl, 0, tbl->it_size);

        if (is_calgary(dev->device))
                tbl->chip_ops = &calgary_chip_ops;
@@ -1186,6 +1193,9 @@ static int __init calgary_locate_bbars(v
                }
        }

+       if (is_kdump_kernel())
+               grab_tce_space_from_tar();
+
        return 0;

 error:
@@ -1276,6 +1286,24 @@ static inline int __init determine_tce_t
        return ret;
 }

+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+       u64 *tp;
+       unsigned int index;
+       tp = ((u64 *)tbl->it_base);
+       for (index = 0 ; index < tbl->it_size; index++) {
+               if (*tp != 0x0)
+                       set_bit(index, tbl->it_map);
+
+               tp++;
+       }
+}
+
 static int __init build_detail_arrays(void)
 {
        unsigned long ptr;
@@ -1338,12 +1366,52 @@ static int __init calgary_bus_has_device
        return (val != 0xffffffff);
 }

+/*
+ * grab_tce_space_from_tar():
+ * Function for kdump case. Grab the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void grab_tce_space_from_tar()
+{
+       int bus;
+       void __iomem *target;
+       unsigned long tce_space;
+
+       for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+               struct calgary_bus_info *info = &bus_info[bus];
+               unsigned short pci_device;
+               u32 val;
+
+               val = read_pci_config(bus, 0, 0, 0);
+               pci_device = (val & 0xFFFF0000) >> 16;
+
+               if (!is_cal_pci_dev(pci_device))
+                       continue;
+
+               if (info->translation_disabled)
+                       continue;
+
+               if (calgary_bus_has_devices(bus, pci_device) ||
+                   translate_empty_slots) {
+                       target = calgary_reg(bus_info[bus].bbar,
+                                               tar_offset(bus));
+                       tce_space = be64_to_cpu(readq(target));
+                       tce_space = tce_space & TAR_SW_BITS;
+
+                       BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
+
+                       tce_space = tce_space & (~specified_table_size);
+                       info->tce_space = (u64 *)__va(tce_space);
+               }
+       }
+}
+
 void __init detect_calgary(void)
 {
        int bus;
        void *tbl;
        int calgary_found = 0;
-       unsigned long ptr;
+       unsigned long ptr, max_pfn;
        unsigned int offset, prev_offset;
        int ret;

@@ -1393,7 +1461,8 @@ void __init detect_calgary(void)
                return;
        }

-       specified_table_size = determine_tce_table_size(end_pfn * 
PAGE_SIZE);
+       max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
+       specified_table_size = determine_tce_table_size(max_pfn * 
PAGE_SIZE);

        for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
                struct calgary_bus_info *info = &bus_info[bus];
@@ -1411,10 +1480,16 @@ void __init detect_calgary(void)

                if (calgary_bus_has_devices(bus, pci_device) ||
                    translate_empty_slots) {
-                       tbl = alloc_tce_table();
-                       if (!tbl)
-                               goto cleanup;
-                       info->tce_space = tbl;
+                       /*
+                        * If this is a kdump kernel find and use
+                        * the tce tables from first kernel
+                        */
+                       if (!is_kdump_kernel()) {
+                               tbl = alloc_tce_table();
+                               if (!tbl)
+                                       goto cleanup;
+                               info->tce_space = tbl;
+                       }
                        calgary_found = 1;
                }
        }
--- include/linux/crash_dump.h.orig     2008-03-10 15:54:33.000000000 +0530
+++ include/linux/crash_dump.h  2008-03-10 16:30:12.000000000 +0530
@@ -21,6 +21,13 @@ extern struct proc_dir_entry *proc_vmcor
 #endif

 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
+extern unsigned long saved_max_pfn;
+static inline int is_kdump_kernel(void)
+{
+       return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0 ;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline is_kdump_kernel(void) { return 0; }

 #endif /* CONFIG_CRASH_DUMP */
 #endif /* LINUX_CRASHDUMP_H */



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-10 13:20           ` Chandru
@ 2008-03-10 16:09             ` Andrew Morton
  2008-06-21 12:11               ` Chandru
  2008-03-11 13:29             ` Vivek Goyal
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2008-03-10 16:09 UTC (permalink / raw)
  To: Chandru; +Cc: linux-kernel, jdmason, Muli Ben-Yehuda

On Mon, 10 Mar 2008 18:50:29 +0530 Chandru <chandru@in.ibm.com> wrote:

> Chandru wrote:
> > Hi Muli,
> >
> > I have tried to work with CCR ,CSR, CSMR, CSAR, CFGRW, GBRERRM  
> > registers but have been unable to make calgary generate an exception 
> > upon error condition.  In alloc_tce_table() , I am setting WRITE_ONLY 
> > access permission bit to tce entries but it isn't helping.  Would you 
> > kindly let me know how we can make calgary to generate an exception in 
> > error conditions ?.
> >
> > Thanks,
> > Chandru
> >
> >
> Hello Andrew,
> 
> Could you pls consider the attached patch for inclusion in mainline.
> 

I can sort-of work out what the patch does from the above quote, but it
would be nice to have a proper changelog description from yourself rather
than having me try to write it, please.

> 
> Signed-off-by: Chandru S <chandru@in.ibm.com>
> ---
> 
> --- arch/x86/kernel/pci-calgary_64.c.orig       2008-03-10 
> 15:54:08.000000000 +0530

The patch was badly wordwrapped.  Please fix your email client for the next
version.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-10 13:20           ` Chandru
  2008-03-10 16:09             ` Andrew Morton
@ 2008-03-11 13:29             ` Vivek Goyal
  2008-03-12  5:08               ` Chandru
  1 sibling, 1 reply; 21+ messages in thread
From: Vivek Goyal @ 2008-03-11 13:29 UTC (permalink / raw)
  To: chandru; +Cc: akpm, linux-kernel, jdmason, Muli Ben-Yehuda

On Mon, Mar 10, 2008 at 06:50:29PM +0530, Chandru wrote:

[..]
> -       specified_table_size = determine_tce_table_size(end_pfn * 
> PAGE_SIZE);
> +       max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
> +       specified_table_size = determine_tce_table_size(max_pfn * 
> PAGE_SIZE);
>
>        for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
>                struct calgary_bus_info *info = &bus_info[bus];
> @@ -1411,10 +1480,16 @@ void __init detect_calgary(void)
>
>                if (calgary_bus_has_devices(bus, pci_device) ||
>                    translate_empty_slots) {
> -                       tbl = alloc_tce_table();
> -                       if (!tbl)
> -                               goto cleanup;
> -                       info->tce_space = tbl;
> +                       /*
> +                        * If this is a kdump kernel find and use
> +                        * the tce tables from first kernel
> +                        */
> +                       if (!is_kdump_kernel()) {
> +                               tbl = alloc_tce_table();
> +                               if (!tbl)
> +                                       goto cleanup;
> +                               info->tce_space = tbl;
> +                       }

Hi Chandru,

- How do we make sure that previous kernel's TCE tables are not
  overwritten
  by new kernel (In case previous kernel allocated TCE tables in first
  640 KB?)

- How do we make sure that when new kernel tries to setup an entry in
  TCE table, then it does not try to clear up an existing entry which is
  still in use?

Did you try the Muli suggestion of ignoring DMA error in exception
handler? What happens if I setup new table and try to switch to new
table? Some sort of error will occur. Can't we modify the handler and
ignore it for kdump case and move on?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-11 13:29             ` Vivek Goyal
@ 2008-03-12  5:08               ` Chandru
  2008-03-12  9:58                 ` Muli Ben-Yehuda
  0 siblings, 1 reply; 21+ messages in thread
From: Chandru @ 2008-03-12  5:08 UTC (permalink / raw)
  To: Vivek Goyal, Muli Ben-Yehuda; +Cc: akpm, linux-kernel, jdmason


> Hi Chandru,
>
> - How do we make sure that previous kernel's TCE tables are not
>   overwritten
>   by new kernel (In case previous kernel allocated TCE tables in first
>   640 KB?)
>   
TCE tables are allocated using alloc_bootmem_low() with goal set to 0. 
Don't know if this will be sufficient. Investigating...
> - How do we make sure that when new kernel tries to setup an entry in
>   TCE table, then it does not try to clear up an existing entry which is
>   still in use?
>   
A bitmap is created once the first kernel's TCE table is found. This 
bitmap is populated by reading the entries in the tce table. Non-zero 
entries in the table are marked as used/reserved in the bitmap.
> Did you try the Muli suggestion of ignoring DMA error in exception
> handler?
With what I tried, I was not successful
>  What happens if I setup new table and try to switch to new
> table?
This is the root cause of the problem.
>  Some sort of error will occur.
The pci bus on that PHB goes into an undefined state ( returning 
0xffffffff for all reads on that bus ).
>  Can't we modify the handler and
> ignore it for kdump case and move on?
>   
The system booted! , :) with the following change. Muli acceptable??

static void calgary_watchdog(unsigned long data)
{
...
...
/* Disable bus that caused the error and ignore if it's kdump kernel */
+ if ( !is_kdump_kernel()) {
target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
PHB_CONFIG_RW_OFFSET);
val32 = be32_to_cpu(readl(target));
val32 |= PHB_SLOT_DISABLE;
writel(cpu_to_be32(val32), target);
+ }
readl(target); /* flush */
..
..
}
> Thanks
> Vivek
>   
-Chandru

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-12  5:08               ` Chandru
@ 2008-03-12  9:58                 ` Muli Ben-Yehuda
  2008-03-12 18:08                   ` Vivek Goyal
  0 siblings, 1 reply; 21+ messages in thread
From: Muli Ben-Yehuda @ 2008-03-12  9:58 UTC (permalink / raw)
  To: Chandru; +Cc: Vivek Goyal, akpm, linux-kernel, jdmason

On Wed, Mar 12, 2008 at 10:38:49AM +0530, Chandru wrote:

> The system booted! , :) with the following change. Muli acceptable??
>
> static void calgary_watchdog(unsigned long data)
> {
> ...
> ...
> /* Disable bus that caused the error and ignore if it's kdump kernel */
> + if ( !is_kdump_kernel()) {
> target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
> PHB_CONFIG_RW_OFFSET);
> val32 = be32_to_cpu(readl(target));
> val32 |= PHB_SLOT_DISABLE;
> writel(cpu_to_be32(val32), target);
> + }
> readl(target); /* flush */

Yikes, not really :-)

You're basically saying "if we're in a kdump kernel, let's ignore all
DMA errors and hope for the best". This is not really acceptable. What
we could do is limit the scope of ignorance - only ignore errors from
the point the kdump kernel starts booting until we have reinitialized
the device and then go back to handling DMA errors correctly.

Cheers,
Muli

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-12  9:58                 ` Muli Ben-Yehuda
@ 2008-03-12 18:08                   ` Vivek Goyal
  2008-03-13 15:49                     ` Muli Ben-Yehuda
  0 siblings, 1 reply; 21+ messages in thread
From: Vivek Goyal @ 2008-03-12 18:08 UTC (permalink / raw)
  To: Muli Ben-Yehuda; +Cc: Chandru, akpm, linux-kernel, jdmason

On Wed, Mar 12, 2008 at 11:58:20AM +0200, Muli Ben-Yehuda wrote:
> On Wed, Mar 12, 2008 at 10:38:49AM +0530, Chandru wrote:
> 
> > The system booted! , :) with the following change. Muli acceptable??
> >
> > static void calgary_watchdog(unsigned long data)
> > {
> > ...
> > ...
> > /* Disable bus that caused the error and ignore if it's kdump kernel */
> > + if ( !is_kdump_kernel()) {
> > target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
> > PHB_CONFIG_RW_OFFSET);
> > val32 = be32_to_cpu(readl(target));
> > val32 |= PHB_SLOT_DISABLE;
> > writel(cpu_to_be32(val32), target);
> > + }
> > readl(target); /* flush */
> 
> Yikes, not really :-)
> 
> You're basically saying "if we're in a kdump kernel, let's ignore all
> DMA errors and hope for the best". This is not really acceptable. What
> we could do is limit the scope of ignorance - only ignore errors from
> the point the kdump kernel starts booting until we have reinitialized
> the device and then go back to handling DMA errors correctly.
> 

Agree. Ignoring all the DMA errors in kdump kernel does not sound very
good.

On a side note, typically how much time does it take to for DMA operations
to finish. Can we wait for a random amount of time in second kernel,
hoping all the DMA operations are complete, and then setup a new table?

This can be atleast a stop gap solution till we come up with a good
solution?

Thanks
Vivek

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-12 18:08                   ` Vivek Goyal
@ 2008-03-13 15:49                     ` Muli Ben-Yehuda
  0 siblings, 0 replies; 21+ messages in thread
From: Muli Ben-Yehuda @ 2008-03-13 15:49 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: Chandru, akpm, linux-kernel, jdmason

On Wed, Mar 12, 2008 at 02:08:06PM -0400, Vivek Goyal wrote:

> Agree. Ignoring all the DMA errors in kdump kernel does not sound
> very good.
> 
> On a side note, typically how much time does it take to for DMA
> operations to finish. Can we wait for a random amount of time in
> second kernel, hoping all the DMA operations are complete, and then
> setup a new table?
> 
> This can be atleast a stop gap solution till we come up with a good
> solution?

The problem as I understand it is that when we boot into the kdump
kernel we have left-over devices from the old kernel which are active
and initiating DMA's. This is fine as long as Calgayr is also active
(with the old kernel's mappings). Once we initialize Calgary (in the
kdump) kernel with the new mappings, however, it starts trapping all
those DMA's. We can't wait for them to stop, because that will only
happen when the device is re-initialized (or otherwise quiesced).

Cheers,
Muli

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-03-10 16:09             ` Andrew Morton
@ 2008-06-21 12:11               ` Chandru
  2008-06-21 12:25                 ` Mark Salyzyn
  2008-06-23 19:29                 ` Muli Ben-Yehuda
  0 siblings, 2 replies; 21+ messages in thread
From: Chandru @ 2008-06-21 12:11 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Muli Ben-Yehuda

kdump kernel fails to boot with calgary iommu and aacraid driver on a x366 
box. The ongoing dma's of aacraid from the first kernel continue to exist 
until the driver is loaded in the kdump kernel. Calgary is initialized prior 
to aacraid and creation of new tce tables causes wrong dma's to occur. Here 
we try to get the tce tables of the first kernel in kdump kernel and use 
them. While in the kdump kernel we do not allocate new tce tables but instead 
read the base address register contents of calgary iommu and use the tables 
that the registers point to. With these changes the kdump kernel and hence 
aacraid now boots normally. 

Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
---

 arch/x86/kernel/pci-calgary_64.c |   91 ++++++++++++++++++++++++++---
 include/linux/crash_dump.h       |    8 ++
 2 files changed, 92 insertions(+), 7 deletions(-)

> > Hello Andrew,
> >
> > Could you pls consider the attached patch for inclusion in mainline.
>
> I can sort-of work out what the patch does from the above quote, but it
> would be nice to have a proper changelog description from yourself rather
> than having me try to write it, please.
> The patch was badly wordwrapped.  Please fix your email client for the next
> version.
Sorry for the much delayed response on this mail. The patch was tested with 
linux-2.6.26-rc6.  Pls consider it for inclusion. 
Thanks,



diff -Naurp linux-2.6.26-rc6-orig/arch/x86/kernel/pci-calgary_64.c 
linux-2.6.26-rc6/arch/x86/kernel/pci-calgary_64.c
--- linux-2.6.26-rc6-orig/arch/x86/kernel/pci-calgary_64.c	2008-06-21 
13:59:08.000000000 +0530
+++ linux-2.6.26-rc6/arch/x86/kernel/pci-calgary_64.c	2008-06-21 
13:59:45.000000000 +0530
@@ -36,6 +36,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(stru
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev 
*dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
 
 static struct cal_chipset_ops calgary_chip_ops = {
 	.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(stru
 
 	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-	tce_free(tbl, 0, tbl->it_size);
+
+	if (is_kdump_kernel())
+		calgary_init_bitmap_from_tce_table(tbl);
+	else
+		tce_free(tbl, 0, tbl->it_size);
 
 	if (is_calgary(dev->device))
 		tbl->chip_ops = &calgary_chip_ops;
@@ -1206,9 +1213,14 @@ static int __init calgary_init(void)
 	struct calgary_bus_info *info;
 
 	ret = calgary_locate_bbars();
+
 	if (ret)
 		return ret;
 
+	/* Purely for kdump kernel case  */
+	if (is_kdump_kernel())
+		get_tce_space_from_tar();
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
 		if (!dev)
@@ -1256,6 +1268,24 @@ error:
 	return ret;
 }
 
+
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+	u64 *tp;
+	unsigned int index;
+	tp = ((u64 *)tbl->it_base);
+	for (index = 0 ; index < tbl->it_size; index++) {
+	if (*tp != 0x0)
+		set_bit(index, tbl->it_map);
+		tp++;
+	}
+}
+
 static inline int __init determine_tce_table_size(u64 ram)
 {
 	int ret;
@@ -1318,6 +1348,8 @@ static int __init build_detail_arrays(vo
 	return 0;
 }
 
+
+
 static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
 {
 	int dev;
@@ -1339,12 +1371,50 @@ static int __init calgary_bus_has_device
 	return (val != 0xffffffff);
 }
 
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+	int bus;
+	void __iomem *target;
+	unsigned long tce_space;
+
+	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+		struct calgary_bus_info *info = &bus_info[bus];
+		unsigned short pci_device;
+		u32 val;
+
+		val = read_pci_config(bus, 0, 0, 0);
+		pci_device = (val & 0xFFFF0000) >> 16;
+
+		if (!is_cal_pci_dev(pci_device))
+			continue;
+		if (info->translation_disabled)
+			continue;
+
+		if (calgary_bus_has_devices(bus, pci_device) ||
+			translate_empty_slots) {
+			target = calgary_reg(bus_info[bus].bbar,
+				tar_offset(bus));
+			tce_space = be64_to_cpu(readq(target));
+			tce_space = tce_space & TAR_SW_BITS;
+
+			tce_space = tce_space & (~specified_table_size);
+			info->tce_space = (u64 *)__va(tce_space);
+		}
+	}
+	return;
+}
+
 void __init detect_calgary(void)
 {
 	int bus;
 	void *tbl;
 	int calgary_found = 0;
-	unsigned long ptr;
+	unsigned long ptr, max_pfn;
 	unsigned int offset, prev_offset;
 	int ret;
 
@@ -1394,7 +1464,8 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+	max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
+	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1483,16 @@ void __init detect_calgary(void)
 
 		if (calgary_bus_has_devices(bus, pci_device) ||
 		    translate_empty_slots) {
-			tbl = alloc_tce_table();
-			if (!tbl)
-				goto cleanup;
-			info->tce_space = tbl;
+			/*
+			 * If it is kdump kernel, find and use tce tables
+			 * from first kernel, else allocate tce tables here
+			 */
+			if (!is_kdump_kernel()) {
+				tbl = alloc_tce_table();
+				if (!tbl)
+					goto cleanup;
+				info->tce_space = tbl;
+			}
 			calgary_found = 1;
 		}
 	}
diff -Naurp linux-2.6.26-rc6-orig/include/linux/crash_dump.h 
linux-2.6.26-rc6/include/linux/crash_dump.h
--- linux-2.6.26-rc6-orig/include/linux/crash_dump.h	2008-06-21 
13:59:18.000000000 +0530
+++ linux-2.6.26-rc6/include/linux/crash_dump.h	2008-06-21 14:00:42.000000000 
+0530
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcor
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
 
+static inline int is_kdump_kernel(void)
+{
+	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0 ;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
 #endif /* LINUX_CRASHDUMP_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-06-21 12:11               ` Chandru
@ 2008-06-21 12:25                 ` Mark Salyzyn
  2008-06-23 19:29                 ` Muli Ben-Yehuda
  1 sibling, 0 replies; 21+ messages in thread
From: Mark Salyzyn @ 2008-06-21 12:25 UTC (permalink / raw)
  To: Chandru; +Cc: Andrew Morton, linux-kernel, Muli Ben-Yehuda

ACK (as a JAFO, though). I feel this patch was a long time coming!  
Thanks!

Sincerely -- Mark Salyzyn

On Jun 21, 2008, at 8:11 AM, Chandru wrote:

> kdump kernel fails to boot with calgary iommu and aacraid driver on  
> a x366
> box. The ongoing dma's of aacraid from the first kernel continue to  
> exist
> until the driver is loaded in the kdump kernel. Calgary is  
> initialized prior
> to aacraid and creation of new tce tables causes wrong dma's to  
> occur. Here
> we try to get the tce tables of the first kernel in kdump kernel and  
> use
> them. While in the kdump kernel we do not allocate new tce tables  
> but instead
> read the base address register contents of calgary iommu and use the  
> tables
> that the registers point to. With these changes the kdump kernel and  
> hence
> aacraid now boots normally.
>
> Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
> ---
>
> arch/x86/kernel/pci-calgary_64.c |   91 ++++++++++++++++++++++++++---
> include/linux/crash_dump.h       |    8 ++
> 2 files changed, 92 insertions(+), 7 deletions(-)
>
>>> Hello Andrew,
>>>
>>> Could you pls consider the attached patch for inclusion in mainline.
>>
>> I can sort-of work out what the patch does from the above quote,  
>> but it
>> would be nice to have a proper changelog description from yourself  
>> rather
>> than having me try to write it, please.
>> The patch was badly wordwrapped.  Please fix your email client for  
>> the next
>> version.
> Sorry for the much delayed response on this mail. The patch was  
> tested with
> linux-2.6.26-rc6.  Pls consider it for inclusion.
> Thanks,
>
>
>
> diff -Naurp linux-2.6.26-rc6-orig/arch/x86/kernel/pci-calgary_64.c
> linux-2.6.26-rc6/arch/x86/kernel/pci-calgary_64.c
> --- linux-2.6.26-rc6-orig/arch/x86/kernel/pci-calgary_64.c       
> 2008-06-21
> 13:59:08.000000000 +0530
> +++ linux-2.6.26-rc6/arch/x86/kernel/pci-calgary_64.c   2008-06-21
> 13:59:45.000000000 +0530
> @@ -36,6 +36,7 @@
> #include <linux/delay.h>
> #include <linux/scatterlist.h>
> #include <linux/iommu-helper.h>
> +#include <linux/crash_dump.h>
> #include <asm/gart.h>
> #include <asm/calgary.h>
> #include <asm/tce.h>
> @@ -167,6 +168,8 @@ static void calgary_dump_error_regs(stru
> static void calioc2_handle_quirks(struct iommu_table *tbl, struct  
> pci_dev
> *dev);
> static void calioc2_tce_cache_blast(struct iommu_table *tbl);
> static void calioc2_dump_error_regs(struct iommu_table *tbl);
> +static void calgary_init_bitmap_from_tce_table(struct iommu_table  
> *tbl);
> +static void get_tce_space_from_tar(void);
>
> static struct cal_chipset_ops calgary_chip_ops = {
>        .handle_quirks = calgary_handle_quirks,
> @@ -830,7 +833,11 @@ static int __init calgary_setup_tar(stru
>
>        tbl = pci_iommu(dev->bus);
>        tbl->it_base = (unsigned long)bus_info[dev->bus- 
> >number].tce_space;
> -       tce_free(tbl, 0, tbl->it_size);
> +
> +       if (is_kdump_kernel())
> +               calgary_init_bitmap_from_tce_table(tbl);
> +       else
> +               tce_free(tbl, 0, tbl->it_size);
>
>        if (is_calgary(dev->device))
>                tbl->chip_ops = &calgary_chip_ops;
> @@ -1206,9 +1213,14 @@ static int __init calgary_init(void)
>        struct calgary_bus_info *info;
>
>        ret = calgary_locate_bbars();
> +
>        if (ret)
>                return ret;
>
> +       /* Purely for kdump kernel case  */
> +       if (is_kdump_kernel())
> +               get_tce_space_from_tar();
> +
>        do {
>                dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID,  
> dev);
>                if (!dev)
> @@ -1256,6 +1268,24 @@ error:
>        return ret;
> }
>
> +
> +/*
> + * calgary_init_bitmap_from_tce_table():
> + * Funtion for kdump case. In the second/kdump kernel initialize
> + * the bitmap based on the tce table entries obtained from first  
> kernel
> + */
> +static void calgary_init_bitmap_from_tce_table(struct iommu_table  
> *tbl)
> +{
> +       u64 *tp;
> +       unsigned int index;
> +       tp = ((u64 *)tbl->it_base);
> +       for (index = 0 ; index < tbl->it_size; index++) {
> +       if (*tp != 0x0)
> +               set_bit(index, tbl->it_map);
> +               tp++;
> +       }
> +}
> +
> static inline int __init determine_tce_table_size(u64 ram)
> {
>        int ret;
> @@ -1318,6 +1348,8 @@ static int __init build_detail_arrays(vo
>        return 0;
> }
>
> +
> +
> static int __init calgary_bus_has_devices(int bus, unsigned short  
> pci_dev)
> {
>        int dev;
> @@ -1339,12 +1371,50 @@ static int __init calgary_bus_has_device
>        return (val != 0xffffffff);
> }
>
> +/*
> + * get_tce_space_from_tar():
> + * Function for kdump case. Get the tce tables from first kernel
> + * by reading the contents of the base adress register of calgary  
> iommu
> + */
> +static void get_tce_space_from_tar()
> +{
> +       int bus;
> +       void __iomem *target;
> +       unsigned long tce_space;
> +
> +       for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
> +               struct calgary_bus_info *info = &bus_info[bus];
> +               unsigned short pci_device;
> +               u32 val;
> +
> +               val = read_pci_config(bus, 0, 0, 0);
> +               pci_device = (val & 0xFFFF0000) >> 16;
> +
> +               if (!is_cal_pci_dev(pci_device))
> +                       continue;
> +               if (info->translation_disabled)
> +                       continue;
> +
> +               if (calgary_bus_has_devices(bus, pci_device) ||
> +                       translate_empty_slots) {
> +                       target = calgary_reg(bus_info[bus].bbar,
> +                               tar_offset(bus));
> +                       tce_space = be64_to_cpu(readq(target));
> +                       tce_space = tce_space & TAR_SW_BITS;
> +
> +                       tce_space = tce_space &  
> (~specified_table_size);
> +                       info->tce_space = (u64 *)__va(tce_space);
> +               }
> +       }
> +       return;
> +}
> +
> void __init detect_calgary(void)
> {
>        int bus;
>        void *tbl;
>        int calgary_found = 0;
> -       unsigned long ptr;
> +       unsigned long ptr, max_pfn;
>        unsigned int offset, prev_offset;
>        int ret;
>
> @@ -1394,7 +1464,8 @@ void __init detect_calgary(void)
>                return;
>        }
>
> -       specified_table_size = determine_tce_table_size(end_pfn *  
> PAGE_SIZE);
> +       max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
> +       specified_table_size = determine_tce_table_size(max_pfn *  
> PAGE_SIZE);
>
>        for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
>                struct calgary_bus_info *info = &bus_info[bus];
> @@ -1412,10 +1483,16 @@ void __init detect_calgary(void)
>
>                if (calgary_bus_has_devices(bus, pci_device) ||
>                    translate_empty_slots) {
> -                       tbl = alloc_tce_table();
> -                       if (!tbl)
> -                               goto cleanup;
> -                       info->tce_space = tbl;
> +                       /*
> +                        * If it is kdump kernel, find and use tce  
> tables
> +                        * from first kernel, else allocate tce  
> tables here
> +                        */
> +                       if (!is_kdump_kernel()) {
> +                               tbl = alloc_tce_table();
> +                               if (!tbl)
> +                                       goto cleanup;
> +                               info->tce_space = tbl;
> +                       }
>                        calgary_found = 1;
>                }
>        }
> diff -Naurp linux-2.6.26-rc6-orig/include/linux/crash_dump.h
> linux-2.6.26-rc6/include/linux/crash_dump.h
> --- linux-2.6.26-rc6-orig/include/linux/crash_dump.h    2008-06-21
> 13:59:18.000000000 +0530
> +++ linux-2.6.26-rc6/include/linux/crash_dump.h 2008-06-21  
> 14:00:42.000000000
> +0530
> @@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcor
>
> #define vmcore_elf_check_arch(x) (elf_check_arch(x) ||
> vmcore_elf_check_arch_cross(x))
>
> +static inline int is_kdump_kernel(void)
> +{
> +       return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0 ;
> +}
> +#else /* !CONFIG_CRASH_DUMP */
> +static inline int is_kdump_kernel(void) { return 0; }
> #endif /* CONFIG_CRASH_DUMP */
> +
> +extern unsigned long saved_max_pfn;
> #endif /* LINUX_CRASHDUMP_H */
> --
> To unsubscribe from this list: send the line "unsubscribe linux- 
> kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-06-21 12:11               ` Chandru
  2008-06-21 12:25                 ` Mark Salyzyn
@ 2008-06-23 19:29                 ` Muli Ben-Yehuda
  2008-07-15  8:45                   ` Chandru
  1 sibling, 1 reply; 21+ messages in thread
From: Muli Ben-Yehuda @ 2008-06-23 19:29 UTC (permalink / raw)
  To: Chandru; +Cc: Andrew Morton, linux-kernel, Alexis Bruemmer

On Sat, Jun 21, 2008 at 05:41:43PM +0530, Chandru wrote:

> kdump kernel fails to boot with calgary iommu and aacraid driver on
> a x366 box. The ongoing dma's of aacraid from the first kernel
> continue to exist until the driver is loaded in the kdump
> kernel. Calgary is initialized prior to aacraid and creation of new
> tce tables causes wrong dma's to occur. Here we try to get the tce
> tables of the first kernel in kdump kernel and use them. While in
> the kdump kernel we do not allocate new tce tables but instead read
> the base address register contents of calgary iommu and use the
> tables that the registers point to. With these changes the kdump
> kernel and hence aacraid now boots normally.
> 
> Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>

Hi Chandru,

I am not a big fan of this patch, but I guess it would do as a
stop-gap measure. However it is not appropriate for 2.6.26 and should
go in at the beginning of the 2.6.27 merge window. Also, minor
comments below.

> @@ -1206,9 +1213,14 @@ static int __init calgary_init(void)
>  	struct calgary_bus_info *info;
>  
>  	ret = calgary_locate_bbars();
> +

unneeded whitespace

> @@ -1256,6 +1268,24 @@ error:
>  	return ret;
>  }
>  
> +
> +/*
> + * calgary_init_bitmap_from_tce_table():
> + * Funtion for kdump case. In the second/kdump kernel initialize
> + * the bitmap based on the tce table entries obtained from first kernel
> + */
> +static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
> +{
> +	u64 *tp;
> +	unsigned int index;
> +	tp = ((u64 *)tbl->it_base);
> +	for (index = 0 ; index < tbl->it_size; index++) {
> +	if (*tp != 0x0)
> +		set_bit(index, tbl->it_map);
> +		tp++;
> +	}

I think the code is correct, but the indentation is misleading. Please
fix.

> +}
> +
>  static inline int __init determine_tce_table_size(u64 ram)
>  {
>  	int ret;
> @@ -1318,6 +1348,8 @@ static int __init build_detail_arrays(vo
>  	return 0;
>  }
>  
> +
> +

unneeded whitespace.

Cheers,
Muli

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-06-23 19:29                 ` Muli Ben-Yehuda
@ 2008-07-15  8:45                   ` Chandru
  2008-07-15 10:52                     ` Muli Ben-Yehuda
  2008-07-17 23:14                     ` Andrew Morton
  0 siblings, 2 replies; 21+ messages in thread
From: Chandru @ 2008-07-15  8:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Muli Ben-Yehuda, linux-kernel, Alexis Bruemmer

kdump kernel fails to boot with calgary iommu and aacraid driver on a x366 
box. The ongoing dma's of aacraid from the first kernel continue to exist 
until the driver is loaded in the kdump kernel. Calgary is initialized prior 
to aacraid and creation of new tce tables causes wrong dma's to occur. Here 
we try to get the tce tables of the first kernel in kdump kernel and use 
them. While in the kdump kernel we do not allocate new tce tables but instead 
read the base addres register contents of calgary iommu and use the tables 
that the registers point to. With these changes the kdump kernel and hence 
aacraid now boots normally.

Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
---

patch taken on top of linux-2.6.26 stable. Comments from Muli Ben-Yehuda taken 
into consideration. Pls apply it as a stop-gap patch until we can come up 
with a more stable patch for this issue. Thanks,

 arch/x86/kernel/pci-calgary_64.c |   87 ++++++++++++++++++++++++++---
 include/linux/crash_dump.h       |    8 ++
 2 files changed, 88 insertions(+), 7 deletions(-)

diff -Narup linux-2.6.26-orig/arch/x86/kernel/pci-calgary_64.c 
linux-2.6.26/arch/x86/kernel/pci-calgary_64.c
--- linux-2.6.26-orig/arch/x86/kernel/pci-calgary_64.c	2008-07-15 
13:36:00.000000000 +0530
+++ linux-2.6.26/arch/x86/kernel/pci-calgary_64.c	2008-07-15 
13:45:36.000000000 +0530
@@ -36,6 +36,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
 #include <asm/gart.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(stru
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev 
*dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
 
 static struct cal_chipset_ops calgary_chip_ops = {
 	.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(stru
 
 	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-	tce_free(tbl, 0, tbl->it_size);
+
+	if (is_kdump_kernel())
+		calgary_init_bitmap_from_tce_table(tbl);
+	else
+		tce_free(tbl, 0, tbl->it_size);
 
 	if (is_calgary(dev->device))
 		tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
 	if (ret)
 		return ret;
 
+	/* Purely for kdump kernel case  */
+	if (is_kdump_kernel())
+		get_tce_space_from_tar();
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
 		if (!dev)
@@ -1256,6 +1267,23 @@ error:
 	return ret;
 }
 
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+	u64 *tp;
+	unsigned int index;
+	tp = ((u64 *)tbl->it_base);
+	for (index = 0 ; index < tbl->it_size; index++) {
+		if (*tp != 0x0)
+			set_bit(index, tbl->it_map);
+		tp++;
+	}
+}
+
 static inline int __init determine_tce_table_size(u64 ram)
 {
 	int ret;
@@ -1339,12 +1367,50 @@ static int __init calgary_bus_has_device
 	return (val != 0xffffffff);
 }
 
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+	int bus;
+	void __iomem *target;
+	unsigned long tce_space;
+
+	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+		struct calgary_bus_info *info = &bus_info[bus];
+		unsigned short pci_device;
+		u32 val;
+
+		val = read_pci_config(bus, 0, 0, 0);
+		pci_device = (val & 0xFFFF0000) >> 16;
+
+		if (!is_cal_pci_dev(pci_device))
+			continue;
+		if (info->translation_disabled)
+			continue;
+
+		if (calgary_bus_has_devices(bus, pci_device) ||
+			translate_empty_slots) {
+			target = calgary_reg(bus_info[bus].bbar,
+				tar_offset(bus));
+			tce_space = be64_to_cpu(readq(target));
+			tce_space = tce_space & TAR_SW_BITS;
+
+			tce_space = tce_space & (~specified_table_size);
+			info->tce_space = (u64 *)__va(tce_space);
+		}
+	}
+	return;
+}
+
 void __init detect_calgary(void)
 {
 	int bus;
 	void *tbl;
 	int calgary_found = 0;
-	unsigned long ptr;
+	unsigned long ptr, max_pfn;
 	unsigned int offset, prev_offset;
 	int ret;
 
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+	max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
+	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
 
 		if (calgary_bus_has_devices(bus, pci_device) ||
 		    translate_empty_slots) {
-			tbl = alloc_tce_table();
-			if (!tbl)
-				goto cleanup;
-			info->tce_space = tbl;
+			/*
+			 * If it is kdump kernel, find and use tce tables
+			 * from first kernel, else allocate tce tables here
+			 */
+			if (!is_kdump_kernel()) {
+				tbl = alloc_tce_table();
+				if (!tbl)
+					goto cleanup;
+				info->tce_space = tbl;
+			}
 			calgary_found = 1;
 		}
 	}
diff -Narup linux-2.6.26-orig/include/linux/crash_dump.h 
linux-2.6.26/include/linux/crash_dump.h
--- linux-2.6.26-orig/include/linux/crash_dump.h	2008-07-15 13:36:09.000000000 
+0530
+++ linux-2.6.26/include/linux/crash_dump.h	2008-07-15 13:36:37.000000000 
+0530
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcor
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || 
vmcore_elf_check_arch_cross(x))
 
+static inline int is_kdump_kernel(void)
+{
+	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0 ;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
 #endif /* LINUX_CRASHDUMP_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-07-15  8:45                   ` Chandru
@ 2008-07-15 10:52                     ` Muli Ben-Yehuda
  2008-07-17 23:14                     ` Andrew Morton
  1 sibling, 0 replies; 21+ messages in thread
From: Muli Ben-Yehuda @ 2008-07-15 10:52 UTC (permalink / raw)
  To: Chandru; +Cc: Andrew Morton, linux-kernel, Alexis Bruemmer

On Tue, Jul 15, 2008 at 02:15:27PM +0530, Chandru wrote:
> kdump kernel fails to boot with calgary iommu and aacraid driver on a x366 
> box. The ongoing dma's of aacraid from the first kernel continue to exist 
> until the driver is loaded in the kdump kernel. Calgary is initialized prior 
> to aacraid and creation of new tce tables causes wrong dma's to occur. Here 
> we try to get the tce tables of the first kernel in kdump kernel and use 
> them. While in the kdump kernel we do not allocate new tce tables but instead 
> read the base addres register contents of calgary iommu and use the tables 
> that the registers point to. With these changes the kdump kernel and hence 
> aacraid now boots normally.
> 
> Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>

Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>

Cheers,
Muli

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-07-15  8:45                   ` Chandru
  2008-07-15 10:52                     ` Muli Ben-Yehuda
@ 2008-07-17 23:14                     ` Andrew Morton
  2008-07-20  9:42                       ` Muli Ben-Yehuda
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2008-07-17 23:14 UTC (permalink / raw)
  To: Chandru; +Cc: muli, linux-kernel, alexisb, Andy Whitcroft

On Tue, 15 Jul 2008 14:15:27 +0530
Chandru <chandru@in.ibm.com> wrote:

> kdump kernel fails to boot with calgary iommu and aacraid driver on a x366 
> box. The ongoing dma's of aacraid from the first kernel continue to exist 
> until the driver is loaded in the kdump kernel. Calgary is initialized prior 
> to aacraid and creation of new tce tables causes wrong dma's to occur. Here 
> we try to get the tce tables of the first kernel in kdump kernel and use 
> them. While in the kdump kernel we do not allocate new tce tables but instead 
> read the base addres register contents of calgary iommu and use the tables 
> that the registers point to. With these changes the kdump kernel and hence 
> aacraid now boots normally.
> 
> Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
> ---
> 
> patch taken on top of linux-2.6.26 stable. Comments from Muli Ben-Yehuda taken 
> into consideration. Pls apply it as a stop-gap patch until we can come up 
> with a more stable patch for this issue. Thanks,

Is this needed in 2.6.26?

If so, the patch whcih I applied will need a little work because I had
to touch up some rejects against already-queued changes.  I can fix
that issue by applying this _ahead_ of those changes, but I'm just not
able to judge whether this is needed from the information which was
provided.

> diff -Narup linux-2.6.26-orig/arch/x86/kernel/pci-calgary_64.c 
> linux-2.6.26/arch/x86/kernel/pci-calgary_64.c
> --- linux-2.6.26-orig/arch/x86/kernel/pci-calgary_64.c	2008-07-15 
> 13:36:00.000000000 +0530
> +++ linux-2.6.26/arch/x86/kernel/pci-calgary_64.c	2008-07-15 
> 13:45:36.000000000 +0530
> @@ -36,6 +36,7 @@
>  #include <linux/delay.h>
>  #include <linux/scatterlist.h>
>  #include <linux/iommu-helper.h>
> +#include <linux/crash_dump.h>

You don't _have_ to put new includes right at the end of the list! 
Everyone does this, and it just maximises the probability of merge
conflcits :(


>  #include <asm/gart.h>
>  #include <asm/calgary.h>
>  #include <asm/tce.h>
> @@ -167,6 +168,8 @@ static void calgary_dump_error_regs(stru
>  static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev 
> *dev);

The patch was wordwrapped.

> +	max_pfn = is_kdump_kernel() ? saved_max_pfn : end_pfn ;
> ...
> +	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0 ;
> ...

A couple of little coding-style errors there.  checkpatch missed them.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump
  2008-07-17 23:14                     ` Andrew Morton
@ 2008-07-20  9:42                       ` Muli Ben-Yehuda
  0 siblings, 0 replies; 21+ messages in thread
From: Muli Ben-Yehuda @ 2008-07-20  9:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Chandru, linux-kernel, alexisb, Andy Whitcroft

On Thu, Jul 17, 2008 at 04:14:14PM -0700, Andrew Morton wrote:
> On Tue, 15 Jul 2008 14:15:27 +0530
> Chandru <chandru@in.ibm.com> wrote:
> 
> > kdump kernel fails to boot with calgary iommu and aacraid driver on a x366 
> > box. The ongoing dma's of aacraid from the first kernel continue to exist 
> > until the driver is loaded in the kdump kernel. Calgary is initialized prior 
> > to aacraid and creation of new tce tables causes wrong dma's to occur. Here 
> > we try to get the tce tables of the first kernel in kdump kernel and use 
> > them. While in the kdump kernel we do not allocate new tce tables but instead 
> > read the base addres register contents of calgary iommu and use the tables 
> > that the registers point to. With these changes the kdump kernel and hence 
> > aacraid now boots normally.
> > 
> > Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
> > ---
> > 
> > patch taken on top of linux-2.6.26 stable. Comments from Muli Ben-Yehuda taken 
> > into consideration. Pls apply it as a stop-gap patch until we can come up 
> > with a more stable patch for this issue. Thanks,
> 
> Is this needed in 2.6.26?

No, it's 2.6.27 material.

Cheers,
Muli

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2008-07-20  9:43 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-09 20:40 [RFC] [Patch] calgary iommu: Use the first kernel's tce tables in kdump chandru
2007-10-09 21:06 ` Muli Ben-Yehuda
2007-10-10  5:30   ` Vivek Goyal
2007-10-14  5:41     ` Muli Ben-Yehuda
2007-10-15  6:29       ` Vivek Goyal
2007-12-24  5:15         ` Chandru
2008-03-10 13:20           ` Chandru
2008-03-10 16:09             ` Andrew Morton
2008-06-21 12:11               ` Chandru
2008-06-21 12:25                 ` Mark Salyzyn
2008-06-23 19:29                 ` Muli Ben-Yehuda
2008-07-15  8:45                   ` Chandru
2008-07-15 10:52                     ` Muli Ben-Yehuda
2008-07-17 23:14                     ` Andrew Morton
2008-07-20  9:42                       ` Muli Ben-Yehuda
2008-03-11 13:29             ` Vivek Goyal
2008-03-12  5:08               ` Chandru
2008-03-12  9:58                 ` Muli Ben-Yehuda
2008-03-12 18:08                   ` Vivek Goyal
2008-03-13 15:49                     ` Muli Ben-Yehuda
2007-10-10  5:37 ` Vivek Goyal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).