All of lore.kernel.org
 help / color / mirror / Atom feed
* v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
@ 2021-10-27 16:08 Marshall Midden
  2021-10-27 16:41 ` Logan Gunthorpe
  0 siblings, 1 reply; 6+ messages in thread
From: Marshall Midden @ 2021-10-27 16:08 UTC (permalink / raw)
  To: logang, joro, will, linux-kernel

v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"

With v5.15-rc (1 through 7), AMD processor kernel crash dumps do not occur,
instead spews:
        "scsci_dma_map failed: request for 36 bytes!"
Works on Intel machines, and in v5.14.0 (and v5.14.14) - as well as previous
kernels (down to v5.3.x.)
git bisect torvalds repository shows: dabb16f67215918c48cf3ff1fc4bc36ca421da2b
Reverse patch to tag v5.15-rc7 allows kernel crash dumps to occur/happen.

Extra details in github repository:
    https://github.com/marshallmidden/m4.git
    Directory AMD-cdrash-2021-10-27
        Z.config.2021-10-27_10-27-15
        Z.dabb16f67215918c48cf3ff1fc4bc36ca421da2b
        Z.df-fstab
        Z.dmesg.2021-10-27_10-26-34
        Z.lspci-vmmD.2021-10-27_10-29-58
        rpviewer(5).png

Reproduce via:
    echo c > /proc/sysrq-trigger

git bisect of linux from torvalds repository shows:
    commit dabb16f67215918c48cf3ff1fc4bc36ca421da2b
    Author: Logan Gunthorpe <logang@deltatee.com>
    Date:   Thu Jul 29 14:15:22 2021 -0600

    iommu/dma: return error code from iommu_dma_map_sg()

    Return appropriate error codes EINVAL or ENOMEM from
    iommup_dma_map_sg(). If lower level code returns ENOMEM, then we
    return it, other errors are coalesced into EINVAL.

    iommu_dma_map_sg_swiotlb() returns -EIO as its an unknown error
    from a call that returns DMA_MAPPING_ERROR.

    Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
    Cc: Joerg Roedel <joro@8bytes.org>
    Cc: Will Deacon <will@kernel.org>
    Signed-off-by: Christoph Hellwig <hch@lst.de>
------------------------------------------------------------------------------
[root@parsec torvalds-linux]# awk -f scripts/ver_linux
    Linux parsec 5.15.0-rc4+ #22 SMP Tue Oct 26 18:50:50 CDT 2021
x86_64 x86_64 x86_64 GNU/Linux

    GNU C                       11.1.0
    GNU Make                    3.82
    Binutils                    2.36.1
    Util-linux                  2.23.2
    Mount                       2.23.2
    Module-init-tools           20
    E2fsprogs                   1.42.9
    Xfsprogs                    4.5.0
    Quota-tools                 4.01
    Nfs-utils                   1.3.0
    Bison                       3.0.4
    Flex                        2.5.37
    Linux C++ Library           6.0.19
    Linux C Library             2.17
    Dynamic linker (ldd)        2.17
    Procps                      3.3.10
    Net-tools                   2.10
    Kbd                         1.15.5
    Console-tools               1.15.5
    Sh-utils                    8.22
    Udev                        219
    Modules Loaded              iscsi_target_mod qla2xxx
target_core_file target_core_iblock target_core_mod target_core_pscsi
tcm_fc tcm_loop tcm_qla2xxx

[root@parsec torvalds-linux]# perl scripts/get_maintainer.pl -f
drivers/iommu/dma-iommu.c
    Joerg Roedel <joro@8bytes.org> (maintainer:IOMMU DRIVERS)
    Will Deacon <will@kernel.org> (maintainer:IOMMU DRIVERS)
    iommu@lists.linux-foundation.org (open list:IOMMU DRIVERS)
    linux-kernel@vger.kernel.org (open list)
------------------------------------------------------------------------------
commit dabb16f67215918c48cf3ff1fc4bc36ca421da2b
Author: Logan Gunthorpe <logang@deltatee.com>
Date:   Thu Jul 29 14:15:22 2021 -0600

    iommu/dma: return error code from iommu_dma_map_sg()

    Return appropriate error codes EINVAL or ENOMEM from
    iommup_dma_map_sg(). If lower level code returns ENOMEM, then we
    return it, other errors are coalesced into EINVAL.

    iommu_dma_map_sg_swiotlb() returns -EIO as its an unknown error
    from a call that returns DMA_MAPPING_ERROR.

    Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
    Cc: Joerg Roedel <joro@8bytes.org>
    Cc: Will Deacon <will@kernel.org>
    Signed-off-by: Christoph Hellwig <hch@lst.de>

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927..168434c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -972,7 +972,7 @@ static int iommu_dma_map_sg_swiotlb(struct device
*dev, struct scatterlist *sg,

 out_unmap:
        iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs |
DMA_ATTR_SKIP_CPU_SYNC);
-       return 0;
+       return -EIO;
 }

 /*
@@ -993,11 +993,13 @@ static int iommu_dma_map_sg(struct device *dev,
struct scatterlist *sg,
        dma_addr_t iova;
        size_t iova_len = 0;
        unsigned long mask = dma_get_seg_boundary(dev);
+       ssize_t ret;
        int i;

-       if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
-           iommu_deferred_attach(dev, domain))
-               return 0;
+       if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
+               ret = iommu_deferred_attach(dev, domain);
+               goto out;
+       }

        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
                iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
@@ -1045,14 +1047,17 @@ static int iommu_dma_map_sg(struct device
*dev, struct scatterlist *sg,
        }

        iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
-       if (!iova)
+       if (!iova) {
+               ret = -ENOMEM;
                goto out_restore_sg;
+       }

        /*
         * We'll leave any physical concatenation to the IOMMU driver's
         * implementation - it knows better than we do.
         */
-       if (iommu_map_sg_atomic(domain, iova, sg, nents, prot) < iova_len)
+       ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+       if (ret < iova_len)
                goto out_free_iova;

        return __finalise_sg(dev, sg, nents, iova);
@@ -1061,7 +1066,10 @@ static int iommu_dma_map_sg(struct device *dev,
struct scatterlist *sg,
        iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
        __invalidate_sg(sg, nents);
-       return 0;
+out:
+       if (ret != -ENOMEM)
+               return -EINVAL;
+       return ret;
 }

  static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
  2021-10-27 16:08 v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!" Marshall Midden
@ 2021-10-27 16:41 ` Logan Gunthorpe
  2021-10-27 17:21   ` Marshall Midden
  0 siblings, 1 reply; 6+ messages in thread
From: Logan Gunthorpe @ 2021-10-27 16:41 UTC (permalink / raw)
  To: Marshall Midden, joro, will, linux-kernel

Hi,

On 2021-10-27 10:08 a.m., Marshall Midden wrote:
> v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
> 
> With v5.15-rc (1 through 7), AMD processor kernel crash dumps do not occur,
> instead spews:
>         "scsci_dma_map failed: request for 36 bytes!"
> Works on Intel machines, and in v5.14.0 (and v5.14.14) - as well as previous
> kernels (down to v5.3.x.)
> git bisect torvalds repository shows: dabb16f67215918c48cf3ff1fc4bc36ca421da2b
> Reverse patch to tag v5.15-rc7 allows kernel crash dumps to occur/happen.
> 

On careful review I think I see what might be causing this. Can you
please try the following patch?

Thanks,

Logan

--

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index cd0f1a593997..3bfb5da87802 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1028,7 +1028,8 @@ static int iommu_dma_map_sg(struct device *dev,
struct sc>

        if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
                ret = iommu_deferred_attach(dev, domain);
-               goto out;
+               if (ret)
+                       goto out;
        }

        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
  2021-10-27 16:41 ` Logan Gunthorpe
@ 2021-10-27 17:21   ` Marshall Midden
  2021-10-27 17:24     ` Logan Gunthorpe
  0 siblings, 1 reply; 6+ messages in thread
From: Marshall Midden @ 2021-10-27 17:21 UTC (permalink / raw)
  To: Logan Gunthorpe; +Cc: joro, will, linux-kernel

Yes, the spewing of messages stopped. Kernel crash dump taking is now
occurring.  Patch applied to v5.15-rc7.
Reworded: THANKS!  Patch works!

Dunno why the dmesg isn't right, but at least there is a core image!
[I updated makedumpfs for v5.15-rc4 -- applying a bunch of cifs/smb,
qlogic, etc. patches and was getting ready for new kernel release in a
few weeks -- couldn't debug a cifs crash. :) Down the rabbit hole...
;) ]
Intel machine booted, etc. and it has a dmesg correct. *sigh*  I don't
think "I" need to worry about that. Someone else can report it.

/media/parsecdata/crash/127.0.0.1-2021-10-27-12:06:53:
total 5283408
-rw------- 1 root root 5410206160 Oct 27 12:06 vmcore
-rw-r--r-- 1 root root          0 Oct 27 12:06 vmcore-dmesg-incomplete.txt

On Wed, Oct 27, 2021 at 11:41 AM Logan Gunthorpe <logang@deltatee.com> wrote:
>
> Hi,
>
> On 2021-10-27 10:08 a.m., Marshall Midden wrote:
> > v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
> >
> > With v5.15-rc (1 through 7), AMD processor kernel crash dumps do not occur,
> > instead spews:
> >         "scsci_dma_map failed: request for 36 bytes!"
> > Works on Intel machines, and in v5.14.0 (and v5.14.14) - as well as previous
> > kernels (down to v5.3.x.)
> > git bisect torvalds repository shows: dabb16f67215918c48cf3ff1fc4bc36ca421da2b
> > Reverse patch to tag v5.15-rc7 allows kernel crash dumps to occur/happen.
> >
>
> On careful review I think I see what might be causing this. Can you
> please try the following patch?
>
> Thanks,
>
> Logan
>
> --
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index cd0f1a593997..3bfb5da87802 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1028,7 +1028,8 @@ static int iommu_dma_map_sg(struct device *dev,
> struct sc>
>
>         if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
>                 ret = iommu_deferred_attach(dev, domain);
> -               goto out;
> +               if (ret)
> +                       goto out;
>         }
>
>         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
  2021-10-27 17:21   ` Marshall Midden
@ 2021-10-27 17:24     ` Logan Gunthorpe
  2021-10-27 17:44       ` Marshall Midden
  0 siblings, 1 reply; 6+ messages in thread
From: Logan Gunthorpe @ 2021-10-27 17:24 UTC (permalink / raw)
  To: Marshall Midden; +Cc: joro, will, linux-kernel




On 2021-10-27 11:21 a.m., Marshall Midden wrote:
> Yes, the spewing of messages stopped. Kernel crash dump taking is now
> occurring.  Patch applied to v5.15-rc7.
> Reworded: THANKS!  Patch works!
> 
> Dunno why the dmesg isn't right, but at least there is a core image!
> [I updated makedumpfs for v5.15-rc4 -- applying a bunch of cifs/smb,
> qlogic, etc. patches and was getting ready for new kernel release in a
> few weeks -- couldn't debug a cifs crash. :) Down the rabbit hole...
> ;) ]
> Intel machine booted, etc. and it has a dmesg correct. *sigh*  I don't
> think "I" need to worry about that. Someone else can report it.

What do you mean the dmesg isn't "right"? What are you seeing and what
are you expecting? What is it that someone else will be reporting with
Intel machine having a "correct" dmesg?

Logan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
  2021-10-27 17:24     ` Logan Gunthorpe
@ 2021-10-27 17:44       ` Marshall Midden
  2021-10-27 17:45         ` Logan Gunthorpe
  0 siblings, 1 reply; 6+ messages in thread
From: Marshall Midden @ 2021-10-27 17:44 UTC (permalink / raw)
  To: Logan Gunthorpe; +Cc: joro, will, linux-kernel

Try number 2 with plain text definitely selected.

Forgot to cut and paste -- interruption occurred. The
"vmcore-dmesg-incomplete.txt" (this file is created before the vmcore
is taken, and then later in the process, the vmcore-dmesg.txt file is
created).

AMD machine after taking dump and rebooting:
    /media/parsecdata/crash/127.0.0.1-2021-10-27-12:06:53:
    -rw------- 1 root root 5410206160 Oct 27 12:06 vmcore
    -rw-r--r-- 1 root root          0 Oct 27 12:06 vmcore-dmesg-incomplete.txt

Intel machine:
    /media/parsecdata/crash/127.0.0.1-2021-10-27-12:12:33:
    -rw------- 1 root root 5482529464 Oct 27 12:12 vmcore
    -rw-r--r-- 1 root root     236525 Oct 27 12:12 vmcore-dmesg.txt


On Wed, Oct 27, 2021 at 12:24 PM Logan Gunthorpe <logang@deltatee.com> wrote:
>
>
>
>
> On 2021-10-27 11:21 a.m., Marshall Midden wrote:
> > Yes, the spewing of messages stopped. Kernel crash dump taking is now
> > occurring.  Patch applied to v5.15-rc7.
> > Reworded: THANKS!  Patch works!
> >
> > Dunno why the dmesg isn't right, but at least there is a core image!
> > [I updated makedumpfs for v5.15-rc4 -- applying a bunch of cifs/smb,
> > qlogic, etc. patches and was getting ready for new kernel release in a
> > few weeks -- couldn't debug a cifs crash. :) Down the rabbit hole...
> > ;) ]
> > Intel machine booted, etc. and it has a dmesg correct. *sigh*  I don't
> > think "I" need to worry about that. Someone else can report it.
>
> What do you mean the dmesg isn't "right"? What are you seeing and what
> are you expecting? What is it that someone else will be reporting with
> Intel machine having a "correct" dmesg?
>
> Logan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!"
  2021-10-27 17:44       ` Marshall Midden
@ 2021-10-27 17:45         ` Logan Gunthorpe
  0 siblings, 0 replies; 6+ messages in thread
From: Logan Gunthorpe @ 2021-10-27 17:45 UTC (permalink / raw)
  To: Marshall Midden; +Cc: joro, will, linux-kernel




On 2021-10-27 11:44 a.m., Marshall Midden wrote:
> Try number 2 with plain text definitely selected.
> 
> Forgot to cut and paste -- interruption occurred. The
> "vmcore-dmesg-incomplete.txt" (this file is created before the vmcore
> is taken, and then later in the process, the vmcore-dmesg.txt file is
> created).
> 
> AMD machine after taking dump and rebooting:
>     /media/parsecdata/crash/127.0.0.1-2021-10-27-12:06:53:
>     -rw------- 1 root root 5410206160 Oct 27 12:06 vmcore
>     -rw-r--r-- 1 root root          0 Oct 27 12:06 vmcore-dmesg-incomplete.txt
> 
> Intel machine:
>     /media/parsecdata/crash/127.0.0.1-2021-10-27-12:12:33:
>     -rw------- 1 root root 5482529464 Oct 27 12:12 vmcore
>     -rw-r--r-- 1 root root     236525 Oct 27 12:12 vmcore-dmesg.txt
> 

Ah, so some kind of issue with the testing infrastructure you are using?
Not something we need to be concerned about.

Logan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-10-27 17:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-27 16:08 v5.15-rc7 AMD no kernel dump, spews "scsi_dma_map failed: request for 36 bytes!" Marshall Midden
2021-10-27 16:41 ` Logan Gunthorpe
2021-10-27 17:21   ` Marshall Midden
2021-10-27 17:24     ` Logan Gunthorpe
2021-10-27 17:44       ` Marshall Midden
2021-10-27 17:45         ` Logan Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.