All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: chenxiang <chenxiang66@hisilicon.com>
Cc: qemu-arm@nongnu.org, qemu-devel@nongnu.org, linuxarm@huawei.com
Subject: Re: [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached iotlb
Date: Wed, 4 Aug 2021 18:26:15 +0200	[thread overview]
Message-ID: <5fc0cd6c-1e1d-3cb3-51e5-f20c10736643@redhat.com> (raw)
In-Reply-To: <1628066969-29945-1-git-send-email-chenxiang66@hisilicon.com>

Hi Chenxiang,

On 8/4/21 10:49 AM, chenxiang wrote:
> From: Xiang Chen <chenxiang66@hisilicon.com>
>
> It splits invalidations into ^2 range invalidations in the patch
> 6d9cd115b(" hw/arm/smmuv3: Enforce invalidation on a power of two range").
> So for some scenarios such as the size of invalidation is not ^2 range
> invalidation, it costs more time to invalidate.
this ^² split is not only necessary for internal TLB management but also
for IOMMU MR notifier calls (which use a mask), ie. IOTLB unmap
notifications used for both vhost and vfio integrations.
So you can disable the internal IOTLB but we can't simply remove the pow
of 2 split. See below.

internal TLB could be disabled through a property but I would rather set
it as an "x-" experimental property for debug purpose. Until recently
this was indeed helpful to debug bugs related to internal IOTLB
management (RIL support) ;-) I hope this period is over though ;-)
> Currently smmuv3_translate is rarely used (i only see it is used when
> binding msi), so i think maybe we can disable cached iotlb to promote
> efficiency of invalidation. So add device property disable_cached_iotlb
> to disable cached iotlb, and then we can send non-^2 range invalidation
> directly.
> Use tool dma_map_benchmark to have a test on the latency of unmap,
> and we can see it promotes much on unmap when the size of invalidation
> is not ^2 range invalidation (such as g = 7/15/31/511):
>
> t = 1(thread = 1)
> 			before opt(us)   after opt(us)
> g=1(4K size)	0.2/7.6		0.2/7.5
> g=4(8K size)	0.4/7.9		0.4/7.9
> g=7(28K size)	0.6/10.2		0.6/8.2
> g=8(32K size)	0.6/8.3		0.6/8.3
> g=15(60K size)	1.1/12.1		1.1/9.1
> g=16(64K size)	1.1/9.2		1.1/9.1
> g=31(124K size)	2.0/14.8		2.0/10.7
> g=32(128K size)	2.1/14.8		2.1/10.7
> g=511(2044K size)	30.9/65.1		31.1/55.9
> g=512(2048K size) 0.3/32.1		0.3/32.1
> t = 10(thread = 10)
> 			before opt(us)   after opt(us)
> g=1(4K size)	0.2/39.9		0.2/39.1
> g=4(8K size)	0.5/42.6		0.5/42.4
> g=7(28K size)	0.6/66.4		0.6/45.3
> g=8(32K size)	0.7/45.8		0.7/46.1
> g=15(60K size)	1.1/80.5		1.1/49.6
> g=16(64K size)	1.1/49.8		1.1/50.2
> g=31(124K size)	2.0/98.3		2.1/58.0
> g=32(128K size)	2.1/57.7		2.1/58.2
> g=511(2044K size)	35.2/322.2		35.3/236.7
> g=512(2048K size) 0.8/238.2		0.9/240.3
>
> Note: i test it based on VSMMU enabled with the patchset
> ("vSMMUv3/pSMMUv3 2 stage VFIO integration").
>
> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> ---
>  hw/arm/smmuv3.c         | 77 ++++++++++++++++++++++++++++++++-----------------
>  include/hw/arm/smmuv3.h |  1 +
>  2 files changed, 52 insertions(+), 26 deletions(-)
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 01b60be..7ae668f 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -19,6 +19,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/bitops.h"
>  #include "hw/irq.h"
> +#include "hw/qdev-properties.h"
>  #include "hw/sysbus.h"
>  #include "migration/vmstate.h"
>  #include "hw/qdev-core.h"
> @@ -682,19 +683,21 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>      page_mask = (1ULL << (tt->granule_sz)) - 1;
>      aligned_addr = addr & ~page_mask;
>  
> -    cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> -    if (cached_entry) {
> -        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> -            status = SMMU_TRANS_ERROR;
> -            if (event.record_trans_faults) {
> -                event.type = SMMU_EVT_F_PERMISSION;
> -                event.u.f_permission.addr = addr;
> -                event.u.f_permission.rnw = flag & 0x1;
> +    if (s->disable_cached_iotlb) {
> +        cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> +        if (cached_entry) {
> +            if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> +                status = SMMU_TRANS_ERROR;
> +                if (event.record_trans_faults) {
> +                    event.type = SMMU_EVT_F_PERMISSION;
> +                    event.u.f_permission.addr = addr;
> +                    event.u.f_permission.rnw = flag & 0x1;
> +                }
> +            } else {
> +                status = SMMU_TRANS_SUCCESS;
>              }
> -        } else {
> -            status = SMMU_TRANS_SUCCESS;
> +            goto epilogue;
>          }
> -        goto epilogue;
>      }
>  
>      cached_entry = g_new0(SMMUTLBEntry, 1);
> @@ -742,7 +745,9 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>          }
>          status = SMMU_TRANS_ERROR;
>      } else {
> -        smmu_iotlb_insert(bs, cfg, cached_entry);
> +        if (s->disable_cached_iotlb) {
> +            smmu_iotlb_insert(bs, cfg, cached_entry);
> +        }
>          status = SMMU_TRANS_SUCCESS;
>      }
>  
> @@ -855,8 +860,9 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
>      }
>  }
>  
> -static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
> +static void smmuv3_s1_range_inval(SMMUv3State *s, Cmd *cmd)
>  {
> +    SMMUState *bs = ARM_SMMU(s);
>      dma_addr_t end, addr = CMD_ADDR(cmd);
>      uint8_t type = CMD_TYPE(cmd);
>      uint16_t vmid = CMD_VMID(cmd);
> @@ -876,7 +882,9 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
>      if (!tg) {
>          trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
>          smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
> -        smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> +        if (s->disable_cached_iotlb) {
> +            smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> +        }
>          return;
>      }
>  
> @@ -885,17 +893,23 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
>      num_pages = (num + 1) * BIT_ULL(scale);
>      granule = tg * 2 + 10;
>  
> -    /* Split invalidations into ^2 range invalidations */
> -    end = addr + (num_pages << granule) - 1;
> -
> -    while (addr != end + 1) {
> -        uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> -
> -        num_pages = (mask + 1) >> granule;
> +    if (s->disable_cached_iotlb) {
>          trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
>          smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> -        smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);

smmuv3_inv_notifiers_iova() 
also needs to be called with power of 2 ranges
as it eventually calls memory_region_notify_iommu_one() which sets 
event.entry.addr_mask = num_pages * (1 << granule) - 1;

> -        addr += mask + 1;
> +    } else {
> +        /* Split invalidations into ^2 range invalidations */
> +        end = addr + (num_pages << granule) - 1;
> +
> +        while (addr != end + 1) {
> +            uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> +
> +            num_pages = (mask + 1) >> granule;
> +            trace_smmuv3_s1_range_inval(vmid, asid, addr,
> +                                        tg, num_pages, ttl, leaf);
> +            smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> +            smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
> +            addr += mask + 1;
> +        }
>      }
>  }
>  
> @@ -1028,18 +1042,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>  
>              trace_smmuv3_cmdq_tlbi_nh_asid(asid);
>              smmu_inv_notifiers_all(&s->smmu_state);
> -            smmu_iotlb_inv_asid(bs, asid);
> +            if (s->disable_cached_iotlb) {
> +                smmu_iotlb_inv_asid(bs, asid);
> +            }
>              break;
>          }
>          case SMMU_CMD_TLBI_NH_ALL:
>          case SMMU_CMD_TLBI_NSNH_ALL:
>              trace_smmuv3_cmdq_tlbi_nh();
>              smmu_inv_notifiers_all(&s->smmu_state);
> -            smmu_iotlb_inv_all(bs);
> +            if (s->disable_cached_iotlb) {
> +                smmu_iotlb_inv_all(bs);
> +            }
>              break;
>          case SMMU_CMD_TLBI_NH_VAA:
>          case SMMU_CMD_TLBI_NH_VA:
> -            smmuv3_s1_range_inval(bs, &cmd);
> +            smmuv3_s1_range_inval(s, &cmd);
>              break;
>          case SMMU_CMD_TLBI_EL3_ALL:
>          case SMMU_CMD_TLBI_EL3_VA:
> @@ -1506,6 +1524,12 @@ static void smmuv3_instance_init(Object *obj)
>      /* Nothing much to do here as of now */
>  }
>  
> +static Property smmuv3_properties[] = {
> +    DEFINE_PROP_BOOL("disable_cached_iotlb", SMMUv3State,
> +                     disable_cached_iotlb, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void smmuv3_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -1515,6 +1539,7 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
>      device_class_set_parent_reset(dc, smmu_reset, &c->parent_reset);
>      c->parent_realize = dc->realize;
>      dc->realize = smmu_realize;
> +    device_class_set_props(dc, smmuv3_properties);
>  }
>  
>  static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index c641e60..c94ab7e 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -62,6 +62,7 @@ struct SMMUv3State {
>  
>      qemu_irq     irq[4];
>      QemuMutex mutex;
> +    bool disable_cached_iotlb; /* Whether disable/enable cached iotlb */
>  };
>  
>  typedef enum {
Thanks

Eric



  reply	other threads:[~2021-08-04 18:08 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-04  8:49 [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached iotlb chenxiang
2021-08-04 16:26 ` Eric Auger [this message]
2021-08-05  7:48   ` chenxiang (M)
2021-08-05  8:10     ` Eric Auger
2021-08-06  7:46       ` chenxiang (M)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5fc0cd6c-1e1d-3cb3-51e5-f20c10736643@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=chenxiang66@hisilicon.com \
    --cc=linuxarm@huawei.com \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.