qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Ming Lin <mlin@kernel.org>
Cc: fes@google.com, keith.busch@intel.com, tytso@mit.edu,
	qemu-devel@nongnu.org, linux-nvme@lists.infradead.org,
	virtualization@lists.linux-foundation.org, axboe@fb.com,
	Rob Nelson <rlnelson@google.com>, Christoph Hellwig <hch@lst.de>,
	Mihai Rusu <dizzy@google.com>
Subject: Re: [Qemu-devel] [PATCH -qemu] nvme: support Google vendor extension
Date: Fri, 20 Nov 2015 09:58:08 +0100	[thread overview]
Message-ID: <564EE0A0.1020800@redhat.com> (raw)
In-Reply-To: <1448007096.3473.10.camel@hasee>



On 20/11/2015 09:11, Ming Lin wrote:
> On Thu, 2015-11-19 at 11:37 +0100, Paolo Bonzini wrote:
>>
>> On 18/11/2015 06:47, Ming Lin wrote:
>>> @@ -726,7 +798,11 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
>>>          }
>>>  
>>>          start_sqs = nvme_cq_full(cq) ? 1 : 0;
>>> -        cq->head = new_head;
>>> +        /* When the mapped pointer memory area is setup, we don't rely on
>>> +         * the MMIO written values to update the head pointer. */
>>> +        if (!cq->db_addr) {
>>> +            cq->head = new_head;
>>> +        }
>>
>> You are still checking
>>
>>         if (new_head >= cq->size) {
>>             return;
>>         }
>>
>> above.  I think this is incorrect when the extension is present, and
>> furthermore it's the only case where val is being used.
>>
>> If you're not using val, you could use ioeventfd for the MMIO.  An
>> ioeventfd cuts the MMIO cost by at least 55% and up to 70%. Here are
>> quick and dirty measurements from kvm-unit-tests's vmexit.flat
>> benchmark, on two very different machines:
>>
>> 			Haswell-EP		Ivy Bridge i7
>>   MMIO memory write	5100 -> 2250 (55%)	7000 -> 3000 (58%)
>>   I/O port write	3800 -> 1150 (70%)	4100 -> 1800 (57%)
>>
>> You would need to allocate two eventfds for each qid, one for the sq and
>> one for the cq.  Also, processing the queues is now bounced to the QEMU
>> iothread, so you can probably get rid of sq->timer and cq->timer.
> 
> Here is a quick try.
> Too late now, I'll test it morning.
> 
> Do you see obvious problem?
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3e1c38d..d28690d 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -543,6 +543,44 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      return NVME_SUCCESS;
>  }
>  
> +static void nvme_cq_notifier(EventNotifier *e)
> +{
> +    NvmeCQueue *cq =
> +        container_of(e, NvmeCQueue, notifier);
> +
> +    nvme_post_cqes(cq);
> +}
> +
> +static void nvme_init_cq_eventfd(NvmeCQueue *cq)
> +{
> +    NvmeCtrl *n = cq->ctrl;
> +    uint16_t offset = (cq->cqid*2+1) * NVME_CAP_DSTRD(n->bar.cap);
> +
> +    event_notifier_init(&cq->notifier, 0);
> +    event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
> +    memory_region_add_eventfd(&n->iomem,
> +        0x1000 + offset, 4, true, cq->cqid*2+1, &cq->notifier);

should be 0x1000 + offset, 4, false, 0, &cq->notifier

> +}
> +
> +static void nvme_sq_notifier(EventNotifier *e)
> +{
> +    NvmeSQueue *sq =
> +        container_of(e, NvmeSQueue, notifier);
> +
> +    nvme_process_sq(sq);
> +}
> +
> +static void nvme_init_sq_eventfd(NvmeSQueue *sq)
> +{
> +    NvmeCtrl *n = sq->ctrl;
> +    uint16_t offset = sq->sqid * 2 * NVME_CAP_DSTRD(n->bar.cap);
> +
> +    event_notifier_init(&sq->notifier, 0);
> +    event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
> +    memory_region_add_eventfd(&n->iomem,
> +        0x1000 + offset, 4, true, sq->sqid * 2, &sq->notifier);

likewise should be 0x1000 + offset, 4, false, 0, &sq->notifier

Otherwise looks good!

Paolo

> +}


> +
>  static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
>  {
>      uint64_t db_addr = le64_to_cpu(cmd->prp1);
> @@ -565,6 +603,7 @@ static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
>              /* Submission queue tail pointer location, 2 * QID * stride. */
>              sq->db_addr = db_addr + 2 * i * 4;
>              sq->eventidx_addr = eventidx_addr + 2 * i * 4;
> +            nvme_init_sq_eventfd(sq);
>          }
>  
>          if (cq != NULL) {
> @@ -572,6 +611,7 @@ static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
>               */
>              cq->db_addr = db_addr + (2 * i + 1) * 4;
>              cq->eventidx_addr = eventidx_addr + (2 * i + 1) * 4;
> +            nvme_init_cq_eventfd(cq);
>          }
>      }
>      return NVME_SUCCESS;
> @@ -793,7 +833,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
>          }
>  
>          cq = n->cq[qid];
> -        if (new_head >= cq->size) {
> +        if (!cq->db_addr && new_head >= cq->size) {
>              return;
>          }
>  
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 82aeab4..608f202 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -667,6 +667,7 @@ typedef struct NvmeSQueue {
>       * do not go over this value will not result in MMIO writes (but will
>       * still write the tail pointer to the "db_addr" location above). */
>      uint64_t    eventidx_addr;
> +    EventNotifier notifier;
>  } NvmeSQueue;
>  
>  typedef struct NvmeCQueue {
> @@ -689,6 +690,7 @@ typedef struct NvmeCQueue {
>       * do not go over this value will not result in MMIO writes (but will
>       * still write the head pointer to the "db_addr" location above). */
>      uint64_t    eventidx_addr;
> +    EventNotifier notifier;
>  } NvmeCQueue;
>  
>  typedef struct NvmeNamespace {
> 
>>
>> Paolo

  reply	other threads:[~2015-11-20  8:58 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-18  5:47 [Qemu-devel] [RFC PATCH 0/2] Google extension to improve qemu-nvme performance Ming Lin
2015-11-18  5:47 ` [Qemu-devel] [PATCH -kernel] nvme: improve performance for virtual NVMe devices Ming Lin
2015-11-18  5:47 ` [Qemu-devel] [PATCH -qemu] nvme: support Google vendor extension Ming Lin
2015-11-19 10:37   ` Paolo Bonzini
2015-11-20  8:11     ` Ming Lin
2015-11-20  8:58       ` Paolo Bonzini [this message]
2015-11-20 23:05         ` Ming Lin
2015-11-21 12:56           ` Paolo Bonzini
2015-11-22  7:45             ` Ming Lin
2015-11-24  6:29               ` Ming Lin
2015-11-24 11:01                 ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=564EE0A0.1020800@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=axboe@fb.com \
    --cc=dizzy@google.com \
    --cc=fes@google.com \
    --cc=hch@lst.de \
    --cc=keith.busch@intel.com \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mlin@kernel.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rlnelson@google.com \
    --cc=tytso@mit.edu \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).