From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7A815C433EF for ; Thu, 21 Apr 2022 09:20:07 +0000 (UTC) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D4C76427F6; Thu, 21 Apr 2022 11:19:50 +0200 (CEST) Received: from mga06.intel.com (mga06b.intel.com [134.134.136.31]) by mails.dpdk.org (Postfix) with ESMTP id 9431342808 for ; Thu, 21 Apr 2022 11:19:47 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1650532788; x=1682068788; h=from:to:cc:subject:date:message-id:in-reply-to: references; bh=LVT+qXlVR4XjvNNS/0W0jV1fzKJ1YW3hp0aXF04YCAE=; b=GH2iJTlrbO32OmPrpncClyVGVgGfcgEQH+j0vt46jzX3o4EXWxhZhisr BZQ9AWTs2cJGK8JHFYXJIxahOaapTCOsbrr+p7ssq1RdCh4v8ofBIQfDC EKvv/5O6fHaazSAPKRC2GWAimV8hbXDA0IKEAo6FNCApTERdFR65hUCkK 15tNyAorc3uR43Ux1TiThlngJeGPXakHCgEQff7SdgR5AgmHKmevXOZ8M ArWkHAmBniOkBBsbMmC9ZZIzDs5LckN83KDrA8KM7n0eRxQaS7oKRZAyV gzG8lHGa0l3qJpPioantmTfDB81ut4EvJvoNO1aac5ahJsgg7svc0WM0V Q==; X-IronPort-AV: E=McAfee;i="6400,9594,10323"; a="324734821" X-IronPort-AV: E=Sophos;i="5.90,278,1643702400"; d="scan'208";a="324734821" Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Apr 2022 02:19:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.90,278,1643702400"; d="scan'208";a="593568212" Received: from dpdk-dipei.sh.intel.com ([10.67.110.238]) by orsmga001.jf.intel.com with ESMTP; 21 Apr 2022 02:19:44 -0700 From: Andy Pei To: dev@dpdk.org Cc: chenbo.xia@intel.com, maxime.coquelin@redhat.com, gang.cao@intel.com, changpeng.liu@intel.com Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration Date: Thu, 21 Apr 2022 16:33:44 +0800 Message-Id: <1650530034-59744-7-git-send-email-andy.pei@intel.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1650530034-59744-1-git-send-email-andy.pei@intel.com> References: <1643093258-47258-2-git-send-email-andy.pei@intel.com> <1650530034-59744-1-git-send-email-andy.pei@intel.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add SW live-migration support to block device. Add dirty page logging to block device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 4 +- drivers/vdpa/ifc/base/ifcvf.h | 6 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++------- 3 files changed, 115 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..e417c50 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -191,7 +191,7 @@ IFCVF_WRITE_REG32(val >> 32, hi); } -STATIC int +int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct ifcvf_pci_common_cfg *cfg; @@ -240,7 +240,7 @@ return 0; } -STATIC void +void ifcvf_hw_disable(struct ifcvf_hw *hw) { u32 i; diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -179,4 +179,10 @@ struct ifcvf_hw { u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); +int +ifcvf_hw_enable(struct ifcvf_hw *hw); + +void +ifcvf_hw_disable(struct ifcvf_hw *hw); + #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8d104b7..a23dc2d 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info { } } +static void +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; + int i, vid; + uint64_t features = 0; + uint64_t log_base = 0, log_size = 0; + uint64_t len; + + vid = internal->vid; + + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + + ifcvf_hw_disable(hw); + + for (i = 0; i < hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, + hw->vring[i].last_used_idx); + + if (internal->sw_lm) + return; + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, &log_size); + rte_vfio_container_dma_unmap(internal->vfio_container_fd, + log_base, IFCVF_LOG_BASE, log_size); + /* + * IFCVF marks dirty memory pages for only packet buffer, + * SW helps to mark the used ring as dirty after device stops. + */ + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } + } +} + #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1)) static int @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { - gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); - if (gpa == 0) { - DRV_LOG(ERR, "Fail to get GPA for used ring."); - return -1; + if (internal->device_type == IFCVF_NET) { + /* Direct I/O for Tx queue, relay for Rx queue */ + if (i & 1) { + gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); + if (gpa == 0) { + DRV_LOG(ERR, "Fail to get GPA for used ring."); + return -1; + } + hw->vring[i].used = gpa; + } else { + hw->vring[i].used = m_vring_iova + + (char *)internal->m_vring[i].used - + (char *)internal->m_vring[i].desc; } - hw->vring[i].used = gpa; - } else { + } else if (internal->device_type == IFCVF_BLK) { + /* BLK: relay every queue */ hw->vring[i].used = m_vring_iova + (char *)internal->m_vring[i].used - (char *)internal->m_vring[i].desc; @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info { } hw->nr_vring = nr_vring; - return ifcvf_start_hw(&internal->hw); + if (internal->device_type == IFCVF_NET) + return ifcvf_start_hw(&internal->hw); + else if (internal->device_type == IFCVF_BLK) + return ifcvf_hw_enable(&internal->hw); error: for (i = 0; i < nr_vring; i++) @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info { for (i = 0; i < hw->nr_vring; i++) { /* synchronize remaining new used entries if any */ - if ((i & 1) == 0) + if (internal->device_type == IFCVF_NET) { + if ((i & 1) == 0) + update_used_ring(internal, i); + } else if (internal->device_type == IFCVF_BLK) { update_used_ring(internal, i); + } rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info { } } - for (qid = 0; qid < q_num; qid += 2) { - ev.events = EPOLLIN | EPOLLPRI; - /* leave a flag to mark it's for interrupt */ - ev.data.u64 = 1 | qid << 1 | - (uint64_t)internal->intr_fd[qid] << 32; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev) - < 0) { - DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); - return NULL; + if (internal->device_type == IFCVF_NET) { + for (qid = 0; qid < q_num; qid += 2) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); + } + } else if (internal->device_type == IFCVF_BLK) { + for (qid = 0; qid < q_num; qid += 1) { + ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ + ev.data.u64 = 1 | qid << 1 | + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, + internal->intr_fd[qid], &ev) + < 0) { + DRV_LOG(ERR, "epoll add error: %s", + strerror(errno)); + return NULL; + } + update_used_ring(internal, qid); } - update_used_ring(internal, qid); } /* start relay with a first kick */ @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info { /* stop the direct IO data path */ unset_notify_relay(internal); - vdpa_ifcvf_stop(internal); + if (internal->device_type == IFCVF_NET) + vdpa_ifcvf_stop(internal); + else if (internal->device_type == IFCVF_BLK) + vdpa_ifcvf_blk_pause(internal); vdpa_disable_vfio_intr(internal); ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false); -- 1.8.3.1