All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] virtio-vfc implementation
@ 2017-12-14 10:11 Hannes Reinecke
  2017-12-14 10:11 ` [PATCH 1/3] virtio-scsi: implement target rescan Hannes Reinecke
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-14 10:11 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

Hi all,

here's my attempt to implement a 'Virtual FC' emulation for virtio-scsi,
based on the presentation at the KVM Forum in Prague.

This doesn't so much implement the FC protocol per se, it just uses the
port information to hook in the FC transport class, making the device
to look like a FC device.

This patchset has a complementary patchset for the qemu virtio-scsi driver,
which will be posted to the qemu-devel list.

As usual, comments and reviews are welcome.

Hannes Reinecke (3):
  virtio-scsi: implement target rescan
  virtio-scsi: Add FC transport class
  virtio_scsi: Implement 'native LUN' feature

 drivers/scsi/virtio_scsi.c       | 582 ++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/virtio_scsi.h |  16 ++
 2 files changed, 554 insertions(+), 44 deletions(-)

-- 
1.8.5.6

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/3] virtio-scsi: implement target rescan
  2017-12-14 10:11 [PATCH 0/3] virtio-vfc implementation Hannes Reinecke
@ 2017-12-14 10:11 ` Hannes Reinecke
  2017-12-15 17:54   ` Steffen Maier
  2017-12-14 10:11 ` [PATCH 2/3] virtio-scsi: Add FC transport class Hannes Reinecke
  2017-12-14 10:11 ` [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature Hannes Reinecke
  2 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-14 10:11 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke, Hannes Reinecke

Implement the 'rescan' virtio-scsi feature. Rescanning works by
sending a 'rescan' virtio-scsi command with the next requested
target id to the backend. The backend will respond with the next
used target id or '-1' if no more targets are found.
This avoids scanning all possible targets.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/virtio_scsi.c       | 239 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_scsi.h |  15 +++
 2 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 7c28e8d..a561e90 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -46,12 +46,14 @@ struct virtio_scsi_cmd {
 		struct virtio_scsi_cmd_req_pi    cmd_pi;
 		struct virtio_scsi_ctrl_tmf_req  tmf;
 		struct virtio_scsi_ctrl_an_req   an;
+		struct virtio_scsi_rescan_req    rescan;
 	} req;
 	union {
 		struct virtio_scsi_cmd_resp      cmd;
 		struct virtio_scsi_ctrl_tmf_resp tmf;
 		struct virtio_scsi_ctrl_an_resp  an;
 		struct virtio_scsi_event         evt;
+		struct virtio_scsi_rescan_resp   rescan;
 	} resp;
 } ____cacheline_aligned_in_smp;
 
@@ -115,6 +117,10 @@ struct virtio_scsi {
 	/* Protected by event_vq lock */
 	bool stop_events;
 
+	int next_target_id;
+	struct work_struct rescan_work;
+	spinlock_t rescan_lock;
+
 	struct virtio_scsi_vq ctrl_vq;
 	struct virtio_scsi_vq event_vq;
 	struct virtio_scsi_vq req_vqs[];
@@ -318,6 +324,11 @@ static void virtscsi_cancel_event_work(struct virtio_scsi *vscsi)
 
 	for (i = 0; i < VIRTIO_SCSI_EVENT_LEN; i++)
 		cancel_work_sync(&vscsi->event_list[i].work);
+
+	spin_lock_irq(&vscsi->rescan_lock);
+	vscsi->next_target_id = -1;
+	spin_unlock_irq(&vscsi->rescan_lock);
+	cancel_work_sync(&vscsi->rescan_work);
 }
 
 static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
@@ -805,6 +816,168 @@ static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd)
 	return BLK_EH_RESET_TIMER;
 }
 
+static void virtscsi_rescan_work(struct work_struct *work)
+{
+	struct virtio_scsi *vscsi =
+		container_of(work, struct virtio_scsi, rescan_work);
+	struct Scsi_Host *sh = virtio_scsi_host(vscsi->vdev);
+	int target_id, ret;
+	struct virtio_scsi_cmd *cmd;
+	DECLARE_COMPLETION_ONSTACK(comp);
+
+	spin_lock_irq(&vscsi->rescan_lock);
+	target_id = vscsi->next_target_id;
+	if (target_id == -1) {
+		shost_printk(KERN_INFO, sh, "rescan: terminated\n");
+		spin_unlock_irq(&vscsi->rescan_lock);
+		return;
+	}
+	spin_unlock_irq(&vscsi->rescan_lock);
+
+	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
+	if (!cmd) {
+		shost_printk(KERN_INFO, sh, "rescan: no memory\n");
+		goto scan_host;
+	}
+	shost_printk(KERN_INFO, sh, "rescan: next target %d\n", target_id);
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->comp = &comp;
+	cmd->sc = NULL;
+	cmd->req.rescan = (struct virtio_scsi_rescan_req){
+		.type = VIRTIO_SCSI_T_RESCAN,
+		.next_id = cpu_to_virtio32(vscsi->vdev, target_id),
+	};
+
+	ret = virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd, sizeof(cmd->req.rescan),
+				sizeof(cmd->resp.rescan));
+	if (ret < 0) {
+		mempool_free(cmd, virtscsi_cmd_pool);
+		goto scan_host;
+	}
+
+	wait_for_completion(&comp);
+	target_id = virtio32_to_cpu(vscsi->vdev, cmd->resp.rescan.id);
+	if (target_id != -1) {
+		int transport = virtio32_to_cpu(vscsi->vdev,
+						cmd->resp.rescan.transport);
+		spin_lock_irq(&vscsi->rescan_lock);
+		vscsi->next_target_id = target_id + 1;
+		spin_unlock_irq(&vscsi->rescan_lock);
+		shost_printk(KERN_INFO, sh,
+			     "found %s target %d (WWN %*phN)\n",
+			     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
+			     target_id, 8,
+			     cmd->resp.rescan.port_wwn);
+		scsi_scan_target(&sh->shost_gendev, 0, target_id,
+				 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
+		queue_work(system_freezable_wq, &vscsi->rescan_work);
+	} else {
+		shost_printk(KERN_INFO, sh,
+			     "rescan: no more targets\n");
+		spin_lock_irq(&vscsi->rescan_lock);
+		vscsi->next_target_id = -1;
+		spin_unlock_irq(&vscsi->rescan_lock);
+	}
+	mempool_free(cmd, virtscsi_cmd_pool);
+	return;
+scan_host:
+	spin_lock_irq(&vscsi->rescan_lock);
+	vscsi->next_target_id = -1;
+	spin_unlock_irq(&vscsi->rescan_lock);
+	shost_printk(KERN_INFO, sh, "rescan: scan host\n");
+	scsi_scan_host(sh);
+}
+
+static void virtscsi_scan_host(struct virtio_scsi *vscsi)
+{
+	struct Scsi_Host *sh = virtio_scsi_host(vscsi->vdev);
+	int ret;
+	struct virtio_scsi_cmd *cmd;
+	DECLARE_COMPLETION_ONSTACK(comp);
+
+	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
+	if (!cmd) {
+		shost_printk(KERN_INFO, sh, "rescan: no memory\n");
+		return;
+	}
+	shost_printk(KERN_INFO, sh, "rescan: scan host\n");
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->comp = &comp;
+	cmd->sc = NULL;
+	cmd->req.rescan = (struct virtio_scsi_rescan_req){
+		.type = VIRTIO_SCSI_T_RESCAN,
+		.next_id = cpu_to_virtio32(vscsi->vdev, -1),
+	};
+
+	ret = virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd, sizeof(cmd->req.rescan),
+				sizeof(cmd->resp.rescan));
+	if (ret < 0) {
+		mempool_free(cmd, virtscsi_cmd_pool);
+		return;
+	}
+
+	wait_for_completion(&comp);
+	if (cmd->resp.rescan.id == -1) {
+		int transport = virtio32_to_cpu(vscsi->vdev,
+						cmd->resp.rescan.transport);
+		shost_printk(KERN_INFO, sh,
+			     "%s host wwnn %*phN wwpn %*phN\n",
+			     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
+			     8, cmd->resp.rescan.node_wwn,
+			     8, cmd->resp.rescan.port_wwn);
+	}
+	mempool_free(cmd, virtscsi_cmd_pool);
+}
+
+static void virtscsi_scan_start(struct Scsi_Host *sh)
+{
+	struct virtio_scsi *vscsi = shost_priv(sh);
+
+	virtscsi_scan_host(vscsi);
+	spin_lock_irq(&vscsi->rescan_lock);
+	if (vscsi->next_target_id != -1) {
+		shost_printk(KERN_INFO, sh, "rescan: already running\n");
+		spin_unlock_irq(&vscsi->rescan_lock);
+		return;
+	}
+	vscsi->next_target_id = 0;
+	shost_printk(KERN_INFO, sh, "rescan: start\n");
+	spin_unlock_irq(&vscsi->rescan_lock);
+	queue_work(system_freezable_wq, &vscsi->rescan_work);
+}
+
+int virtscsi_scan_finished(struct Scsi_Host *sh, unsigned long time)
+{
+	struct virtio_scsi *vscsi = shost_priv(sh);
+	int ret = 1;
+
+	spin_lock_irq(&vscsi->rescan_lock);
+	if (vscsi->next_target_id != -1)
+		ret = 0;
+	spin_unlock_irq(&vscsi->rescan_lock);
+	if (!ret)
+		flush_work(&vscsi->rescan_work);
+
+	shost_printk(KERN_INFO, sh, "rescan: %s finished\n",
+		     ret ? "" : "not");
+	return ret;
+}
+
+static ssize_t virtscsi_host_store_rescan(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	virtscsi_scan_start(shost);
+	return count;
+}
+static DEVICE_ATTR(rescan, S_IWUSR, NULL, virtscsi_host_store_rescan);
+
+static struct device_attribute *virtscsi_shost_attrs[] = {
+	&dev_attr_rescan,
+	NULL,
+};
+
 static struct scsi_host_template virtscsi_host_template_single = {
 	.module = THIS_MODULE,
 	.name = "Virtio SCSI HBA",
@@ -846,6 +1019,53 @@ static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd)
 	.track_queue_depth = 1,
 };
 
+static struct scsi_host_template virtscsi_host_template_single_rescan = {
+	.module = THIS_MODULE,
+	.name = "Virtio SCSI HBA",
+	.proc_name = "virtio_scsi",
+	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
+	.queuecommand = virtscsi_queuecommand_single,
+	.change_queue_depth = virtscsi_change_queue_depth,
+	.eh_abort_handler = virtscsi_abort,
+	.eh_device_reset_handler = virtscsi_device_reset,
+	.eh_timed_out = virtscsi_eh_timed_out,
+	.slave_alloc = virtscsi_device_alloc,
+	.scan_start = virtscsi_scan_start,
+	.scan_finished = virtscsi_scan_finished,
+	.shost_attrs = virtscsi_shost_attrs,
+
+	.dma_boundary = UINT_MAX,
+	.use_clustering = ENABLE_CLUSTERING,
+	.target_alloc = virtscsi_target_alloc,
+	.target_destroy = virtscsi_target_destroy,
+	.track_queue_depth = 1,
+};
+
+static struct scsi_host_template virtscsi_host_template_multi_rescan = {
+	.module = THIS_MODULE,
+	.name = "Virtio SCSI HBA",
+	.proc_name = "virtio_scsi",
+	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
+	.queuecommand = virtscsi_queuecommand_multi,
+	.change_queue_depth = virtscsi_change_queue_depth,
+	.eh_abort_handler = virtscsi_abort,
+	.eh_device_reset_handler = virtscsi_device_reset,
+	.eh_timed_out = virtscsi_eh_timed_out,
+	.slave_alloc = virtscsi_device_alloc,
+	.scan_start = virtscsi_scan_start,
+	.scan_finished = virtscsi_scan_finished,
+	.shost_attrs = virtscsi_shost_attrs,
+
+	.dma_boundary = UINT_MAX,
+	.use_clustering = ENABLE_CLUSTERING,
+	.target_alloc = virtscsi_target_alloc,
+	.target_destroy = virtscsi_target_destroy,
+	.map_queues = virtscsi_map_queues,
+	.track_queue_depth = 1,
+};
+
 #define virtscsi_config_get(vdev, fld) \
 	({ \
 		typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -949,10 +1169,17 @@ static int virtscsi_probe(struct virtio_device *vdev)
 
 	num_targets = virtscsi_config_get(vdev, max_target) + 1;
 
-	if (num_queues == 1)
-		hostt = &virtscsi_host_template_single;
-	else
-		hostt = &virtscsi_host_template_multi;
+	if (num_queues == 1) {
+		if (virtio_has_feature(vdev, VIRTIO_SCSI_F_RESCAN))
+			hostt = &virtscsi_host_template_single_rescan;
+		else
+			hostt = &virtscsi_host_template_single;
+	} else {
+		if (virtio_has_feature(vdev, VIRTIO_SCSI_F_RESCAN))
+			hostt = &virtscsi_host_template_multi_rescan;
+		else
+			hostt = &virtscsi_host_template_multi;
+	}
 
 	shost = scsi_host_alloc(hostt,
 		sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
@@ -965,6 +1192,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	vscsi->vdev = vdev;
 	vscsi->num_queues = num_queues;
 	vdev->priv = shost;
+	vscsi->next_target_id = -1;
+	spin_lock_init(&vscsi->rescan_lock);
+	INIT_WORK(&vscsi->rescan_work, virtscsi_rescan_work);
 
 	err = virtscsi_init(vdev, vscsi);
 	if (err)
@@ -1067,6 +1297,7 @@ static int virtscsi_restore(struct virtio_device *vdev)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	VIRTIO_SCSI_F_T10_PI,
 #endif
+	VIRTIO_SCSI_F_RESCAN,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index cc18ef8..762622e 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -96,6 +96,19 @@ struct virtio_scsi_ctrl_an_resp {
 	__u8 response;
 } __attribute__((packed));
 
+/* Target rescan */
+struct virtio_scsi_rescan_req {
+	__virtio32 type;
+	__virtio32 next_id;
+} __attribute__((packed));
+
+struct virtio_scsi_rescan_resp {
+	__virtio32 id;
+	__virtio32 transport;
+	uint8_t node_wwn[8];
+	uint8_t port_wwn[8];
+} __attribute__((packed));
+
 struct virtio_scsi_event {
 	__virtio32 event;
 	__u8 lun[8];
@@ -120,6 +133,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_HOTPLUG                  1
 #define VIRTIO_SCSI_F_CHANGE                   2
 #define VIRTIO_SCSI_F_T10_PI                   3
+#define VIRTIO_SCSI_F_RESCAN                   4
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0
@@ -140,6 +154,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_T_TMF                      0
 #define VIRTIO_SCSI_T_AN_QUERY                 1
 #define VIRTIO_SCSI_T_AN_SUBSCRIBE             2
+#define VIRTIO_SCSI_T_RESCAN                   3
 
 /* Valid TMF subtypes.  */
 #define VIRTIO_SCSI_T_TMF_ABORT_TASK           0
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] virtio-scsi: Add FC transport class
  2017-12-14 10:11 [PATCH 0/3] virtio-vfc implementation Hannes Reinecke
  2017-12-14 10:11 ` [PATCH 1/3] virtio-scsi: implement target rescan Hannes Reinecke
@ 2017-12-14 10:11 ` Hannes Reinecke
  2017-12-15 18:08   ` Steffen Maier
  2017-12-14 10:11 ` [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature Hannes Reinecke
  2 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-14 10:11 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke, Hannes Reinecke

When a device announces an 'FC' protocol we should be pulling
in the FC transport class to have the rports etc setup correctly.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/virtio_scsi.c | 323 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 277 insertions(+), 46 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index a561e90..f925fbd 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -25,11 +25,13 @@
 #include <linux/virtio_scsi.h>
 #include <linux/cpu.h>
 #include <linux/blkdev.h>
+#include <linux/delay.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_devinfo.h>
+#include <scsi/scsi_transport_fc.h>
 #include <linux/seqlock.h>
 #include <linux/blk-mq-virtio.h>
 
@@ -91,6 +93,12 @@ struct virtio_scsi_vq {
  * an atomic_t.
  */
 struct virtio_scsi_target_state {
+	struct list_head list;
+	struct fc_rport *rport;
+	struct virtio_scsi *vscsi;
+	int target_id;
+	bool removed;
+
 	seqcount_t tgt_seq;
 
 	/* Count of outstanding requests. */
@@ -117,8 +125,12 @@ struct virtio_scsi {
 	/* Protected by event_vq lock */
 	bool stop_events;
 
+	int protocol;
 	int next_target_id;
+	u64 wwnn;
+	u64 wwpn;
 	struct work_struct rescan_work;
+	struct list_head target_list;
 	spinlock_t rescan_lock;
 
 	struct virtio_scsi_vq ctrl_vq;
@@ -128,6 +140,7 @@ struct virtio_scsi {
 
 static struct kmem_cache *virtscsi_cmd_cache;
 static mempool_t *virtscsi_cmd_pool;
+static struct scsi_transport_template *virtio_transport_template;
 
 static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev)
 {
@@ -156,15 +169,21 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
 static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 {
 	struct virtio_scsi_cmd *cmd = buf;
+	struct fc_rport *rport;
 	struct scsi_cmnd *sc = cmd->sc;
 	struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
-	struct virtio_scsi_target_state *tgt =
-				scsi_target(sc->device)->hostdata;
+	struct virtio_scsi_target_state *tgt;
 
 	dev_dbg(&sc->device->sdev_gendev,
 		"cmd %p response %u status %#02x sense_len %u\n",
 		sc, resp->response, resp->status, resp->sense_len);
 
+	rport = starget_to_rport(scsi_target(sc->device));
+	if (!rport)
+		tgt = scsi_target(sc->device)->hostdata;
+	else
+		tgt = rport->dd_data;
+
 	sc->result = resp->status;
 	virtscsi_compute_resid(sc, virtio32_to_cpu(vscsi->vdev, resp->resid));
 	switch (resp->response) {
@@ -502,10 +521,11 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
 
 static void virtio_scsi_init_hdr(struct virtio_device *vdev,
 				 struct virtio_scsi_cmd_req *cmd,
+				 int target_id,
 				 struct scsi_cmnd *sc)
 {
 	cmd->lun[0] = 1;
-	cmd->lun[1] = sc->device->id;
+	cmd->lun[1] = target_id;
 	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
 	cmd->lun[3] = sc->device->lun & 0xff;
 	cmd->tag = cpu_to_virtio64(vdev, (unsigned long)sc);
@@ -517,12 +537,14 @@ static void virtio_scsi_init_hdr(struct virtio_device *vdev,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
 				    struct virtio_scsi_cmd_req_pi *cmd_pi,
+				    int target_id,
 				    struct scsi_cmnd *sc)
 {
 	struct request *rq = sc->request;
 	struct blk_integrity *bi;
 
-	virtio_scsi_init_hdr(vdev, (struct virtio_scsi_cmd_req *)cmd_pi, sc);
+	virtio_scsi_init_hdr(vdev, (struct virtio_scsi_cmd_req *)cmd_pi,
+			     target_id, sc);
 
 	if (!rq || !scsi_prot_sg_count(sc))
 		return;
@@ -542,6 +564,7 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
 
 static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 				 struct virtio_scsi_vq *req_vq,
+				 int target_id,
 				 struct scsi_cmnd *sc)
 {
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
@@ -564,13 +587,15 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) {
-		virtio_scsi_init_hdr_pi(vscsi->vdev, &cmd->req.cmd_pi, sc);
+		virtio_scsi_init_hdr_pi(vscsi->vdev, &cmd->req.cmd_pi,
+					target_id, sc);
 		memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len);
 		req_size = sizeof(cmd->req.cmd_pi);
 	} else
 #endif
 	{
-		virtio_scsi_init_hdr(vscsi->vdev, &cmd->req.cmd, sc);
+		virtio_scsi_init_hdr(vscsi->vdev, &cmd->req.cmd,
+				     target_id, sc);
 		memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
 		req_size = sizeof(cmd->req.cmd);
 	}
@@ -591,11 +616,25 @@ static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
 					struct scsi_cmnd *sc)
 {
 	struct virtio_scsi *vscsi = shost_priv(sh);
-	struct virtio_scsi_target_state *tgt =
-				scsi_target(sc->device)->hostdata;
-
+	struct virtio_scsi_target_state *tgt = NULL;
+	int target_id = sc->device->id;
+
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
+		struct fc_rport *rport =
+			starget_to_rport(scsi_target(sc->device));
+		if (rport && rport->dd_data) {
+			tgt = rport->dd_data;
+			target_id = tgt->target_id;
+		}
+	} else
+		tgt = scsi_target(sc->device)->hostdata;
+	if (!tgt || tgt->removed) {
+		sc->result = DID_NO_CONNECT << 16;
+		sc->scsi_done(sc);
+		return 0;
+	}
 	atomic_inc(&tgt->reqs);
-	return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
+	return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], target_id, sc);
 }
 
 static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi,
@@ -648,16 +687,30 @@ static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
 				       struct scsi_cmnd *sc)
 {
 	struct virtio_scsi *vscsi = shost_priv(sh);
-	struct virtio_scsi_target_state *tgt =
-				scsi_target(sc->device)->hostdata;
+	struct virtio_scsi_target_state *tgt = NULL;
 	struct virtio_scsi_vq *req_vq;
-
+	int target_id = sc->device->id;
+
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
+		struct fc_rport *rport =
+			starget_to_rport(scsi_target(sc->device));
+		if (rport && rport->dd_data) {
+			tgt = rport->dd_data;
+			target_id = tgt->target_id;
+		}
+	} else
+		tgt = scsi_target(sc->device)->hostdata;
+	if (!tgt || tgt->removed) {
+		sc->result = DID_NO_CONNECT << 16;
+		sc->scsi_done(sc);
+		return 0;
+	}
 	if (shost_use_blk_mq(sh))
 		req_vq = virtscsi_pick_vq_mq(vscsi, sc);
 	else
 		req_vq = virtscsi_pick_vq(vscsi, tgt);
 
-	return virtscsi_queuecommand(vscsi, req_vq, sc);
+	return virtscsi_queuecommand(vscsi, req_vq, target_id, sc);
 }
 
 static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
@@ -696,12 +749,27 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
 {
 	struct virtio_scsi *vscsi = shost_priv(sc->device->host);
 	struct virtio_scsi_cmd *cmd;
+	struct virtio_scsi_target_state *tgt = NULL;
+	int target_id = sc->device->id;
 
 	sdev_printk(KERN_INFO, sc->device, "device reset\n");
 	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
 	if (!cmd)
 		return FAILED;
 
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
+		struct fc_rport *rport =
+			starget_to_rport(scsi_target(sc->device));
+		if (rport && rport->dd_data ) {
+			tgt = rport->dd_data;
+			target_id = tgt->target_id;
+		} else
+			return FAST_IO_FAIL;
+	} else {
+		tgt = scsi_target(sc->device)->hostdata;
+		if (!tgt || tgt->removed)
+			return FAST_IO_FAIL;
+	}
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->sc = sc;
 	cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
@@ -709,7 +777,7 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
 		.subtype = cpu_to_virtio32(vscsi->vdev,
 					     VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET),
 		.lun[0] = 1,
-		.lun[1] = sc->device->id,
+		.lun[1] = target_id,
 		.lun[2] = (sc->device->lun >> 8) | 0x40,
 		.lun[3] = sc->device->lun & 0xff,
 	};
@@ -755,19 +823,34 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
 {
 	struct virtio_scsi *vscsi = shost_priv(sc->device->host);
 	struct virtio_scsi_cmd *cmd;
+	struct virtio_scsi_target_state *tgt = NULL;
+	int target_id = sc->device->id;
 
 	scmd_printk(KERN_INFO, sc, "abort\n");
 	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
 	if (!cmd)
 		return FAILED;
 
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
+		struct fc_rport *rport =
+			starget_to_rport(scsi_target(sc->device));
+		if (rport && rport->dd_data ) {
+			tgt = rport->dd_data;
+			target_id = tgt->target_id;
+		} else
+			return FAST_IO_FAIL;
+	} else {
+		tgt = scsi_target(sc->device)->hostdata;
+		if (!tgt || tgt->removed)
+			return FAST_IO_FAIL;
+	}
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->sc = sc;
 	cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
 		.type = VIRTIO_SCSI_T_TMF,
 		.subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
 		.lun[0] = 1,
-		.lun[1] = sc->device->id,
+		.lun[1] = target_id,
 		.lun[2] = (sc->device->lun >> 8) | 0x40,
 		.lun[3] = sc->device->lun & 0xff,
 		.tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc),
@@ -777,25 +860,54 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
 
 static int virtscsi_target_alloc(struct scsi_target *starget)
 {
-	struct Scsi_Host *sh = dev_to_shost(starget->dev.parent);
-	struct virtio_scsi *vscsi = shost_priv(sh);
-
-	struct virtio_scsi_target_state *tgt =
-				kmalloc(sizeof(*tgt), GFP_KERNEL);
-	if (!tgt)
-		return -ENOMEM;
-
+	struct Scsi_Host *sh;
+	struct virtio_scsi *vscsi;
+	struct fc_rport *rport;
+	struct virtio_scsi_target_state *tgt;
+
+	rport = starget_to_rport(starget);
+	if (rport) {
+		tgt = rport->dd_data;
+		sh = rport_to_shost(rport);
+		vscsi = shost_priv(sh);
+	} else {
+		sh = dev_to_shost(starget->dev.parent);
+		vscsi = shost_priv(sh);
+		spin_lock_irq(&vscsi->rescan_lock);
+		list_for_each_entry(tgt, &vscsi->target_list, list) {
+			if (tgt->target_id == starget->id) {
+				starget->hostdata = tgt;
+				break;
+			}
+		}
+		spin_unlock_irq(&vscsi->rescan_lock);
+		if (!starget->hostdata)
+			return -ENOMEM;
+		tgt = starget->hostdata;
+	}
 	seqcount_init(&tgt->tgt_seq);
 	atomic_set(&tgt->reqs, 0);
 	tgt->req_vq = &vscsi->req_vqs[0];
-
-	starget->hostdata = tgt;
+	tgt->vscsi = vscsi;
 	return 0;
 }
 
 static void virtscsi_target_destroy(struct scsi_target *starget)
 {
-	struct virtio_scsi_target_state *tgt = starget->hostdata;
+	struct fc_rport *rport;
+	struct virtio_scsi_target_state *tgt;
+
+	rport = starget_to_rport(starget);
+	if (rport) {
+		tgt = rport->dd_data;
+		rport->dd_data = NULL;
+	} else {
+		tgt = starget->hostdata;
+		starget->hostdata = NULL;
+	}
+	spin_lock_irq(&tgt->vscsi->rescan_lock);
+	list_del_init(&tgt->list);
+	spin_unlock_irq(&tgt->vscsi->rescan_lock);
 	kfree(tgt);
 }
 
@@ -821,8 +933,9 @@ static void virtscsi_rescan_work(struct work_struct *work)
 	struct virtio_scsi *vscsi =
 		container_of(work, struct virtio_scsi, rescan_work);
 	struct Scsi_Host *sh = virtio_scsi_host(vscsi->vdev);
-	int target_id, ret;
+	int target_id, ret, transport;
 	struct virtio_scsi_cmd *cmd;
+	struct virtio_scsi_target_state *tgt, *tmp, *old;
 	DECLARE_COMPLETION_ONSTACK(comp);
 
 	spin_lock_irq(&vscsi->rescan_lock);
@@ -857,27 +970,67 @@ static void virtscsi_rescan_work(struct work_struct *work)
 
 	wait_for_completion(&comp);
 	target_id = virtio32_to_cpu(vscsi->vdev, cmd->resp.rescan.id);
-	if (target_id != -1) {
-		int transport = virtio32_to_cpu(vscsi->vdev,
-						cmd->resp.rescan.transport);
-		spin_lock_irq(&vscsi->rescan_lock);
-		vscsi->next_target_id = target_id + 1;
-		spin_unlock_irq(&vscsi->rescan_lock);
-		shost_printk(KERN_INFO, sh,
-			     "found %s target %d (WWN %*phN)\n",
-			     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
-			     target_id, 8,
-			     cmd->resp.rescan.port_wwn);
-		scsi_scan_target(&sh->shost_gendev, 0, target_id,
-				 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
-		queue_work(system_freezable_wq, &vscsi->rescan_work);
-	} else {
+	if (target_id == -1) {
 		shost_printk(KERN_INFO, sh,
 			     "rescan: no more targets\n");
 		spin_lock_irq(&vscsi->rescan_lock);
 		vscsi->next_target_id = -1;
 		spin_unlock_irq(&vscsi->rescan_lock);
+		goto out;
+	}
+	transport = virtio32_to_cpu(vscsi->vdev, cmd->resp.rescan.transport);
+	shost_printk(KERN_INFO, sh,
+		     "found %s target %d (WWN %*phN)\n",
+		     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
+		     target_id, 8, cmd->resp.rescan.port_wwn);
+
+	tgt = kmalloc(sizeof(*tgt), GFP_KERNEL);
+	if (!tgt) {
+		shost_printk(KERN_WARNING, sh,
+			     "rescan: out of memory for rport\n");
+		goto out;
+	}
+	tgt->target_id = (target_id & 0xff)
+	tgt->removed = false;
+	spin_lock_irq(&vscsi->rescan_lock);
+	vscsi->next_target_id = tgt->target_id + 1;
+	list_for_each_entry(tmp, &vscsi->target_list, list) {
+		if (tgt->target_id == tmp->target_id) {
+			old = tmp;
+			break;
+		}
 	}
+	if (old) {
+		kfree(tgt);
+		tgt = old;
+	} else
+		list_add_tail(&tgt->list, &vscsi->target_list);
+	spin_unlock_irq(&vscsi->rescan_lock);
+	if (transport == SCSI_PROTOCOL_FCP) {
+		struct fc_rport_identifiers rport_ids;
+		struct fc_rport *rport;
+
+		rport_ids.node_name = wwn_to_u64(cmd->resp.rescan.node_wwn);
+		rport_ids.port_name = wwn_to_u64(cmd->resp.rescan.port_wwn);
+		rport_ids.port_id = (target_id >> 8);
+		rport = fc_remote_port_add(sh, 0, &rport_ids);
+		if (rport) {
+			tgt->rport = rport;
+			rport->dd_data = tgt;
+			fc_remote_port_rolechg(rport, FC_RPORT_ROLE_FCP_TARGET);
+		} else {
+			spin_lock_irq(&vscsi->rescan_lock);
+			list_del(&tgt->list);
+			spin_unlock_irq(&vscsi->rescan_lock);
+			kfree(tgt);
+			tgt = NULL;
+		}
+	} else {
+		scsi_scan_target(&sh->shost_gendev, 0, tgt->target_id,
+				 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
+	}
+	queue_work(system_freezable_wq, &vscsi->rescan_work);
+out:
 	mempool_free(cmd, virtscsi_cmd_pool);
 	return;
 scan_host:
@@ -920,11 +1073,15 @@ static void virtscsi_scan_host(struct virtio_scsi *vscsi)
 	if (cmd->resp.rescan.id == -1) {
 		int transport = virtio32_to_cpu(vscsi->vdev,
 						cmd->resp.rescan.transport);
+
 		shost_printk(KERN_INFO, sh,
 			     "%s host wwnn %*phN wwpn %*phN\n",
 			     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
 			     8, cmd->resp.rescan.node_wwn,
 			     8, cmd->resp.rescan.port_wwn);
+		vscsi->protocol = transport;
+		vscsi->wwnn = wwn_to_u64(cmd->resp.rescan.node_wwn);
+		vscsi->wwpn = wwn_to_u64(cmd->resp.rescan.port_wwn);
 	}
 	mempool_free(cmd, virtscsi_cmd_pool);
 }
@@ -932,14 +1089,31 @@ static void virtscsi_scan_host(struct virtio_scsi *vscsi)
 static void virtscsi_scan_start(struct Scsi_Host *sh)
 {
 	struct virtio_scsi *vscsi = shost_priv(sh);
+	struct virtio_scsi_target_state *tgt;
 
-	virtscsi_scan_host(vscsi);
 	spin_lock_irq(&vscsi->rescan_lock);
 	if (vscsi->next_target_id != -1) {
 		shost_printk(KERN_INFO, sh, "rescan: already running\n");
 		spin_unlock_irq(&vscsi->rescan_lock);
 		return;
 	}
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
+		fc_host_node_name(sh) = vscsi->wwnn;
+		fc_host_port_name(sh) = vscsi->wwpn;
+		fc_host_port_id(sh) = 0x00ff00;
+		fc_host_port_type(sh) = FC_PORTTYPE_NPIV;
+		fc_host_port_state(sh) = FC_PORTSTATE_BLOCKED;
+		list_for_each_entry(tgt, &vscsi->target_list, list) {
+			if (tgt->rport) {
+				fc_remote_port_delete(tgt->rport);
+				tgt->rport = NULL;
+			}
+			tgt->removed = true;
+		}
+	} else {
+		list_for_each_entry(tgt, &vscsi->target_list, list)
+			tgt->removed = true;
+	}
 	vscsi->next_target_id = 0;
 	shost_printk(KERN_INFO, sh, "rescan: start\n");
 	spin_unlock_irq(&vscsi->rescan_lock);
@@ -954,12 +1128,14 @@ int virtscsi_scan_finished(struct Scsi_Host *sh, unsigned long time)
 	spin_lock_irq(&vscsi->rescan_lock);
 	if (vscsi->next_target_id != -1)
 		ret = 0;
+	else if (vscsi->protocol == SCSI_PROTOCOL_FCP)
+		fc_host_port_state(sh) = FC_PORTSTATE_ONLINE;
 	spin_unlock_irq(&vscsi->rescan_lock);
 	if (!ret)
 		flush_work(&vscsi->rescan_work);
 
-	shost_printk(KERN_INFO, sh, "rescan: %s finished\n",
-		     ret ? "" : "not");
+	shost_printk(KERN_INFO, sh, "rescan: %sfinished\n",
+		     ret ? "" : "not ");
 	return ret;
 }
 
@@ -978,6 +1154,34 @@ static ssize_t virtscsi_host_store_rescan(struct device *dev,
 	NULL,
 };
 
+static int virtscsi_issue_lip(struct Scsi_Host *shost)
+{
+	struct virtio_scsi *vscsi = shost_priv(shost);
+	unsigned long start = jiffies;
+	struct virtio_scsi_target_state *tgt;
+
+	spin_lock_irq(&vscsi->rescan_lock);
+	if (vscsi->next_target_id != -1) {
+		spin_unlock_irq(&vscsi->rescan_lock);
+		return 0;
+	}
+	fc_host_port_state(shost) = FC_PORTSTATE_BLOCKED;
+	list_for_each_entry(tgt, &vscsi->target_list, list) {
+		if (tgt->rport) {
+			fc_remote_port_delete(tgt->rport);
+			tgt->rport = NULL;
+		}
+	}
+	vscsi->next_target_id = 0;
+	spin_unlock_irq(&vscsi->rescan_lock);
+	queue_work(system_freezable_wq, &vscsi->rescan_work);
+
+	while (!virtscsi_scan_finished(shost, jiffies - start))
+		msleep(10);
+
+	return 0;
+}
+
 static struct scsi_host_template virtscsi_host_template_single = {
 	.module = THIS_MODULE,
 	.name = "Virtio SCSI HBA",
@@ -1066,6 +1270,20 @@ static ssize_t virtscsi_host_store_rescan(struct device *dev,
 	.track_queue_depth = 1,
 };
 
+static struct fc_function_template virtscsi_transport_functions = {
+	.dd_fcrport_size = sizeof(struct virtio_scsi_target_state *),
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_port_id = 1,
+	.show_host_port_state = 1,
+	.show_host_port_type = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.show_rport_dev_loss_tmo = 1,
+	.issue_fc_host_lip = virtscsi_issue_lip,
+};
+
 #define virtscsi_config_get(vdev, fld) \
 	({ \
 		typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -1193,7 +1411,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	vscsi->num_queues = num_queues;
 	vdev->priv = shost;
 	vscsi->next_target_id = -1;
+	vscsi->protocol = SCSI_PROTOCOL_SAS;
 	spin_lock_init(&vscsi->rescan_lock);
+	INIT_LIST_HEAD(&vscsi->target_list);
 	INIT_WORK(&vscsi->rescan_work, virtscsi_rescan_work);
 
 	err = virtscsi_init(vdev, vscsi);
@@ -1228,6 +1448,10 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	}
 #endif
 
+	virtscsi_scan_host(vscsi);
+	if (vscsi->protocol == SCSI_PROTOCOL_FCP)
+		shost->transportt = virtio_transport_template;
+
 	err = scsi_add_host(shost, &vdev->dev);
 	if (err)
 		goto scsi_add_host_failed;
@@ -1332,6 +1556,10 @@ static int __init init(void)
 		pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
 		goto error;
 	}
+	virtio_transport_template =
+		fc_attach_transport(&virtscsi_transport_functions);
+	if (!virtio_transport_template)
+		goto error;
 	ret = register_virtio_driver(&virtio_scsi_driver);
 	if (ret < 0)
 		goto error;
@@ -1339,6 +1567,8 @@ static int __init init(void)
 	return 0;
 
 error:
+	if (virtio_transport_template)
+		fc_release_transport(virtio_transport_template);
 	if (virtscsi_cmd_pool) {
 		mempool_destroy(virtscsi_cmd_pool);
 		virtscsi_cmd_pool = NULL;
@@ -1352,6 +1582,7 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
+	fc_release_transport(virtio_transport_template);
 	unregister_virtio_driver(&virtio_scsi_driver);
 	mempool_destroy(virtscsi_cmd_pool);
 	kmem_cache_destroy(virtscsi_cmd_cache);
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2017-12-14 10:11 [PATCH 0/3] virtio-vfc implementation Hannes Reinecke
  2017-12-14 10:11 ` [PATCH 1/3] virtio-scsi: implement target rescan Hannes Reinecke
  2017-12-14 10:11 ` [PATCH 2/3] virtio-scsi: Add FC transport class Hannes Reinecke
@ 2017-12-14 10:11 ` Hannes Reinecke
  2017-12-15 18:17   ` Steffen Maier
  2 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-14 10:11 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke, Hannes Reinecke

The 'native LUN' feature allows virtio-scsi to pass in the LUN
numbers from the underlying storage directly, without having
to modify the LUN number itself.
It works by shifting the existing LUN number down by 8 bytes,
and add the virtio-specific 8-byte LUN steering header.
With that virtio doesn't have to mangle the LUN number, allowing
us to pass the 'real' LUN number to the guest.
Of course, we do cut off the last 8 bytes of the 'real' LUN number,
but I'm not aware of any array utilizing that, so the impact should
be negligible.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/virtio_scsi.c       | 62 ++++++++++++++++++++++++++++++----------
 include/uapi/linux/virtio_scsi.h |  1 +
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index f925fbd..63c2c85 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -356,8 +356,12 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
 	struct scsi_device *sdev;
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 	unsigned int target = event->lun[1];
-	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
+	u64 lun;
 
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN))
+		lun = scsilun_to_int((struct scsi_lun *)event->lun) >> 16;
+	else
+		lun = (event->lun[2] << 8) | event->lun[3];
 	switch (virtio32_to_cpu(vscsi->vdev, event->reason)) {
 	case VIRTIO_SCSI_EVT_RESET_RESCAN:
 		scsi_add_device(shost, 0, target, lun);
@@ -368,7 +372,7 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
 			scsi_remove_device(sdev);
 			scsi_device_put(sdev);
 		} else {
-			pr_err("SCSI device %d 0 %d %d not found\n",
+			pr_err("SCSI device %d 0 %d %llu not found\n",
 				shost->host_no, target, lun);
 		}
 		break;
@@ -383,13 +387,17 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
 	struct scsi_device *sdev;
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 	unsigned int target = event->lun[1];
-	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
+	u64 lun;
 	u8 asc = virtio32_to_cpu(vscsi->vdev, event->reason) & 255;
 	u8 ascq = virtio32_to_cpu(vscsi->vdev, event->reason) >> 8;
 
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN))
+		lun = scsilun_to_int((struct scsi_lun *)event->lun) >> 16;
+	else
+		lun = (event->lun[2] << 8) | event->lun[3];
 	sdev = scsi_device_lookup(shost, 0, target, lun);
 	if (!sdev) {
-		pr_err("SCSI device %d 0 %d %d not found\n",
+		pr_err("SCSI device %d 0 %d %llu not found\n",
 			shost->host_no, target, lun);
 		return;
 	}
@@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct virtio_device *vdev,
 				 int target_id,
 				 struct scsi_cmnd *sc)
 {
-	cmd->lun[0] = 1;
-	cmd->lun[1] = target_id;
-	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
-	cmd->lun[3] = sc->device->lun & 0xff;
+	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
+		u64 lun = sc->device->lun << 16;
+		lun |= ((u64)1 << 8) | (u64)target_id;
+		int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
+	} else {
+		cmd->lun[0] = 1;
+		cmd->lun[1] = target_id;
+		cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
+		cmd->lun[3] = sc->device->lun & 0xff;
+	}
 	cmd->tag = cpu_to_virtio64(vdev, (unsigned long)sc);
 	cmd->task_attr = VIRTIO_SCSI_S_SIMPLE;
 	cmd->prio = 0;
@@ -776,11 +790,17 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
 		.type = VIRTIO_SCSI_T_TMF,
 		.subtype = cpu_to_virtio32(vscsi->vdev,
 					     VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET),
-		.lun[0] = 1,
-		.lun[1] = target_id,
-		.lun[2] = (sc->device->lun >> 8) | 0x40,
-		.lun[3] = sc->device->lun & 0xff,
 	};
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
+		u64 lun = sc->device->lun << 16;
+		lun |= ((u64)1 << 8) | (u64)target_id;
+		int_to_scsilun(lun, (struct scsi_lun *)&cmd->req.tmf.lun);
+	} else {
+		cmd->req.tmf.lun[0] = 1;
+		cmd->req.tmf.lun[1] = target_id;
+		cmd->req.tmf.lun[2] = (sc->device->lun >> 8) | 0x40;
+		cmd->req.tmf.lun[3] = sc->device->lun & 0xff;
+	}
 	return virtscsi_tmf(vscsi, cmd);
 }
 
@@ -851,10 +871,18 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
 		.subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
 		.lun[0] = 1,
 		.lun[1] = target_id,
-		.lun[2] = (sc->device->lun >> 8) | 0x40,
-		.lun[3] = sc->device->lun & 0xff,
 		.tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc),
 	};
+	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
+		u64 lun = sc->device->lun << 16;
+		lun |= ((u64)1 << 8) | (u64)target_id;
+		int_to_scsilun(lun, (struct scsi_lun *)&cmd->req.tmf.lun);
+	} else {
+		cmd->req.tmf.lun[0] = 1;
+		cmd->req.tmf.lun[1] = target_id;
+		cmd->req.tmf.lun[2] = (sc->device->lun >> 8) | 0x40;
+		cmd->req.tmf.lun[3] = sc->device->lun & 0xff;
+	}
 	return virtscsi_tmf(vscsi, cmd);
 }
 
@@ -1429,7 +1457,10 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	/* LUNs > 256 are reported with format 1, so they go in the range
 	 * 16640-32767.
 	 */
-	shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1 + 0x4000;
+	if (!virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN))
+		shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1 + 0x4000;
+	else
+		shost->max_lun = (u64)-1;
 	shost->max_id = num_targets;
 	shost->max_channel = 0;
 	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
@@ -1522,6 +1553,7 @@ static int virtscsi_restore(struct virtio_device *vdev)
 	VIRTIO_SCSI_F_T10_PI,
 #endif
 	VIRTIO_SCSI_F_RESCAN,
+	VIRTIO_SCSI_F_NATIVE_LUN,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index 762622e..30d275f 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -134,6 +134,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_CHANGE                   2
 #define VIRTIO_SCSI_F_T10_PI                   3
 #define VIRTIO_SCSI_F_RESCAN                   4
+#define VIRTIO_SCSI_F_NATIVE_LUN               5
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0
-- 
1.8.5.6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] virtio-scsi: implement target rescan
  2017-12-14 10:11 ` [PATCH 1/3] virtio-scsi: implement target rescan Hannes Reinecke
@ 2017-12-15 17:54   ` Steffen Maier
  0 siblings, 0 replies; 16+ messages in thread
From: Steffen Maier @ 2017-12-15 17:54 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke


On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
> Implement the 'rescan' virtio-scsi feature. Rescanning works by
> sending a 'rescan' virtio-scsi command with the next requested
> target id to the backend. The backend will respond with the next
> used target id or '-1' if no more targets are found.
> This avoids scanning all possible targets.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>   drivers/scsi/virtio_scsi.c       | 239 ++++++++++++++++++++++++++++++++++++++-
>   include/uapi/linux/virtio_scsi.h |  15 +++
>   2 files changed, 250 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index 7c28e8d..a561e90 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c

> +static void virtscsi_rescan_work(struct work_struct *work)
> +{

> +	if (target_id == -1) {
> +		shost_printk(KERN_INFO, sh, "rescan: terminated\n");
> +		spin_unlock_irq(&vscsi->rescan_lock);
> +		return;
> +	}
> +	spin_unlock_irq(&vscsi->rescan_lock);
> +
> +	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
> +	if (!cmd) {
> +		shost_printk(KERN_INFO, sh, "rescan: no memory\n");
> +		goto scan_host;
> +	}
> +	shost_printk(KERN_INFO, sh, "rescan: next target %d\n", target_id);

> +		shost_printk(KERN_INFO, sh,
> +			     "rescan: no more targets\n");

> +	shost_printk(KERN_INFO, sh, "rescan: scan host\n");
> +	scsi_scan_host(sh);
> +}
> +
> +static void virtscsi_scan_host(struct virtio_scsi *vscsi)
> +{
> +	struct Scsi_Host *sh = virtio_scsi_host(vscsi->vdev);
> +	int ret;
> +	struct virtio_scsi_cmd *cmd;
> +	DECLARE_COMPLETION_ONSTACK(comp);
> +
> +	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
> +	if (!cmd) {
> +		shost_printk(KERN_INFO, sh, "rescan: no memory\n");

If shost_printk does not add any info about calling function, this 
cannot be distinguished from a message with the same format string above 
in virtscsi_rescan_work()?

> +		return;
> +	}
> +	shost_printk(KERN_INFO, sh, "rescan: scan host\n");

dito


> +static void virtscsi_scan_start(struct Scsi_Host *sh)
> +{
> +	struct virtio_scsi *vscsi = shost_priv(sh);
> +
> +	virtscsi_scan_host(vscsi);
> +	spin_lock_irq(&vscsi->rescan_lock);
> +	if (vscsi->next_target_id != -1) {
> +		shost_printk(KERN_INFO, sh, "rescan: already running\n");
> +		spin_unlock_irq(&vscsi->rescan_lock);
> +		return;
> +	}
> +	vscsi->next_target_id = 0;
> +	shost_printk(KERN_INFO, sh, "rescan: start\n");
> +	spin_unlock_irq(&vscsi->rescan_lock);
> +	queue_work(system_freezable_wq, &vscsi->rescan_work);
> +}
> +
> +int virtscsi_scan_finished(struct Scsi_Host *sh, unsigned long time)

> +	shost_printk(KERN_INFO, sh, "rescan: %s finished\n",
> +		     ret ? "" : "not");
> +	return ret;
> +}


-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2017-12-14 10:11 ` [PATCH 2/3] virtio-scsi: Add FC transport class Hannes Reinecke
@ 2017-12-15 18:08   ` Steffen Maier
  2017-12-18  8:31     ` Hannes Reinecke
  0 siblings, 1 reply; 16+ messages in thread
From: Steffen Maier @ 2017-12-15 18:08 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke


On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
> When a device announces an 'FC' protocol we should be pulling
> in the FC transport class to have the rports etc setup correctly.

It took some time for me to understand what this does.
It seems to mirror the topology of rports and sdevs that exist under the 
fc_host on the kvm host side to the virtio-scsi-fc side in the guest.

I like the idea. This is also what I've been suggesting users to do if 
they back virtio-scsi with zfcp on the kvm host side. Primarily to not 
stall all virtio-scsi I/O on all paths if the guest ever gets into 
scsi_eh. But also to make it look like an HBA pass through so one can 
more easily migrate to this once we have FCP pass through.

> 
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>   drivers/scsi/virtio_scsi.c | 323 ++++++++++++++++++++++++++++++++++++++-------
>   1 file changed, 277 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index a561e90..f925fbd 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c

> @@ -591,11 +616,25 @@ static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
>   					struct scsi_cmnd *sc)

> +	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
> +		struct fc_rport *rport =
> +			starget_to_rport(scsi_target(sc->device));
> +		if (rport && rport->dd_data) {
> +			tgt = rport->dd_data;
> +			target_id = tgt->target_id;
> +		}
> +	} else
> +		tgt = scsi_target(sc->device)->hostdata;
> +	if (!tgt || tgt->removed) {

> @@ -648,16 +687,30 @@ static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
>   				       struct scsi_cmnd *sc)

> +	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
> +		struct fc_rport *rport =
> +			starget_to_rport(scsi_target(sc->device));
> +		if (rport && rport->dd_data) {
> +			tgt = rport->dd_data;
> +			target_id = tgt->target_id;
> +		}
> +	} else
> +		tgt = scsi_target(sc->device)->hostdata;
> +	if (!tgt || tgt->removed) {

repeating pattern?

> @@ -696,12 +749,27 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
>   {
> +	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
> +		struct fc_rport *rport =
> +			starget_to_rport(scsi_target(sc->device));
> +		if (rport && rport->dd_data ) {
> +			tgt = rport->dd_data;
> +			target_id = tgt->target_id;
> +		} else
> +			return FAST_IO_FAIL;
> +	} else {
> +		tgt = scsi_target(sc->device)->hostdata;
> +		if (!tgt || tgt->removed)

The other patterns have the !tgt check outside of the if else condition.
For consistency this might work here, too?

> +			return FAST_IO_FAIL;
> +	}


> @@ -755,19 +823,34 @@ static int virtscsi_abort(struct scsi_cmnd *sc)

> +	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
> +		struct fc_rport *rport =
> +			starget_to_rport(scsi_target(sc->device));
> +		if (rport && rport->dd_data ) {
> +			tgt = rport->dd_data;
> +			target_id = tgt->target_id;
> +		} else
> +			return FAST_IO_FAIL;
> +	} else {
> +		tgt = scsi_target(sc->device)->hostdata;
> +		if (!tgt || tgt->removed)
> +			return FAST_IO_FAIL;
> +	}

dito

> @@ -857,27 +970,67 @@ static void virtscsi_rescan_work(struct work_struct *work)
> 
>   	wait_for_completion(&comp);

Waiting in work item .vs. having the response (IRQ) path trigger 
subsequent processing async ?
Or do we need the call chain(s) getting here to be in our own process 
context via the workqueue anyway?

>   	target_id = virtio32_to_cpu(vscsi->vdev, cmd->resp.rescan.id);
> -	if (target_id != -1) {
> -		int transport = virtio32_to_cpu(vscsi->vdev,
> -						cmd->resp.rescan.transport);
> -		spin_lock_irq(&vscsi->rescan_lock);
> -		vscsi->next_target_id = target_id + 1;
> -		spin_unlock_irq(&vscsi->rescan_lock);
> -		shost_printk(KERN_INFO, sh,
> -			     "found %s target %d (WWN %*phN)\n",
> -			     transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
> -			     target_id, 8,
> -			     cmd->resp.rescan.port_wwn);
> -		scsi_scan_target(&sh->shost_gendev, 0, target_id,
> -				 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
> -		queue_work(system_freezable_wq, &vscsi->rescan_work);
> -	} else {
> +	if (target_id == -1) {

This boolean if expression was introduced in the previous patch.
Why negate it here?
It causes a number of potentially unnecessary changed lines making 
review harder.


> +	if (transport == SCSI_PROTOCOL_FCP) {
> +		struct fc_rport_identifiers rport_ids;
> +		struct fc_rport *rport;
> +
> +		rport_ids.node_name = wwn_to_u64(cmd->resp.rescan.node_wwn);
> +		rport_ids.port_name = wwn_to_u64(cmd->resp.rescan.port_wwn);
> +		rport_ids.port_id = (target_id >> 8);

Why do you shift target_id by one byte to the right?

> +		rport = fc_remote_port_add(sh, 0, &rport_ids);
> +		if (rport) {
> +			tgt->rport = rport;
> +			rport->dd_data = tgt;
> +			fc_remote_port_rolechg(rport, FC_RPORT_ROLE_FCP_TARGET);

Is the rolechg to get some event? Otherwise we could have 
rport_ids.roles = FC_RPORT_ROLE_FCP_TARGET before fc_remote_port_add().

> +		} else {
> +			spin_lock_irq(&vscsi->rescan_lock);
> +			list_del(&tgt->list);
> +			spin_unlock_irq(&vscsi->rescan_lock);
> +			kfree(tgt);
> +			tgt = NULL;
> +		}
> +	} else {
> +		scsi_scan_target(&sh->shost_gendev, 0, tgt->target_id,
> +				 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
> +	}
> +	queue_work(system_freezable_wq, &vscsi->rescan_work);
> +out:
>   	mempool_free(cmd, virtscsi_cmd_pool);
>   	return;
>   scan_host:

> @@ -932,14 +1089,31 @@ static void virtscsi_scan_host(struct virtio_scsi *vscsi)
>   static void virtscsi_scan_start(struct Scsi_Host *sh)
>   {
>   	struct virtio_scsi *vscsi = shost_priv(sh);
> +	struct virtio_scsi_target_state *tgt;
> 
> -	virtscsi_scan_host(vscsi);
>   	spin_lock_irq(&vscsi->rescan_lock);
>   	if (vscsi->next_target_id != -1) {
>   		shost_printk(KERN_INFO, sh, "rescan: already running\n");
>   		spin_unlock_irq(&vscsi->rescan_lock);
>   		return;
>   	}
> +	if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
> +		fc_host_node_name(sh) = vscsi->wwnn;
> +		fc_host_port_name(sh) = vscsi->wwpn;
> +		fc_host_port_id(sh) = 0x00ff00;
> +		fc_host_port_type(sh) = FC_PORTTYPE_NPIV;

Why is this hardcoded?

At least with zfcp, we can have kvm host *v*HBAs without NPIV.

> +		fc_host_port_state(sh) = FC_PORTSTATE_BLOCKED;
> +		list_for_each_entry(tgt, &vscsi->target_list, list) {
> +			if (tgt->rport) {
> +				fc_remote_port_delete(tgt->rport);

I'm not familiar with the scan callbacks, maybe that's why I wonder why 
you block all rports when scanning the host. Is it so that the 
subsequent port scans can call the properly paired fc_remote_port_add to 
potentially update a previously existing rport with new properties we 
got from the qemu/host side.

> +				tgt->rport = NULL;
> +			}
> +			tgt->removed = true;
> +		}
> +	} else {
> +		list_for_each_entry(tgt, &vscsi->target_list, list)
> +			tgt->removed = true;
> +	}
>   	vscsi->next_target_id = 0;
>   	shost_printk(KERN_INFO, sh, "rescan: start\n");
>   	spin_unlock_irq(&vscsi->rescan_lock);
> @@ -954,12 +1128,14 @@ int virtscsi_scan_finished(struct Scsi_Host *sh, unsigned long time)
>   	spin_lock_irq(&vscsi->rescan_lock);
>   	if (vscsi->next_target_id != -1)
>   		ret = 0;
> +	else if (vscsi->protocol == SCSI_PROTOCOL_FCP)
> +		fc_host_port_state(sh) = FC_PORTSTATE_ONLINE;
>   	spin_unlock_irq(&vscsi->rescan_lock);
>   	if (!ret)
>   		flush_work(&vscsi->rescan_work);
> 
> -	shost_printk(KERN_INFO, sh, "rescan: %s finished\n",
> -		     ret ? "" : "not");
> +	shost_printk(KERN_INFO, sh, "rescan: %sfinished\n",
> +		     ret ? "" : "not ");

You introduced it in the previous patch, so do the fixup there in the 
first place?

>   	return ret;
>   }
> 
> @@ -978,6 +1154,34 @@ static ssize_t virtscsi_host_store_rescan(struct device *dev,
>   	NULL,
>   };
> 
> +static int virtscsi_issue_lip(struct Scsi_Host *shost)
> +{
> +	struct virtio_scsi *vscsi = shost_priv(shost);
> +	unsigned long start = jiffies;
> +	struct virtio_scsi_target_state *tgt;
> +
> +	spin_lock_irq(&vscsi->rescan_lock);
> +	if (vscsi->next_target_id != -1) {
> +		spin_unlock_irq(&vscsi->rescan_lock);
> +		return 0;
> +	}

> +	fc_host_port_state(shost) = FC_PORTSTATE_BLOCKED;
> +	list_for_each_entry(tgt, &vscsi->target_list, list) {
> +		if (tgt->rport) {
> +			fc_remote_port_delete(tgt->rport);
> +			tgt->rport = NULL;
> +		}
> +	}
> +	vscsi->next_target_id = 0;

I see some code duplication with what's in virtscsi_scan_host().
Not sure if reduction is worth it.

> +	spin_unlock_irq(&vscsi->rescan_lock);
> +	queue_work(system_freezable_wq, &vscsi->rescan_work);
> +
> +	while (!virtscsi_scan_finished(shost, jiffies - start))
> +		msleep(10);
> +
> +	return 0;
> +}
> +
>   static struct scsi_host_template virtscsi_host_template_single = {
>   	.module = THIS_MODULE,
>   	.name = "Virtio SCSI HBA",
> @@ -1066,6 +1270,20 @@ static ssize_t virtscsi_host_store_rescan(struct device *dev,
>   	.track_queue_depth = 1,
>   };
> 
> +static struct fc_function_template virtscsi_transport_functions = {
> +	.dd_fcrport_size = sizeof(struct virtio_scsi_target_state *),
> +	.show_host_node_name = 1,
> +	.show_host_port_name = 1,
> +	.show_host_port_id = 1,
> +	.show_host_port_state = 1,
> +	.show_host_port_type = 1,
> +	.show_starget_node_name = 1,
> +	.show_starget_port_name = 1,
> +	.show_starget_port_id = 1,
> +	.show_rport_dev_loss_tmo = 1,
> +	.issue_fc_host_lip = virtscsi_issue_lip,
> +};
> +
>   #define virtscsi_config_get(vdev, fld) \
>   	({ \
>   		typeof(((struct virtio_scsi_config *)0)->fld) __val; \
> @@ -1193,7 +1411,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
>   	vscsi->num_queues = num_queues;
>   	vdev->priv = shost;
>   	vscsi->next_target_id = -1;
> +	vscsi->protocol = SCSI_PROTOCOL_SAS;

Why is the old/legacy/non-fcp hardcoded SAS?
Doesn't the non-fcp virtio-scsi have any real transport at all, i.e. "none"?
Maybe I just don't understand semantics of vscsi->protocol well enough.

>   	spin_lock_init(&vscsi->rescan_lock);
> +	INIT_LIST_HEAD(&vscsi->target_list);
>   	INIT_WORK(&vscsi->rescan_work, virtscsi_rescan_work);
> 
>   	err = virtscsi_init(vdev, vscsi);


-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2017-12-14 10:11 ` [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature Hannes Reinecke
@ 2017-12-15 18:17   ` Steffen Maier
  2017-12-18  7:48     ` Hannes Reinecke
  0 siblings, 1 reply; 16+ messages in thread
From: Steffen Maier @ 2017-12-15 18:17 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

Just a few very early view-only review comments.
Haven't run the code.

On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
> The 'native LUN' feature allows virtio-scsi to pass in the LUN
> numbers from the underlying storage directly, without having
> to modify the LUN number itself.
> It works by shifting the existing LUN number down by 8 bytes,
> and add the virtio-specific 8-byte LUN steering header.
> With that virtio doesn't have to mangle the LUN number, allowing
> us to pass the 'real' LUN number to the guest.

I only see shifts by 16 bits in the code below which would be 2 bytes.
I had a quick look at the corresponding qemu code which looked the same 
to me.
What's the relation to 8 byte shifting, which would be 64 bit shift and 
thus odd for a 64 bit LUN, mentioned in the description here?

If the code keeps the LUN level 1 and 2 (dropping level 3 and 4) and I 
just don't understand it, it would be fine, I guess.

> Of course, we do cut off the last 8 bytes of the 'real' LUN number,
> but I'm not aware of any array utilizing that, so the impact should
> be negligible.

Why did we do v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs")? ;-)

> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>   drivers/scsi/virtio_scsi.c       | 62 ++++++++++++++++++++++++++++++----------
>   include/uapi/linux/virtio_scsi.h |  1 +
>   2 files changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index f925fbd..63c2c85 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -356,8 +356,12 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
>   	struct scsi_device *sdev;
>   	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
>   	unsigned int target = event->lun[1];
> -	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
> +	u64 lun;
> 
> +	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN))
> +		lun = scsilun_to_int((struct scsi_lun *)event->lun) >> 16;
> +	else
> +		lun = (event->lun[2] << 8) | event->lun[3];


> @@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct virtio_device *vdev,
>   				 int target_id,
>   				 struct scsi_cmnd *sc)
>   {
> -	cmd->lun[0] = 1;
> -	cmd->lun[1] = target_id;
> -	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
> -	cmd->lun[3] = sc->device->lun & 0xff;
> +	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
> +		u64 lun = sc->device->lun << 16;
> +		lun |= ((u64)1 << 8) | (u64)target_id;
> +		int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
> +	} else {
> +		cmd->lun[0] = 1;
> +		cmd->lun[1] = target_id;
> +		cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
> +		cmd->lun[3] = sc->device->lun & 0xff;
> +	}

Above 2 patterns seem to repeat. Have helper functions (similar to 
int_to_scsilun()) now that it's more than just 4 lines of filling in the 
virtio lun?

> @@ -851,10 +871,18 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>   		.subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,

>   		.lun[0] = 1,
>   		.lun[1] = target_id,

drop those 2 superfluous lines, too?

> -		.lun[2] = (sc->device->lun >> 8) | 0x40,
> -		.lun[3] = sc->device->lun & 0xff,
>   		.tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc),
>   	};
> +	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
> +		u64 lun = sc->device->lun << 16;
> +		lun |= ((u64)1 << 8) | (u64)target_id;
> +		int_to_scsilun(lun, (struct scsi_lun *)&cmd->req.tmf.lun);
> +	} else {
> +		cmd->req.tmf.lun[0] = 1;
> +		cmd->req.tmf.lun[1] = target_id;
> +		cmd->req.tmf.lun[2] = (sc->device->lun >> 8) | 0x40;
> +		cmd->req.tmf.lun[3] = sc->device->lun & 0xff;
> +	}

>   	return virtscsi_tmf(vscsi, cmd);
>   }
> 
> @@ -1429,7 +1457,10 @@ static int virtscsi_probe(struct virtio_device *vdev)
>   	/* LUNs > 256 are reported with format 1, so they go in the range
>   	 * 16640-32767.
>   	 */

Above old comment now only seems to apply to the then case of the 
following if statement, not to the else case.

> -	shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1 + 0x4000;
> +	if (!virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN))
> +		shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1 + 0x4000;
> +	else
> +		shost->max_lun = (u64)-1;
>   	shost->max_id = num_targets;
>   	shost->max_channel = 0;
>   	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;


-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2017-12-15 18:17   ` Steffen Maier
@ 2017-12-18  7:48     ` Hannes Reinecke
  2018-01-26 14:15       ` Steffen Maier
  0 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-18  7:48 UTC (permalink / raw)
  To: Steffen Maier, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 12/15/2017 07:17 PM, Steffen Maier wrote:
> Just a few very early view-only review comments.
> Haven't run the code.
> 
> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>> The 'native LUN' feature allows virtio-scsi to pass in the LUN
>> numbers from the underlying storage directly, without having
>> to modify the LUN number itself.
>> It works by shifting the existing LUN number down by 8 bytes,
>> and add the virtio-specific 8-byte LUN steering header.
>> With that virtio doesn't have to mangle the LUN number, allowing
>> us to pass the 'real' LUN number to the guest.
> 
> I only see shifts by 16 bits in the code below which would be 2 bytes.
> I had a quick look at the corresponding qemu code which looked the same
> to me.
> What's the relation to 8 byte shifting, which would be 64 bit shift and
> thus odd for a 64 bit LUN, mentioned in the description here?
> 
> If the code keeps the LUN level 1 and 2 (dropping level 3 and 4) and I
> just don't understand it, it would be fine, I guess.
> 
Yeah, messed that one up. It should be 8 _bits_, obviously.

>> Of course, we do cut off the last 8 bytes of the 'real' LUN number,
>> but I'm not aware of any array utilizing that, so the impact should
>> be negligible.
> 
> Why did we do v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs")? ;-)
> 
Because that patch just lifts the internal code to use 64-bit LUNs
without any changes to the behaviour.
This one uses the internal 64-bit LUNs and actually changes the behaviour.

>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>   drivers/scsi/virtio_scsi.c       | 62
>> ++++++++++++++++++++++++++++++----------
>>   include/uapi/linux/virtio_scsi.h |  1 +
>>   2 files changed, 48 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
>> index f925fbd..63c2c85 100644
>> --- a/drivers/scsi/virtio_scsi.c
>> +++ b/drivers/scsi/virtio_scsi.c
>> @@ -356,8 +356,12 @@ static void
>> virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
>>       struct scsi_device *sdev;
>>       struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
>>       unsigned int target = event->lun[1];
>> -    unsigned int lun = (event->lun[2] << 8) | event->lun[3];
>> +    u64 lun;
>>
>> +    if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN))
>> +        lun = scsilun_to_int((struct scsi_lun *)event->lun) >> 16;
>> +    else
>> +        lun = (event->lun[2] << 8) | event->lun[3];
> 
> 
>> @@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct
>> virtio_device *vdev,
>>                    int target_id,
>>                    struct scsi_cmnd *sc)
>>   {
>> -    cmd->lun[0] = 1;
>> -    cmd->lun[1] = target_id;
>> -    cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>> -    cmd->lun[3] = sc->device->lun & 0xff;
>> +    if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
>> +        u64 lun = sc->device->lun << 16;
>> +        lun |= ((u64)1 << 8) | (u64)target_id;
>> +        int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
>> +    } else {
>> +        cmd->lun[0] = 1;
>> +        cmd->lun[1] = target_id;
>> +        cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>> +        cmd->lun[3] = sc->device->lun & 0xff;
>> +    }
> 
> Above 2 patterns seem to repeat. Have helper functions (similar to
> int_to_scsilun()) now that it's more than just 4 lines of filling in the
> virtio lun?
> 
Yes, can do.

>> @@ -851,10 +871,18 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>>           .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
> 
>>           .lun[0] = 1,
>>           .lun[1] = target_id,
> 
> drop those 2 superfluous lines, too?
> 
Hmm. Will be checking.

>> -        .lun[2] = (sc->device->lun >> 8) | 0x40,
>> -        .lun[3] = sc->device->lun & 0xff,
>>           .tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc),
>>       };
>> +    if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
>> +        u64 lun = sc->device->lun << 16;
>> +        lun |= ((u64)1 << 8) | (u64)target_id;
>> +        int_to_scsilun(lun, (struct scsi_lun *)&cmd->req.tmf.lun);
>> +    } else {
>> +        cmd->req.tmf.lun[0] = 1;
>> +        cmd->req.tmf.lun[1] = target_id;
>> +        cmd->req.tmf.lun[2] = (sc->device->lun >> 8) | 0x40;
>> +        cmd->req.tmf.lun[3] = sc->device->lun & 0xff;
>> +    }
> 
>>       return virtscsi_tmf(vscsi, cmd);
>>   }
>>
>> @@ -1429,7 +1457,10 @@ static int virtscsi_probe(struct virtio_device
>> *vdev)
>>       /* LUNs > 256 are reported with format 1, so they go in the range
>>        * 16640-32767.
>>        */
> 
> Above old comment now only seems to apply to the then case of the
> following if statement, not to the else case.
> 
Yes.
Will be doing a respin.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2017-12-15 18:08   ` Steffen Maier
@ 2017-12-18  8:31     ` Hannes Reinecke
  2018-01-26 16:54       ` Steffen Maier
  0 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2017-12-18  8:31 UTC (permalink / raw)
  To: Steffen Maier, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 12/15/2017 07:08 PM, Steffen Maier wrote:
> 
> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>> When a device announces an 'FC' protocol we should be pulling
>> in the FC transport class to have the rports etc setup correctly.
> 
> It took some time for me to understand what this does.
> It seems to mirror the topology of rports and sdevs that exist under the
> fc_host on the kvm host side to the virtio-scsi-fc side in the guest.
> 
> I like the idea. This is also what I've been suggesting users to do if
> they back virtio-scsi with zfcp on the kvm host side. Primarily to not
> stall all virtio-scsi I/O on all paths if the guest ever gets into
> scsi_eh. But also to make it look like an HBA pass through so one can
> more easily migrate to this once we have FCP pass through.
> 
Thanks for the review.

>>
>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>> ---
>>   drivers/scsi/virtio_scsi.c | 323
>> ++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 277 insertions(+), 46 deletions(-)
>>
>> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
>> index a561e90..f925fbd 100644
>> --- a/drivers/scsi/virtio_scsi.c
>> +++ b/drivers/scsi/virtio_scsi.c
> 
>> @@ -591,11 +616,25 @@ static int virtscsi_queuecommand_single(struct
>> Scsi_Host *sh,
>>                       struct scsi_cmnd *sc)
> 
>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>> +        struct fc_rport *rport =
>> +            starget_to_rport(scsi_target(sc->device));
>> +        if (rport && rport->dd_data) {
>> +            tgt = rport->dd_data;
>> +            target_id = tgt->target_id;
>> +        }
>> +    } else
>> +        tgt = scsi_target(sc->device)->hostdata;
>> +    if (!tgt || tgt->removed) {
> 
>> @@ -648,16 +687,30 @@ static int virtscsi_queuecommand_multi(struct
>> Scsi_Host *sh,
>>                          struct scsi_cmnd *sc)
> 
>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>> +        struct fc_rport *rport =
>> +            starget_to_rport(scsi_target(sc->device));
>> +        if (rport && rport->dd_data) {
>> +            tgt = rport->dd_data;
>> +            target_id = tgt->target_id;
>> +        }
>> +    } else
>> +        tgt = scsi_target(sc->device)->hostdata;
>> +    if (!tgt || tgt->removed) {
> 
> repeating pattern?
> 
Yeah, I could probably move that to a separate function.

>> @@ -696,12 +749,27 @@ static int virtscsi_device_reset(struct
>> scsi_cmnd *sc)
>>   {
>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>> +        struct fc_rport *rport =
>> +            starget_to_rport(scsi_target(sc->device));
>> +        if (rport && rport->dd_data ) {
>> +            tgt = rport->dd_data;
>> +            target_id = tgt->target_id;
>> +        } else
>> +            return FAST_IO_FAIL;
>> +    } else {
>> +        tgt = scsi_target(sc->device)->hostdata;
>> +        if (!tgt || tgt->removed)
> 
> The other patterns have the !tgt check outside of the if else condition.
> For consistency this might work here, too?
> 
Possibly. I'll check.

>> +            return FAST_IO_FAIL;
>> +    }
> 
> 
>> @@ -755,19 +823,34 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
> 
>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>> +        struct fc_rport *rport =
>> +            starget_to_rport(scsi_target(sc->device));
>> +        if (rport && rport->dd_data ) {
>> +            tgt = rport->dd_data;
>> +            target_id = tgt->target_id;
>> +        } else
>> +            return FAST_IO_FAIL;
>> +    } else {
>> +        tgt = scsi_target(sc->device)->hostdata;
>> +        if (!tgt || tgt->removed)
>> +            return FAST_IO_FAIL;
>> +    }
> 
> dito
> 
>> @@ -857,27 +970,67 @@ static void virtscsi_rescan_work(struct
>> work_struct *work)
>>
>>       wait_for_completion(&comp);
> 
> Waiting in work item .vs. having the response (IRQ) path trigger
> subsequent processing async ?
> Or do we need the call chain(s) getting here to be in our own process
> context via the workqueue anyway?
> 
Can't see I can parse this sentence, but I'll be looking at the code
trying to come up with a clever explanation :-)

>>       target_id = virtio32_to_cpu(vscsi->vdev, cmd->resp.rescan.id);
>> -    if (target_id != -1) {
>> -        int transport = virtio32_to_cpu(vscsi->vdev,
>> -                        cmd->resp.rescan.transport);
>> -        spin_lock_irq(&vscsi->rescan_lock);
>> -        vscsi->next_target_id = target_id + 1;
>> -        spin_unlock_irq(&vscsi->rescan_lock);
>> -        shost_printk(KERN_INFO, sh,
>> -                 "found %s target %d (WWN %*phN)\n",
>> -                 transport == SCSI_PROTOCOL_FCP ? "FC" : "SAS",
>> -                 target_id, 8,
>> -                 cmd->resp.rescan.port_wwn);
>> -        scsi_scan_target(&sh->shost_gendev, 0, target_id,
>> -                 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
>> -        queue_work(system_freezable_wq, &vscsi->rescan_work);
>> -    } else {
>> +    if (target_id == -1) {
> 
> This boolean if expression was introduced in the previous patch.
> Why negate it here?
> It causes a number of potentially unnecessary changed lines making
> review harder.
> 
True. Will be fixing it up.

> 
>> +    if (transport == SCSI_PROTOCOL_FCP) {
>> +        struct fc_rport_identifiers rport_ids;
>> +        struct fc_rport *rport;
>> +
>> +        rport_ids.node_name = wwn_to_u64(cmd->resp.rescan.node_wwn);
>> +        rport_ids.port_name = wwn_to_u64(cmd->resp.rescan.port_wwn);
>> +        rport_ids.port_id = (target_id >> 8);
> 
> Why do you shift target_id by one byte to the right?
> 
Because with the original setup virtio_scsi guest would pass in the
target_id, and the host would be selecting the device based on that
information.
With virtio-vfc we pass in the wwpn, but still require the target ID to
be compliant with things like event notification etc.
So I've shifted the target id onto the port ID (which is 24 bit anyway).
I could've used a bitfield here, but then I wasn't quite sure about the
endianness of which.

>> +        rport = fc_remote_port_add(sh, 0, &rport_ids);
>> +        if (rport) {
>> +            tgt->rport = rport;
>> +            rport->dd_data = tgt;
>> +            fc_remote_port_rolechg(rport, FC_RPORT_ROLE_FCP_TARGET);
> 
> Is the rolechg to get some event? Otherwise we could have
> rport_ids.roles = FC_RPORT_ROLE_FCP_TARGET before fc_remote_port_add().
> 
That's how the 'normal' transport classes do it; but I'll check if this
can be rolled into the call to fc_remote_port_add().

>> +        } else {
>> +            spin_lock_irq(&vscsi->rescan_lock);
>> +            list_del(&tgt->list);
>> +            spin_unlock_irq(&vscsi->rescan_lock);
>> +            kfree(tgt);
>> +            tgt = NULL;
>> +        }
>> +    } else {
>> +        scsi_scan_target(&sh->shost_gendev, 0, tgt->target_id,
>> +                 SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
>> +    }
>> +    queue_work(system_freezable_wq, &vscsi->rescan_work);
>> +out:
>>       mempool_free(cmd, virtscsi_cmd_pool);
>>       return;
>>   scan_host:
> 
>> @@ -932,14 +1089,31 @@ static void virtscsi_scan_host(struct
>> virtio_scsi *vscsi)
>>   static void virtscsi_scan_start(struct Scsi_Host *sh)
>>   {
>>       struct virtio_scsi *vscsi = shost_priv(sh);
>> +    struct virtio_scsi_target_state *tgt;
>>
>> -    virtscsi_scan_host(vscsi);
>>       spin_lock_irq(&vscsi->rescan_lock);
>>       if (vscsi->next_target_id != -1) {
>>           shost_printk(KERN_INFO, sh, "rescan: already running\n");
>>           spin_unlock_irq(&vscsi->rescan_lock);
>>           return;
>>       }
>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>> +        fc_host_node_name(sh) = vscsi->wwnn;
>> +        fc_host_port_name(sh) = vscsi->wwpn;
>> +        fc_host_port_id(sh) = 0x00ff00;
>> +        fc_host_port_type(sh) = FC_PORTTYPE_NPIV;
> 
> Why is this hardcoded?
> 
> At least with zfcp, we can have kvm host *v*HBAs without NPIV.
> 
For the simple fact that I don't have fields to transfer this
information; plus it's not _actually_ used anywhere.
Unless you have a strict need why this information is required?

>> +        fc_host_port_state(sh) = FC_PORTSTATE_BLOCKED;
>> +        list_for_each_entry(tgt, &vscsi->target_list, list) {
>> +            if (tgt->rport) {
>> +                fc_remote_port_delete(tgt->rport);
> 
> I'm not familiar with the scan callbacks, maybe that's why I wonder why
> you block all rports when scanning the host. Is it so that the
> subsequent port scans can call the properly paired fc_remote_port_add to
> potentially update a previously existing rport with new properties we
> got from the qemu/host side.
> 
The 'scan' code here is the equivalent of an RSCN for FC-AL,
ie we will be updating the rport state (and existence) during scanning.
As we cannot guess the rport state during this operation (in fact, we
can't even tell if the rport is still alive) we set all ports to BLOCKED
here, and let the FC transport class handle the state transition of the
rports via fc_remove_port_add() or dev_loss_tmo firing.

>> +                tgt->rport = NULL;
>> +            }
>> +            tgt->removed = true;
>> +        }
>> +    } else {
>> +        list_for_each_entry(tgt, &vscsi->target_list, list)
>> +            tgt->removed = true;
>> +    }
>>       vscsi->next_target_id = 0;
>>       shost_printk(KERN_INFO, sh, "rescan: start\n");
>>       spin_unlock_irq(&vscsi->rescan_lock);
>> @@ -954,12 +1128,14 @@ int virtscsi_scan_finished(struct Scsi_Host
>> *sh, unsigned long time)
>>       spin_lock_irq(&vscsi->rescan_lock);
>>       if (vscsi->next_target_id != -1)
>>           ret = 0;
>> +    else if (vscsi->protocol == SCSI_PROTOCOL_FCP)
>> +        fc_host_port_state(sh) = FC_PORTSTATE_ONLINE;
>>       spin_unlock_irq(&vscsi->rescan_lock);
>>       if (!ret)
>>           flush_work(&vscsi->rescan_work);
>>
>> -    shost_printk(KERN_INFO, sh, "rescan: %s finished\n",
>> -             ret ? "" : "not");
>> +    shost_printk(KERN_INFO, sh, "rescan: %sfinished\n",
>> +             ret ? "" : "not ");
> 
> You introduced it in the previous patch, so do the fixup there in the
> first place?
> 
Yeah, will be cleaning it up.

>>       return ret;
>>   }
>>
>> @@ -978,6 +1154,34 @@ static ssize_t virtscsi_host_store_rescan(struct
>> device *dev,
>>       NULL,
>>   };
>>
>> +static int virtscsi_issue_lip(struct Scsi_Host *shost)
>> +{
>> +    struct virtio_scsi *vscsi = shost_priv(shost);
>> +    unsigned long start = jiffies;
>> +    struct virtio_scsi_target_state *tgt;
>> +
>> +    spin_lock_irq(&vscsi->rescan_lock);
>> +    if (vscsi->next_target_id != -1) {
>> +        spin_unlock_irq(&vscsi->rescan_lock);
>> +        return 0;
>> +    }
> 
>> +    fc_host_port_state(shost) = FC_PORTSTATE_BLOCKED;
>> +    list_for_each_entry(tgt, &vscsi->target_list, list) {
>> +        if (tgt->rport) {
>> +            fc_remote_port_delete(tgt->rport);
>> +            tgt->rport = NULL;
>> +        }
>> +    }
>> +    vscsi->next_target_id = 0;
> 
> I see some code duplication with what's in virtscsi_scan_host().
> Not sure if reduction is worth it.
> 
Possibly not :-)

>> +    spin_unlock_irq(&vscsi->rescan_lock);
>> +    queue_work(system_freezable_wq, &vscsi->rescan_work);
>> +
>> +    while (!virtscsi_scan_finished(shost, jiffies - start))
>> +        msleep(10);
>> +
>> +    return 0;
>> +}
>> +
>>   static struct scsi_host_template virtscsi_host_template_single = {
>>       .module = THIS_MODULE,
>>       .name = "Virtio SCSI HBA",
>> @@ -1066,6 +1270,20 @@ static ssize_t
>> virtscsi_host_store_rescan(struct device *dev,
>>       .track_queue_depth = 1,
>>   };
>>
>> +static struct fc_function_template virtscsi_transport_functions = {
>> +    .dd_fcrport_size = sizeof(struct virtio_scsi_target_state *),
>> +    .show_host_node_name = 1,
>> +    .show_host_port_name = 1,
>> +    .show_host_port_id = 1,
>> +    .show_host_port_state = 1,
>> +    .show_host_port_type = 1,
>> +    .show_starget_node_name = 1,
>> +    .show_starget_port_name = 1,
>> +    .show_starget_port_id = 1,
>> +    .show_rport_dev_loss_tmo = 1,
>> +    .issue_fc_host_lip = virtscsi_issue_lip,
>> +};
>> +
>>   #define virtscsi_config_get(vdev, fld) \
>>       ({ \
>>           typeof(((struct virtio_scsi_config *)0)->fld) __val; \
>> @@ -1193,7 +1411,9 @@ static int virtscsi_probe(struct virtio_device
>> *vdev)
>>       vscsi->num_queues = num_queues;
>>       vdev->priv = shost;
>>       vscsi->next_target_id = -1;
>> +    vscsi->protocol = SCSI_PROTOCOL_SAS;
> 
> Why is the old/legacy/non-fcp hardcoded SAS?
> Doesn't the non-fcp virtio-scsi have any real transport at all, i.e.
> "none"?
> Maybe I just don't understand semantics of vscsi->protocol well enough.
> 

The original virtio-scsi code _claimed_ to the a SAS HBA (look for the
VPD page 83 emulation).
So to get the compatible output there we need to set the protocol to 'SAS'.
Using 'none' would just add another layer of complexity (as I would need
to differentiate between 'none' and 'real' SAS), which gives not benefit
ATM as there _is_ not real SAS implementation.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2017-12-18  7:48     ` Hannes Reinecke
@ 2018-01-26 14:15       ` Steffen Maier
  2018-01-26 15:22         ` Hannes Reinecke
  2018-01-26 16:57         ` Steffen Maier
  0 siblings, 2 replies; 16+ messages in thread
From: Steffen Maier @ 2018-01-26 14:15 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 12/18/2017 08:48 AM, Hannes Reinecke wrote:
> On 12/15/2017 07:17 PM, Steffen Maier wrote:
>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>>> The 'native LUN' feature allows virtio-scsi to pass in the LUN
>>> numbers from the underlying storage directly, without having
>>> to modify the LUN number itself.
>>> It works by shifting the existing LUN number down by 8 bytes,
>>> and add the virtio-specific 8-byte LUN steering header.
>>> With that virtio doesn't have to mangle the LUN number, allowing
>>> us to pass the 'real' LUN number to the guest.
>>
>> I only see shifts by 16 bits in the code below which would be 2 bytes.
>> I had a quick look at the corresponding qemu code which looked the same
>> to me.
>> What's the relation to 8 byte shifting, which would be 64 bit shift and
>> thus odd for a 64 bit LUN, mentioned in the description here?
>>
>> If the code keeps the LUN level 1 and 2 (dropping level 3 and 4) and I
>> just don't understand it, it would be fine, I guess.
>>
> Yeah, messed that one up. It should be 8 _bits_, obviously.

Isn't it 16 bits or 2 bytes corresponding to one LUN level?
See also below.

>>> Of course, we do cut off the last 8 bytes of the 'real' LUN number,
>>> but I'm not aware of any array utilizing that, so the impact should
>>> be negligible.
>>
>> Why did we do v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs")? ;-)
>>
> Because that patch just lifts the internal code to use 64-bit LUNs
> without any changes to the behaviour.
> This one uses the internal 64-bit LUNs and actually changes the behaviour.

Sure, I was just being ironic, because your description sounded a bit as 
if all the LUN range extension is not even required because no storage 
array uses it.

>>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>>> ---
>>>    drivers/scsi/virtio_scsi.c       | 62
>>> ++++++++++++++++++++++++++++++----------
>>>    include/uapi/linux/virtio_scsi.h |  1 +
>>>    2 files changed, 48 insertions(+), 15 deletions(-)

>>> @@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct
>>> virtio_device *vdev,
>>>                     int target_id,
>>>                     struct scsi_cmnd *sc)
>>>    {
>>> -    cmd->lun[0] = 1;
>>> -    cmd->lun[1] = target_id;
>>> -    cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>> -    cmd->lun[3] = sc->device->lun & 0xff;
>>> +    if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
>>> +        u64 lun = sc->device->lun << 16;
>>> +        lun |= ((u64)1 << 8) | (u64)target_id;
>>> +        int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
>>> +    } else {
>>> +        cmd->lun[0] = 1;
>>> +        cmd->lun[1] = target_id;
>>> +        cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>> +        cmd->lun[3] = sc->device->lun & 0xff;
>>> +    }
>>
>> Above 2 patterns seem to repeat. Have helper functions (similar to
>> int_to_scsilun()) now that it's more than just 4 lines of filling in the
>> virtio lun?
>>
> Yes, can do.

Meanwhile I think I realized why I had trouble understanding what the 
code does. I guess, I expected a conversion with int_to_scsilun() first, 
and then we would fill in the virtio-specific parts of magic-one and 
target-ID.
You do it just the other way round, which is OK.

Say we have a 4 level 64-bit LUN and represent it in hex using 
placeholder hexdigits for the 4 levels like this:
0xL1L1L2L2L3L3L4L4
Its decimal SCSI LUN representation (in hex) is:
0xL4L4L3L3L2L2L1L1
Then you shift left by 16 bits (2 bytes, 1 LUN level), basically 
dropping the 4th level:
0xL3L3L2L2L1L10000
The steering header is 0x01TT where TT is the target ID.
You bitwise or the virtio-specific parts into the SCSI LUN representation:
0xL3L3L2L2L1L101TT
Finally you convert it into the 64-bit LUN representation:
0x01TTL1L1L2L2L3L3
   0123456789abcdef [char array indexes]
So we nicely have the virtio-specific parts at those array indexes where 
the virtio-scsi protocol expects them.
The usage of the other bytes is now of course different from the 
original LUN encoding: It allows more than just peripheral and flat 
space addressing for the 1st level; and it now also uses levels 2 and 3 
which were previously always zero. The 3rd level really requires 64-bit 
support in the kvm host kernel.
This also means that a 4-level LUN is not supported unless we would 
create a new virtio-scsi protocol version that would transfer the target 
ID in a separate field not as part of the LUN field.

Did I get that right?

A similar explanation in a kernel doc comment for the helper conversion 
function(s) might be helpful.

-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2018-01-26 14:15       ` Steffen Maier
@ 2018-01-26 15:22         ` Hannes Reinecke
  2018-01-26 16:57         ` Steffen Maier
  1 sibling, 0 replies; 16+ messages in thread
From: Hannes Reinecke @ 2018-01-26 15:22 UTC (permalink / raw)
  To: Steffen Maier, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 01/26/2018 03:15 PM, Steffen Maier wrote:
> On 12/18/2017 08:48 AM, Hannes Reinecke wrote:
>> On 12/15/2017 07:17 PM, Steffen Maier wrote:
>>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>>>> The 'native LUN' feature allows virtio-scsi to pass in the LUN
>>>> numbers from the underlying storage directly, without having
>>>> to modify the LUN number itself.
>>>> It works by shifting the existing LUN number down by 8 bytes,
>>>> and add the virtio-specific 8-byte LUN steering header.
>>>> With that virtio doesn't have to mangle the LUN number, allowing
>>>> us to pass the 'real' LUN number to the guest.
>>>
>>> I only see shifts by 16 bits in the code below which would be 2 bytes.
>>> I had a quick look at the corresponding qemu code which looked the same
>>> to me.
>>> What's the relation to 8 byte shifting, which would be 64 bit shift and
>>> thus odd for a 64 bit LUN, mentioned in the description here?
>>>
>>> If the code keeps the LUN level 1 and 2 (dropping level 3 and 4) and I
>>> just don't understand it, it would be fine, I guess.
>>>
>> Yeah, messed that one up. It should be 8 _bits_, obviously.
> 
> Isn't it 16 bits or 2 bytes corresponding to one LUN level?
> See also below.
> 
Indeed, correct.

>>>> Of course, we do cut off the last 8 bytes of the 'real' LUN number,
>>>> but I'm not aware of any array utilizing that, so the impact should
>>>> be negligible.
>>>
>>> Why did we do v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs")? ;-)
>>>
>> Because that patch just lifts the internal code to use 64-bit LUNs
>> without any changes to the behaviour.
>> This one uses the internal 64-bit LUNs and actually changes the
>> behaviour.
> 
> Sure, I was just being ironic, because your description sounded a bit as
> if all the LUN range extension is not even required because no storage
> array uses it.
> 
>>>> Signed-off-by: Hannes Reinecke <hare@suse.com>
>>>> ---
>>>>    drivers/scsi/virtio_scsi.c       | 62
>>>> ++++++++++++++++++++++++++++++----------
>>>>    include/uapi/linux/virtio_scsi.h |  1 +
>>>>    2 files changed, 48 insertions(+), 15 deletions(-)
> 
>>>> @@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct
>>>> virtio_device *vdev,
>>>>                     int target_id,
>>>>                     struct scsi_cmnd *sc)
>>>>    {
>>>> -    cmd->lun[0] = 1;
>>>> -    cmd->lun[1] = target_id;
>>>> -    cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>>> -    cmd->lun[3] = sc->device->lun & 0xff;
>>>> +    if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
>>>> +        u64 lun = sc->device->lun << 16;
>>>> +        lun |= ((u64)1 << 8) | (u64)target_id;
>>>> +        int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
>>>> +    } else {
>>>> +        cmd->lun[0] = 1;
>>>> +        cmd->lun[1] = target_id;
>>>> +        cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>>> +        cmd->lun[3] = sc->device->lun & 0xff;
>>>> +    }
>>>
>>> Above 2 patterns seem to repeat. Have helper functions (similar to
>>> int_to_scsilun()) now that it's more than just 4 lines of filling in the
>>> virtio lun?
>>>
>> Yes, can do.
> 
> Meanwhile I think I realized why I had trouble understanding what the
> code does. I guess, I expected a conversion with int_to_scsilun() first,
> and then we would fill in the virtio-specific parts of magic-one and
> target-ID.
> You do it just the other way round, which is OK.
> 
> Say we have a 4 level 64-bit LUN and represent it in hex using
> placeholder hexdigits for the 4 levels like this:
> 0xL1L1L2L2L3L3L4L4
> Its decimal SCSI LUN representation (in hex) is:
> 0xL4L4L3L3L2L2L1L1
> Then you shift left by 16 bits (2 bytes, 1 LUN level), basically
> dropping the 4th level:
> 0xL3L3L2L2L1L10000
> The steering header is 0x01TT where TT is the target ID.
> You bitwise or the virtio-specific parts into the SCSI LUN representation:
> 0xL3L3L2L2L1L101TT
> Finally you convert it into the 64-bit LUN representation:
> 0x01TTL1L1L2L2L3L3
>   0123456789abcdef [char array indexes]
> So we nicely have the virtio-specific parts at those array indexes where
> the virtio-scsi protocol expects them.
> The usage of the other bytes is now of course different from the
> original LUN encoding: It allows more than just peripheral and flat
> space addressing for the 1st level; and it now also uses levels 2 and 3
> which were previously always zero. The 3rd level really requires 64-bit
> support in the kvm host kernel.
> This also means that a 4-level LUN is not supported unless we would
> create a new virtio-scsi protocol version that would transfer the target
> ID in a separate field not as part of the LUN field.
> 
> Did I get that right?
> 
I couldn't have it phrased better.
Essentially we're moving the original LUN down by two bytes, and
stuffing the original virtio target and LUN addressing onto it.
With that we can keep (most) of the original LUN, _and_ keep compability
with virtio (of sorts).

And with that we have to rely on no-one using 4-level LUNs; but so far
I've yet to see an array even using 3-level LUNs.

> A similar explanation in a kernel doc comment for the helper conversion
> function(s) might be helpful.
> 
Okay, I can update it.

However, I'm waiting for some life signals from Paolo; a review would be
pretty much appreciated...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2017-12-18  8:31     ` Hannes Reinecke
@ 2018-01-26 16:54       ` Steffen Maier
  2018-02-02 16:00         ` Hannes Reinecke
  0 siblings, 1 reply; 16+ messages in thread
From: Steffen Maier @ 2018-01-26 16:54 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 12/18/2017 09:31 AM, Hannes Reinecke wrote:
> On 12/15/2017 07:08 PM, Steffen Maier wrote:
>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>>> When a device announces an 'FC' protocol we should be pulling
>>> in the FC transport class to have the rports etc setup correctly.
>>
>> It took some time for me to understand what this does.
>> It seems to mirror the topology of rports and sdevs that exist under the
>> fc_host on the kvm host side to the virtio-scsi-fc side in the guest.
>>
>> I like the idea. This is also what I've been suggesting users to do if
>> they back virtio-scsi with zfcp on the kvm host side. Primarily to not
>> stall all virtio-scsi I/O on all paths if the guest ever gets into
>> scsi_eh. But also to make it look like an HBA pass through so one can
>> more easily migrate to this once we have FCP pass through.

On second thought, I like the idea for virtio-scsi.

For the future virtio-(v)fc case, see below.

>>> @@ -755,19 +823,34 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>>
>>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>>> +        struct fc_rport *rport =
>>> +            starget_to_rport(scsi_target(sc->device));
>>> +        if (rport && rport->dd_data ) {
>>> +            tgt = rport->dd_data;
>>> +            target_id = tgt->target_id;
>>> +        } else
>>> +            return FAST_IO_FAIL;
>>> +    } else {
>>> +        tgt = scsi_target(sc->device)->hostdata;
>>> +        if (!tgt || tgt->removed)
>>> +            return FAST_IO_FAIL;
>>> +    }
>>
>> dito
>>
>>> @@ -857,27 +970,67 @@ static void virtscsi_rescan_work(struct
>>> work_struct *work)
>>>
>>>        wait_for_completion(&comp);
>>
>> Waiting in work item .vs. having the response (IRQ) path trigger
>> subsequent processing async ?
>> Or do we need the call chain(s) getting here to be in our own process
>> context via the workqueue anyway?
>>
> Can't see I can parse this sentence, but I'll be looking at the code
> trying to come up with a clever explanation :-)

Sorry, meanwhile I have a hard time understanding my own words, too.

I think I wondered if the effort of a work item is really necessary, 
especially considering that it does block on the completion and thus 
could delay other queued work items (even though Concurrency Managed 
Workqueues can often hide this delay).

Couldn't we just return asynchronously after having sent the request. 
And then later on, simply have the response (IRQ) path trigger whatever 
processing is necessary (after the work item variant woke up from the 
wait_for_completion) in some asynchronuous fashion? Of course, this 
could also be a work item which just does necessary remaining processing 
after we got a response.
Just a wild guess, without knowing the environmental requirements.

>>> +    if (transport == SCSI_PROTOCOL_FCP) {
>>> +        struct fc_rport_identifiers rport_ids;
>>> +        struct fc_rport *rport;
>>> +
>>> +        rport_ids.node_name = wwn_to_u64(cmd->resp.rescan.node_wwn);
>>> +        rport_ids.port_name = wwn_to_u64(cmd->resp.rescan.port_wwn);
>>> +        rport_ids.port_id = (target_id >> 8);
>>
>> Why do you shift target_id by one byte to the right?
>>
> Because with the original setup virtio_scsi guest would pass in the
> target_id, and the host would be selecting the device based on that
> information.
> With virtio-vfc we pass in the wwpn, but still require the target ID to
> be compliant with things like event notification etc.

Don't we need the true N_Port-ID, then? That's what an fc_rport.port_id 
usually contains. It's also a simple way to lookup resources on a SAN 
switch for problem determination. Or did I misunderstand the 
content/semantics of the variable target_id, assuming it's a SCSI target 
ID, i.e. the 3rd part of a HCTL 4-tuple?

> So I've shifted the target id onto the port ID (which is 24 bit anyway).
> I could've used a bitfield here, but then I wasn't quite sure about the
> endianness of which.

>>> +        rport = fc_remote_port_add(sh, 0, &rport_ids);
>>> +        if (rport) {
>>> +            tgt->rport = rport;
>>> +            rport->dd_data = tgt;
>>> +            fc_remote_port_rolechg(rport, FC_RPORT_ROLE_FCP_TARGET);
>>
>> Is the rolechg to get some event? Otherwise we could have
>> rport_ids.roles = FC_RPORT_ROLE_FCP_TARGET before fc_remote_port_add().
>>
> That's how the 'normal' transport classes do it; but I'll check if this
> can be rolled into the call to fc_remote_port_add().

My idea was just based on how zfcp does it. Do you think I need to check 
if zfcp should do it via rolechg (even though zfcp never changes an 
rport role since it can only open targets)?

>>> @@ -932,14 +1089,31 @@ static void virtscsi_scan_host(struct
>>> virtio_scsi *vscsi)
>>>    static void virtscsi_scan_start(struct Scsi_Host *sh)
>>>    {

>>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>>> +        fc_host_node_name(sh) = vscsi->wwnn;
>>> +        fc_host_port_name(sh) = vscsi->wwpn;
>>> +        fc_host_port_id(sh) = 0x00ff00;
>>> +        fc_host_port_type(sh) = FC_PORTTYPE_NPIV;
>>
>> Why is this hardcoded?
>>
>> At least with zfcp, we can have kvm host *v*HBAs without NPIV.
>>
> For the simple fact that I don't have fields to transfer this
> information; plus it's not _actually_ used anywhere.

It might not be used in this code, but it is exported to user space via 
sysfs. If I understood things correctly, you newly introduce virtio-scsi 
commands in patch 1 and are free to add more fields to struct 
virtio_scsi_rescan_resp (maybe within some max size limit)?
To me, this raises the question which properties of the host's FC 
(driver core) objects should be mirrored to the guest. Ideally all (and 
that's a lot).
This in turn makes me wonder if mirroring is really desirable (e.g. 
considering the effort) or if only the guest should have its own FC 
object hierarchy which does _not_ exist on the KVM host in case an 
fc_host is passed through with virtio-(v)fc.

> Unless you have a strict need why this information is required?

In general, I would wish that virtio-(v)fc has no hard requirement for 
creating real vports on the KVM host. From a zfcp perspective, it would 
be nice if already existing fc_hosts of any type (vport or not) could be 
configured for virtio-vfc pass through. A selection key could e.g. be 
fc_host.port_name which is the (NPIV) WWPN of the initiator.

Rationale:

A 1st level hypervisor manages physical devices.
A 1st level hypervisor can pass through devices to a 1st level guest.

On x86, the 1st level hypervisor is KVM.
It can pass through PCI devices (physical or virtual functions).
It can create vports (defining the NPIV WWPN) to pass through with 
virtio-vfc.
 From your presentation [1] I derived and as quite surprised that there 
does not seem to be a thing that combines a VF and vport (actually no 
VFs for HBAs at all), because then x86 would already have everything 
needed: Just PCI pass through a vport-VF and be done.
If I understood your slides correctly "mediated device passthrough" 
would be a software "workaround" because the hardware does not provide 
vport-VFs.

(The following assumes an FCP channel / zfcp perspective.)
On s390, the 1st level hypervisor is PR/SM providing LPARs.
The PR/SM ecosystem defines virtual function equivalents [2, slide 15].
They can either use NPIV (kind of a vport) or not (FCP channel hardware 
virtualization was there before NPIV). Admittedly, the latter case is 
probably of no(t so much) importance for a virtio-vfc use case {it would 
be more like virtio-fc, i.e. without the 'v' as in NPIV}.
For the NPIV case, we have something like a vport and VF combined into a 
device, that we can sense on the system bus CCW (would be PCI on x86). 
The PR/SM ecosystem also defines the virtual NPIV WWPN for such initiator.
An LPAR only sees an equivalent of virtual functions; it does not get 
access to a PF equivalent.
Hence, zfcp can hardly reasonably implement the vport creation interface 
of scsi_transport_fc; it only fakes the fc_host.port_type but otherwise 
all zfcp fc_hosts are "real", i.e. non-vport.
KVM as a 2nd level hypervisor sees the VF equivalents of the LPAR it 
runs in. Hence, KVM can only take whatever devices are there, HBA LLDDs 
in that KVM cannot create vports themselves.
(BTW, in case of z/VM as 2nd level hypervisor, it supports live guest 
migration with zfcp devices passed through the 1st and the 2nd level 
hypervisor.)

IOW, it would be nice if virtio-vfc could support nested KVM as 2nd 
level hypervisor just seeing already existing VFs or vports, which are 
in turn managed, defined and passed through by some 1st level hypervisor.


A few more thoughts on your presentation [1]:

"Devices on the vport will not be visible on the host"
I could not agree more to the design point that devices (or at least 
their descendant object subtree) passed through to a guest should not 
appear on the host!
With virtio-blk or virtio-scsi, we have SCSI devices and thus disks 
visible in the host, which needlessly scans partitions, or even worse 
automatically scans for LVM and maybe even activates PVs/VGs/LVs. It's 
hard for a KVM host admin to suppress this (and not break the devices 
the host needs itself).
If we mirror the host's scsi_transport_fc tree including fc_rports and 
thus SCSI devices etc., we would have the same problems?
Even more so, dev_loss_tmo and fast_io_fail_tmo would run independently 
on the host and in the guest on the same mirrored scsi_transport_fc 
object tree. I can envision user confusion having configured timeouts on 
the "wrong" side (host vs. guest). Also we would still need a mechanism 
to mirror fc_rport (un)block from host to guest for proper transport 
recovery. In zfcp we try to recover on transport rather than scsi_eh 
whenever possible because it is so much smoother.

"Towards virtio-fc?"
Using the FCP_CMND_IU (instead of just a plain SCB as with virtio-scsi) 
sounds promising to me as starting point.
A listener from the audience asked if you would also do ELS/CT in the 
guest and you replied that this would not be good. Why is that?
Based on above starting point, doing ELS/CT (and basic aborts and maybe 
a few other functions such as open/close ports or metadata transfer 
commands) in the guest is exactly what I would have expected. An HBA 
LLDD on the KVM host would implement such API and for all fc_hosts, 
passed through this way, it would *not* establish any scsi_transport_fc 
tree on the host. Instead the one virtio-vfc implementation in the guest 
would do this indendently of which HBA LLDD provides the passed through 
fc_host in the KVM host.
ELS/CT pass through is maybe even for free via FC_BSG for those LLDDs 
that already implement it.
Rport open/close is just the analogon of slave_alloc()/slave_destroy().

A new tricky part would be how to "unbind" an fc_host on the KVM host to 
be able to pass it through. (At least on x86) we have a vport and thus 
would not have a system bus (PCI) device we could unbind and then bind 
to the virtio-vfc equivalent of VFIO for pass through. A virtio-vfc API 
with sysfs interface for HBA LLDDs on the host could provide something 
similar on an fc_host level.
Even better would be if an HBA LLDD would already know at probe time 
which of its fc_hosts are planned for pass through so it would not even 
start establishing the whole FC transport object tree with port 
discovery and the implied SCSI target scan. A late unbind would, at 
least with zfcp, cause the object tree to linger with fc_rports 
transitioning to "not present" state eventually (because zfcp uses the 
default FC_TGTID_BIND_BY_WWPN and the fc_host lives long until module 
unload or system shutdown, so fc_remote_port_delete() does not really 
delete rports from sysfs) and SCSI devices transitioning to 
"transport-offline" eventually.

What do you think?


REFERENCES

[1] https://www.youtube.com/watch?v=ME1IdbtaU5E , 
https://events.static.linuxfound.org/sites/events/files/slides/kvmforum17-npiv.pdf

[2] 
http://www-05.ibm.com/de/events/linux-on-z/pdf/day2/4_Steffen_Maier_zfcp-best-practices-2015.pdf

-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature
  2018-01-26 14:15       ` Steffen Maier
  2018-01-26 15:22         ` Hannes Reinecke
@ 2018-01-26 16:57         ` Steffen Maier
  1 sibling, 0 replies; 16+ messages in thread
From: Steffen Maier @ 2018-01-26 16:57 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 01/26/2018 03:15 PM, Steffen Maier wrote:
> On 12/18/2017 08:48 AM, Hannes Reinecke wrote:
>> On 12/15/2017 07:17 PM, Steffen Maier wrote:
>>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:

>>>> @@ -524,10 +532,16 @@ static void virtio_scsi_init_hdr(struct
>>>> virtio_device *vdev,
>>>>                     int target_id,
>>>>                     struct scsi_cmnd *sc)
>>>>    {
>>>> -    cmd->lun[0] = 1;
>>>> -    cmd->lun[1] = target_id;
>>>> -    cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>>> -    cmd->lun[3] = sc->device->lun & 0xff;
>>>> +    if (virtio_has_feature(vdev, VIRTIO_SCSI_F_NATIVE_LUN)) {
>>>> +        u64 lun = sc->device->lun << 16;
>>>> +        lun |= ((u64)1 << 8) | (u64)target_id;
>>>> +        int_to_scsilun(lun, (struct scsi_lun *)&cmd->lun);
>>>> +    } else {
>>>> +        cmd->lun[0] = 1;
>>>> +        cmd->lun[1] = target_id;
>>>> +        cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
>>>> +        cmd->lun[3] = sc->device->lun & 0xff;
>>>> +    }
>>>
>>> Above 2 patterns seem to repeat. Have helper functions (similar to
>>> int_to_scsilun()) now that it's more than just 4 lines of filling in the
>>> virtio lun?
>>>
>> Yes, can do.
> 
> Meanwhile I think I realized why I had trouble understanding what the 
> code does. I guess, I expected a conversion with int_to_scsilun() first, 
> and then we would fill in the virtio-specific parts of magic-one and 
> target-ID.
> You do it just the other way round, which is OK.
> 
> Say we have a 4 level 64-bit LUN and represent it in hex using 
> placeholder hexdigits for the 4 levels like this:
> 0xL1L1L2L2L3L3L4L4
> Its decimal SCSI LUN representation (in hex) is:
> 0xL4L4L3L3L2L2L1L1
> Then you shift left by 16 bits (2 bytes, 1 LUN level), basically 
> dropping the 4th level:
> 0xL3L3L2L2L1L10000
> The steering header is 0x01TT where TT is the target ID.
> You bitwise or the virtio-specific parts into the SCSI LUN representation:
> 0xL3L3L2L2L1L101TT
> Finally you convert it into the 64-bit LUN representation:
> 0x01TTL1L1L2L2L3L3
>    0123456789abcdef [char array indexes]
      0 1 2 3 4 5 6 7
of course
> So we nicely have the virtio-specific parts at those array indexes where 
> the virtio-scsi protocol expects them.
> The usage of the other bytes is now of course different from the 
> original LUN encoding: It allows more than just peripheral and flat 
> space addressing for the 1st level; and it now also uses levels 2 and 3 
> which were previously always zero. The 3rd level really requires 64-bit 
> support in the kvm host kernel.
> This also means that a 4-level LUN is not supported unless we would 
> create a new virtio-scsi protocol version that would transfer the target 
> ID in a separate field not as part of the LUN field.
> 
> Did I get that right?
> 
> A similar explanation in a kernel doc comment for the helper conversion 
> function(s) might be helpful.

-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2018-01-26 16:54       ` Steffen Maier
@ 2018-02-02 16:00         ` Hannes Reinecke
  2018-02-02 17:39           ` Steffen Maier
  0 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2018-02-02 16:00 UTC (permalink / raw)
  To: Steffen Maier, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 01/26/2018 05:54 PM, Steffen Maier wrote:
> On 12/18/2017 09:31 AM, Hannes Reinecke wrote:
>> On 12/15/2017 07:08 PM, Steffen Maier wrote:
>>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
>>>> When a device announces an 'FC' protocol we should be pulling
>>>> in the FC transport class to have the rports etc setup correctly.
>>>
>>> It took some time for me to understand what this does.
>>> It seems to mirror the topology of rports and sdevs that exist under the
>>> fc_host on the kvm host side to the virtio-scsi-fc side in the guest.
>>>
>>> I like the idea. This is also what I've been suggesting users to do if
>>> they back virtio-scsi with zfcp on the kvm host side. Primarily to not
>>> stall all virtio-scsi I/O on all paths if the guest ever gets into
>>> scsi_eh. But also to make it look like an HBA pass through so one can
>>> more easily migrate to this once we have FCP pass through.
> 
> On second thought, I like the idea for virtio-scsi.
> 
> For the future virtio-(v)fc case, see below.
> 
>>>> @@ -755,19 +823,34 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>>>
>>>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>>>> +        struct fc_rport *rport =
>>>> +            starget_to_rport(scsi_target(sc->device));
>>>> +        if (rport && rport->dd_data ) {
>>>> +            tgt = rport->dd_data;
>>>> +            target_id = tgt->target_id;
>>>> +        } else
>>>> +            return FAST_IO_FAIL;
>>>> +    } else {
>>>> +        tgt = scsi_target(sc->device)->hostdata;
>>>> +        if (!tgt || tgt->removed)
>>>> +            return FAST_IO_FAIL;
>>>> +    }
>>>
>>> dito
>>>
>>>> @@ -857,27 +970,67 @@ static void virtscsi_rescan_work(struct
>>>> work_struct *work)
>>>>
>>>>        wait_for_completion(&comp);
>>>
>>> Waiting in work item .vs. having the response (IRQ) path trigger
>>> subsequent processing async ?
>>> Or do we need the call chain(s) getting here to be in our own process
>>> context via the workqueue anyway?
>>>
>> Can't see I can parse this sentence, but I'll be looking at the code
>> trying to come up with a clever explanation :-)
> 
> Sorry, meanwhile I have a hard time understanding my own words, too.
> 
> I think I wondered if the effort of a work item is really necessary,
> especially considering that it does block on the completion and thus
> could delay other queued work items (even though Concurrency Managed
> Workqueues can often hide this delay).
> 
> Couldn't we just return asynchronously after having sent the request.
> And then later on, simply have the response (IRQ) path trigger whatever
> processing is necessary (after the work item variant woke up from the
> wait_for_completion) in some asynchronuous fashion? Of course, this
> could also be a work item which just does necessary remaining processing
> after we got a response.
> Just a wild guess, without knowing the environmental requirements.
> 
The main point here was that we get off the irq-completion handler; if
we were to trigger this directly we would be running in an interrupt
context, which then will make things like spin_lock, mutex et al tricky.
Plus it's not really time critical; during rescanning all I/O should be
blocked, so we shouldn't have anything else going on.

>>>> +    if (transport == SCSI_PROTOCOL_FCP) {
>>>> +        struct fc_rport_identifiers rport_ids;
>>>> +        struct fc_rport *rport;
>>>> +
>>>> +        rport_ids.node_name = wwn_to_u64(cmd->resp.rescan.node_wwn);
>>>> +        rport_ids.port_name = wwn_to_u64(cmd->resp.rescan.port_wwn);
>>>> +        rport_ids.port_id = (target_id >> 8);
>>>
>>> Why do you shift target_id by one byte to the right?
>>>
>> Because with the original setup virtio_scsi guest would pass in the
>> target_id, and the host would be selecting the device based on that
>> information.
>> With virtio-vfc we pass in the wwpn, but still require the target ID to
>> be compliant with things like event notification etc.
> 
> Don't we need the true N_Port-ID, then? That's what an fc_rport.port_id
> usually contains. It's also a simple way to lookup resources on a SAN
> switch for problem determination. Or did I misunderstand the
> content/semantics of the variable target_id, assuming it's a SCSI target
> ID, i.e. the 3rd part of a HCTL 4-tuple?
> 
Yes, that was the idea.
I've now modified the code so that we can pick up the port id for both,
target and host port. That should satisfy the needs here.

>> So I've shifted the target id onto the port ID (which is 24 bit anyway).
>> I could've used a bitfield here, but then I wasn't quite sure about the
>> endianness of which.
> 
>>>> +        rport = fc_remote_port_add(sh, 0, &rport_ids);
>>>> +        if (rport) {
>>>> +            tgt->rport = rport;
>>>> +            rport->dd_data = tgt;
>>>> +            fc_remote_port_rolechg(rport, FC_RPORT_ROLE_FCP_TARGET);
>>>
>>> Is the rolechg to get some event? Otherwise we could have
>>> rport_ids.roles = FC_RPORT_ROLE_FCP_TARGET before fc_remote_port_add().
>>>
>> That's how the 'normal' transport classes do it; but I'll check if this
>> can be rolled into the call to fc_remote_port_add().
> 
> My idea was just based on how zfcp does it. Do you think I need to check
> if zfcp should do it via rolechg (even though zfcp never changes an
> rport role since it can only open targets)?
> 
Yeah, modified that with the next version.

>>>> @@ -932,14 +1089,31 @@ static void virtscsi_scan_host(struct
>>>> virtio_scsi *vscsi)
>>>>    static void virtscsi_scan_start(struct Scsi_Host *sh)
>>>>    {
> 
>>>> +    if (vscsi->protocol == SCSI_PROTOCOL_FCP) {
>>>> +        fc_host_node_name(sh) = vscsi->wwnn;
>>>> +        fc_host_port_name(sh) = vscsi->wwpn;
>>>> +        fc_host_port_id(sh) = 0x00ff00;
>>>> +        fc_host_port_type(sh) = FC_PORTTYPE_NPIV;
>>>
>>> Why is this hardcoded?
>>>
>>> At least with zfcp, we can have kvm host *v*HBAs without NPIV.
>>>
>> For the simple fact that I don't have fields to transfer this
>> information; plus it's not _actually_ used anywhere.
> 
> It might not be used in this code, but it is exported to user space via
> sysfs. If I understood things correctly, you newly introduce virtio-scsi
> commands in patch 1 and are free to add more fields to struct
> virtio_scsi_rescan_resp (maybe within some max size limit)?
> To me, this raises the question which properties of the host's FC
> (driver core) objects should be mirrored to the guest. Ideally all (and
> that's a lot).
> This in turn makes me wonder if mirroring is really desirable (e.g.
> considering the effort) or if only the guest should have its own FC
> object hierarchy which does _not_ exist on the KVM host in case an
> fc_host is passed through with virtio-(v)fc.
> 
The original idea here was to pass in NPIV hosts only; otherwise it
might be simpler to use device passthrough and have the full device
assigned to the guest.

>> Unless you have a strict need why this information is required?
> 
> In general, I would wish that virtio-(v)fc has no hard requirement for
> creating real vports on the KVM host. From a zfcp perspective, it would
> be nice if already existing fc_hosts of any type (vport or not) could be
> configured for virtio-vfc pass through. A selection key could e.g. be
> fc_host.port_name which is the (NPIV) WWPN of the initiator.
> 
> Rationale:
> 
> A 1st level hypervisor manages physical devices.
> A 1st level hypervisor can pass through devices to a 1st level guest.
> 
> On x86, the 1st level hypervisor is KVM.
> It can pass through PCI devices (physical or virtual functions).
> It can create vports (defining the NPIV WWPN) to pass through with
> virtio-vfc.
> From your presentation [1] I derived and as quite surprised that there
> does not seem to be a thing that combines a VF and vport (actually no
> VFs for HBAs at all), because then x86 would already have everything
> needed: Just PCI pass through a vport-VF and be done.
> If I understood your slides correctly "mediated device passthrough"
> would be a software "workaround" because the hardware does not provide
> vport-VFs.
> 
Yeah, sort of.

> (The following assumes an FCP channel / zfcp perspective.)
> On s390, the 1st level hypervisor is PR/SM providing LPARs.
> The PR/SM ecosystem defines virtual function equivalents [2, slide 15].
> They can either use NPIV (kind of a vport) or not (FCP channel hardware
> virtualization was there before NPIV). Admittedly, the latter case is
> probably of no(t so much) importance for a virtio-vfc use case {it would
> be more like virtio-fc, i.e. without the 'v' as in NPIV}.
> For the NPIV case, we have something like a vport and VF combined into a
> device, that we can sense on the system bus CCW (would be PCI on x86).
> The PR/SM ecosystem also defines the virtual NPIV WWPN for such initiator.
> An LPAR only sees an equivalent of virtual functions; it does not get
> access to a PF equivalent.
> Hence, zfcp can hardly reasonably implement the vport creation interface
> of scsi_transport_fc; it only fakes the fc_host.port_type but otherwise
> all zfcp fc_hosts are "real", i.e. non-vport.
> KVM as a 2nd level hypervisor sees the VF equivalents of the LPAR it
> runs in. Hence, KVM can only take whatever devices are there, HBA LLDDs
> in that KVM cannot create vports themselves.
> (BTW, in case of z/VM as 2nd level hypervisor, it supports live guest
> migration with zfcp devices passed through the 1st and the 2nd level
> hypervisor.)
> 
> IOW, it would be nice if virtio-vfc could support nested KVM as 2nd
> level hypervisor just seeing already existing VFs or vports, which are
> in turn managed, defined and passed through by some 1st level hypervisor.
> 
As mentioned, there is nothing in the code which requires NPIV. It works
happily with any FC HBA (or emulated devices, or whatever).

>From the guest side I just have exposed the port_id, wwpn, and wwnn, as
this was the only fields I found a use-case for (like keeping lsscsi -t
happy). If you have a need to for additional information we sure can add
them.

> 
> A few more thoughts on your presentation [1]:
> 
> "Devices on the vport will not be visible on the host"
> I could not agree more to the design point that devices (or at least
> their descendant object subtree) passed through to a guest should not
> appear on the host!
> With virtio-blk or virtio-scsi, we have SCSI devices and thus disks
> visible in the host, which needlessly scans partitions, or even worse
> automatically scans for LVM and maybe even activates PVs/VGs/LVs. It's
> hard for a KVM host admin to suppress this (and not break the devices
> the host needs itself).
> If we mirror the host's scsi_transport_fc tree including fc_rports and
> thus SCSI devices etc., we would have the same problems?
> Even more so, dev_loss_tmo and fast_io_fail_tmo would run independently
> on the host and in the guest on the same mirrored scsi_transport_fc
> object tree. I can envision user confusion having configured timeouts on
> the "wrong" side (host vs. guest). Also we would still need a mechanism
> to mirror fc_rport (un)block from host to guest for proper transport
> recovery. In zfcp we try to recover on transport rather than scsi_eh
> whenever possible because it is so much smoother.
> 
As similar thing can be achieved event today, by setting the
'no_uld_attach' parameter when scanning the scsi device
(that's what some RAID HBAs do).
However, there currently is no way of modifying it from user-space, and
certainly not to change the behaviour for existing devices.
It should be relatively simple to set this flag whenever the host is
exposed to a VM; we would still see the scsi devices, but the 'sd'
driver won't be attached so nothing will scan the device on the host.

> "Towards virtio-fc?"
> Using the FCP_CMND_IU (instead of just a plain SCB as with virtio-scsi)
> sounds promising to me as starting point.
> A listener from the audience asked if you would also do ELS/CT in the
> guest and you replied that this would not be good. Why is that?
> Based on above starting point, doing ELS/CT (and basic aborts and maybe
> a few other functions such as open/close ports or metadata transfer
> commands) in the guest is exactly what I would have expected. An HBA
> LLDD on the KVM host would implement such API and for all fc_hosts,
> passed through this way, it would *not* establish any scsi_transport_fc
> tree on the host. Instead the one virtio-vfc implementation in the guest
> would do this indendently of which HBA LLDD provides the passed through
> fc_host in the KVM host.
> ELS/CT pass through is maybe even for free via FC_BSG for those LLDDs
> that already implement it.
> Rport open/close is just the analogon of slave_alloc()/slave_destroy().
> 
I'm not convinced that moving to full virtio-fc is something we want or
even can do.
Neither qla2xxx nor lpfc allow for direct FC frame access; so one would
need to reformat the FC frames into something the driver understands,
just so that the hardware can transform it back into FC frames.
Another thing is xid management; some drivers have to do their own xid
management, based on hardware capabilities etc.
So the FC frames would need to re-write the xids, making it hard if not
impossible to match things up when the response comes in.

And, more importantly, what's the gain here?
Which feature do we miss?

> A new tricky part would be how to "unbind" an fc_host on the KVM host to
> be able to pass it through. (At least on x86) we have a vport and thus
> would not have a system bus (PCI) device we could unbind and then bind
> to the virtio-vfc equivalent of VFIO for pass through. A virtio-vfc API
> with sysfs interface for HBA LLDDs on the host could provide something
> similar on an fc_host level.
> Even better would be if an HBA LLDD would already know at probe time
> which of its fc_hosts are planned for pass through so it would not even
> start establishing the whole FC transport object tree with port
> discovery and the implied SCSI target scan. A late unbind would, at
> least with zfcp, cause the object tree to linger with fc_rports
> transitioning to "not present" state eventually (because zfcp uses the
> default FC_TGTID_BIND_BY_WWPN and the fc_host lives long until module
> unload or system shutdown, so fc_remote_port_delete() does not really
> delete rports from sysfs) and SCSI devices transitioning to
> "transport-offline" eventually.
> 
Hehe. That's one unresolved problem; the FC vendors have a hard time
already figuring out how to bind to the various modes (FC inintiator, FC
target, NVMe initiator, NVMe target, and all the combinations thereof).

Not sure if we can solve it generically.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2018-02-02 16:00         ` Hannes Reinecke
@ 2018-02-02 17:39           ` Steffen Maier
  2018-02-05  8:07             ` Hannes Reinecke
  0 siblings, 1 reply; 16+ messages in thread
From: Steffen Maier @ 2018-02-02 17:39 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke


On 02/02/2018 05:00 PM, Hannes Reinecke wrote:
> On 01/26/2018 05:54 PM, Steffen Maier wrote:
>> On 12/18/2017 09:31 AM, Hannes Reinecke wrote:
>>> On 12/15/2017 07:08 PM, Steffen Maier wrote:
>>>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:

>> To me, this raises the question which properties of the host's FC
>> (driver core) objects should be mirrored to the guest. Ideally all (and
>> that's a lot).
>> This in turn makes me wonder if mirroring is really desirable (e.g.
>> considering the effort) or if only the guest should have its own FC
>> object hierarchy which does _not_ exist on the KVM host in case an
>> fc_host is passed through with virtio-(v)fc.

>> A few more thoughts on your presentation [1]:
>>
>> "Devices on the vport will not be visible on the host"
>> I could not agree more to the design point that devices (or at least
>> their descendant object subtree) passed through to a guest should not
>> appear on the host!
>> With virtio-blk or virtio-scsi, we have SCSI devices and thus disks
>> visible in the host, which needlessly scans partitions, or even worse
>> automatically scans for LVM and maybe even activates PVs/VGs/LVs. It's
>> hard for a KVM host admin to suppress this (and not break the devices
>> the host needs itself).
>> If we mirror the host's scsi_transport_fc tree including fc_rports and
>> thus SCSI devices etc., we would have the same problems?
>> Even more so, dev_loss_tmo and fast_io_fail_tmo would run independently
>> on the host and in the guest on the same mirrored scsi_transport_fc
>> object tree. I can envision user confusion having configured timeouts on
>> the "wrong" side (host vs. guest). Also we would still need a mechanism
>> to mirror fc_rport (un)block from host to guest for proper transport
>> recovery. In zfcp we try to recover on transport rather than scsi_eh
>> whenever possible because it is so much smoother.
>>
> As similar thing can be achieved event today, by setting the
> 'no_uld_attach' parameter when scanning the scsi device
> (that's what some RAID HBAs do).
> However, there currently is no way of modifying it from user-space, and
> certainly not to change the behaviour for existing devices.
> It should be relatively simple to set this flag whenever the host is
> exposed to a VM; we would still see the scsi devices, but the 'sd'
> driver won't be attached so nothing will scan the device on the host.

Ah, nice, didn't know that. It would solve the undesired I/O problem in 
the host.
But it would not solve the so far somewhat unsynchronized state 
transitions of fc_rports on the host and their mirrors in the guest?

I would be very interested in how you intend to do transport recovery.

>> "Towards virtio-fc?"
>> Using the FCP_CMND_IU (instead of just a plain SCB as with virtio-scsi)
>> sounds promising to me as starting point.
>> A listener from the audience asked if you would also do ELS/CT in the
>> guest and you replied that this would not be good. Why is that?
>> Based on above starting point, doing ELS/CT (and basic aborts and maybe
>> a few other functions such as open/close ports or metadata transfer
>> commands) in the guest is exactly what I would have expected. An HBA
>> LLDD on the KVM host would implement such API and for all fc_hosts,
>> passed through this way, it would *not* establish any scsi_transport_fc
>> tree on the host. Instead the one virtio-vfc implementation in the guest
>> would do this indendently of which HBA LLDD provides the passed through
>> fc_host in the KVM host.
>> ELS/CT pass through is maybe even for free via FC_BSG for those LLDDs
>> that already implement it.
>> Rport open/close is just the analogon of slave_alloc()/slave_destroy().
>>
> I'm not convinced that moving to full virtio-fc is something we want or
> even can do.
> Neither qla2xxx nor lpfc allow for direct FC frame access; so one would
> need to reformat the FC frames into something the driver understands,
> just so that the hardware can transform it back into FC frames.

I thought of a more high level para-virtualized FCP HBA interface, than 
FC frames (which did exist in kernel v2.4 under drivers/fc4/ but no 
longer as it seems). Just like large parts of today's FCP LLDDs handle 
scatter gather lists and framing is done by the hardware.

> Another thing is xid management; some drivers have to do their own xid
> management, based on hardware capabilities etc.
> So the FC frames would need to re-write the xids, making it hard if not
> impossible to match things up when the response comes in.

For such things, where the hardware exposes more details (than, say, 
zfcp sees) I thought the LLDD on the KVM host would handle such details 
internally and only expose the higher level interface to virtio-fc.

Maybe something roughly like the basic transport protocol part of 
ibmvfc/ibmvscsi (not the other end in the firmware and not the cross 
partition DMA part), if I understood its overall design correctly by 
quickly looking at the code.
I somewhat had the impression that zfcp isn't too far from the overall 
operations style. As seem qla2xxx or lpfc to me, they just see and need 
to handle some more low-level FC details.
Conceptually replace CRQ/RDMA or QDIO with virtio.
(And for ibmvscsi also: SRP => FCP because it uses a different SCSI 
transport.)

> And, more importantly, what's the gain here?
> Which feature do we miss?

Pros: Full transport_fc tree in guest (only) and reliable fc_rport state 
transitions done in the guest, where I would expect them as user for a 
passed through fc_host. No effort needed for transport tree mirroring 
and trying to get (mirrored) rport state transitions right.

Cons: Of course such virtio-fc protocol/API would be a bit more than 
just FCP_CMND_IU and FCP_RSP_IU. It's also: abort, open/close port, 
open/close LUN, ELS, CT, get HBA metadata, and a mechanism for 
unsolicited notifications from the HBA LLDD on the KVM host side mainly 
for link up/down. And any host LLDD interested in supporting it would 
need some modifications to implement such API.

Admittedly, with your currently limited transport tree property 
mirroring it is likely less effort to get a first working virtio-vfc. 
However, I would not call it fc_host pass through. To me it currently 
more looks like a bit of a "faked" (no offense) transport tree in the 
guest and the rest is close to today's virtio-scsi?
I'm just trying to understand where this would be going, in order to get 
a conceptual impression or classification in my brain.

-- 
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] virtio-scsi: Add FC transport class
  2018-02-02 17:39           ` Steffen Maier
@ 2018-02-05  8:07             ` Hannes Reinecke
  0 siblings, 0 replies; 16+ messages in thread
From: Hannes Reinecke @ 2018-02-05  8:07 UTC (permalink / raw)
  To: Steffen Maier, Martin K. Petersen
  Cc: Christoph Hellwig, James Bottomley, Paolo Bonzini, linux-scsi,
	Hannes Reinecke

On 02/02/2018 06:39 PM, Steffen Maier wrote:
> 
> On 02/02/2018 05:00 PM, Hannes Reinecke wrote:
>> On 01/26/2018 05:54 PM, Steffen Maier wrote:
>>> On 12/18/2017 09:31 AM, Hannes Reinecke wrote:
>>>> On 12/15/2017 07:08 PM, Steffen Maier wrote:
>>>>> On 12/14/2017 11:11 AM, Hannes Reinecke wrote:
> 
>>> To me, this raises the question which properties of the host's FC
>>> (driver core) objects should be mirrored to the guest. Ideally all (and
>>> that's a lot).
>>> This in turn makes me wonder if mirroring is really desirable (e.g.
>>> considering the effort) or if only the guest should have its own FC
>>> object hierarchy which does _not_ exist on the KVM host in case an
>>> fc_host is passed through with virtio-(v)fc.
> 
>>> A few more thoughts on your presentation [1]:
>>>
>>> "Devices on the vport will not be visible on the host"
>>> I could not agree more to the design point that devices (or at least
>>> their descendant object subtree) passed through to a guest should not
>>> appear on the host!
>>> With virtio-blk or virtio-scsi, we have SCSI devices and thus disks
>>> visible in the host, which needlessly scans partitions, or even worse
>>> automatically scans for LVM and maybe even activates PVs/VGs/LVs. It's
>>> hard for a KVM host admin to suppress this (and not break the devices
>>> the host needs itself).
>>> If we mirror the host's scsi_transport_fc tree including fc_rports and
>>> thus SCSI devices etc., we would have the same problems?
>>> Even more so, dev_loss_tmo and fast_io_fail_tmo would run independently
>>> on the host and in the guest on the same mirrored scsi_transport_fc
>>> object tree. I can envision user confusion having configured timeouts on
>>> the "wrong" side (host vs. guest). Also we would still need a mechanism
>>> to mirror fc_rport (un)block from host to guest for proper transport
>>> recovery. In zfcp we try to recover on transport rather than scsi_eh
>>> whenever possible because it is so much smoother.
>>>
>> As similar thing can be achieved event today, by setting the
>> 'no_uld_attach' parameter when scanning the scsi device
>> (that's what some RAID HBAs do).
>> However, there currently is no way of modifying it from user-space, and
>> certainly not to change the behaviour for existing devices.
>> It should be relatively simple to set this flag whenever the host is
>> exposed to a VM; we would still see the scsi devices, but the 'sd'
>> driver won't be attached so nothing will scan the device on the host.
> 
> Ah, nice, didn't know that. It would solve the undesired I/O problem in
> the host.
> But it would not solve the so far somewhat unsynchronized state
> transitions of fc_rports on the host and their mirrors in the guest?
> 
> I would be very interested in how you intend to do transport recovery.
> 
So am I :-)

Current plan is to check fc-transport events; probably implementing a
listener which then will manage the interface to the guest.
Also I'm looking at implementing a new block device, which just gets a
WWPN as input and manages all associated scsi (sg) devices.
But that still needs some more work to be done.

>>> "Towards virtio-fc?"
[ .. ]
>> I'm not convinced that moving to full virtio-fc is something we want or
>> even can do.
>> Neither qla2xxx nor lpfc allow for direct FC frame access; so one would
>> need to reformat the FC frames into something the driver understands,
>> just so that the hardware can transform it back into FC frames.
> 
> I thought of a more high level para-virtualized FCP HBA interface, than
> FC frames (which did exist in kernel v2.4 under drivers/fc4/ but no
> longer as it seems). Just like large parts of today's FCP LLDDs handle
> scatter gather lists and framing is done by the hardware.
> 
But that would amount to yet another abstraction layer; not sure if it's
worth the trouble...

>> Another thing is xid management; some drivers have to do their own xid
>> management, based on hardware capabilities etc.
>> So the FC frames would need to re-write the xids, making it hard if not
>> impossible to match things up when the response comes in.
> 
> For such things, where the hardware exposes more details (than, say,
> zfcp sees) I thought the LLDD on the KVM host would handle such details
> internally and only expose the higher level interface to virtio-fc.
> 
Yes; but that's what I'm aiming for with my virtio-vfc thingie already :-)

> Maybe something roughly like the basic transport protocol part of
> ibmvfc/ibmvscsi (not the other end in the firmware and not the cross
> partition DMA part), if I understood its overall design correctly by
> quickly looking at the code.
> I somewhat had the impression that zfcp isn't too far from the overall
> operations style. As seem qla2xxx or lpfc to me, they just see and need
> to handle some more low-level FC details.
> Conceptually replace CRQ/RDMA or QDIO with virtio.
> (And for ibmvscsi also: SRP => FCP because it uses a different SCSI
> transport.)
> 
This is actually a different project; we're looking at extending libfc
to cover ibmvfc, too. But that's even further down the line.

>> And, more importantly, what's the gain here?
>> Which feature do we miss?
> 
> Pros: Full transport_fc tree in guest (only) and reliable fc_rport state
> transitions done in the guest, where I would expect them as user for a
> passed through fc_host. No effort needed for transport tree mirroring
> and trying to get (mirrored) rport state transitions right.
> 
> Cons: Of course such virtio-fc protocol/API would be a bit more than
> just FCP_CMND_IU and FCP_RSP_IU. It's also: abort, open/close port,
> open/close LUN, ELS, CT, get HBA metadata, and a mechanism for
> unsolicited notifications from the HBA LLDD on the KVM host side mainly
> for link up/down. And any host LLDD interested in supporting it would
> need some modifications to implement such API.
> 
> Admittedly, with your currently limited transport tree property
> mirroring it is likely less effort to get a first working virtio-vfc.
> However, I would not call it fc_host pass through. To me it currently
> more looks like a bit of a "faked" (no offense) transport tree in the
> guest and the rest is close to today's virtio-scsi?
> I'm just trying to understand where this would be going, in order to get
> a conceptual impression or classification in my brain.
> 
Oh, it certainly is not fc_host pass through; indeed it _is_ a faked
transport tree. vfc is just a better name; less degrading :-)

And the overall idea here was to provide a _minimal_ emulation for
virtio to simulate an FC host in the KVM guest.

Basically, let's see if we can make it to work without having to
reinvent yet another protocol, and layer it on top of the existing
virtio scsi transport.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-02-05  8:07 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-14 10:11 [PATCH 0/3] virtio-vfc implementation Hannes Reinecke
2017-12-14 10:11 ` [PATCH 1/3] virtio-scsi: implement target rescan Hannes Reinecke
2017-12-15 17:54   ` Steffen Maier
2017-12-14 10:11 ` [PATCH 2/3] virtio-scsi: Add FC transport class Hannes Reinecke
2017-12-15 18:08   ` Steffen Maier
2017-12-18  8:31     ` Hannes Reinecke
2018-01-26 16:54       ` Steffen Maier
2018-02-02 16:00         ` Hannes Reinecke
2018-02-02 17:39           ` Steffen Maier
2018-02-05  8:07             ` Hannes Reinecke
2017-12-14 10:11 ` [PATCH 3/3] virtio_scsi: Implement 'native LUN' feature Hannes Reinecke
2017-12-15 18:17   ` Steffen Maier
2017-12-18  7:48     ` Hannes Reinecke
2018-01-26 14:15       ` Steffen Maier
2018-01-26 15:22         ` Hannes Reinecke
2018-01-26 16:57         ` Steffen Maier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.