All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] ibmvscsis driver rewrite
@ 2011-02-10 12:21 FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 1/3] libsrp: add srp_data_length helper function FUJITA Tomonori
                   ` (4 more replies)
  0 siblings, 5 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-10 12:21 UTC (permalink / raw)
  To: linux-scsi; +Cc: nab, fujita.tomonori

Finally, I have the working driver. We are ready to remove the old
ibmvstgt driver.

This is for scsi-misc tree.

=
 drivers/scsi/ibmvscsi/Makefile    |    4 +-
 drivers/scsi/ibmvscsi/ibmvscsis.c | 1759 +++++++++++++++++++++++++++++++++++++
 drivers/scsi/libsrp.c             |   18 +-
 include/scsi/libsrp.h             |   10 +-
 4 files changed, 1778 insertions(+), 13 deletions(-)





^ permalink raw reply	[flat|nested] 81+ messages in thread

* [PATCH 1/3] libsrp: add srp_data_length helper function
  2011-02-10 12:21 [PATCH 0/3] ibmvscsis driver rewrite FUJITA Tomonori
@ 2011-02-10 12:21 ` FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 2/3] libsrp: fix dma_unmap_sg FUJITA Tomonori
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-10 12:21 UTC (permalink / raw)
  To: linux-scsi; +Cc: nab, fujita.tomonori

srp_data_length helper function returns the data transfer length from
an SRP_CMD request.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/scsi/libsrp.c |   12 +++---------
 include/scsi/libsrp.h |   10 +++++++++-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index ff6a28c..01c4010 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -30,13 +30,6 @@
 #include <scsi/srp.h>
 #include <scsi/libsrp.h>
 
-enum srp_task_attributes {
-	SRP_SIMPLE_TASK = 0,
-	SRP_HEAD_TASK = 1,
-	SRP_ORDERED_TASK = 2,
-	SRP_ACA_TASK = 4
-};
-
 /* tmp - will replace with SCSI logging stuff */
 #define eprintk(fmt, args...)					\
 do {								\
@@ -363,7 +356,7 @@ int srp_transfer_data(struct scsi_cmnd *sc, struct srp_cmd *cmd,
 }
 EXPORT_SYMBOL_GPL(srp_transfer_data);
 
-static int vscsis_data_length(struct srp_cmd *cmd, enum dma_data_direction dir)
+int srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir)
 {
 	struct srp_direct_buf *md;
 	struct srp_indirect_buf *id;
@@ -394,6 +387,7 @@ static int vscsis_data_length(struct srp_cmd *cmd, enum dma_data_direction dir)
 	}
 	return len;
 }
+EXPORT_SYMBOL_GPL(srp_data_length);
 
 int srp_cmd_queue(struct Scsi_Host *shost, struct srp_cmd *cmd, void *info,
 		  u64 itn_id, u64 addr)
@@ -418,7 +412,7 @@ int srp_cmd_queue(struct Scsi_Host *shost, struct srp_cmd *cmd, void *info,
 	}
 
 	dir = srp_cmd_direction(cmd);
-	len = vscsis_data_length(cmd, dir);
+	len = srp_data_length(cmd, dir);
 
 	dprintk("%p %x %lx %d %d %d %llx\n", info, cmd->cdb[0],
 		cmd->lun, dir, len, tag, (unsigned long long) cmd->tag);
diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index f4105c9..f5ebdbf 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -7,6 +7,13 @@
 #include <scsi/scsi_host.h>
 #include <scsi/srp.h>
 
+enum srp_task_attributes {
+	SRP_SIMPLE_TASK = 0,
+	SRP_HEAD_TASK = 1,
+	SRP_ORDERED_TASK = 2,
+	SRP_ACA_TASK = 4
+};
+
 enum iue_flags {
 	V_DIOVER,
 	V_WRITE,
@@ -64,7 +71,6 @@ extern int srp_cmd_queue(struct Scsi_Host *, struct srp_cmd *, void *, u64, u64)
 extern int srp_transfer_data(struct scsi_cmnd *, struct srp_cmd *,
 			     srp_rdma_t, int, int);
 
-
 static inline struct srp_target *host_to_srp_target(struct Scsi_Host *host)
 {
 	return (struct srp_target *) host->hostdata;
@@ -75,4 +81,6 @@ static inline int srp_cmd_direction(struct srp_cmd *cmd)
 	return (cmd->buf_fmt >> 4) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 }
 
+extern int srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir);
+
 #endif
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 81+ messages in thread

* [PATCH 2/3] libsrp: fix dma_unmap_sg
  2011-02-10 12:21 [PATCH 0/3] ibmvscsis driver rewrite FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 1/3] libsrp: add srp_data_length helper function FUJITA Tomonori
@ 2011-02-10 12:21 ` FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-10 12:21 UTC (permalink / raw)
  To: linux-scsi; +Cc: nab, fujita.tomonori

Needs to pass the number of scatterlist entries that passed in to
dma_map_sg.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/scsi/libsrp.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 01c4010..938e5e0 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -206,7 +206,8 @@ static int srp_direct_data(struct scsi_cmnd *sc, struct srp_direct_buf *md,
 	err = rdma_io(sc, sg, nsg, md, 1, dir, len);
 
 	if (dma_map)
-		dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(iue->target->dev, sg, scsi_sg_count(sc),
+			     DMA_BIDIRECTIONAL);
 
 	return err;
 }
@@ -278,7 +279,8 @@ rdma:
 	err = rdma_io(sc, sg, nsg, md, nmd, dir, len);
 
 	if (dma_map)
-		dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(iue->target->dev, sg, scsi_sg_count(sc),
+			     DMA_BIDIRECTIONAL);
 
 free_mem:
 	if (token && dma_map)
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 81+ messages in thread

* [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 12:21 [PATCH 0/3] ibmvscsis driver rewrite FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 1/3] libsrp: add srp_data_length helper function FUJITA Tomonori
  2011-02-10 12:21 ` [PATCH 2/3] libsrp: fix dma_unmap_sg FUJITA Tomonori
@ 2011-02-10 12:21 ` FUJITA Tomonori
  2011-02-10 19:03   ` Nicholas A. Bellinger
                     ` (2 more replies)
  2011-02-10 18:34 ` [PATCH 0/3] ibmvscsis driver rewrite Nicholas A. Bellinger
       [not found] ` <4D53DE96.2020502@suse.de>
  4 siblings, 3 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-10 12:21 UTC (permalink / raw)
  To: linux-scsi; +Cc: nab, fujita.tomonori

This replaces ibmvstgt driver.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/scsi/ibmvscsi/Makefile    |    4 +-
 drivers/scsi/ibmvscsi/ibmvscsis.c | 1759 +++++++++++++++++++++++++++++++++++++
 2 files changed, 1762 insertions(+), 1 deletions(-)
 create mode 100644 drivers/scsi/ibmvscsi/ibmvscsis.c

diff --git a/drivers/scsi/ibmvscsi/Makefile b/drivers/scsi/ibmvscsi/Makefile
index a423d96..a615ea5 100644
--- a/drivers/scsi/ibmvscsi/Makefile
+++ b/drivers/scsi/ibmvscsi/Makefile
@@ -1,8 +1,10 @@
+EXTRA_CFLAGS += -I$(srctree)/drivers/target/
+
 obj-$(CONFIG_SCSI_IBMVSCSI)	+= ibmvscsic.o
 
 ibmvscsic-y			+= ibmvscsi.o
 ibmvscsic-$(CONFIG_PPC_ISERIES)	+= iseries_vscsi.o 
 ibmvscsic-$(CONFIG_PPC_PSERIES)	+= rpa_vscsi.o 
 
-obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvstgt.o
+obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvscsis.o
 obj-$(CONFIG_SCSI_IBMVFC)	+= ibmvfc.o
diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
new file mode 100644
index 0000000..1ab4d73
--- /dev/null
+++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
@@ -0,0 +1,1759 @@
+/*
+ * IBM eServer i/pSeries Virtual SCSI Target Driver
+ * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp.
+ *			   Santiago Leon (santil@us.ibm.com) IBM Corp.
+ *			   Linda Xie (lxie@us.ibm.com) IBM Corp.
+ *
+ * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org>
+ * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/utsname.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/libsrp.h>
+#include <generated/utsrelease.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+
+#include <asm/hvcall.h>
+#include <asm/iommu.h>
+#include <asm/prom.h>
+#include <asm/vio.h>
+
+#include "ibmvscsi.h"
+#include "viosrp.h"
+
+#define IBMVSCSIS_VERSION  "v0.1"
+#define IBMVSCSIS_NAMELEN 32
+
+#define	INITIAL_SRP_LIMIT	16
+#define	DEFAULT_MAX_SECTORS	256
+
+/*
+ * Hypervisor calls.
+ */
+#define h_copy_rdma(l, sa, sb, da, db) \
+			plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)
+#define h_send_crq(ua, l, h) \
+			plpar_hcall_norets(H_SEND_CRQ, ua, l, h)
+#define h_reg_crq(ua, tok, sz)\
+			plpar_hcall_norets(H_REG_CRQ, ua, tok, sz);
+#define h_free_crq(ua) \
+			plpar_hcall_norets(H_FREE_CRQ, ua);
+
+#define GETTARGET(x) ((int)((((u64)(x)) >> 56) & 0x003f))
+#define GETBUS(x) ((int)((((u64)(x)) >> 53) & 0x0007))
+#define GETLUN(x) ((int)((((u64)(x)) >> 48) & 0x001f))
+
+/*
+ * These are fixed for the system and come from the Open Firmware device tree.
+ * We just store them here to save getting them every time.
+ */
+static char system_id[64] = "";
+static char partition_name[97] = "UNKNOWN";
+static unsigned int partition_number = -1;
+
+static LIST_HEAD(tpg_list);
+static DEFINE_SPINLOCK(tpg_lock);
+
+struct ibmvscsis_adapter {
+	struct vio_dev *dma_dev;
+	struct list_head siblings;
+
+	struct crq_queue crq_queue;
+
+	struct work_struct crq_work;
+
+	unsigned long liobn;
+	unsigned long riobn;
+
+	/* todo: remove */
+	struct srp_target srpt;
+
+	/* SRP port target portal group tag for TCM */
+	unsigned long tport_tpgt;
+
+	/* Returned by ibmvscsis_make_tpg() */
+	struct se_portal_group se_tpg;
+
+	struct se_session *se_sess;
+
+
+	/* SCSI protocol the tport is providing */
+	u8 tport_proto_id;
+	/* Binary World Wide unique Port Name for SRP Target port */
+	u64 tport_wwpn;
+	/* ASCII formatted WWPN for SRP Target port */
+	char tport_name[IBMVSCSIS_NAMELEN];
+	/* Returned by ibmvscsis_make_tport() */
+	struct se_wwn tport_wwn;
+};
+
+struct ibmvscsis_cmnd {
+	/* Used for libsrp processing callbacks */
+	struct scsi_cmnd sc;
+	/* Used for TCM Core operations */
+	struct se_cmd se_cmd;
+	/* Sense buffer that will be mapped into outgoing status */
+	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
+};
+
+static int ibmvscsis_check_true(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static int ibmvscsis_check_false(struct se_portal_group *se_tpg)
+{
+	return 0;
+}
+
+static char *ibmvscsis_get_fabric_name(void)
+{
+	return "ibmvscsis";
+}
+
+static u8 ibmvscsis_get_fabric_proto_ident(struct se_portal_group *se_tpg)
+{
+	return 4;
+}
+
+static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg)
+{
+	struct ibmvscsis_adapter *adapter =
+		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
+
+	return adapter->tport_name;
+}
+
+static u16 ibmvscsis_get_tag(struct se_portal_group *se_tpg)
+{
+	struct ibmvscsis_adapter *adapter =
+		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
+	return adapter->tport_tpgt;
+}
+
+static u32 ibmvscsis_get_default_depth(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+/* we don't care about the transport id since we never use pr. */
+static u32 ibmvscsis_get_pr_transport_id(struct se_portal_group *se_tpg,
+					 struct se_node_acl *se_nacl,
+					 struct t10_pr_registration *pr_reg,
+					 int *format_code,
+					 unsigned char *buf)
+{
+	return 24;
+}
+
+static u32 ibmvscsis_get_pr_transport_id_len(struct se_portal_group *se_tpg,
+					     struct se_node_acl *se_nacl,
+					     struct t10_pr_registration *pr_reg,
+					     int *format_code)
+{
+	return 24;
+}
+
+static char *ibmvscsis_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
+						 const char *buf,
+						 u32 *out_tid_len,
+						 char **port_nexus_ptr)
+{
+	return NULL;
+}
+
+struct ibmvscsis_nacl {
+	/* Binary World Wide unique Port Name for SRP Initiator port */
+	u64 iport_wwpn;
+	/* ASCII formatted WWPN for Sas Initiator port */
+	char iport_name[IBMVSCSIS_NAMELEN];
+	/* Returned by ibmvscsis_make_nodeacl() */
+	struct se_node_acl se_node_acl;
+};
+
+static struct se_node_acl *ibmvscsis_alloc_fabric_acl(struct se_portal_group *se_tpg)
+{
+	struct ibmvscsis_nacl *nacl;
+
+	nacl = kzalloc(sizeof(struct ibmvscsis_nacl), GFP_KERNEL);
+	if (!(nacl)) {
+		printk(KERN_ERR "Unable to alocate struct ibmvscsis_nacl\n");
+		return NULL;
+	}
+
+	return &nacl->se_node_acl;
+}
+
+static void ibmvscsis_release_fabric_acl(struct se_portal_group *se_tpg,
+					 struct se_node_acl *se_nacl)
+{
+	struct ibmvscsis_nacl *nacl = container_of(se_nacl,
+			struct ibmvscsis_nacl, se_node_acl);
+	kfree(nacl);
+}
+
+static u32 ibmvscsis_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static void ibmvscsis_release_cmd(struct se_cmd *se_cmd)
+{
+	struct ibmvscsis_cmnd *cmd =
+		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
+	kfree(cmd);
+	return;
+}
+
+static int ibmvscsis_shutdown_session(struct se_session *se_sess)
+{
+	return 0;
+}
+
+static void ibmvscsis_close_session(struct se_session *se_sess)
+{
+	return;
+}
+
+static void ibmvscsis_stop_session(struct se_session *se_sess,
+				   int sess_sleep , int conn_sleep)
+{
+	return;
+}
+
+static void ibmvscsis_reset_nexus(struct se_session *se_sess)
+{
+	return;
+}
+
+static int ibmvscsis_sess_logged_in(struct se_session *se_sess)
+{
+	return 0;
+}
+
+static u32 ibmvscsis_sess_get_index(struct se_session *se_sess)
+{
+	return 0;
+}
+
+static int ibmvscsis_write_pending_status(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static void ibmvscsis_set_default_node_attrs(struct se_node_acl *nacl)
+{
+	return;
+}
+
+static u32 ibmvscsis_get_task_tag(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static int ibmvscsis_get_cmd_state(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static void ibmvscsis_new_cmd_failure(struct se_cmd *se_cmd)
+{
+	return;
+}
+
+static int ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static u16 ibmvscsis_set_fabric_sense_len(struct se_cmd *se_cmd,
+					  u32 sense_length)
+{
+	return 0;
+}
+
+static u16 ibmvscsis_get_fabric_sense_len(void)
+{
+	return 0;
+}
+
+static int ibmvscsis_is_state_remove(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static u64 make_lun(unsigned int bus, unsigned int target, unsigned int lun);
+
+static u64 ibmvscsis_pack_lun(unsigned int lun)
+{
+	return make_lun(0, lun & 0x003f, 0);
+}
+
+/* Local pointer to allocated TCM configfs fabric module */
+static struct target_fabric_configfs *ibmvscsis_fabric_configfs;
+
+static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
+						  struct config_group *group,
+						  const char *name)
+{
+	struct ibmvscsis_adapter *adapter =
+		container_of(wwn, struct ibmvscsis_adapter, tport_wwn);
+	struct se_node_acl *acl;
+	int ret;
+	char *dname = (char *)dev_name(&adapter->dma_dev->dev);
+
+	if (strncmp(name, "tpgt_1", 6))
+		return ERR_PTR(-EINVAL);
+
+	ret = core_tpg_register(&ibmvscsis_fabric_configfs->tf_ops, wwn,
+				&adapter->se_tpg, (void *)adapter,
+				TRANSPORT_TPG_TYPE_NORMAL);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+
+	adapter->se_sess = transport_init_session();
+	if (!adapter->se_sess) {
+		core_tpg_deregister(&adapter->se_tpg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	acl = core_tpg_check_initiator_node_acl(&adapter->se_tpg, dname);
+	if (!acl) {
+		transport_free_session(adapter->se_sess);
+		adapter->se_sess = NULL;
+		return ERR_PTR(-ENOMEM);
+	}
+	adapter->se_sess->se_node_acl = acl;
+
+	transport_register_session(&adapter->se_tpg,
+				   adapter->se_sess->se_node_acl,
+				   adapter->se_sess, adapter);
+
+	return &adapter->se_tpg;
+}
+
+static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg)
+{
+	struct ibmvscsis_adapter *adapter =
+		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
+	unsigned long flags;
+
+
+	transport_deregister_session_configfs(adapter->se_sess);
+	transport_free_session(adapter->se_sess);
+	core_tpg_deregister(se_tpg);
+
+	spin_lock_irqsave(&tpg_lock, flags);
+	adapter->se_sess = NULL;
+	spin_unlock_irqrestore(&tpg_lock, flags);
+}
+
+static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
+					   struct config_group *group,
+					   const char *name)
+{
+	struct ibmvscsis_adapter *adapter;
+	unsigned long tpgt, flags;
+
+	if (strict_strtoul(name, 10, &tpgt))
+		return NULL;
+
+	spin_lock_irqsave(&tpg_lock, flags);
+	list_for_each_entry(adapter, &tpg_list, siblings) {
+		if (tpgt == adapter->tport_tpgt)
+			goto found;
+	}
+
+	spin_unlock_irqrestore(&tpg_lock, flags);
+	return NULL;
+found:
+	spin_unlock_irqrestore(&tpg_lock, flags);
+
+	return &adapter->tport_wwn;
+}
+
+static void ibmvscsis_drop_tport(struct se_wwn *wwn)
+{
+}
+
+static ssize_t ibmvscsis_wwn_show_attr_version(struct target_fabric_configfs *tf,
+					       char *page)
+{
+	return sprintf(page, "IBMVSCSIS fabric module %s on %s/%s"
+		"on "UTS_RELEASE"\n", IBMVSCSIS_VERSION, utsname()->sysname,
+		utsname()->machine);
+}
+
+TF_WWN_ATTR_RO(ibmvscsis, version);
+
+static struct configfs_attribute *ibmvscsis_wwn_attrs[] = {
+	&ibmvscsis_wwn_version.attr,
+	NULL,
+};
+
+static int ibmvscsis_write_pending(struct se_cmd *se_cmd);
+static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd);
+static int ibmvscsis_queue_status(struct se_cmd *se_cmd);
+static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd);
+static void ibmvscsis_check_stop_free(struct se_cmd *se_cmd);
+
+static struct target_core_fabric_ops ibmvscsis_ops = {
+	.task_sg_chaining		= 1,
+	.get_fabric_name		= ibmvscsis_get_fabric_name,
+	.get_fabric_proto_ident		= ibmvscsis_get_fabric_proto_ident,
+	.tpg_get_wwn			= ibmvscsis_get_fabric_wwn,
+	.tpg_get_tag			= ibmvscsis_get_tag,
+	.tpg_get_default_depth		= ibmvscsis_get_default_depth,
+	.tpg_get_pr_transport_id	= ibmvscsis_get_pr_transport_id,
+	.tpg_get_pr_transport_id_len	= ibmvscsis_get_pr_transport_id_len,
+	.tpg_parse_pr_out_transport_id	= ibmvscsis_parse_pr_out_transport_id,
+	.tpg_check_demo_mode		= ibmvscsis_check_true,
+	.tpg_check_demo_mode_cache	= ibmvscsis_check_true,
+	.tpg_check_demo_mode_write_protect = ibmvscsis_check_false,
+	.tpg_check_prod_mode_write_protect = ibmvscsis_check_false,
+	.tpg_alloc_fabric_acl		= ibmvscsis_alloc_fabric_acl,
+	.tpg_release_fabric_acl		= ibmvscsis_release_fabric_acl,
+	.tpg_get_inst_index		= ibmvscsis_tpg_get_inst_index,
+	.new_cmd_map			= ibmvscsis_new_cmd_map,
+	.check_stop_free		= ibmvscsis_check_stop_free,
+	.release_cmd_to_pool		= ibmvscsis_release_cmd,
+	.release_cmd_direct		= ibmvscsis_release_cmd,
+	.shutdown_session		= ibmvscsis_shutdown_session,
+	.close_session			= ibmvscsis_close_session,
+	.stop_session			= ibmvscsis_stop_session,
+	.fall_back_to_erl0		= ibmvscsis_reset_nexus,
+	.sess_logged_in			= ibmvscsis_sess_logged_in,
+	.sess_get_index			= ibmvscsis_sess_get_index,
+	.sess_get_initiator_sid		= NULL,
+	.write_pending			= ibmvscsis_write_pending,
+	.write_pending_status		= ibmvscsis_write_pending_status,
+	.set_default_node_attributes	= ibmvscsis_set_default_node_attrs,
+	.get_task_tag			= ibmvscsis_get_task_tag,
+	.get_cmd_state			= ibmvscsis_get_cmd_state,
+	.new_cmd_failure		= ibmvscsis_new_cmd_failure,
+	.queue_data_in			= ibmvscsis_queue_data_in,
+	.queue_status			= ibmvscsis_queue_status,
+	.queue_tm_rsp			= ibmvscsis_queue_tm_rsp,
+	.get_fabric_sense_len		= ibmvscsis_get_fabric_sense_len,
+	.set_fabric_sense_len		= ibmvscsis_set_fabric_sense_len,
+	.is_state_remove		= ibmvscsis_is_state_remove,
+	.pack_lun			= ibmvscsis_pack_lun,
+	.fabric_make_wwn		= ibmvscsis_make_tport,
+	.fabric_drop_wwn		= ibmvscsis_drop_tport,
+	.fabric_make_tpg		= ibmvscsis_make_tpg,
+	.fabric_drop_tpg		= ibmvscsis_drop_tpg,
+	.fabric_post_link		= NULL,
+	.fabric_pre_unlink		= NULL,
+	.fabric_make_np			= NULL,
+	.fabric_drop_np			= NULL,
+	.fabric_make_nodeacl		= NULL,
+	.fabric_drop_nodeacl		= NULL,
+};
+
+static inline union viosrp_iu *vio_iu(struct iu_entry *iue)
+{
+	return (union viosrp_iu *)(iue->sbuf->buf);
+}
+
+static int send_iu(struct iu_entry *iue, u64 length, u8 format)
+{
+	struct srp_target *target = iue->target;
+	struct ibmvscsis_adapter *adapter = target->ldata;
+	long rc, rc1;
+	union {
+		struct viosrp_crq cooked;
+		u64 raw[2];
+	} crq;
+
+	/* First copy the SRP */
+	rc = h_copy_rdma(length, adapter->liobn, iue->sbuf->dma,
+			 adapter->riobn, iue->remote_token);
+
+	if (rc)
+		printk(KERN_ERR "Error %ld transferring data\n", rc);
+
+	crq.cooked.valid = 0x80;
+	crq.cooked.format = format;
+	crq.cooked.reserved = 0x00;
+	crq.cooked.timeout = 0x00;
+	crq.cooked.IU_length = length;
+	crq.cooked.IU_data_ptr = vio_iu(iue)->srp.rsp.tag;
+
+	if (rc == 0)
+		crq.cooked.status = 0x99;	/* Just needs to be non-zero */
+	else
+		crq.cooked.status = 0x00;
+
+	rc1 = h_send_crq(adapter->dma_dev->unit_address, crq.raw[0],
+			 crq.raw[1]);
+	if (rc1) {
+		printk(KERN_ERR "%ld sending response\n", rc1);
+		return rc1;
+	}
+
+	return rc;
+}
+
+#define SRP_RSP_SENSE_DATA_LEN	18
+
+static int send_rsp(struct iu_entry *iue, struct scsi_cmnd *sc,
+		    unsigned char status, unsigned char asc)
+{
+	union viosrp_iu *iu = vio_iu(iue);
+	uint64_t tag = iu->srp.rsp.tag;
+
+	/* If the linked bit is on and status is good */
+	if (test_bit(V_LINKED, &iue->flags) && (status == NO_SENSE))
+		status = 0x10;
+
+	memset(iu, 0, sizeof(struct srp_rsp));
+	iu->srp.rsp.opcode = SRP_RSP;
+	iu->srp.rsp.req_lim_delta = 1;
+	iu->srp.rsp.tag = tag;
+
+	if (test_bit(V_DIOVER, &iue->flags))
+		iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER;
+
+	iu->srp.rsp.data_in_res_cnt = 0;
+	iu->srp.rsp.data_out_res_cnt = 0;
+
+	iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID;
+
+	iu->srp.rsp.resp_data_len = 0;
+	iu->srp.rsp.status = status;
+	if (status) {
+		uint8_t *sense = iu->srp.rsp.data;
+
+		if (sc) {
+			iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
+			iu->srp.rsp.sense_data_len = SCSI_SENSE_BUFFERSIZE;
+			memcpy(sense, sc->sense_buffer, SCSI_SENSE_BUFFERSIZE);
+		} else {
+			iu->srp.rsp.status = SAM_STAT_CHECK_CONDITION;
+			iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
+			iu->srp.rsp.sense_data_len = SRP_RSP_SENSE_DATA_LEN;
+
+			/* Valid bit and 'current errors' */
+			sense[0] = (0x1 << 7 | 0x70);
+			/* Sense key */
+			sense[2] = status;
+			/* Additional sense length */
+			sense[7] = 0xa;	/* 10 bytes */
+			/* Additional sense code */
+			sense[12] = asc;
+		}
+	}
+
+	send_iu(iue, sizeof(iu->srp.rsp) + SRP_RSP_SENSE_DATA_LEN,
+		VIOSRP_SRP_FORMAT);
+
+	return 0;
+}
+
+static int send_adapter_info(struct iu_entry *iue,
+			     dma_addr_t remote_buffer, u16 length)
+{
+	struct srp_target *target = iue->target;
+	struct ibmvscsis_adapter *adapter = target->ldata;
+	dma_addr_t data_token;
+	struct mad_adapter_info_data *info;
+	int err;
+
+	info = dma_alloc_coherent(&adapter->dma_dev->dev, sizeof(*info),
+				  &data_token, GFP_KERNEL);
+	if (!info) {
+		printk(KERN_ERR "bad dma_alloc_coherent %p\n", target);
+		return 1;
+	}
+
+	/* Get remote info */
+	err = h_copy_rdma(sizeof(*info), adapter->riobn, remote_buffer,
+			  adapter->liobn, data_token);
+	if (err == H_SUCCESS) {
+		printk(KERN_INFO "Client connect: %s (%d)\n",
+		       info->partition_name, info->partition_number);
+	}
+
+	memset(info, 0, sizeof(*info));
+
+	strcpy(info->srp_version, "16.a");
+	strncpy(info->partition_name, partition_name,
+		sizeof(info->partition_name));
+	info->partition_number = partition_number;
+	info->mad_version = 1;
+	info->os_type = 2;
+	info->port_max_txu[0] = DEFAULT_MAX_SECTORS << 9;
+
+	/* Send our info to remote */
+	err = h_copy_rdma(sizeof(*info), adapter->liobn, data_token,
+			  adapter->riobn, remote_buffer);
+
+	dma_free_coherent(&adapter->dma_dev->dev, sizeof(*info), info,
+			  data_token);
+	if (err != H_SUCCESS) {
+		printk(KERN_INFO "Error sending adapter info %d\n", err);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int process_mad_iu(struct iu_entry *iue)
+{
+	union viosrp_iu *iu = vio_iu(iue);
+	struct viosrp_adapter_info *info;
+	struct viosrp_host_config *conf;
+
+	switch (iu->mad.empty_iu.common.type) {
+	case VIOSRP_EMPTY_IU_TYPE:
+		printk(KERN_ERR "%s\n", "Unsupported EMPTY MAD IU");
+		break;
+	case VIOSRP_ERROR_LOG_TYPE:
+		printk(KERN_ERR "%s\n", "Unsupported ERROR LOG MAD IU");
+		iu->mad.error_log.common.status = 1;
+		send_iu(iue, sizeof(iu->mad.error_log),	VIOSRP_MAD_FORMAT);
+		break;
+	case VIOSRP_ADAPTER_INFO_TYPE:
+		info = &iu->mad.adapter_info;
+		info->common.status = send_adapter_info(iue, info->buffer,
+							info->common.length);
+		send_iu(iue, sizeof(*info), VIOSRP_MAD_FORMAT);
+		break;
+	case VIOSRP_HOST_CONFIG_TYPE:
+		conf = &iu->mad.host_config;
+		conf->common.status = 1;
+		send_iu(iue, sizeof(*conf), VIOSRP_MAD_FORMAT);
+		break;
+	default:
+		printk(KERN_ERR "Unknown type %u\n", iu->srp.rsp.opcode);
+	}
+
+	return 1;
+}
+
+static void process_login(struct iu_entry *iue)
+{
+	union viosrp_iu *iu = vio_iu(iue);
+	struct srp_login_rsp *rsp = &iu->srp.login_rsp;
+	u64 tag = iu->srp.rsp.tag;
+
+	/* TODO handle case that requested size is wrong and
+	 * buffer format is wrong
+	 */
+	memset(iu, 0, sizeof(struct srp_login_rsp));
+	rsp->opcode = SRP_LOGIN_RSP;
+	rsp->req_lim_delta = INITIAL_SRP_LIMIT;
+	rsp->tag = tag;
+	rsp->max_it_iu_len = sizeof(union srp_iu);
+	rsp->max_ti_iu_len = sizeof(union srp_iu);
+	/* direct and indirect */
+	rsp->buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
+
+	send_iu(iue, sizeof(*rsp), VIOSRP_SRP_FORMAT);
+}
+
+static int process_srp_iu(struct iu_entry *iue)
+{
+	union viosrp_iu *iu = vio_iu(iue);
+	struct srp_target *target = iue->target;
+	int done = 1;
+	u8 opcode = iu->srp.rsp.opcode;
+	unsigned long flags;
+
+	switch (opcode) {
+	case SRP_LOGIN_REQ:
+		process_login(iue);
+		break;
+	case SRP_TSK_MGMT:
+		/* done = process_tsk_mgmt(iue); */
+		break;
+	case SRP_CMD:
+		spin_lock_irqsave(&target->lock, flags);
+		list_add_tail(&iue->ilist, &target->cmd_queue);
+		spin_unlock_irqrestore(&target->lock, flags);
+		done = 0;
+		break;
+	case SRP_LOGIN_RSP:
+	case SRP_I_LOGOUT:
+	case SRP_T_LOGOUT:
+	case SRP_RSP:
+	case SRP_CRED_REQ:
+	case SRP_CRED_RSP:
+	case SRP_AER_REQ:
+	case SRP_AER_RSP:
+		printk(KERN_ERR "Unsupported type %u\n", opcode);
+		break;
+	default:
+		printk(KERN_ERR "Unknown type %u\n", opcode);
+	}
+
+	return done;
+}
+
+static void process_iu(struct viosrp_crq *crq,
+		       struct ibmvscsis_adapter *adapter)
+{
+	struct iu_entry *iue;
+	long err;
+	int done = 1;
+
+	iue = srp_iu_get(&adapter->srpt);
+	if (!iue) {
+		printk(KERN_ERR "Error getting IU from pool\n");
+		return;
+	}
+
+	iue->remote_token = crq->IU_data_ptr;
+
+	err = h_copy_rdma(crq->IU_length, adapter->riobn,
+			  iue->remote_token, adapter->liobn, iue->sbuf->dma);
+
+	if (err != H_SUCCESS) {
+		printk(KERN_ERR "%ld transferring data error %p\n", err, iue);
+		goto out;
+	}
+
+	if (crq->format == VIOSRP_MAD_FORMAT)
+		done = process_mad_iu(iue);
+	else
+		done = process_srp_iu(iue);
+out:
+	if (done)
+		srp_iu_put(iue);
+}
+
+static void process_crq(struct viosrp_crq *crq,
+			struct ibmvscsis_adapter *adapter)
+{
+	switch (crq->valid) {
+	case 0xC0:
+		/* initialization */
+		switch (crq->format) {
+		case 0x01:
+			h_send_crq(adapter->dma_dev->unit_address,
+				   0xC002000000000000, 0);
+			break;
+		case 0x02:
+			break;
+		default:
+			printk(KERN_ERR "Unknown format %u\n", crq->format);
+		}
+		break;
+	case 0xFF:
+		/* transport event */
+		break;
+	case 0x80:
+		/* real payload */
+		switch (crq->format) {
+		case VIOSRP_SRP_FORMAT:
+		case VIOSRP_MAD_FORMAT:
+			process_iu(crq, adapter);
+			break;
+		case VIOSRP_OS400_FORMAT:
+		case VIOSRP_AIX_FORMAT:
+		case VIOSRP_LINUX_FORMAT:
+		case VIOSRP_INLINE_FORMAT:
+			printk(KERN_ERR "Unsupported format %u\n", crq->format);
+			break;
+		default:
+			printk(KERN_ERR "Unknown format %u\n", crq->format);
+		}
+		break;
+	default:
+		printk(KERN_ERR "unknown message type 0x%02x!?\n", crq->valid);
+	}
+}
+
+static inline struct viosrp_crq *next_crq(struct crq_queue *queue)
+{
+	struct viosrp_crq *crq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	crq = &queue->msgs[queue->cur];
+	if (crq->valid & 0x80) {
+		if (++queue->cur == queue->size)
+			queue->cur = 0;
+	} else
+		crq = NULL;
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	return crq;
+}
+
+static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
+			    struct ibmvscsis_cmnd *vsc,
+			    struct srp_cmd *cmd)
+{
+	struct se_cmd *se_cmd;
+	int attr;
+	int data_len;
+	int ret;
+
+	switch (cmd->task_attr) {
+	case SRP_SIMPLE_TASK:
+		attr = MSG_SIMPLE_TAG;
+		break;
+	case SRP_ORDERED_TASK:
+		attr = MSG_ORDERED_TAG;
+		break;
+	case SRP_HEAD_TASK:
+		attr = MSG_HEAD_TAG;
+		break;
+	default:
+		printk(KERN_WARNING "Task attribute %d not supported\n",
+		       cmd->task_attr);
+		attr = MSG_SIMPLE_TAG;
+	}
+
+	data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
+
+	se_cmd = &vsc->se_cmd;
+
+	transport_init_se_cmd(se_cmd,
+			      adapter->se_tpg.se_tpg_tfo,
+			      adapter->se_sess, data_len,
+			      srp_cmd_direction(cmd),
+			      attr, vsc->sense_buf);
+
+	ret = transport_get_lun_for_cmd(se_cmd, NULL, cmd->lun);
+	if (ret) {
+		printk(KERN_ERR "invalid lun %u\n", GETLUN(cmd->lun));
+		transport_send_check_condition_and_sense(se_cmd,
+							 se_cmd->scsi_sense_reason,
+							 0);
+		return ret;
+	}
+
+	transport_device_setup_cmd(se_cmd);
+	transport_generic_handle_cdb_map(se_cmd);
+
+	return 0;
+}
+
+static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
+{
+	struct ibmvscsis_cmnd *cmd =
+		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
+	struct scsi_cmnd *sc = &cmd->sc;
+	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
+	struct srp_cmd *scmd = iue->sbuf->buf;
+	int ret;
+
+	/*
+	 * Allocate the necessary tasks to complete the received CDB+data
+	 */
+	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
+	if (ret == -1) {
+		/* Out of Resources */
+		return PYX_TRANSPORT_LU_COMM_FAILURE;
+	} else if (ret == -2) {
+		/*
+		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
+		 */
+		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
+			return PYX_TRANSPORT_RESERVATION_CONFLICT;
+		/*
+		 * Otherwise, return SAM_STAT_CHECK_CONDITION and return
+		 * sense data
+		 */
+		return PYX_TRANSPORT_USE_SENSE_REASON;
+	}
+
+	return 0;
+}
+
+static void ibmvscsis_check_stop_free(struct se_cmd *se_cmd)
+{
+	if (se_cmd->se_tmr_req)
+		return;
+	transport_generic_free_cmd(se_cmd, 0, 1, 0);
+}
+
+static u64 scsi_lun_to_int(u64 lun)
+{
+	if (GETBUS(lun) || GETLUN(lun))
+		return ~0UL;
+	else
+		return GETTARGET(lun);
+}
+
+struct inquiry_data {
+	u8 qual_type;
+	u8 rmb_reserve;
+	u8 version;
+	u8 aerc_naca_hisup_format;
+	u8 addl_len;
+	u8 sccs_reserved;
+	u8 bque_encserv_vs_multip_mchngr_reserved;
+	u8 reladr_reserved_linked_cmdqueue_vs;
+	char vendor[8];
+	char product[16];
+	char revision[4];
+	char vendor_specific[20];
+	char reserved1[2];
+	char version_descriptor[16];
+	char reserved2[22];
+	char unique[158];
+};
+
+static u64 make_lun(unsigned int bus, unsigned int target, unsigned int lun)
+{
+	u16 result = (0x8000 |
+			   ((target & 0x003f) << 8) |
+			   ((bus & 0x0007) << 5) |
+			   (lun & 0x001f));
+	return ((u64) result) << 48;
+}
+
+static int ibmvscsis_inquery(struct ibmvscsis_adapter *adapter,
+			      struct srp_cmd *cmd, char *data)
+{
+	struct se_portal_group *se_tpg = &adapter->se_tpg;
+	struct inquiry_data *id = (struct inquiry_data *)data;
+	u64 unpacked_lun, lun = cmd->lun;
+	u8 *cdb = cmd->cdb;
+	int len;
+
+	if (!data)
+		printk(KERN_INFO "%s %d: oomu\n", __func__, __LINE__);
+
+	if (((cdb[1] & 0x3) == 0x3) || (!(cdb[1] & 0x3) && cdb[2])) {
+		printk(KERN_INFO "%s %d: invalid req\n", __func__, __LINE__);
+		return 0;
+	}
+
+	if (cdb[1] & 0x3)
+		printk(KERN_INFO "%s %d: needs the normal path\n",
+		       __func__, __LINE__);
+	else {
+		id->qual_type = TYPE_DISK;
+		id->rmb_reserve = 0x00;
+		id->version = 0x84; /* ISO/IE */
+		id->aerc_naca_hisup_format = 0x22; /* naca & fmt 0x02 */
+		id->addl_len = sizeof(*id) - 4;
+		id->bque_encserv_vs_multip_mchngr_reserved = 0x00;
+		id->reladr_reserved_linked_cmdqueue_vs = 0x02; /* CMDQ */
+		memcpy(id->vendor, "IBM	    ", 8);
+		/*
+		 * Don't even ask about the next bit.  AIX uses
+		 * hardcoded device naming to recognize device types
+		 * and their client won't  work unless we use VOPTA and
+		 * VDASD.
+		 */
+		if (id->qual_type == TYPE_ROM)
+			memcpy(id->product, "VOPTA blkdev    ", 16);
+		else
+			memcpy(id->product, "VDASD blkdev    ", 16);
+
+		memcpy(id->revision, "0001", 4);
+
+		snprintf(id->unique, sizeof(id->unique),
+			 "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n",
+			 system_id,
+			 partition_number,
+			 adapter->dma_dev->unit_address,
+			 GETBUS(lun),
+			 GETTARGET(lun),
+			 GETLUN(lun));
+	}
+
+	len = min_t(int, sizeof(*id), cdb[4]);
+
+	unpacked_lun = scsi_lun_to_int(cmd->lun);
+
+	spin_lock(&se_tpg->tpg_lun_lock);
+
+	if (unpacked_lun < TRANSPORT_MAX_LUNS_PER_TPG &&
+	    se_tpg->tpg_lun_list[unpacked_lun].lun_status ==
+	    TRANSPORT_LUN_STATUS_ACTIVE)
+		;
+	else
+		data[0] = TYPE_NO_LUN;
+
+	spin_unlock(&se_tpg->tpg_lun_lock);
+
+	return len;
+}
+
+static int ibmvscsis_mode_sense(struct ibmvscsis_adapter *adapter,
+				struct srp_cmd *cmd, char *mode)
+{
+	int bytes;
+	struct se_portal_group *se_tpg = &adapter->se_tpg;
+	u64 unpacked_lun;
+	struct se_lun *lun;
+	u32 blocks;
+
+	unpacked_lun = scsi_lun_to_int(cmd->lun);
+
+	spin_lock(&se_tpg->tpg_lun_lock);
+
+	lun = &se_tpg->tpg_lun_list[unpacked_lun];
+
+	blocks = TRANSPORT(lun->lun_se_dev)->get_blocks(lun->lun_se_dev);
+
+	spin_unlock(&se_tpg->tpg_lun_lock);
+
+	switch (cmd->cdb[2]) {
+	case 0:
+	case 0x3f:
+		mode[1] = 0x00;	/* Default medium */
+		/* if (iue->req.vd->b.ro) */
+		if (0)
+			mode[2] = 0x80;	/* device specific  */
+		else
+			mode[2] = 0x00;	/* device specific  */
+
+		/* note the DPOFUA bit is set to zero! */
+		mode[3] = 0x08;	/* block descriptor length */
+		*((u32 *) &mode[4]) = blocks - 1;
+		*((u32 *) &mode[8]) = 512;
+		bytes = mode[0] = 12;	/* length */
+		break;
+
+	case 0x08: /* Cache page */
+		/* length should be 4 */
+#if 0
+		if (cmd->cdb[4] != 4
+		    && cmd->cdb[4] != 0x20) {
+			send_rsp(iue, ILLEGAL_REQUEST, 0x20);
+			dma_free_coherent(iue->adapter->dev,
+					  MODE_SENSE_BUFFER_SIZE,
+					  mode, data_token);
+			return FREE_IU;
+		}
+#endif
+
+		mode[1] = 0x00;	/* Default medium */
+		if (0)
+			mode[2] = 0x80;	/* device specific */
+		else
+			mode[2] = 0x00;	/* device specific */
+
+		/* note the DPOFUA bit is set to zero! */
+		mode[3] = 0x08;	/* block descriptor length */
+		*((u32 *) &mode[4]) = blocks - 1;
+		*((u32 *) &mode[8]) = 512;
+
+		/* Cache page */
+		mode[12] = 0x08;    /* page */
+		mode[13] = 0x12;    /* page length */
+		mode[14] = 0x01;    /* no cache (0x04 for read/write cache) */
+
+		bytes = mode[0] = 12 + mode[13];	/* length */
+		break;
+	}
+
+	return bytes;
+}
+
+static int ibmvscsis_report_luns(struct ibmvscsis_adapter *adapter,
+				 struct srp_cmd *cmd, u64 *data)
+{
+	u64 lun;
+	struct se_portal_group *se_tpg = &adapter->se_tpg;
+	int i, idx;
+	int alen, oalen, nr_luns, rbuflen = 4096;
+
+	alen = get_unaligned_be32(&cmd->cdb[6]);
+
+	alen &= ~(8 - 1);
+	oalen = alen;
+
+	if (cmd->lun) {
+		nr_luns = 1;
+		goto done;
+	}
+
+	alen -= 8;
+	rbuflen -= 8; /* FIXME */
+	idx = 2;
+	nr_luns = 1;
+
+	spin_lock(&se_tpg->tpg_lun_lock);
+	for (i = 0; i < 255; i++) {
+		if (se_tpg->tpg_lun_list[i].lun_status !=
+		    TRANSPORT_LUN_STATUS_ACTIVE)
+			continue;
+
+		lun = make_lun(0, i & 0x003f, 0);
+		data[idx++] = cpu_to_be64(lun);
+		alen -= 8;
+		if (!alen)
+			break;
+		rbuflen -= 8;
+		if (!rbuflen)
+			break;
+
+		nr_luns++;
+	}
+	spin_unlock(&se_tpg->tpg_lun_lock);
+done:
+	put_unaligned_be32(nr_luns * 8, data);
+	return min(oalen, nr_luns * 8 + 8);
+}
+
+static int ibmvscsis_rdma(struct scsi_cmnd *sc, struct scatterlist *sg, int nsg,
+			  struct srp_direct_buf *md, int nmd,
+			  enum dma_data_direction dir, unsigned int rest)
+{
+	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
+	struct srp_target *target = iue->target;
+	struct ibmvscsis_adapter *adapter = target->ldata;
+	dma_addr_t token;
+	long err;
+	unsigned int done = 0;
+	int i, sidx, soff;
+
+	sidx = soff = 0;
+	token = sg_dma_address(sg + sidx);
+
+	for (i = 0; i < nmd && rest; i++) {
+		unsigned int mdone, mlen;
+
+		mlen = min(rest, md[i].len);
+		for (mdone = 0; mlen;) {
+			int slen = min(sg_dma_len(sg + sidx) - soff, mlen);
+
+			if (dir == DMA_TO_DEVICE)
+				err = h_copy_rdma(slen,
+						  adapter->riobn,
+						  md[i].va + mdone,
+						  adapter->liobn,
+						  token + soff);
+			else
+				err = h_copy_rdma(slen,
+						  adapter->liobn,
+						  token + soff,
+						  adapter->riobn,
+						  md[i].va + mdone);
+
+			if (err != H_SUCCESS) {
+				printk(KERN_ERR "rdma error %d %d %ld\n",
+				       dir, slen, err);
+				return -EIO;
+			}
+
+			mlen -= slen;
+			mdone += slen;
+			soff += slen;
+			done += slen;
+
+			if (soff == sg_dma_len(sg + sidx)) {
+				sidx++;
+				soff = 0;
+				token = sg_dma_address(sg + sidx);
+
+				if (sidx > nsg) {
+					printk(KERN_ERR "out of sg %p %d %d\n",
+						iue, sidx, nsg);
+					return -EIO;
+				}
+			}
+		};
+
+		rest -= mlen;
+	}
+	return 0;
+}
+
+static int ibmvscsis_cmd_done(struct scsi_cmnd *sc)
+{
+	unsigned long flags;
+	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
+	struct srp_target *target = iue->target;
+	int err = 0;
+
+	if (scsi_sg_count(sc))
+		err = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
+					ibmvscsis_rdma, 1, 1);
+
+	spin_lock_irqsave(&target->lock, flags);
+	list_del(&iue->ilist);
+	spin_unlock_irqrestore(&target->lock, flags);
+
+	if (err || sc->result != SAM_STAT_GOOD) {
+		printk(KERN_ERR "operation failed %p %d %x\n",
+		       iue, sc->result, vio_iu(iue)->srp.cmd.cdb[0]);
+		send_rsp(iue, sc, HARDWARE_ERROR, 0x00);
+	} else
+		send_rsp(iue, sc, NO_SENSE, 0x00);
+
+	/* done(sc); */
+	srp_iu_put(iue);
+	return 0;
+}
+
+struct ibmvscsis_cmd {
+	/* Used for libsrp processing callbacks */
+	struct scsi_cmnd sc;
+	/* Used for TCM Core operations */
+	struct se_cmd se_cmd;
+	/* Sense buffer that will be mapped into outgoing status */
+	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
+};
+
+static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
+{
+	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
+			struct ibmvscsis_cmnd, se_cmd);
+	struct scsi_cmnd *sc = &cmd->sc;
+	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
+	int ret;
+
+	sc->sdb.length = se_cmd->data_length;
+
+	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
+	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
+		transport_do_task_sg_chain(se_cmd);
+
+		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
+		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
+	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
+		/*
+		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
+		 * using a contigious buffer
+		 */
+		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
+		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
+			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
+
+		sc->sdb.table.nents = 1;
+		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
+	}
+
+	ret = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
+				ibmvscsis_rdma, 1, 1);
+	if (ret) {
+		printk(KERN_ERR "srp_transfer_data() failed: %d\n", ret);
+		return PYX_TRANSPORT_LU_COMM_FAILURE;
+	}
+	/*
+	 * We now tell TCM to add this WRITE CDB directly into the TCM storage
+	 * object execution queue.
+	 */
+	transport_generic_process_write(se_cmd);
+	return 0;
+}
+
+static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
+{
+	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
+			struct ibmvscsis_cmnd, se_cmd);
+	struct scsi_cmnd *sc = &cmd->sc;
+	/*
+	 * Check for overflow residual count
+	 */
+	if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
+		scsi_set_resid(sc, se_cmd->residual_count);
+
+	sc->sdb.length = se_cmd->data_length;
+
+	/*
+	 * Setup the struct se_task->task_sg[] chained SG list
+	 */
+	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
+	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
+		transport_do_task_sg_chain(se_cmd);
+
+		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
+		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
+	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
+		/*
+		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
+		 * using a contigious buffer
+		 */
+		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
+		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
+			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
+
+		sc->sdb.table.nents = 1;
+		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
+	}
+	/*
+	 * Perform the SCSI READ data transfer from sc->sdb.table into
+	 * VIO LPAR memory.  This will occur via libsrp in the
+	 * ibmvscsis_rdma() callback
+	 */
+#if 0
+	ret = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
+				ibmvscsis_rdma, 1, 1);
+	if (ret) {
+		printk(KERN_ERR "srp_transfer_data() failed: %d, returning"
+				" DID_ERROR\n", ret);
+		sc->result = host_byte(DID_ERROR) | se_cmd->scsi_status;
+	} else
+		sc->result = host_byte(DID_OK) | se_cmd->scsi_status;
+#endif
+	/*
+	 * This will call srp_transfer_data() and post the response
+	 * to VIO via libsrp.
+	 */
+	ibmvscsis_cmd_done(sc);
+	return 0;
+}
+
+static int ibmvscsis_queue_status(struct se_cmd *se_cmd)
+{
+	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
+						  struct ibmvscsis_cmnd, se_cmd);
+	struct scsi_cmnd *sc = &cmd->sc;
+	/*
+	 * Copy any generated SENSE data into sc->sense_buffer and
+	 * set the appropiate sc->result to be translated by
+	 * ibmvscsis_cmd_done()
+	 */
+	if (se_cmd->sense_buffer &&
+	   ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+	    (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
+		memcpy((void *)sc->sense_buffer, (void *)se_cmd->sense_buffer,
+				SCSI_SENSE_BUFFERSIZE);
+		sc->result = host_byte(DID_OK) | driver_byte(DRIVER_SENSE) |
+				SAM_STAT_CHECK_CONDITION;
+	} else
+		sc->result = host_byte(DID_OK) | se_cmd->scsi_status;
+	/*
+	 * Finally post the response to VIO via libsrp.
+	 */
+	ibmvscsis_cmd_done(sc);
+	return 0;
+}
+
+static int ibmvscsis_queuecommand(struct ibmvscsis_adapter *adapter,
+				  struct iu_entry *iue)
+{
+	int data_len;
+	struct srp_cmd *cmd = iue->sbuf->buf;
+	struct scsi_cmnd *sc;
+	struct page *pg;
+	struct ibmvscsis_cmnd *vsc;
+
+	data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
+
+	vsc = kzalloc(sizeof(*vsc), GFP_KERNEL);
+	sc = &vsc->sc;
+	sc->sense_buffer = vsc->sense_buf;
+	sc->cmnd = cmd->cdb;
+	sc->SCp.ptr = (char *)iue;
+
+	switch (cmd->cdb[0]) {
+	case INQUIRY:
+		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
+		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		sc->sdb.length = ibmvscsis_inquery(adapter, cmd,
+						   page_address(pg));
+		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
+		ibmvscsis_cmd_done(sc);
+		sg_free_table(&sc->sdb.table);
+		__free_page(pg);
+		kfree(vsc);
+		break;
+	case REPORT_LUNS:
+		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
+		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		sc->sdb.length = ibmvscsis_report_luns(adapter, cmd,
+						       page_address(pg));
+		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
+		ibmvscsis_cmd_done(sc);
+		sg_free_table(&sc->sdb.table);
+		__free_page(pg);
+		kfree(vsc);
+		break;
+	case MODE_SENSE:
+		/* fixme: needs to use tcm */
+		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
+		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		sc->sdb.length = ibmvscsis_mode_sense(adapter,
+						      cmd, page_address(pg));
+		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
+		ibmvscsis_cmd_done(sc);
+		sg_free_table(&sc->sdb.table);
+		__free_page(pg);
+		kfree(vsc);
+		break;
+	default:
+		tcm_queuecommand(adapter, vsc, cmd);
+		break;
+	}
+
+	return 0;
+}
+
+static void handle_cmd_queue(struct ibmvscsis_adapter *adapter)
+{
+	struct srp_target *target = &adapter->srpt;
+	struct iu_entry *iue;
+	struct srp_cmd *cmd;
+	unsigned long flags;
+	int err;
+
+retry:
+	spin_lock_irqsave(&target->lock, flags);
+
+	list_for_each_entry(iue, &target->cmd_queue, ilist) {
+		if (!test_and_set_bit(V_FLYING, &iue->flags)) {
+			spin_unlock_irqrestore(&target->lock, flags);
+			err = ibmvscsis_queuecommand(adapter, iue);
+			if (err) {
+				printk(KERN_ERR "cannot queue cmd %p %d\n",
+				       cmd, err);
+				srp_iu_put(iue);
+			}
+			goto retry;
+		}
+	}
+
+	spin_unlock_irqrestore(&target->lock, flags);
+}
+
+static void handle_crq(struct work_struct *work)
+{
+	struct ibmvscsis_adapter *adapter =
+		container_of(work, struct ibmvscsis_adapter, crq_work);
+	struct viosrp_crq *crq;
+	int done = 0;
+
+	while (!done) {
+		while ((crq = next_crq(&adapter->crq_queue)) != NULL) {
+			process_crq(crq, adapter);
+			crq->valid = 0x00;
+		}
+
+		vio_enable_interrupts(adapter->dma_dev);
+
+		crq = next_crq(&adapter->crq_queue);
+		if (crq) {
+			vio_disable_interrupts(adapter->dma_dev);
+			process_crq(crq, adapter);
+			crq->valid = 0x00;
+		} else
+			done = 1;
+	}
+
+	handle_cmd_queue(adapter);
+}
+
+static irqreturn_t ibmvscsis_interrupt(int dummy, void *data)
+{
+	struct ibmvscsis_adapter *adapter = data;
+
+	vio_disable_interrupts(adapter->dma_dev);
+	schedule_work(&adapter->crq_work);
+
+	return IRQ_HANDLED;
+}
+
+static int crq_queue_create(struct crq_queue *queue,
+			    struct ibmvscsis_adapter *adapter)
+{
+	int err;
+	struct vio_dev *vdev = adapter->dma_dev;
+
+	queue->msgs = (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL);
+	if (!queue->msgs)
+		goto malloc_failed;
+	queue->size = PAGE_SIZE / sizeof(*queue->msgs);
+
+	queue->msg_token = dma_map_single(&vdev->dev, queue->msgs,
+					  queue->size * sizeof(*queue->msgs),
+					  DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(&vdev->dev, queue->msg_token))
+		goto map_failed;
+
+	err = h_reg_crq(vdev->unit_address, queue->msg_token,
+			PAGE_SIZE);
+
+	/* If the adapter was left active for some reason (like kexec)
+	 * try freeing and re-registering
+	 */
+	if (err == H_RESOURCE) {
+		do {
+			err = h_free_crq(vdev->unit_address);
+		} while (err == H_BUSY || H_IS_LONG_BUSY(err));
+
+		err = h_reg_crq(vdev->unit_address, queue->msg_token,
+				PAGE_SIZE);
+	}
+
+	if (err != H_SUCCESS && err != 2) {
+		printk(KERN_ERR "Error 0x%x opening virtual adapter\n", err);
+		goto reg_crq_failed;
+	}
+
+	err = request_irq(vdev->irq, &ibmvscsis_interrupt,
+			  IRQF_DISABLED, "ibmvscsis", adapter);
+	if (err)
+		goto req_irq_failed;
+
+	vio_enable_interrupts(vdev);
+
+	h_send_crq(vdev->unit_address, 0xC001000000000000, 0);
+
+	queue->cur = 0;
+	spin_lock_init(&queue->lock);
+
+	return 0;
+
+req_irq_failed:
+	do {
+		err = h_free_crq(vdev->unit_address);
+	} while (err == H_BUSY || H_IS_LONG_BUSY(err));
+
+reg_crq_failed:
+	dma_unmap_single(&vdev->dev, queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+map_failed:
+	free_page((unsigned long) queue->msgs);
+
+malloc_failed:
+	return -ENOMEM;
+}
+
+static void crq_queue_destroy(struct ibmvscsis_adapter *adapter)
+{
+	struct crq_queue *queue = &adapter->crq_queue;
+	int err;
+
+	free_irq(adapter->dma_dev->irq, adapter);
+	flush_work_sync(&adapter->crq_work);
+	do {
+		err = h_free_crq(adapter->dma_dev->unit_address);
+	} while (err == H_BUSY || H_IS_LONG_BUSY(err));
+
+	dma_unmap_single(&adapter->dma_dev->dev, queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+
+	free_page((unsigned long)queue->msgs);
+}
+
+static int ibmvscsis_probe(struct vio_dev *dev, const struct vio_device_id *id)
+{
+	unsigned int *dma, dma_size;
+	unsigned long flags;
+	int ret;
+	struct ibmvscsis_adapter *adapter;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->dma_dev = dev;
+
+	dma = (unsigned int *)vio_get_attribute(dev, "ibm,my-dma-window",
+						&dma_size);
+	if (!dma || dma_size != 40) {
+		printk(KERN_ERR "Couldn't get window property %d\n", dma_size);
+		kfree(adapter);
+		return -EIO;
+	}
+
+	adapter->liobn = dma[0];
+	adapter->riobn = dma[5];
+	ret = strict_strtoul(dev_name(&dev->dev), 10, &adapter->tport_tpgt);
+
+	spin_lock_irqsave(&tpg_lock, flags);
+	list_add(&adapter->siblings, &tpg_list);
+	spin_unlock_irqrestore(&tpg_lock, flags);
+
+	INIT_WORK(&adapter->crq_work, handle_crq);
+
+	dev_set_drvdata(&dev->dev, adapter);
+
+	ret = srp_target_alloc(&adapter->srpt, &dev->dev, INITIAL_SRP_LIMIT,
+			       SRP_MAX_IU_LEN);
+
+	adapter->srpt.ldata = adapter;
+
+	ret = crq_queue_create(&adapter->crq_queue, adapter);
+
+	return 0;
+}
+
+static int ibmvscsis_remove(struct vio_dev *dev)
+{
+	struct ibmvscsis_adapter *adapter = dev_get_drvdata(&dev->dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tpg_lock, flags);
+	list_del(&adapter->siblings);
+	spin_unlock_irqrestore(&tpg_lock, flags);
+
+	crq_queue_destroy(adapter);
+
+	srp_target_free(&adapter->srpt);
+
+	kfree(adapter);
+	return 0;
+}
+
+static struct vio_device_id ibmvscsis_device_table[] __devinitdata = {
+	{"v-scsi-host", "IBM,v-scsi-host"},
+	{"", ""}
+};
+
+MODULE_DEVICE_TABLE(vio, ibmvscsis_device_table);
+
+static struct vio_driver ibmvscsis_driver = {
+	.id_table = ibmvscsis_device_table,
+	.probe = ibmvscsis_probe,
+	.remove = ibmvscsis_remove,
+	.driver = {
+		.name = "ibmvscsis",
+		.owner = THIS_MODULE,
+	}
+};
+
+static int get_system_info(void)
+{
+	struct device_node *rootdn;
+	const char *id, *model, *name;
+	const unsigned int *num;
+
+	rootdn = of_find_node_by_path("/");
+	if (!rootdn)
+		return -ENOENT;
+
+	model = of_get_property(rootdn, "model", NULL);
+	id = of_get_property(rootdn, "system-id", NULL);
+	if (model && id)
+		snprintf(system_id, sizeof(system_id), "%s-%s", model, id);
+
+	name = of_get_property(rootdn, "ibm,partition-name", NULL);
+	if (name)
+		strncpy(partition_name, name, sizeof(partition_name));
+
+	num = of_get_property(rootdn, "ibm,partition-no", NULL);
+	if (num)
+		partition_number = *num;
+
+	of_node_put(rootdn);
+	return 0;
+}
+
+static int ibmvscsis_register_configfs(void)
+{
+	struct target_fabric_configfs *fabric;
+	int ret;
+
+	printk(KERN_INFO "IBMVSCSIS fabric module %s on %s/%s"
+		" on "UTS_RELEASE"\n", IBMVSCSIS_VERSION, utsname()->sysname,
+		utsname()->machine);
+	/*
+	 * Register the top level struct config_item_type with TCM core
+	 */
+	fabric = target_fabric_configfs_init(THIS_MODULE, "ibmvscsis");
+	if (!(fabric)) {
+		printk(KERN_ERR "target_fabric_configfs_init() failed\n");
+		return -ENOMEM;
+	}
+	/*
+	 * Setup fabric->tf_ops from our local ibmvscsis_ops
+	 */
+	fabric->tf_ops = ibmvscsis_ops;
+	/*
+	 * Setup default attribute lists for various fabric->tf_cit_tmpl
+	 */
+	TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = ibmvscsis_wwn_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;
+	/*
+	 * Register the fabric for use within TCM
+	 */
+	ret = target_fabric_configfs_register(fabric);
+	if (ret < 0) {
+		printk(KERN_ERR "target_fabric_configfs_register() failed"
+				" for IBMVSCSIS\n");
+		target_fabric_configfs_deregister(fabric);
+		return ret;
+	}
+	/*
+	 * Setup our local pointer to *fabric
+	 */
+	ibmvscsis_fabric_configfs = fabric;
+	printk(KERN_INFO "IBMVSCSIS[0] - Set fabric -> ibmvscsis_fabric_configfs\n");
+	return 0;
+};
+
+static void ibmvscsis_deregister_configfs(void)
+{
+	if (!(ibmvscsis_fabric_configfs))
+		return;
+
+	target_fabric_configfs_deregister(ibmvscsis_fabric_configfs);
+	ibmvscsis_fabric_configfs = NULL;
+	printk(KERN_INFO "IBMVSCSIS[0] - Cleared ibmvscsis_fabric_configfs\n");
+};
+
+static int __init ibmvscsis_init(void)
+{
+	int ret;
+
+	ret = get_system_info();
+	if (ret)
+		return ret;
+
+	ret = vio_register_driver(&ibmvscsis_driver);
+	if (ret)
+		return ret;
+
+	ret = ibmvscsis_register_configfs();
+	if (ret < 0)
+		return ret;
+
+	return 0;
+};
+
+static void ibmvscsis_exit(void)
+{
+	vio_unregister_driver(&ibmvscsis_driver);
+	ibmvscsis_deregister_configfs();
+};
+
+MODULE_DESCRIPTION("IBMVSCSIS series fabric driver");
+MODULE_AUTHOR("FUJITA Tomonori");
+MODULE_LICENSE("GPL");
+module_init(ibmvscsis_init);
+module_exit(ibmvscsis_exit);
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 81+ messages in thread

* Re: [PATCH 0/3] ibmvscsis driver rewrite
  2011-02-10 12:21 [PATCH 0/3] ibmvscsis driver rewrite FUJITA Tomonori
                   ` (2 preceding siblings ...)
  2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
@ 2011-02-10 18:34 ` Nicholas A. Bellinger
  2011-02-14  1:36   ` FUJITA Tomonori
       [not found] ` <4D53DE96.2020502@suse.de>
  4 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-10 18:34 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Thu, 2011-02-10 at 21:21 +0900, FUJITA Tomonori wrote:
> Finally, I have the working driver. We are ready to remove the old
> ibmvstgt driver.
> 
> This is for scsi-misc tree.
> 
> =
>  drivers/scsi/ibmvscsi/Makefile    |    4 +-
>  drivers/scsi/ibmvscsi/ibmvscsis.c | 1759 +++++++++++++++++++++++++++++++++++++
>  drivers/scsi/libsrp.c             |   18 +-
>  include/scsi/libsrp.h             |   10 +-
>  4 files changed, 1778 insertions(+), 13 deletions(-)
> 

Many thanks for this updated patch series Tomo-san.

I will add this series to a working lio-core-2.6.git/tcm_ibmvscsis
branch from the current LIO upstream HEAD (linus-38-rc4).  I will review
shortly and add my comments..

Also, would you mind sending along 'tree', 'dmesg' and 'lsmod' output of
a functional ibmvscsis system so we can add a proper LIO wiki
entry..?  :-)

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
@ 2011-02-10 19:03   ` Nicholas A. Bellinger
  2011-02-14  1:36     ` FUJITA Tomonori
  2011-02-14  3:26     ` FUJITA Tomonori
  2011-02-10 19:15   ` Brian King
  2011-02-14  7:16   ` Bart Van Assche
  2 siblings, 2 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-10 19:03 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Thu, 2011-02-10 at 21:21 +0900, FUJITA Tomonori wrote:
> This replaces ibmvstgt driver.
> 
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
> ---
>  drivers/scsi/ibmvscsi/Makefile    |    4 +-
>  drivers/scsi/ibmvscsi/ibmvscsis.c | 1759 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 1762 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/scsi/ibmvscsi/ibmvscsis.c
> 
> diff --git a/drivers/scsi/ibmvscsi/Makefile b/drivers/scsi/ibmvscsi/Makefile
> index a423d96..a615ea5 100644
> --- a/drivers/scsi/ibmvscsi/Makefile
> +++ b/drivers/scsi/ibmvscsi/Makefile
> @@ -1,8 +1,10 @@
> +EXTRA_CFLAGS += -I$(srctree)/drivers/target/
> +
>  obj-$(CONFIG_SCSI_IBMVSCSI)	+= ibmvscsic.o
>  
>  ibmvscsic-y			+= ibmvscsi.o
>  ibmvscsic-$(CONFIG_PPC_ISERIES)	+= iseries_vscsi.o 
>  ibmvscsic-$(CONFIG_PPC_PSERIES)	+= rpa_vscsi.o 
>  
> -obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvstgt.o
> +obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvscsis.o
>  obj-$(CONFIG_SCSI_IBMVFC)	+= ibmvfc.o
> diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
> new file mode 100644
> index 0000000..1ab4d73
> --- /dev/null
> +++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
> @@ -0,0 +1,1759 @@
> +/*
> + * IBM eServer i/pSeries Virtual SCSI Target Driver
> + * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp.
> + *			   Santiago Leon (santil@us.ibm.com) IBM Corp.
> + *			   Linda Xie (lxie@us.ibm.com) IBM Corp.
> + *
> + * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org>
> + * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
> + * USA
> + */
> +#include <linux/slab.h>
> +#include <linux/kthread.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +#include <linux/types.h>
> +#include <linux/string.h>
> +#include <linux/ctype.h>
> +#include <linux/utsname.h>
> +#include <asm/unaligned.h>
> +#include <scsi/scsi.h>
> +#include <scsi/scsi_host.h>
> +#include <scsi/scsi_device.h>
> +#include <scsi/scsi_cmnd.h>
> +#include <scsi/scsi_tcq.h>
> +#include <scsi/libsrp.h>
> +#include <generated/utsrelease.h>
> +
> +#include <target/target_core_base.h>
> +#include <target/target_core_transport.h>
> +#include <target/target_core_fabric_ops.h>
> +#include <target/target_core_fabric_lib.h>
> +#include <target/target_core_fabric_configfs.h>
> +#include <target/target_core_device.h>
> +#include <target/target_core_tpg.h>
> +#include <target/target_core_configfs.h>
> +
> +#include <asm/hvcall.h>
> +#include <asm/iommu.h>
> +#include <asm/prom.h>
> +#include <asm/vio.h>
> +
> +#include "ibmvscsi.h"
> +#include "viosrp.h"
> +
> +#define IBMVSCSIS_VERSION  "v0.1"
> +#define IBMVSCSIS_NAMELEN 32
> +
> +#define	INITIAL_SRP_LIMIT	16
> +#define	DEFAULT_MAX_SECTORS	256
> +

Ok, these two hardcoded defs can  be converted into context specific
fabric dependent configfs attributes (TF_CIT_TMPL(fabric)->tfc_* below)
using target_core_fabric_configfs.h wrappers..

> +/*
> + * Hypervisor calls.
> + */
> +#define h_copy_rdma(l, sa, sb, da, db) \
> +			plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)
> +#define h_send_crq(ua, l, h) \
> +			plpar_hcall_norets(H_SEND_CRQ, ua, l, h)
> +#define h_reg_crq(ua, tok, sz)\
> +			plpar_hcall_norets(H_REG_CRQ, ua, tok, sz);
> +#define h_free_crq(ua) \
> +			plpar_hcall_norets(H_FREE_CRQ, ua);
> +
> +#define GETTARGET(x) ((int)((((u64)(x)) >> 56) & 0x003f))
> +#define GETBUS(x) ((int)((((u64)(x)) >> 53) & 0x0007))
> +#define GETLUN(x) ((int)((((u64)(x)) >> 48) & 0x001f))
> +
> +/*
> + * These are fixed for the system and come from the Open Firmware device tree.
> + * We just store them here to save getting them every time.
> + */
> +static char system_id[64] = "";
> +static char partition_name[97] = "UNKNOWN";
> +static unsigned int partition_number = -1;
> +
> +static LIST_HEAD(tpg_list);
> +static DEFINE_SPINLOCK(tpg_lock);
> +
> +struct ibmvscsis_adapter {
> +	struct vio_dev *dma_dev;
> +	struct list_head siblings;
> +
> +	struct crq_queue crq_queue;
> +
> +	struct work_struct crq_work;
> +
> +	unsigned long liobn;
> +	unsigned long riobn;
> +
> +	/* todo: remove */
> +	struct srp_target srpt;
> +
> +	/* SRP port target portal group tag for TCM */
> +	unsigned long tport_tpgt;
> +
> +	/* Returned by ibmvscsis_make_tpg() */
> +	struct se_portal_group se_tpg;
> +
> +	struct se_session *se_sess;
> +
> +
> +	/* SCSI protocol the tport is providing */
> +	u8 tport_proto_id;
> +	/* Binary World Wide unique Port Name for SRP Target port */
> +	u64 tport_wwpn;
> +	/* ASCII formatted WWPN for SRP Target port */
> +	char tport_name[IBMVSCSIS_NAMELEN];
> +	/* Returned by ibmvscsis_make_tport() */
> +	struct se_wwn tport_wwn;
> +};
> +
> +struct ibmvscsis_cmnd {
> +	/* Used for libsrp processing callbacks */
> +	struct scsi_cmnd sc;
> +	/* Used for TCM Core operations */
> +	struct se_cmd se_cmd;
> +	/* Sense buffer that will be mapped into outgoing status */
> +	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
> +};
> +
> +static int ibmvscsis_check_true(struct se_portal_group *se_tpg)
> +{
> +	return 1;
> +}
> +
> +static int ibmvscsis_check_false(struct se_portal_group *se_tpg)
> +{
> +	return 0;
> +}
> +
> +static char *ibmvscsis_get_fabric_name(void)
> +{
> +	return "ibmvscsis";
> +}
> +
> +static u8 ibmvscsis_get_fabric_proto_ident(struct se_portal_group *se_tpg)
> +{
> +	return 4;
> +}
> +
> +static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg)
> +{
> +	struct ibmvscsis_adapter *adapter =
> +		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
> +
> +	return adapter->tport_name;
> +}
> +
> +static u16 ibmvscsis_get_tag(struct se_portal_group *se_tpg)
> +{
> +	struct ibmvscsis_adapter *adapter =
> +		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
> +	return adapter->tport_tpgt;
> +}
> +
> +static u32 ibmvscsis_get_default_depth(struct se_portal_group *se_tpg)
> +{
> +	return 1;
> +}
> +
> +/* we don't care about the transport id since we never use pr. */
> +static u32 ibmvscsis_get_pr_transport_id(struct se_portal_group *se_tpg,
> +					 struct se_node_acl *se_nacl,
> +					 struct t10_pr_registration *pr_reg,
> +					 int *format_code,
> +					 unsigned char *buf)
> +{
> +	return 24;
> +}
> +
> +static u32 ibmvscsis_get_pr_transport_id_len(struct se_portal_group *se_tpg,
> +					     struct se_node_acl *se_nacl,
> +					     struct t10_pr_registration *pr_reg,
> +					     int *format_code)
> +{
> +	return 24;
> +}
> +
> +static char *ibmvscsis_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
> +						 const char *buf,
> +						 u32 *out_tid_len,
> +						 char **port_nexus_ptr)
> +{
> +	return NULL;
> +}
> +

OK, I still need to add generic SCSI_PROTOCOL_SRP handlers for PR
TransportID processing, and get up and running with TCM_Loop SRP target
ports.  I will look at getting this resolved in LIO upstream in the near
future, and make sure this gets converted..

> +struct ibmvscsis_nacl {
> +	/* Binary World Wide unique Port Name for SRP Initiator port */
> +	u64 iport_wwpn;
> +	/* ASCII formatted WWPN for Sas Initiator port */
> +	char iport_name[IBMVSCSIS_NAMELEN];
> +	/* Returned by ibmvscsis_make_nodeacl() */
> +	struct se_node_acl se_node_acl;
> +};
> +

<SNIP>

> +
> +static int ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
> +{
> +	return 0;
> +}
> +

The TM request/response logic still needs to be hooked up, right..?

> +static u16 ibmvscsis_set_fabric_sense_len(struct se_cmd *se_cmd,
> +					  u32 sense_length)
> +{
> +	return 0;
> +}
> +
> +static u16 ibmvscsis_get_fabric_sense_len(void)
> +{
> +	return 0;
> +}
> +
> +static int ibmvscsis_is_state_remove(struct se_cmd *se_cmd)
> +{
> +	return 0;
> +}
> +
> +static u64 make_lun(unsigned int bus, unsigned int target, unsigned int lun);
> +
> +static u64 ibmvscsis_pack_lun(unsigned int lun)
> +{
> +	return make_lun(0, lun & 0x003f, 0);
> +}
> +
> +/* Local pointer to allocated TCM configfs fabric module */
> +static struct target_fabric_configfs *ibmvscsis_fabric_configfs;
> +
> +static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
> +						  struct config_group *group,
> +						  const char *name)
> +{
> +	struct ibmvscsis_adapter *adapter =
> +		container_of(wwn, struct ibmvscsis_adapter, tport_wwn);
> +	struct se_node_acl *acl;
> +	int ret;
> +	char *dname = (char *)dev_name(&adapter->dma_dev->dev);
> +
> +	if (strncmp(name, "tpgt_1", 6))
> +		return ERR_PTR(-EINVAL);
> +
> +	ret = core_tpg_register(&ibmvscsis_fabric_configfs->tf_ops, wwn,
> +				&adapter->se_tpg, (void *)adapter,
> +				TRANSPORT_TPG_TYPE_NORMAL);
> +	if (ret)
> +		return ERR_PTR(-ENOMEM);
> +
> +	adapter->se_sess = transport_init_session();
> +	if (!adapter->se_sess) {
> +		core_tpg_deregister(&adapter->se_tpg);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	acl = core_tpg_check_initiator_node_acl(&adapter->se_tpg, dname);
> +	if (!acl) {
> +		transport_free_session(adapter->se_sess);
> +		adapter->se_sess = NULL;
> +		return ERR_PTR(-ENOMEM);
> +	}
> +	adapter->se_sess->se_node_acl = acl;
> +
> +	transport_register_session(&adapter->se_tpg,
> +				   adapter->se_sess->se_node_acl,
> +				   adapter->se_sess, adapter);
> +
> +	return &adapter->se_tpg;
> +}
> +
> +static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg)
> +{
> +	struct ibmvscsis_adapter *adapter =
> +		container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
> +	unsigned long flags;
> +
> +
> +	transport_deregister_session_configfs(adapter->se_sess);
> +	transport_free_session(adapter->se_sess);
> +	core_tpg_deregister(se_tpg);
> +
> +	spin_lock_irqsave(&tpg_lock, flags);
> +	adapter->se_sess = NULL;
> +	spin_unlock_irqrestore(&tpg_lock, flags);
> +}
> +
> +static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
> +					   struct config_group *group,
> +					   const char *name)
> +{
> +	struct ibmvscsis_adapter *adapter;
> +	unsigned long tpgt, flags;
> +
> +	if (strict_strtoul(name, 10, &tpgt))
> +		return NULL;
> +
> +	spin_lock_irqsave(&tpg_lock, flags);
> +	list_for_each_entry(adapter, &tpg_list, siblings) {
> +		if (tpgt == adapter->tport_tpgt)
> +			goto found;
> +	}
> +
> +	spin_unlock_irqrestore(&tpg_lock, flags);
> +	return NULL;
> +found:
> +	spin_unlock_irqrestore(&tpg_lock, flags);
> +
> +	return &adapter->tport_wwn;
> +}
> +
> +static void ibmvscsis_drop_tport(struct se_wwn *wwn)
> +{
> +}
> +

Ok, this should be OK being a NOP since adapter->tport_wwn memory is not
being released..

> +static ssize_t ibmvscsis_wwn_show_attr_version(struct target_fabric_configfs *tf,
> +					       char *page)
> +{
> +	return sprintf(page, "IBMVSCSIS fabric module %s on %s/%s"
> +		"on "UTS_RELEASE"\n", IBMVSCSIS_VERSION, utsname()->sysname,
> +		utsname()->machine);
> +}
> +
> +TF_WWN_ATTR_RO(ibmvscsis, version);
> +
> +static struct configfs_attribute *ibmvscsis_wwn_attrs[] = {
> +	&ibmvscsis_wwn_version.attr,
> +	NULL,
> +};
> +
> +static int ibmvscsis_write_pending(struct se_cmd *se_cmd);
> +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd);
> +static int ibmvscsis_queue_status(struct se_cmd *se_cmd);
> +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd);
> +static void ibmvscsis_check_stop_free(struct se_cmd *se_cmd);
> +
> +static struct target_core_fabric_ops ibmvscsis_ops = {
> +	.task_sg_chaining		= 1,
> +	.get_fabric_name		= ibmvscsis_get_fabric_name,
> +	.get_fabric_proto_ident		= ibmvscsis_get_fabric_proto_ident,
> +	.tpg_get_wwn			= ibmvscsis_get_fabric_wwn,
> +	.tpg_get_tag			= ibmvscsis_get_tag,
> +	.tpg_get_default_depth		= ibmvscsis_get_default_depth,
> +	.tpg_get_pr_transport_id	= ibmvscsis_get_pr_transport_id,
> +	.tpg_get_pr_transport_id_len	= ibmvscsis_get_pr_transport_id_len,
> +	.tpg_parse_pr_out_transport_id	= ibmvscsis_parse_pr_out_transport_id,
> +	.tpg_check_demo_mode		= ibmvscsis_check_true,
> +	.tpg_check_demo_mode_cache	= ibmvscsis_check_true,
> +	.tpg_check_demo_mode_write_protect = ibmvscsis_check_false,
> +	.tpg_check_prod_mode_write_protect = ibmvscsis_check_false,
> +	.tpg_alloc_fabric_acl		= ibmvscsis_alloc_fabric_acl,
> +	.tpg_release_fabric_acl		= ibmvscsis_release_fabric_acl,
> +	.tpg_get_inst_index		= ibmvscsis_tpg_get_inst_index,
> +	.new_cmd_map			= ibmvscsis_new_cmd_map,
> +	.check_stop_free		= ibmvscsis_check_stop_free,
> +	.release_cmd_to_pool		= ibmvscsis_release_cmd,
> +	.release_cmd_direct		= ibmvscsis_release_cmd,
> +	.shutdown_session		= ibmvscsis_shutdown_session,
> +	.close_session			= ibmvscsis_close_session,
> +	.stop_session			= ibmvscsis_stop_session,
> +	.fall_back_to_erl0		= ibmvscsis_reset_nexus,
> +	.sess_logged_in			= ibmvscsis_sess_logged_in,
> +	.sess_get_index			= ibmvscsis_sess_get_index,
> +	.sess_get_initiator_sid		= NULL,
> +	.write_pending			= ibmvscsis_write_pending,
> +	.write_pending_status		= ibmvscsis_write_pending_status,
> +	.set_default_node_attributes	= ibmvscsis_set_default_node_attrs,
> +	.get_task_tag			= ibmvscsis_get_task_tag,
> +	.get_cmd_state			= ibmvscsis_get_cmd_state,
> +	.new_cmd_failure		= ibmvscsis_new_cmd_failure,
> +	.queue_data_in			= ibmvscsis_queue_data_in,
> +	.queue_status			= ibmvscsis_queue_status,
> +	.queue_tm_rsp			= ibmvscsis_queue_tm_rsp,
> +	.get_fabric_sense_len		= ibmvscsis_get_fabric_sense_len,
> +	.set_fabric_sense_len		= ibmvscsis_set_fabric_sense_len,
> +	.is_state_remove		= ibmvscsis_is_state_remove,
> +	.pack_lun			= ibmvscsis_pack_lun,
> +	.fabric_make_wwn		= ibmvscsis_make_tport,
> +	.fabric_drop_wwn		= ibmvscsis_drop_tport,
> +	.fabric_make_tpg		= ibmvscsis_make_tpg,
> +	.fabric_drop_tpg		= ibmvscsis_drop_tpg,
> +	.fabric_post_link		= NULL,
> +	.fabric_pre_unlink		= NULL,
> +	.fabric_make_np			= NULL,
> +	.fabric_drop_np			= NULL,
> +	.fabric_make_nodeacl		= NULL,
> +	.fabric_drop_nodeacl		= NULL,
> +};
> +
> +static inline union viosrp_iu *vio_iu(struct iu_entry *iue)
> +{
> +	return (union viosrp_iu *)(iue->sbuf->buf);
> +}
> +
> +static int send_iu(struct iu_entry *iue, u64 length, u8 format)
> +{
> +	struct srp_target *target = iue->target;
> +	struct ibmvscsis_adapter *adapter = target->ldata;
> +	long rc, rc1;
> +	union {
> +		struct viosrp_crq cooked;
> +		u64 raw[2];
> +	} crq;
> +
> +	/* First copy the SRP */
> +	rc = h_copy_rdma(length, adapter->liobn, iue->sbuf->dma,
> +			 adapter->riobn, iue->remote_token);
> +
> +	if (rc)
> +		printk(KERN_ERR "Error %ld transferring data\n", rc);
> +
> +	crq.cooked.valid = 0x80;
> +	crq.cooked.format = format;
> +	crq.cooked.reserved = 0x00;
> +	crq.cooked.timeout = 0x00;
> +	crq.cooked.IU_length = length;
> +	crq.cooked.IU_data_ptr = vio_iu(iue)->srp.rsp.tag;
> +
> +	if (rc == 0)
> +		crq.cooked.status = 0x99;	/* Just needs to be non-zero */
> +	else
> +		crq.cooked.status = 0x00;
> +
> +	rc1 = h_send_crq(adapter->dma_dev->unit_address, crq.raw[0],
> +			 crq.raw[1]);
> +	if (rc1) {
> +		printk(KERN_ERR "%ld sending response\n", rc1);
> +		return rc1;
> +	}
> +
> +	return rc;
> +}
> +
> +#define SRP_RSP_SENSE_DATA_LEN	18
> +
> +static int send_rsp(struct iu_entry *iue, struct scsi_cmnd *sc,
> +		    unsigned char status, unsigned char asc)
> +{
> +	union viosrp_iu *iu = vio_iu(iue);
> +	uint64_t tag = iu->srp.rsp.tag;
> +
> +	/* If the linked bit is on and status is good */
> +	if (test_bit(V_LINKED, &iue->flags) && (status == NO_SENSE))
> +		status = 0x10;
> +
> +	memset(iu, 0, sizeof(struct srp_rsp));
> +	iu->srp.rsp.opcode = SRP_RSP;
> +	iu->srp.rsp.req_lim_delta = 1;
> +	iu->srp.rsp.tag = tag;
> +
> +	if (test_bit(V_DIOVER, &iue->flags))
> +		iu->srp.rsp.flags |= SRP_RSP_FLAG_DIOVER;
> +
> +	iu->srp.rsp.data_in_res_cnt = 0;
> +	iu->srp.rsp.data_out_res_cnt = 0;
> +
> +	iu->srp.rsp.flags &= ~SRP_RSP_FLAG_RSPVALID;
> +
> +	iu->srp.rsp.resp_data_len = 0;
> +	iu->srp.rsp.status = status;
> +	if (status) {
> +		uint8_t *sense = iu->srp.rsp.data;
> +
> +		if (sc) {
> +			iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
> +			iu->srp.rsp.sense_data_len = SCSI_SENSE_BUFFERSIZE;
> +			memcpy(sense, sc->sense_buffer, SCSI_SENSE_BUFFERSIZE);
> +		} else {
> +			iu->srp.rsp.status = SAM_STAT_CHECK_CONDITION;
> +			iu->srp.rsp.flags |= SRP_RSP_FLAG_SNSVALID;
> +			iu->srp.rsp.sense_data_len = SRP_RSP_SENSE_DATA_LEN;
> +
> +			/* Valid bit and 'current errors' */
> +			sense[0] = (0x1 << 7 | 0x70);
> +			/* Sense key */
> +			sense[2] = status;
> +			/* Additional sense length */
> +			sense[7] = 0xa;	/* 10 bytes */
> +			/* Additional sense code */
> +			sense[12] = asc;
> +		}
> +	}
> +
> +	send_iu(iue, sizeof(iu->srp.rsp) + SRP_RSP_SENSE_DATA_LEN,
> +		VIOSRP_SRP_FORMAT);
> +
> +	return 0;
> +}
> +
> +static int send_adapter_info(struct iu_entry *iue,
> +			     dma_addr_t remote_buffer, u16 length)
> +{
> +	struct srp_target *target = iue->target;
> +	struct ibmvscsis_adapter *adapter = target->ldata;
> +	dma_addr_t data_token;
> +	struct mad_adapter_info_data *info;
> +	int err;
> +
> +	info = dma_alloc_coherent(&adapter->dma_dev->dev, sizeof(*info),
> +				  &data_token, GFP_KERNEL);
> +	if (!info) {
> +		printk(KERN_ERR "bad dma_alloc_coherent %p\n", target);
> +		return 1;
> +	}
> +
> +	/* Get remote info */
> +	err = h_copy_rdma(sizeof(*info), adapter->riobn, remote_buffer,
> +			  adapter->liobn, data_token);
> +	if (err == H_SUCCESS) {
> +		printk(KERN_INFO "Client connect: %s (%d)\n",
> +		       info->partition_name, info->partition_number);
> +	}
> +
> +	memset(info, 0, sizeof(*info));
> +
> +	strcpy(info->srp_version, "16.a");
> +	strncpy(info->partition_name, partition_name,
> +		sizeof(info->partition_name));
> +	info->partition_number = partition_number;
> +	info->mad_version = 1;
> +	info->os_type = 2;
> +	info->port_max_txu[0] = DEFAULT_MAX_SECTORS << 9;
> +
> +	/* Send our info to remote */
> +	err = h_copy_rdma(sizeof(*info), adapter->liobn, data_token,
> +			  adapter->riobn, remote_buffer);
> +
> +	dma_free_coherent(&adapter->dma_dev->dev, sizeof(*info), info,
> +			  data_token);
> +	if (err != H_SUCCESS) {
> +		printk(KERN_INFO "Error sending adapter info %d\n", err);
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int process_mad_iu(struct iu_entry *iue)
> +{
> +	union viosrp_iu *iu = vio_iu(iue);
> +	struct viosrp_adapter_info *info;
> +	struct viosrp_host_config *conf;
> +
> +	switch (iu->mad.empty_iu.common.type) {
> +	case VIOSRP_EMPTY_IU_TYPE:
> +		printk(KERN_ERR "%s\n", "Unsupported EMPTY MAD IU");
> +		break;
> +	case VIOSRP_ERROR_LOG_TYPE:
> +		printk(KERN_ERR "%s\n", "Unsupported ERROR LOG MAD IU");
> +		iu->mad.error_log.common.status = 1;
> +		send_iu(iue, sizeof(iu->mad.error_log),	VIOSRP_MAD_FORMAT);
> +		break;
> +	case VIOSRP_ADAPTER_INFO_TYPE:
> +		info = &iu->mad.adapter_info;
> +		info->common.status = send_adapter_info(iue, info->buffer,
> +							info->common.length);
> +		send_iu(iue, sizeof(*info), VIOSRP_MAD_FORMAT);
> +		break;
> +	case VIOSRP_HOST_CONFIG_TYPE:
> +		conf = &iu->mad.host_config;
> +		conf->common.status = 1;
> +		send_iu(iue, sizeof(*conf), VIOSRP_MAD_FORMAT);
> +		break;
> +	default:
> +		printk(KERN_ERR "Unknown type %u\n", iu->srp.rsp.opcode);
> +	}
> +
> +	return 1;
> +}
> +
> +static void process_login(struct iu_entry *iue)
> +{
> +	union viosrp_iu *iu = vio_iu(iue);
> +	struct srp_login_rsp *rsp = &iu->srp.login_rsp;
> +	u64 tag = iu->srp.rsp.tag;
> +
> +	/* TODO handle case that requested size is wrong and
> +	 * buffer format is wrong
> +	 */
> +	memset(iu, 0, sizeof(struct srp_login_rsp));
> +	rsp->opcode = SRP_LOGIN_RSP;
> +	rsp->req_lim_delta = INITIAL_SRP_LIMIT;
> +	rsp->tag = tag;
> +	rsp->max_it_iu_len = sizeof(union srp_iu);
> +	rsp->max_ti_iu_len = sizeof(union srp_iu);
> +	/* direct and indirect */
> +	rsp->buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
> +
> +	send_iu(iue, sizeof(*rsp), VIOSRP_SRP_FORMAT);
> +}
> +
> +static int process_srp_iu(struct iu_entry *iue)
> +{
> +	union viosrp_iu *iu = vio_iu(iue);
> +	struct srp_target *target = iue->target;
> +	int done = 1;
> +	u8 opcode = iu->srp.rsp.opcode;
> +	unsigned long flags;
> +
> +	switch (opcode) {
> +	case SRP_LOGIN_REQ:
> +		process_login(iue);
> +		break;
> +	case SRP_TSK_MGMT:
> +		/* done = process_tsk_mgmt(iue); */
> +		break;
> +	case SRP_CMD:
> +		spin_lock_irqsave(&target->lock, flags);
> +		list_add_tail(&iue->ilist, &target->cmd_queue);
> +		spin_unlock_irqrestore(&target->lock, flags);
> +		done = 0;
> +		break;
> +	case SRP_LOGIN_RSP:
> +	case SRP_I_LOGOUT:
> +	case SRP_T_LOGOUT:
> +	case SRP_RSP:
> +	case SRP_CRED_REQ:
> +	case SRP_CRED_RSP:
> +	case SRP_AER_REQ:
> +	case SRP_AER_RSP:
> +		printk(KERN_ERR "Unsupported type %u\n", opcode);
> +		break;
> +	default:
> +		printk(KERN_ERR "Unknown type %u\n", opcode);
> +	}
> +
> +	return done;
> +}
> +
> +static void process_iu(struct viosrp_crq *crq,
> +		       struct ibmvscsis_adapter *adapter)
> +{
> +	struct iu_entry *iue;
> +	long err;
> +	int done = 1;
> +
> +	iue = srp_iu_get(&adapter->srpt);
> +	if (!iue) {
> +		printk(KERN_ERR "Error getting IU from pool\n");
> +		return;
> +	}
> +
> +	iue->remote_token = crq->IU_data_ptr;
> +
> +	err = h_copy_rdma(crq->IU_length, adapter->riobn,
> +			  iue->remote_token, adapter->liobn, iue->sbuf->dma);
> +
> +	if (err != H_SUCCESS) {
> +		printk(KERN_ERR "%ld transferring data error %p\n", err, iue);
> +		goto out;
> +	}
> +
> +	if (crq->format == VIOSRP_MAD_FORMAT)
> +		done = process_mad_iu(iue);
> +	else
> +		done = process_srp_iu(iue);
> +out:
> +	if (done)
> +		srp_iu_put(iue);
> +}
> +
> +static void process_crq(struct viosrp_crq *crq,
> +			struct ibmvscsis_adapter *adapter)
> +{
> +	switch (crq->valid) {
> +	case 0xC0:
> +		/* initialization */
> +		switch (crq->format) {
> +		case 0x01:
> +			h_send_crq(adapter->dma_dev->unit_address,
> +				   0xC002000000000000, 0);
> +			break;
> +		case 0x02:
> +			break;
> +		default:
> +			printk(KERN_ERR "Unknown format %u\n", crq->format);
> +		}
> +		break;
> +	case 0xFF:
> +		/* transport event */
> +		break;
> +	case 0x80:
> +		/* real payload */
> +		switch (crq->format) {
> +		case VIOSRP_SRP_FORMAT:
> +		case VIOSRP_MAD_FORMAT:
> +			process_iu(crq, adapter);
> +			break;
> +		case VIOSRP_OS400_FORMAT:
> +		case VIOSRP_AIX_FORMAT:
> +		case VIOSRP_LINUX_FORMAT:
> +		case VIOSRP_INLINE_FORMAT:
> +			printk(KERN_ERR "Unsupported format %u\n", crq->format);
> +			break;
> +		default:
> +			printk(KERN_ERR "Unknown format %u\n", crq->format);
> +		}
> +		break;
> +	default:
> +		printk(KERN_ERR "unknown message type 0x%02x!?\n", crq->valid);
> +	}
> +}
> +
> +static inline struct viosrp_crq *next_crq(struct crq_queue *queue)
> +{
> +	struct viosrp_crq *crq;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&queue->lock, flags);
> +	crq = &queue->msgs[queue->cur];
> +	if (crq->valid & 0x80) {
> +		if (++queue->cur == queue->size)
> +			queue->cur = 0;
> +	} else
> +		crq = NULL;
> +	spin_unlock_irqrestore(&queue->lock, flags);
> +
> +	return crq;
> +}
> +
> +static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
> +			    struct ibmvscsis_cmnd *vsc,
> +			    struct srp_cmd *cmd)
> +{
> +	struct se_cmd *se_cmd;
> +	int attr;
> +	int data_len;
> +	int ret;
> +
> +	switch (cmd->task_attr) {
> +	case SRP_SIMPLE_TASK:
> +		attr = MSG_SIMPLE_TAG;
> +		break;
> +	case SRP_ORDERED_TASK:
> +		attr = MSG_ORDERED_TAG;
> +		break;
> +	case SRP_HEAD_TASK:
> +		attr = MSG_HEAD_TAG;
> +		break;
> +	default:
> +		printk(KERN_WARNING "Task attribute %d not supported\n",
> +		       cmd->task_attr);
> +		attr = MSG_SIMPLE_TAG;
> +	}
> +
> +	data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
> +
> +	se_cmd = &vsc->se_cmd;
> +
> +	transport_init_se_cmd(se_cmd,
> +			      adapter->se_tpg.se_tpg_tfo,
> +			      adapter->se_sess, data_len,
> +			      srp_cmd_direction(cmd),
> +			      attr, vsc->sense_buf);
> +
> +	ret = transport_get_lun_for_cmd(se_cmd, NULL, cmd->lun);
> +	if (ret) {
> +		printk(KERN_ERR "invalid lun %u\n", GETLUN(cmd->lun));
> +		transport_send_check_condition_and_sense(se_cmd,
> +							 se_cmd->scsi_sense_reason,
> +							 0);
> +		return ret;
> +	}
> +
> +	transport_device_setup_cmd(se_cmd);
> +	transport_generic_handle_cdb_map(se_cmd);
> +
> +	return 0;
> +}
> +
> +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
> +{
> +	struct ibmvscsis_cmnd *cmd =
> +		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
> +	struct scsi_cmnd *sc = &cmd->sc;
> +	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
> +	struct srp_cmd *scmd = iue->sbuf->buf;
> +	int ret;
> +
> +	/*
> +	 * Allocate the necessary tasks to complete the received CDB+data
> +	 */
> +	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
> +	if (ret == -1) {
> +		/* Out of Resources */
> +		return PYX_TRANSPORT_LU_COMM_FAILURE;
> +	} else if (ret == -2) {
> +		/*
> +		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
> +		 */
> +		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
> +			return PYX_TRANSPORT_RESERVATION_CONFLICT;
> +		/*
> +		 * Otherwise, return SAM_STAT_CHECK_CONDITION and return
> +		 * sense data
> +		 */
> +		return PYX_TRANSPORT_USE_SENSE_REASON;
> +	}
> +
> +	return 0;
> +}
> +
> +static void ibmvscsis_check_stop_free(struct se_cmd *se_cmd)
> +{
> +	if (se_cmd->se_tmr_req)
> +		return;
> +	transport_generic_free_cmd(se_cmd, 0, 1, 0);
> +}
> +
> +static u64 scsi_lun_to_int(u64 lun)
> +{
> +	if (GETBUS(lun) || GETLUN(lun))
> +		return ~0UL;
> +	else
> +		return GETTARGET(lun);
> +}
> +
> +struct inquiry_data {
> +	u8 qual_type;
> +	u8 rmb_reserve;
> +	u8 version;
> +	u8 aerc_naca_hisup_format;
> +	u8 addl_len;
> +	u8 sccs_reserved;
> +	u8 bque_encserv_vs_multip_mchngr_reserved;
> +	u8 reladr_reserved_linked_cmdqueue_vs;
> +	char vendor[8];
> +	char product[16];
> +	char revision[4];
> +	char vendor_specific[20];
> +	char reserved1[2];
> +	char version_descriptor[16];
> +	char reserved2[22];
> +	char unique[158];
> +};
> +
> +static u64 make_lun(unsigned int bus, unsigned int target, unsigned int lun)
> +{
> +	u16 result = (0x8000 |
> +			   ((target & 0x003f) << 8) |
> +			   ((bus & 0x0007) << 5) |
> +			   (lun & 0x001f));
> +	return ((u64) result) << 48;
> +}
> +
> +static int ibmvscsis_inquery(struct ibmvscsis_adapter *adapter,
> +			      struct srp_cmd *cmd, char *data)
> +{
> +	struct se_portal_group *se_tpg = &adapter->se_tpg;
> +	struct inquiry_data *id = (struct inquiry_data *)data;
> +	u64 unpacked_lun, lun = cmd->lun;
> +	u8 *cdb = cmd->cdb;
> +	int len;
> +
> +	if (!data)
> +		printk(KERN_INFO "%s %d: oomu\n", __func__, __LINE__);
> +
> +	if (((cdb[1] & 0x3) == 0x3) || (!(cdb[1] & 0x3) && cdb[2])) {
> +		printk(KERN_INFO "%s %d: invalid req\n", __func__, __LINE__);
> +		return 0;
> +	}
> +
> +	if (cdb[1] & 0x3)
> +		printk(KERN_INFO "%s %d: needs the normal path\n",
> +		       __func__, __LINE__);
> +	else {
> +		id->qual_type = TYPE_DISK;
> +		id->rmb_reserve = 0x00;
> +		id->version = 0x84; /* ISO/IE */
> +		id->aerc_naca_hisup_format = 0x22; /* naca & fmt 0x02 */
> +		id->addl_len = sizeof(*id) - 4;
> +		id->bque_encserv_vs_multip_mchngr_reserved = 0x00;
> +		id->reladr_reserved_linked_cmdqueue_vs = 0x02; /* CMDQ */
> +		memcpy(id->vendor, "IBM	    ", 8);
> +		/*
> +		 * Don't even ask about the next bit.  AIX uses
> +		 * hardcoded device naming to recognize device types
> +		 * and their client won't  work unless we use VOPTA and
> +		 * VDASD.
> +		 */
> +		if (id->qual_type == TYPE_ROM)
> +			memcpy(id->product, "VOPTA blkdev    ", 16);
> +		else
> +			memcpy(id->product, "VDASD blkdev    ", 16);
> +
> +		memcpy(id->revision, "0001", 4);
> +
> +		snprintf(id->unique, sizeof(id->unique),
> +			 "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n",
> +			 system_id,
> +			 partition_number,
> +			 adapter->dma_dev->unit_address,
> +			 GETBUS(lun),
> +			 GETTARGET(lun),
> +			 GETLUN(lun));
> +	}
> +
> +	len = min_t(int, sizeof(*id), cdb[4]);
> +
> +	unpacked_lun = scsi_lun_to_int(cmd->lun);
> +
> +	spin_lock(&se_tpg->tpg_lun_lock);
> +
> +	if (unpacked_lun < TRANSPORT_MAX_LUNS_PER_TPG &&
> +	    se_tpg->tpg_lun_list[unpacked_lun].lun_status ==
> +	    TRANSPORT_LUN_STATUS_ACTIVE)
> +		;
> +	else
> +		data[0] = TYPE_NO_LUN;
> +
> +	spin_unlock(&se_tpg->tpg_lun_lock);
> +
> +	return len;
> +}
> +
> +static int ibmvscsis_mode_sense(struct ibmvscsis_adapter *adapter,
> +				struct srp_cmd *cmd, char *mode)
> +{
> +	int bytes;
> +	struct se_portal_group *se_tpg = &adapter->se_tpg;
> +	u64 unpacked_lun;
> +	struct se_lun *lun;
> +	u32 blocks;
> +
> +	unpacked_lun = scsi_lun_to_int(cmd->lun);
> +
> +	spin_lock(&se_tpg->tpg_lun_lock);
> +
> +	lun = &se_tpg->tpg_lun_list[unpacked_lun];
> +
> +	blocks = TRANSPORT(lun->lun_se_dev)->get_blocks(lun->lun_se_dev);
> +
> +	spin_unlock(&se_tpg->tpg_lun_lock);
> +
> +	switch (cmd->cdb[2]) {
> +	case 0:
> +	case 0x3f:
> +		mode[1] = 0x00;	/* Default medium */
> +		/* if (iue->req.vd->b.ro) */
> +		if (0)
> +			mode[2] = 0x80;	/* device specific  */
> +		else
> +			mode[2] = 0x00;	/* device specific  */
> +
> +		/* note the DPOFUA bit is set to zero! */
> +		mode[3] = 0x08;	/* block descriptor length */
> +		*((u32 *) &mode[4]) = blocks - 1;
> +		*((u32 *) &mode[8]) = 512;
> +		bytes = mode[0] = 12;	/* length */
> +		break;
> +
> +	case 0x08: /* Cache page */
> +		/* length should be 4 */
> +#if 0
> +		if (cmd->cdb[4] != 4
> +		    && cmd->cdb[4] != 0x20) {
> +			send_rsp(iue, ILLEGAL_REQUEST, 0x20);
> +			dma_free_coherent(iue->adapter->dev,
> +					  MODE_SENSE_BUFFER_SIZE,
> +					  mode, data_token);
> +			return FREE_IU;
> +		}
> +#endif
> +

Can this be dropped..?

> +		mode[1] = 0x00;	/* Default medium */
> +		if (0)
> +			mode[2] = 0x80;	/* device specific */
> +		else
> +			mode[2] = 0x00;	/* device specific */
> +
> +		/* note the DPOFUA bit is set to zero! */
> +		mode[3] = 0x08;	/* block descriptor length */
> +		*((u32 *) &mode[4]) = blocks - 1;
> +		*((u32 *) &mode[8]) = 512;
> +
> +		/* Cache page */
> +		mode[12] = 0x08;    /* page */
> +		mode[13] = 0x12;    /* page length */
> +		mode[14] = 0x01;    /* no cache (0x04 for read/write cache) */
> +
> +		bytes = mode[0] = 12 + mode[13];	/* length */
> +		break;
> +	}
> +
> +	return bytes;
> +}
> +
> +static int ibmvscsis_report_luns(struct ibmvscsis_adapter *adapter,
> +				 struct srp_cmd *cmd, u64 *data)
> +{
> +	u64 lun;
> +	struct se_portal_group *se_tpg = &adapter->se_tpg;
> +	int i, idx;
> +	int alen, oalen, nr_luns, rbuflen = 4096;
> +
> +	alen = get_unaligned_be32(&cmd->cdb[6]);
> +
> +	alen &= ~(8 - 1);
> +	oalen = alen;
> +
> +	if (cmd->lun) {
> +		nr_luns = 1;
> +		goto done;
> +	}
> +
> +	alen -= 8;
> +	rbuflen -= 8; /* FIXME */
> +	idx = 2;
> +	nr_luns = 1;
> +
> +	spin_lock(&se_tpg->tpg_lun_lock);
> +	for (i = 0; i < 255; i++) {
> +		if (se_tpg->tpg_lun_list[i].lun_status !=
> +		    TRANSPORT_LUN_STATUS_ACTIVE)
> +			continue;
> +
> +		lun = make_lun(0, i & 0x003f, 0);
> +		data[idx++] = cpu_to_be64(lun);
> +		alen -= 8;
> +		if (!alen)
> +			break;
> +		rbuflen -= 8;
> +		if (!rbuflen)
> +			break;
> +
> +		nr_luns++;
> +	}
> +	spin_unlock(&se_tpg->tpg_lun_lock);
> +done:
> +	put_unaligned_be32(nr_luns * 8, data);
> +	return min(oalen, nr_luns * 8 + 8);
> +}
> +
> +static int ibmvscsis_rdma(struct scsi_cmnd *sc, struct scatterlist *sg, int nsg,
> +			  struct srp_direct_buf *md, int nmd,
> +			  enum dma_data_direction dir, unsigned int rest)
> +{
> +	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
> +	struct srp_target *target = iue->target;
> +	struct ibmvscsis_adapter *adapter = target->ldata;
> +	dma_addr_t token;
> +	long err;
> +	unsigned int done = 0;
> +	int i, sidx, soff;
> +
> +	sidx = soff = 0;
> +	token = sg_dma_address(sg + sidx);
> +
> +	for (i = 0; i < nmd && rest; i++) {
> +		unsigned int mdone, mlen;
> +
> +		mlen = min(rest, md[i].len);
> +		for (mdone = 0; mlen;) {
> +			int slen = min(sg_dma_len(sg + sidx) - soff, mlen);
> +
> +			if (dir == DMA_TO_DEVICE)
> +				err = h_copy_rdma(slen,
> +						  adapter->riobn,
> +						  md[i].va + mdone,
> +						  adapter->liobn,
> +						  token + soff);
> +			else
> +				err = h_copy_rdma(slen,
> +						  adapter->liobn,
> +						  token + soff,
> +						  adapter->riobn,
> +						  md[i].va + mdone);
> +
> +			if (err != H_SUCCESS) {
> +				printk(KERN_ERR "rdma error %d %d %ld\n",
> +				       dir, slen, err);
> +				return -EIO;
> +			}
> +
> +			mlen -= slen;
> +			mdone += slen;
> +			soff += slen;
> +			done += slen;
> +
> +			if (soff == sg_dma_len(sg + sidx)) {
> +				sidx++;
> +				soff = 0;
> +				token = sg_dma_address(sg + sidx);
> +
> +				if (sidx > nsg) {
> +					printk(KERN_ERR "out of sg %p %d %d\n",
> +						iue, sidx, nsg);
> +					return -EIO;
> +				}
> +			}
> +		};
> +
> +		rest -= mlen;
> +	}
> +	return 0;
> +}
> +
> +static int ibmvscsis_cmd_done(struct scsi_cmnd *sc)
> +{
> +	unsigned long flags;
> +	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
> +	struct srp_target *target = iue->target;
> +	int err = 0;
> +
> +	if (scsi_sg_count(sc))
> +		err = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
> +					ibmvscsis_rdma, 1, 1);
> +
> +	spin_lock_irqsave(&target->lock, flags);
> +	list_del(&iue->ilist);
> +	spin_unlock_irqrestore(&target->lock, flags);
> +
> +	if (err || sc->result != SAM_STAT_GOOD) {
> +		printk(KERN_ERR "operation failed %p %d %x\n",
> +		       iue, sc->result, vio_iu(iue)->srp.cmd.cdb[0]);
> +		send_rsp(iue, sc, HARDWARE_ERROR, 0x00);
> +	} else
> +		send_rsp(iue, sc, NO_SENSE, 0x00);
> +
> +	/* done(sc); */
> +	srp_iu_put(iue);
> +	return 0;
> +}
> +
> +struct ibmvscsis_cmd {
> +	/* Used for libsrp processing callbacks */
> +	struct scsi_cmnd sc;
> +	/* Used for TCM Core operations */
> +	struct se_cmd se_cmd;
> +	/* Sense buffer that will be mapped into outgoing status */
> +	unsigned char sense_buf[TRANSPORT_SENSE_BUFFER];
> +};
> +
> +static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
> +{
> +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> +			struct ibmvscsis_cmnd, se_cmd);
> +	struct scsi_cmnd *sc = &cmd->sc;
> +	struct iu_entry *iue = (struct iu_entry *) sc->SCp.ptr;
> +	int ret;
> +
> +	sc->sdb.length = se_cmd->data_length;
> +
> +	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
> +	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
> +		transport_do_task_sg_chain(se_cmd);
> +
> +		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
> +		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
> +	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
> +		/*
> +		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
> +		 * using a contigious buffer
> +		 */
> +		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
> +		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
> +			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
> +
> +		sc->sdb.table.nents = 1;
> +		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
> +	}
> +
> +	ret = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
> +				ibmvscsis_rdma, 1, 1);
> +	if (ret) {
> +		printk(KERN_ERR "srp_transfer_data() failed: %d\n", ret);
> +		return PYX_TRANSPORT_LU_COMM_FAILURE;
> +	}
> +	/*
> +	 * We now tell TCM to add this WRITE CDB directly into the TCM storage
> +	 * object execution queue.
> +	 */
> +	transport_generic_process_write(se_cmd);
> +	return 0;
> +}
> +
> +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
> +{
> +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> +			struct ibmvscsis_cmnd, se_cmd);
> +	struct scsi_cmnd *sc = &cmd->sc;
> +	/*
> +	 * Check for overflow residual count
> +	 */
> +	if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
> +		scsi_set_resid(sc, se_cmd->residual_count);
> +
> +	sc->sdb.length = se_cmd->data_length;
> +
> +	/*
> +	 * Setup the struct se_task->task_sg[] chained SG list
> +	 */
> +	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
> +	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
> +		transport_do_task_sg_chain(se_cmd);
> +
> +		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
> +		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
> +	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
> +		/*
> +		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
> +		 * using a contigious buffer
> +		 */
> +		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
> +		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
> +			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
> +
> +		sc->sdb.table.nents = 1;
> +		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
> +	}
> +	/*
> +	 * Perform the SCSI READ data transfer from sc->sdb.table into
> +	 * VIO LPAR memory.  This will occur via libsrp in the
> +	 * ibmvscsis_rdma() callback
> +	 */
> +#if 0
> +	ret = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
> +				ibmvscsis_rdma, 1, 1);
> +	if (ret) {
> +		printk(KERN_ERR "srp_transfer_data() failed: %d, returning"
> +				" DID_ERROR\n", ret);
> +		sc->result = host_byte(DID_ERROR) | se_cmd->scsi_status;
> +	} else
> +		sc->result = host_byte(DID_OK) | se_cmd->scsi_status;
> +#endif

Mmmm, is this really supposed to be disabled..?

> +	/*
> +	 * This will call srp_transfer_data() and post the response
> +	 * to VIO via libsrp.
> +	 */
> +	ibmvscsis_cmd_done(sc);
> +	return 0;
> +}
> +
> +static int ibmvscsis_queue_status(struct se_cmd *se_cmd)
> +{
> +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> +						  struct ibmvscsis_cmnd, se_cmd);
> +	struct scsi_cmnd *sc = &cmd->sc;
> +	/*
> +	 * Copy any generated SENSE data into sc->sense_buffer and
> +	 * set the appropiate sc->result to be translated by
> +	 * ibmvscsis_cmd_done()
> +	 */
> +	if (se_cmd->sense_buffer &&
> +	   ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
> +	    (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
> +		memcpy((void *)sc->sense_buffer, (void *)se_cmd->sense_buffer,
> +				SCSI_SENSE_BUFFERSIZE);
> +		sc->result = host_byte(DID_OK) | driver_byte(DRIVER_SENSE) |
> +				SAM_STAT_CHECK_CONDITION;
> +	} else
> +		sc->result = host_byte(DID_OK) | se_cmd->scsi_status;
> +	/*
> +	 * Finally post the response to VIO via libsrp.
> +	 */
> +	ibmvscsis_cmd_done(sc);
> +	return 0;
> +}
> +
> +static int ibmvscsis_queuecommand(struct ibmvscsis_adapter *adapter,
> +				  struct iu_entry *iue)
> +{
> +	int data_len;
> +	struct srp_cmd *cmd = iue->sbuf->buf;
> +	struct scsi_cmnd *sc;
> +	struct page *pg;
> +	struct ibmvscsis_cmnd *vsc;
> +
> +	data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
> +
> +	vsc = kzalloc(sizeof(*vsc), GFP_KERNEL);
> +	sc = &vsc->sc;
> +	sc->sense_buffer = vsc->sense_buf;
> +	sc->cmnd = cmd->cdb;
> +	sc->SCp.ptr = (char *)iue;
> +
> +	switch (cmd->cdb[0]) {
> +	case INQUIRY:
> +		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
> +		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		sc->sdb.length = ibmvscsis_inquery(adapter, cmd,
> +						   page_address(pg));
> +		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
> +		ibmvscsis_cmd_done(sc);
> +		sg_free_table(&sc->sdb.table);
> +		__free_page(pg);
> +		kfree(vsc);
> +		break;
> +	case REPORT_LUNS:
> +		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
> +		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		sc->sdb.length = ibmvscsis_report_luns(adapter, cmd,
> +						       page_address(pg));
> +		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
> +		ibmvscsis_cmd_done(sc);
> +		sg_free_table(&sc->sdb.table);
> +		__free_page(pg);
> +		kfree(vsc);
> +		break;
> +	case MODE_SENSE:
> +		/* fixme: needs to use tcm */
> +		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
> +		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
> +		sc->sdb.length = ibmvscsis_mode_sense(adapter,
> +						      cmd, page_address(pg));
> +		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
> +		ibmvscsis_cmd_done(sc);
> +		sg_free_table(&sc->sdb.table);
> +		__free_page(pg);
> +		kfree(vsc);
> +		break;

Just to verify again, INQUIRY and REPORT_LUNS are the only CDBs that
have a hard requirement of being intercepted in order for VSCSI to
function, right..?

> +	default:
> +		tcm_queuecommand(adapter, vsc, cmd);
> +		break;
> +	}
> +
> +	return 0;
> +}
> +

I think everything else looks quite reasonable.

Thanks Tomo-san!

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
  2011-02-10 19:03   ` Nicholas A. Bellinger
@ 2011-02-10 19:15   ` Brian King
  2011-02-10 19:38     ` Nicholas A. Bellinger
  2011-02-14  1:42     ` FUJITA Tomonori
  2011-02-14  7:16   ` Bart Van Assche
  2 siblings, 2 replies; 81+ messages in thread
From: Brian King @ 2011-02-10 19:15 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi, nab

On 02/10/2011 06:21 AM, FUJITA Tomonori wrote:

> +static int process_mad_iu(struct iu_entry *iue)
> +{
> +	union viosrp_iu *iu = vio_iu(iue);
> +	struct viosrp_adapter_info *info;
> +	struct viosrp_host_config *conf;
> +
> +	switch (iu->mad.empty_iu.common.type) {
> +	case VIOSRP_EMPTY_IU_TYPE:
> +		printk(KERN_ERR "%s\n", "Unsupported EMPTY MAD IU");
> +		break;
> +	case VIOSRP_ERROR_LOG_TYPE:
> +		printk(KERN_ERR "%s\n", "Unsupported ERROR LOG MAD IU");
> +		iu->mad.error_log.common.status = 1;
> +		send_iu(iue, sizeof(iu->mad.error_log),	VIOSRP_MAD_FORMAT);
> +		break;
> +	case VIOSRP_ADAPTER_INFO_TYPE:
> +		info = &iu->mad.adapter_info;
> +		info->common.status = send_adapter_info(iue, info->buffer,
> +							info->common.length);
> +		send_iu(iue, sizeof(*info), VIOSRP_MAD_FORMAT);
> +		break;
> +	case VIOSRP_HOST_CONFIG_TYPE:
> +		conf = &iu->mad.host_config;
> +		conf->common.status = 1;
> +		send_iu(iue, sizeof(*conf), VIOSRP_MAD_FORMAT);
> +		break;
> +	default:
> +		printk(KERN_ERR "Unknown type %u\n", iu->srp.rsp.opcode);

We should be sending back the MAD not supported response here (0xF1):

iu->mad.common.status = 0xF1;
send_iu(iue, sizeof(iu->mad.common), VIOSRP_MAD_FORMAT);

Should also be doing this for the unsupported MAD types above, otherwise
the client never receives a response to these commands and times them out.


> +	}
> +
> +	return 1;
> +}
> +


> +
> +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
> +{
> +	struct ibmvscsis_cmnd *cmd =
> +		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
> +	struct scsi_cmnd *sc = &cmd->sc;
> +	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
> +	struct srp_cmd *scmd = iue->sbuf->buf;
> +	int ret;
> +
> +	/*
> +	 * Allocate the necessary tasks to complete the received CDB+data
> +	 */
> +	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
> +	if (ret == -1) {
> +		/* Out of Resources */
> +		return PYX_TRANSPORT_LU_COMM_FAILURE;
> +	} else if (ret == -2) {
> +		/*
> +		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
> +		 */
> +		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
> +			return PYX_TRANSPORT_RESERVATION_CONFLICT;

Does this imply the driver supports scsi reservations? If it supports SCSI-2 reservations,
there is a capability that could be set in the capabilities MAD to advertise this
fact such that it can be used by the client.

> +		/*
> +		 * Otherwise, return SAM_STAT_CHECK_CONDITION and return
> +		 * sense data
> +		 */
> +		return PYX_TRANSPORT_USE_SENSE_REASON;
> +	}
> +
> +	return 0;
> +}
> +

> +
> +static int ibmvscsis_inquery(struct ibmvscsis_adapter *adapter,

/inquery/inquiry/

> +			      struct srp_cmd *cmd, char *data)
> +{
> +	struct se_portal_group *se_tpg = &adapter->se_tpg;
> +	struct inquiry_data *id = (struct inquiry_data *)data;
> +	u64 unpacked_lun, lun = cmd->lun;
> +	u8 *cdb = cmd->cdb;
> +	int len;
> +
> +	if (!data)
> +		printk(KERN_INFO "%s %d: oomu\n", __func__, __LINE__);
> +
> +	if (((cdb[1] & 0x3) == 0x3) || (!(cdb[1] & 0x3) && cdb[2])) {
> +		printk(KERN_INFO "%s %d: invalid req\n", __func__, __LINE__);
> +		return 0;
> +	}
> +
> +	if (cdb[1] & 0x3)
> +		printk(KERN_INFO "%s %d: needs the normal path\n",
> +		       __func__, __LINE__);
> +	else {
> +		id->qual_type = TYPE_DISK;
> +		id->rmb_reserve = 0x00;
> +		id->version = 0x84; /* ISO/IE */
> +		id->aerc_naca_hisup_format = 0x22; /* naca & fmt 0x02 */
> +		id->addl_len = sizeof(*id) - 4;
> +		id->bque_encserv_vs_multip_mchngr_reserved = 0x00;
> +		id->reladr_reserved_linked_cmdqueue_vs = 0x02; /* CMDQ */
> +		memcpy(id->vendor, "IBM	    ", 8);
> +		/*
> +		 * Don't even ask about the next bit.  AIX uses
> +		 * hardcoded device naming to recognize device types
> +		 * and their client won't  work unless we use VOPTA and
> +		 * VDASD.
> +		 */
> +		if (id->qual_type == TYPE_ROM)
> +			memcpy(id->product, "VOPTA blkdev    ", 16);
> +		else
> +			memcpy(id->product, "VDASD blkdev    ", 16);
> +
> +		memcpy(id->revision, "0001", 4);
> +
> +		snprintf(id->unique, sizeof(id->unique),
> +			 "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n",
> +			 system_id,
> +			 partition_number,
> +			 adapter->dma_dev->unit_address,
> +			 GETBUS(lun),
> +			 GETTARGET(lun),
> +			 GETLUN(lun));
> +	}

Do we have any persistent unique identifying data which we could use to
build a page 0x83 response such that persistent naming would work in the client?

> +
> +	len = min_t(int, sizeof(*id), cdb[4]);
> +
> +	unpacked_lun = scsi_lun_to_int(cmd->lun);
> +
> +	spin_lock(&se_tpg->tpg_lun_lock);
> +
> +	if (unpacked_lun < TRANSPORT_MAX_LUNS_PER_TPG &&
> +	    se_tpg->tpg_lun_list[unpacked_lun].lun_status ==
> +	    TRANSPORT_LUN_STATUS_ACTIVE)
> +		;
> +	else
> +		data[0] = TYPE_NO_LUN;
> +
> +	spin_unlock(&se_tpg->tpg_lun_lock);
> +
> +	return len;
> +}

Thanks!

Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:15   ` Brian King
@ 2011-02-10 19:38     ` Nicholas A. Bellinger
  2011-02-11 21:13       ` Brian King
  2011-02-14  1:42       ` FUJITA Tomonori
  2011-02-14  1:42     ` FUJITA Tomonori
  1 sibling, 2 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-10 19:38 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, linux-scsi

On Thu, 2011-02-10 at 13:15 -0600, Brian King wrote:
> On 02/10/2011 06:21 AM, FUJITA Tomonori wrote:

Hi Brian,

<SNIP>

> > +
> > +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
> > +{
> > +	struct ibmvscsis_cmnd *cmd =
> > +		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
> > +	struct scsi_cmnd *sc = &cmd->sc;
> > +	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
> > +	struct srp_cmd *scmd = iue->sbuf->buf;
> > +	int ret;
> > +
> > +	/*
> > +	 * Allocate the necessary tasks to complete the received CDB+data
> > +	 */
> > +	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
> > +	if (ret == -1) {
> > +		/* Out of Resources */
> > +		return PYX_TRANSPORT_LU_COMM_FAILURE;
> > +	} else if (ret == -2) {
> > +		/*
> > +		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
> > +		 */
> > +		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
> > +			return PYX_TRANSPORT_RESERVATION_CONFLICT;
> 
> Does this imply the driver supports scsi reservations? If it supports SCSI-2 reservations,
> there is a capability that could be set in the capabilities MAD to advertise this
> fact such that it can be used by the client.
> 

Absoulutely, the target core struct se_device backends being serviced as
VSCSI LUN would normally return SCSI-3 from INQUIRY data, and support
the full set of persistent reservations and CRH=1 (Compatible
Reservations Handling) in drivers/target/target_core_pr.c code.

Also, we are able to explictly limit individual struct se_device
backends to disable PR and only use the older SCSI-2 reservations, but I
don't think we have a way to do this on a fabric module wide basis just
yet.

Is this something we should be doing for this code..?

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 0/3] ibmvscsis driver rewrite
       [not found]   ` <1297363312.18212.153.camel@haakon2.linux-iscsi.org>
@ 2011-02-10 21:22     ` Bart Van Assche
  0 siblings, 0 replies; 81+ messages in thread
From: Bart Van Assche @ 2011-02-10 21:22 UTC (permalink / raw)
  To: linux-scsi
  Cc: Hannes Reinecke, FUJITA Tomonori, Benjamin Herrenschmidt,
	Alexander Graf, Stefan Hajnoczi

On Thu, Feb 10, 2011 at 7:41 PM, Nicholas A. Bellinger
<nab@linux-iscsi.org> wrote:
>
> On Thu, 2011-02-10 at 13:48 +0100, Hannes Reinecke wrote:
> >
> > On 02/10/2011 01:21 PM, FUJITA Tomonori wrote:
> > > Finally, I have the working driver. We are ready to remove the old
> > > ibmvstgt driver.
> > >
> > Hmm. Wouldn't it be possible to use it as a template for a
> > virtio-srp target?
> > From what I've seen we 'just' need to modify it to use virtio
> > instead for pSeries rdma callbacks.
> > And then we would have a virtio-srp target onto which any kvm guests
> > could connect to.

One should keep in mind that there exist two kinds of SRP drivers for Linux:
- Those based on synchronous data transfer calls - calls that only
return after the entire transfer has finished. The ibmvscsi, ibmvscsis
drivers and the libsrp library fall in this category.
- Those based on asynchronous data transfers - calls that return
immediately and where a callback function is invoked to report
completion. ib_srp and ib_srpt fall into this category (ib_srp is
upstream, ib_srpt not yet).

As far as I know the virtio interface uses callbacks and hence is an
asynchronous interface ? If so, the InfiniBand drivers might be a
better starting point.

Bart.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:38     ` Nicholas A. Bellinger
@ 2011-02-11 21:13       ` Brian King
  2011-02-12 20:27         ` James Bottomley
  2011-02-14  1:42       ` FUJITA Tomonori
  1 sibling, 1 reply; 81+ messages in thread
From: Brian King @ 2011-02-11 21:13 UTC (permalink / raw)
  To: Nicholas A. Bellinger; +Cc: FUJITA Tomonori, linux-scsi

On 02/10/2011 01:38 PM, Nicholas A. Bellinger wrote:
> On Thu, 2011-02-10 at 13:15 -0600, Brian King wrote:
>> On 02/10/2011 06:21 AM, FUJITA Tomonori wrote:
> 
> Hi Brian,
> 
> <SNIP>
> 
>>> +
>>> +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
>>> +{
>>> +	struct ibmvscsis_cmnd *cmd =
>>> +		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
>>> +	struct scsi_cmnd *sc = &cmd->sc;
>>> +	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
>>> +	struct srp_cmd *scmd = iue->sbuf->buf;
>>> +	int ret;
>>> +
>>> +	/*
>>> +	 * Allocate the necessary tasks to complete the received CDB+data
>>> +	 */
>>> +	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
>>> +	if (ret == -1) {
>>> +		/* Out of Resources */
>>> +		return PYX_TRANSPORT_LU_COMM_FAILURE;
>>> +	} else if (ret == -2) {
>>> +		/*
>>> +		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
>>> +		 */
>>> +		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
>>> +			return PYX_TRANSPORT_RESERVATION_CONFLICT;
>>
>> Does this imply the driver supports scsi reservations? If it supports SCSI-2 reservations,
>> there is a capability that could be set in the capabilities MAD to advertise this
>> fact such that it can be used by the client.
>>
> 
> Absoulutely, the target core struct se_device backends being serviced as
> VSCSI LUN would normally return SCSI-3 from INQUIRY data, and support
> the full set of persistent reservations and CRH=1 (Compatible
> Reservations Handling) in drivers/target/target_core_pr.c code.
> 
> Also, we are able to explictly limit individual struct se_device
> backends to disable PR and only use the older SCSI-2 reservations, but I
> don't think we have a way to do this on a fabric module wide basis just
> yet.
> 
> Is this something we should be doing for this code..?

Disregard my previous comment. It looks like current client should handle reservations
just fine without any further changes.

Thanks,

Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-11 21:13       ` Brian King
@ 2011-02-12 20:27         ` James Bottomley
  2011-03-07  4:41           ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: James Bottomley @ 2011-02-12 20:27 UTC (permalink / raw)
  To: Brian King; +Cc: Nicholas A. Bellinger, FUJITA Tomonori, linux-scsi

On Fri, 2011-02-11 at 15:13 -0600, Brian King wrote:
> On 02/10/2011 01:38 PM, Nicholas A. Bellinger wrote:
> > On Thu, 2011-02-10 at 13:15 -0600, Brian King wrote:
> >> On 02/10/2011 06:21 AM, FUJITA Tomonori wrote:
> > 
> > Hi Brian,
> > 
> > <SNIP>
> > 
> >>> +
> >>> +static int ibmvscsis_new_cmd_map(struct se_cmd *se_cmd)
> >>> +{
> >>> +	struct ibmvscsis_cmnd *cmd =
> >>> +		container_of(se_cmd, struct ibmvscsis_cmnd, se_cmd);
> >>> +	struct scsi_cmnd *sc = &cmd->sc;
> >>> +	struct iu_entry *iue = (struct iu_entry *)sc->SCp.ptr;
> >>> +	struct srp_cmd *scmd = iue->sbuf->buf;
> >>> +	int ret;
> >>> +
> >>> +	/*
> >>> +	 * Allocate the necessary tasks to complete the received CDB+data
> >>> +	 */
> >>> +	ret = transport_generic_allocate_tasks(se_cmd, scmd->cdb);
> >>> +	if (ret == -1) {
> >>> +		/* Out of Resources */
> >>> +		return PYX_TRANSPORT_LU_COMM_FAILURE;
> >>> +	} else if (ret == -2) {
> >>> +		/*
> >>> +		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
> >>> +		 */
> >>> +		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
> >>> +			return PYX_TRANSPORT_RESERVATION_CONFLICT;
> >>
> >> Does this imply the driver supports scsi reservations? If it supports SCSI-2 reservations,
> >> there is a capability that could be set in the capabilities MAD to advertise this
> >> fact such that it can be used by the client.
> >>
> > 
> > Absoulutely, the target core struct se_device backends being serviced as
> > VSCSI LUN would normally return SCSI-3 from INQUIRY data, and support
> > the full set of persistent reservations and CRH=1 (Compatible
> > Reservations Handling) in drivers/target/target_core_pr.c code.
> > 
> > Also, we are able to explictly limit individual struct se_device
> > backends to disable PR and only use the older SCSI-2 reservations, but I
> > don't think we have a way to do this on a fabric module wide basis just
> > yet.
> > 
> > Is this something we should be doing for this code..?
> 
> Disregard my previous comment. It looks like current client should handle reservations
> just fine without any further changes.

So is that an ack for putting this in scsi-misc ... or did you want to
do more testing first?

James



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 0/3] ibmvscsis driver rewrite
  2011-02-10 18:34 ` [PATCH 0/3] ibmvscsis driver rewrite Nicholas A. Bellinger
@ 2011-02-14  1:36   ` FUJITA Tomonori
  2011-02-14  8:48     ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  1:36 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, linux-scsi

On Thu, 10 Feb 2011 10:34:44 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> Also, would you mind sending along 'tree', 'dmesg' and 'lsmod' output of
> a functional ibmvscsis system so we can add a proper LIO wiki
> entry..?  :-)

I've uploaded the log files:

http://www.kernel.org/pub/linux/kernel/people/tomo/ibmvscsis/

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:03   ` Nicholas A. Bellinger
@ 2011-02-14  1:36     ` FUJITA Tomonori
  2011-02-14  3:26     ` FUJITA Tomonori
  1 sibling, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  1:36 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, linux-scsi

On Thu, 10 Feb 2011 11:03:11 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> > +#define	INITIAL_SRP_LIMIT	16
> > +#define	DEFAULT_MAX_SECTORS	256
> > +
> 
> Ok, these two hardcoded defs can  be converted into context specific
> fabric dependent configfs attributes (TF_CIT_TMPL(fabric)->tfc_* below)
> using target_core_fabric_configfs.h wrappers..

I think that these values are used when you load the driver. No chance
to configure them via configfs.


> > +static char *ibmvscsis_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
> > +						 const char *buf,
> > +						 u32 *out_tid_len,
> > +						 char **port_nexus_ptr)
> > +{
> > +	return NULL;
> > +}
> > +
> 
> OK, I still need to add generic SCSI_PROTOCOL_SRP handlers for PR
> TransportID processing, and get up and running with TCM_Loop SRP target
> ports.  I will look at getting this resolved in LIO upstream in the near
> future, and make sure this gets converted..

I don't care much.

FYI, ibmvscsis initiators don't share a target device. So PR is
pointless in reality.



> > +static int ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd)
> > +{
> > +	return 0;
> > +}
> > +
> 
> The TM request/response logic still needs to be hooked up, right..?

Yeah, I'll add soon.


> > +	case 0x08: /* Cache page */
> > +		/* length should be 4 */
> > +#if 0
> > +		if (cmd->cdb[4] != 4
> > +		    && cmd->cdb[4] != 0x20) {
> > +			send_rsp(iue, ILLEGAL_REQUEST, 0x20);
> > +			dma_free_coherent(iue->adapter->dev,
> > +					  MODE_SENSE_BUFFER_SIZE,
> > +					  mode, data_token);
> > +			return FREE_IU;
> > +		}
> > +#endif
> > +
> 
> Can this be dropped..?

Yeah, I had better to add the proper error handling though.


> > +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
> > +{
> > +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> > +			struct ibmvscsis_cmnd, se_cmd);
> > +	struct scsi_cmnd *sc = &cmd->sc;
> > +	/*
> > +	 * Check for overflow residual count
> > +	 */
> > +	if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
> > +		scsi_set_resid(sc, se_cmd->residual_count);
> > +
> > +	sc->sdb.length = se_cmd->data_length;
> > +
> > +	/*
> > +	 * Setup the struct se_task->task_sg[] chained SG list
> > +	 */
> > +	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
> > +	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
> > +		transport_do_task_sg_chain(se_cmd);
> > +
> > +		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
> > +		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
> > +	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
> > +		/*
> > +		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
> > +		 * using a contigious buffer
> > +		 */
> > +		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
> > +		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
> > +			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
> > +
> > +		sc->sdb.table.nents = 1;
> > +		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
> > +	}
> > +	/*
> > +	 * Perform the SCSI READ data transfer from sc->sdb.table into
> > +	 * VIO LPAR memory.  This will occur via libsrp in the
> > +	 * ibmvscsis_rdma() callback
> > +	 */
> > +#if 0
> > +	ret = srp_transfer_data(sc, &vio_iu(iue)->srp.cmd,
> > +				ibmvscsis_rdma, 1, 1);
> > +	if (ret) {
> > +		printk(KERN_ERR "srp_transfer_data() failed: %d, returning"
> > +				" DID_ERROR\n", ret);
> > +		sc->result = host_byte(DID_ERROR) | se_cmd->scsi_status;
> > +	} else
> > +		sc->result = host_byte(DID_OK) | se_cmd->scsi_status;
> > +#endif
> 
> Mmmm, is this really supposed to be disabled..?

ibmvscsis_cmd_done calls srp_transfer_data(). I should remove the
above.


> > +	/*
> > +	 * This will call srp_transfer_data() and post the response
> > +	 * to VIO via libsrp.
> > +	 */
> > +	ibmvscsis_cmd_done(sc);
> > +	return 0;
> > +}
> > +


> > +	case REPORT_LUNS:
> > +		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
> > +		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
> > +		sc->sdb.length = ibmvscsis_report_luns(adapter, cmd,
> > +						       page_address(pg));
> > +		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
> > +		ibmvscsis_cmd_done(sc);
> > +		sg_free_table(&sc->sdb.table);
> > +		__free_page(pg);
> > +		kfree(vsc);
> > +		break;
> > +	case MODE_SENSE:
> > +		/* fixme: needs to use tcm */
> > +		sg_alloc_table(&sc->sdb.table, 1, GFP_KERNEL);
> > +		pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
> > +		sc->sdb.length = ibmvscsis_mode_sense(adapter,
> > +						      cmd, page_address(pg));
> > +		sg_set_page(sc->sdb.table.sgl, pg, sc->sdb.length, 0);
> > +		ibmvscsis_cmd_done(sc);
> > +		sg_free_table(&sc->sdb.table);
> > +		__free_page(pg);
> > +		kfree(vsc);
> > +		break;
> 
> Just to verify again, INQUIRY and REPORT_LUNS are the only CDBs that
> have a hard requirement of being intercepted in order for VSCSI to
> function, right..?

tcm MODE_SENSE needed to be modified slightly to handle
ibmvscsis. It's a todo item.


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:15   ` Brian King
  2011-02-10 19:38     ` Nicholas A. Bellinger
@ 2011-02-14  1:42     ` FUJITA Tomonori
  1 sibling, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  1:42 UTC (permalink / raw)
  To: brking; +Cc: fujita.tomonori, linux-scsi, nab

On Thu, 10 Feb 2011 13:15:29 -0600
Brian King <brking@linux.vnet.ibm.com> wrote:

> On 02/10/2011 06:21 AM, FUJITA Tomonori wrote:
> 
> > +static int process_mad_iu(struct iu_entry *iue)
> > +{
> > +	union viosrp_iu *iu = vio_iu(iue);
> > +	struct viosrp_adapter_info *info;
> > +	struct viosrp_host_config *conf;
> > +
> > +	switch (iu->mad.empty_iu.common.type) {
> > +	case VIOSRP_EMPTY_IU_TYPE:
> > +		printk(KERN_ERR "%s\n", "Unsupported EMPTY MAD IU");
> > +		break;
> > +	case VIOSRP_ERROR_LOG_TYPE:
> > +		printk(KERN_ERR "%s\n", "Unsupported ERROR LOG MAD IU");
> > +		iu->mad.error_log.common.status = 1;
> > +		send_iu(iue, sizeof(iu->mad.error_log),	VIOSRP_MAD_FORMAT);
> > +		break;
> > +	case VIOSRP_ADAPTER_INFO_TYPE:
> > +		info = &iu->mad.adapter_info;
> > +		info->common.status = send_adapter_info(iue, info->buffer,
> > +							info->common.length);
> > +		send_iu(iue, sizeof(*info), VIOSRP_MAD_FORMAT);
> > +		break;
> > +	case VIOSRP_HOST_CONFIG_TYPE:
> > +		conf = &iu->mad.host_config;
> > +		conf->common.status = 1;
> > +		send_iu(iue, sizeof(*conf), VIOSRP_MAD_FORMAT);
> > +		break;
> > +	default:
> > +		printk(KERN_ERR "Unknown type %u\n", iu->srp.rsp.opcode);
> 
> We should be sending back the MAD not supported response here (0xF1):
> 
> iu->mad.common.status = 0xF1;
> send_iu(iue, sizeof(iu->mad.common), VIOSRP_MAD_FORMAT);
> 
> Should also be doing this for the unsupported MAD types above, otherwise
> the client never receives a response to these commands and times them out.

I'll fix.


> > +static int ibmvscsis_inquery(struct ibmvscsis_adapter *adapter,
> 
> /inquery/inquiry/

Oops.


> > +			      struct srp_cmd *cmd, char *data)
> > +{
> > +	struct se_portal_group *se_tpg = &adapter->se_tpg;
> > +	struct inquiry_data *id = (struct inquiry_data *)data;
> > +	u64 unpacked_lun, lun = cmd->lun;
> > +	u8 *cdb = cmd->cdb;
> > +	int len;
> > +
> > +	if (!data)
> > +		printk(KERN_INFO "%s %d: oomu\n", __func__, __LINE__);
> > +
> > +	if (((cdb[1] & 0x3) == 0x3) || (!(cdb[1] & 0x3) && cdb[2])) {
> > +		printk(KERN_INFO "%s %d: invalid req\n", __func__, __LINE__);
> > +		return 0;
> > +	}
> > +
> > +	if (cdb[1] & 0x3)
> > +		printk(KERN_INFO "%s %d: needs the normal path\n",
> > +		       __func__, __LINE__);
> > +	else {
> > +		id->qual_type = TYPE_DISK;
> > +		id->rmb_reserve = 0x00;
> > +		id->version = 0x84; /* ISO/IE */
> > +		id->aerc_naca_hisup_format = 0x22; /* naca & fmt 0x02 */
> > +		id->addl_len = sizeof(*id) - 4;
> > +		id->bque_encserv_vs_multip_mchngr_reserved = 0x00;
> > +		id->reladr_reserved_linked_cmdqueue_vs = 0x02; /* CMDQ */
> > +		memcpy(id->vendor, "IBM	    ", 8);
> > +		/*
> > +		 * Don't even ask about the next bit.  AIX uses
> > +		 * hardcoded device naming to recognize device types
> > +		 * and their client won't  work unless we use VOPTA and
> > +		 * VDASD.
> > +		 */
> > +		if (id->qual_type == TYPE_ROM)
> > +			memcpy(id->product, "VOPTA blkdev    ", 16);
> > +		else
> > +			memcpy(id->product, "VDASD blkdev    ", 16);
> > +
> > +		memcpy(id->revision, "0001", 4);
> > +
> > +		snprintf(id->unique, sizeof(id->unique),
> > +			 "IBM-VSCSI-%s-P%d-%x-%d-%d-%d\n",
> > +			 system_id,
> > +			 partition_number,
> > +			 adapter->dma_dev->unit_address,
> > +			 GETBUS(lun),
> > +			 GETTARGET(lun),
> > +			 GETLUN(lun));
> > +	}
> 
> Do we have any persistent unique identifying data which we could use to
> build a page 0x83 response such that persistent naming would work in the client?

I guess that we could enable it to be configured via configfs.

Thanks,

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:38     ` Nicholas A. Bellinger
  2011-02-11 21:13       ` Brian King
@ 2011-02-14  1:42       ` FUJITA Tomonori
  1 sibling, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  1:42 UTC (permalink / raw)
  To: nab; +Cc: brking, fujita.tomonori, linux-scsi

On Thu, 10 Feb 2011 11:38:59 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> Also, we are able to explictly limit individual struct se_device
> backends to disable PR and only use the older SCSI-2 reservations, but I
> don't think we have a way to do this on a fabric module wide basis just
> yet.

As I wrote in another mail, multiple initiators don't connect to the
same target.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 19:03   ` Nicholas A. Bellinger
  2011-02-14  1:36     ` FUJITA Tomonori
@ 2011-02-14  3:26     ` FUJITA Tomonori
  2011-02-14  9:01       ` Nicholas A. Bellinger
  1 sibling, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  3:26 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, linux-scsi

On Thu, 10 Feb 2011 11:03:11 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> > +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
> > +{
> > +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> > +			struct ibmvscsis_cmnd, se_cmd);
> > +	struct scsi_cmnd *sc = &cmd->sc;
> > +	/*
> > +	 * Check for overflow residual count
> > +	 */
> > +	if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
> > +		scsi_set_resid(sc, se_cmd->residual_count);
> > +
> > +	sc->sdb.length = se_cmd->data_length;
> > +
> > +	/*
> > +	 * Setup the struct se_task->task_sg[] chained SG list
> > +	 */
> > +	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
> > +	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
> > +		transport_do_task_sg_chain(se_cmd);
> > +
> > +		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
> > +		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
> > +	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
> > +		/*
> > +		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
> > +		 * using a contigious buffer
> > +		 */
> > +		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
> > +		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
> > +			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
> > +
> > +		sc->sdb.table.nents = 1;
> > +		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
> > +	}
> > +	/*
> > +	 * Perform the SCSI READ data transfer from sc->sdb.table into
> > +	 * VIO LPAR memory.  This will occur via libsrp in the
> > +	 * ibmvscsis_rdma() callback

btw, can we kill the non scatter/gather data path? I think that we
should always use the scatter/gather data transfer.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
  2011-02-10 19:03   ` Nicholas A. Bellinger
  2011-02-10 19:15   ` Brian King
@ 2011-02-14  7:16   ` Bart Van Assche
  2011-02-14  9:11     ` FUJITA Tomonori
  2 siblings, 1 reply; 81+ messages in thread
From: Bart Van Assche @ 2011-02-14  7:16 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi, nab

On Thu, Feb 10, 2011 at 1:21 PM, FUJITA Tomonori
<fujita.tomonori@lab.ntt.co.jp> wrote:
> +       switch (cmd->task_attr) {
> +       case SRP_SIMPLE_TASK:
> +               attr = MSG_SIMPLE_TAG;
> +               break;
> +       case SRP_ORDERED_TASK:
> +               attr = MSG_ORDERED_TAG;
> +               break;
> +       case SRP_HEAD_TASK:
> +               attr = MSG_HEAD_TAG;
> +               break;
> +       default:
> +               printk(KERN_WARNING "Task attribute %d not supported\n",
> +                      cmd->task_attr);
> +               attr = MSG_SIMPLE_TAG;
> +       }
> +
> +       data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
> +
> +       se_cmd = &vsc->se_cmd;
> +
> +       transport_init_se_cmd(se_cmd,
> +                             adapter->se_tpg.se_tpg_tfo,
> +                             adapter->se_sess, data_len,
> +                             srp_cmd_direction(cmd),
> +                             attr, vsc->sense_buf);

Hi Tomo,

Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
sixth argument of transport_init_se_cmd() ? As far as I know TCM
doesn' t support any of the MSG_..._TAG values.

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 0/3] ibmvscsis driver rewrite
  2011-02-14  1:36   ` FUJITA Tomonori
@ 2011-02-14  8:48     ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  8:48 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Mon, 2011-02-14 at 10:36 +0900, FUJITA Tomonori wrote:
> On Thu, 10 Feb 2011 10:34:44 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > Also, would you mind sending along 'tree', 'dmesg' and 'lsmod' output of
> > a functional ibmvscsis system so we can add a proper LIO wiki
> > entry..?  :-)
> 
> I've uploaded the log files:
> 
> http://www.kernel.org/pub/linux/kernel/people/tomo/ibmvscsis/

Thanks Tomo-san!

This information has been added into a new wiki entry here:

http://linux-iscsi.org/wiki/IBM_vSCSI

Please let me know any other related information that should go here.

--nab





^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  3:26     ` FUJITA Tomonori
@ 2011-02-14  9:01       ` Nicholas A. Bellinger
  2011-02-14  9:29         ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:01 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Mon, 2011-02-14 at 12:26 +0900, FUJITA Tomonori wrote:
> On Thu, 10 Feb 2011 11:03:11 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > > +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd)
> > > +{
> > > +	struct ibmvscsis_cmnd *cmd = container_of(se_cmd,
> > > +			struct ibmvscsis_cmnd, se_cmd);
> > > +	struct scsi_cmnd *sc = &cmd->sc;
> > > +	/*
> > > +	 * Check for overflow residual count
> > > +	 */
> > > +	if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
> > > +		scsi_set_resid(sc, se_cmd->residual_count);
> > > +
> > > +	sc->sdb.length = se_cmd->data_length;
> > > +
> > > +	/*
> > > +	 * Setup the struct se_task->task_sg[] chained SG list
> > > +	 */
> > > +	if ((se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) ||
> > > +	    (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB)) {
> > > +		transport_do_task_sg_chain(se_cmd);
> > > +
> > > +		sc->sdb.table.nents = T_TASK(se_cmd)->t_tasks_sg_chained_no;
> > > +		sc->sdb.table.sgl = T_TASK(se_cmd)->t_tasks_sg_chained;
> > > +	} else if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_NONSG_IO_CDB) {
> > > +		/*
> > > +		 * Use T_TASK(se_cmd)->t_tasks_sg_bounce for control CDBs
> > > +		 * using a contigious buffer
> > > +		 */
> > > +		sg_init_table(&T_TASK(se_cmd)->t_tasks_sg_bounce, 1);
> > > +		sg_set_buf(&T_TASK(se_cmd)->t_tasks_sg_bounce,
> > > +			T_TASK(se_cmd)->t_task_buf, se_cmd->data_length);
> > > +
> > > +		sc->sdb.table.nents = 1;
> > > +		sc->sdb.table.sgl = &T_TASK(se_cmd)->t_tasks_sg_bounce;
> > > +	}
> > > +	/*
> > > +	 * Perform the SCSI READ data transfer from sc->sdb.table into
> > > +	 * VIO LPAR memory.  This will occur via libsrp in the
> > > +	 * ibmvscsis_rdma() callback
> 
> btw, can we kill the non scatter/gather data path? I think that we
> should always use the scatter/gather data transfer.

Unfortuately it's not that easy.  The main reason why CDB type
SCF_SCSI_CONTROL_NONSG_IO_CDB was originally added (back in 2.2/2.4
days) was because certain LLDs had a problem with basic control CDBs
using SGLs..

Obviously we are way past that point with drivers/scsi today, but the
main reason today why SCF_SCSI_CONTROL_NONSG_IO_CDB still exists is
because of CDB emulation for complex stuff in target_core_cdb.c.  It has
historically proven much easier to code complex CDB emulation using a
contigious buffer, than with walking SGL formatted memory.

Converting over the more complex CDB emulation stuff to SGLs would
somewhat painful, at least without adding an extra location allocation +
copy into SGLs (not a big deal for CONTROL CDB stuff), or something else
to obtain a virtually contigious buffer for building the emulated
response.

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  7:16   ` Bart Van Assche
@ 2011-02-14  9:11     ` FUJITA Tomonori
  2011-02-14  9:18       ` Nicholas A. Bellinger
  2011-02-14 11:50       ` Bart Van Assche
  0 siblings, 2 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  9:11 UTC (permalink / raw)
  To: bvanassche; +Cc: fujita.tomonori, linux-scsi, nab

On Mon, 14 Feb 2011 08:16:45 +0100
Bart Van Assche <bvanassche@acm.org> wrote:

> Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
> sixth argument of transport_init_se_cmd() ? As far as I know TCM
> doesn' t support any of the MSG_..._TAG values.

Oops, I'll send the next version soon.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:11     ` FUJITA Tomonori
@ 2011-02-14  9:18       ` Nicholas A. Bellinger
  2011-02-14  9:19         ` Nicholas A. Bellinger
  2011-02-14 11:50       ` Bart Van Assche
  1 sibling, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:18 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: bvanassche, linux-scsi

On Mon, 2011-02-14 at 18:11 +0900, FUJITA Tomonori wrote:
> On Mon, 14 Feb 2011 08:16:45 +0100
> Bart Van Assche <bvanassche@acm.org> wrote:
> 
> > Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
> > sixth argument of transport_init_se_cmd() ? As far as I know TCM
> > doesn' t support any of the MSG_..._TAG values.
> 
> Oops, I'll send the next version soon.

Hi Tomo,

Target core is actually still using the TASK_ATTR_* defs for SCSI task
attributes from include/scsi/libsas.h..  These should probably be
converted to use something generic, and I think scsi_tcq.h:MSG_* would
sufficent, but appears to be missing some extra SCSI task attrs.

In any event, here is a patch against your rev2 to address the
short-term for lio-core-2.6.git/tcm_ibmvscsis code, please give your
sign-off, and I will get this pushed into LIO upstream.

Thanks!

--nab

diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
index 591cedb..36acaa1 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsis.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
@@ -37,6 +37,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/libsrp.h>
+#include <scsi/libsas.h> /* For TASK_ATTR_* */
 #include <generated/utsrelease.h>
 
 #include <target/target_core_base.h>
@@ -855,18 +856,18 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
 
        switch (cmd->task_attr) {
        case SRP_SIMPLE_TASK:
-               attr = MSG_SIMPLE_TAG;
+               attr = TASK_ATTR_SIMPLE;
                break;
        case SRP_ORDERED_TASK:
-               attr = MSG_ORDERED_TAG;
+               attr = TASK_ATTR_ORDERED;
                break;
        case SRP_HEAD_TASK:
-               attr = MSG_HEAD_TAG;
+               attr = TASK_ATTR_HOQ;
                break;
        default:
                printk(KERN_WARNING "Task attribute %d not supported\n",
                       cmd->task_attr);
-               attr = MSG_SIMPLE_TAG;
+               attr = TASK_ATTR_ACA;
        }
 
        data_len = srp_data_length(cmd, srp_cmd_direction(cmd));



^ permalink raw reply related	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:18       ` Nicholas A. Bellinger
@ 2011-02-14  9:19         ` Nicholas A. Bellinger
  2011-02-14  9:31           ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:19 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: bvanassche, linux-scsi

On Mon, 2011-02-14 at 01:18 -0800, Nicholas A. Bellinger wrote:
> On Mon, 2011-02-14 at 18:11 +0900, FUJITA Tomonori wrote:
> > On Mon, 14 Feb 2011 08:16:45 +0100
> > Bart Van Assche <bvanassche@acm.org> wrote:
> > 
> > > Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
> > > sixth argument of transport_init_se_cmd() ? As far as I know TCM
> > > doesn' t support any of the MSG_..._TAG values.
> > 
> > Oops, I'll send the next version soon.
> 
> Hi Tomo,
> 
> Target core is actually still using the TASK_ATTR_* defs for SCSI task
> attributes from include/scsi/libsas.h..  These should probably be
> converted to use something generic, and I think scsi_tcq.h:MSG_* would
> sufficent, but appears to be missing some extra SCSI task attrs.
> 
> In any event, here is a patch against your rev2 to address the
> short-term for lio-core-2.6.git/tcm_ibmvscsis code, please give your
> sign-off, and I will get this pushed into LIO upstream.
> 
> Thanks!
> 
> --nab
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
> index 591cedb..36acaa1 100644
> --- a/drivers/scsi/ibmvscsi/ibmvscsis.c
> +++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
> @@ -37,6 +37,7 @@
>  #include <scsi/scsi_cmnd.h>
>  #include <scsi/scsi_tcq.h>
>  #include <scsi/libsrp.h>
> +#include <scsi/libsas.h> /* For TASK_ATTR_* */
>  #include <generated/utsrelease.h>
>  
>  #include <target/target_core_base.h>
> @@ -855,18 +856,18 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
>  
>         switch (cmd->task_attr) {
>         case SRP_SIMPLE_TASK:
> -               attr = MSG_SIMPLE_TAG;
> +               attr = TASK_ATTR_SIMPLE;
>                 break;
>         case SRP_ORDERED_TASK:
> -               attr = MSG_ORDERED_TAG;
> +               attr = TASK_ATTR_ORDERED;
>                 break;
>         case SRP_HEAD_TASK:
> -               attr = MSG_HEAD_TAG;
> +               attr = TASK_ATTR_HOQ;
>                 break;
>         default:
>                 printk(KERN_WARNING "Task attribute %d not supported\n",
>                        cmd->task_attr);
> -               attr = MSG_SIMPLE_TAG;
> +               attr = TASK_ATTR_ACA;
>         }
>  

Ugh sorry, this last one should be TASK_ATTR_SIMPLE..

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:29         ` FUJITA Tomonori
@ 2011-02-14  9:27           ` Nicholas A. Bellinger
  2011-02-14  9:46             ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:27 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Mon, 2011-02-14 at 18:29 +0900, FUJITA Tomonori wrote:
> On Mon, 14 Feb 2011 01:01:12 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > > btw, can we kill the non scatter/gather data path? I think that we
> > > should always use the scatter/gather data transfer.
> > 
> > Unfortuately it's not that easy.  The main reason why CDB type
> > SCF_SCSI_CONTROL_NONSG_IO_CDB was originally added (back in 2.2/2.4
> > days) was because certain LLDs had a problem with basic control CDBs
> > using SGLs..
> > 
> > Obviously we are way past that point with drivers/scsi today, but the
> > main reason today why SCF_SCSI_CONTROL_NONSG_IO_CDB still exists is
> > because of CDB emulation for complex stuff in target_core_cdb.c.  It has
> > historically proven much easier to code complex CDB emulation using a
> > contigious buffer, than with walking SGL formatted memory.
> > 
> > Converting over the more complex CDB emulation stuff to SGLs would
> > somewhat painful, at least without adding an extra location allocation +
> > copy into SGLs (not a big deal for CONTROL CDB stuff),
> 
> lib/scatterlist.c provides nice helper functions to copy data between
> a linear buffer and an SG list. I think that at least, it's more clear
> than now.
> 

Yeah, I think this is going to make the most sense for a proper long
term conversion and removal of SCF_SCSI_CONTROL_NONSG_IO_CDB.

> 
> > or something else
> > to obtain a virtually contigious buffer for building the emulated
> > response.
> 
> Hmm, I don't think that very large contiguous buffer for CONTROL CDB
> so why can't we allocate a physically contiguous buffer for it?
> 

Well, that depends what you consider large..  ;)

> We could play with vmap for a virtually contiguous buffer but it adds
> the complicated cache issue so I don't like that.

Yes, most likely overkill I think..

Best Regards,

--nab




^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:01       ` Nicholas A. Bellinger
@ 2011-02-14  9:29         ` FUJITA Tomonori
  2011-02-14  9:27           ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  9:29 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, linux-scsi

On Mon, 14 Feb 2011 01:01:12 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> > btw, can we kill the non scatter/gather data path? I think that we
> > should always use the scatter/gather data transfer.
> 
> Unfortuately it's not that easy.  The main reason why CDB type
> SCF_SCSI_CONTROL_NONSG_IO_CDB was originally added (back in 2.2/2.4
> days) was because certain LLDs had a problem with basic control CDBs
> using SGLs..
> 
> Obviously we are way past that point with drivers/scsi today, but the
> main reason today why SCF_SCSI_CONTROL_NONSG_IO_CDB still exists is
> because of CDB emulation for complex stuff in target_core_cdb.c.  It has
> historically proven much easier to code complex CDB emulation using a
> contigious buffer, than with walking SGL formatted memory.
> 
> Converting over the more complex CDB emulation stuff to SGLs would
> somewhat painful, at least without adding an extra location allocation +
> copy into SGLs (not a big deal for CONTROL CDB stuff),

lib/scatterlist.c provides nice helper functions to copy data between
a linear buffer and an SG list. I think that at least, it's more clear
than now.


> or something else
> to obtain a virtually contigious buffer for building the emulated
> response.

Hmm, I don't think that very large contiguous buffer for CONTROL CDB
so why can't we allocate a physically contiguous buffer for it?

We could play with vmap for a virtually contiguous buffer but it adds
the complicated cache issue so I don't like that.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:31           ` FUJITA Tomonori
@ 2011-02-14  9:29             ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:29 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: bvanassche, linux-scsi

On Mon, 2011-02-14 at 18:31 +0900, FUJITA Tomonori wrote:
> On Mon, 14 Feb 2011 01:19:35 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Mon, 2011-02-14 at 01:18 -0800, Nicholas A. Bellinger wrote:
> > > On Mon, 2011-02-14 at 18:11 +0900, FUJITA Tomonori wrote:
> > > > On Mon, 14 Feb 2011 08:16:45 +0100
> > > > Bart Van Assche <bvanassche@acm.org> wrote:
> > > > 
> > > > > Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
> > > > > sixth argument of transport_init_se_cmd() ? As far as I know TCM
> > > > > doesn' t support any of the MSG_..._TAG values.
> > > > 
> > > > Oops, I'll send the next version soon.
> > > 
> > > Hi Tomo,
> > > 
> > > Target core is actually still using the TASK_ATTR_* defs for SCSI task
> > > attributes from include/scsi/libsas.h..  These should probably be
> > > converted to use something generic, and I think scsi_tcq.h:MSG_* would
> > > sufficent, but appears to be missing some extra SCSI task attrs.
> > > 
> > > In any event, here is a patch against your rev2 to address the
> > > short-term for lio-core-2.6.git/tcm_ibmvscsis code, please give your
> > > sign-off, and I will get this pushed into LIO upstream.
> > > 
> > > Thanks!
> > > 
> > > --nab
> > > 
> > > diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
> > > index 591cedb..36acaa1 100644
> > > --- a/drivers/scsi/ibmvscsi/ibmvscsis.c
> > > +++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
> > > @@ -37,6 +37,7 @@
> > >  #include <scsi/scsi_cmnd.h>
> > >  #include <scsi/scsi_tcq.h>
> > >  #include <scsi/libsrp.h>
> > > +#include <scsi/libsas.h> /* For TASK_ATTR_* */
> > >  #include <generated/utsrelease.h>
> > >  
> > >  #include <target/target_core_base.h>
> > > @@ -855,18 +856,18 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
> > >  
> > >         switch (cmd->task_attr) {
> > >         case SRP_SIMPLE_TASK:
> > > -               attr = MSG_SIMPLE_TAG;
> > > +               attr = TASK_ATTR_SIMPLE;
> > >                 break;
> > >         case SRP_ORDERED_TASK:
> > > -               attr = MSG_ORDERED_TAG;
> > > +               attr = TASK_ATTR_ORDERED;
> > >                 break;
> > >         case SRP_HEAD_TASK:
> > > -               attr = MSG_HEAD_TAG;
> > > +               attr = TASK_ATTR_HOQ;
> > >                 break;
> > >         default:
> > >                 printk(KERN_WARNING "Task attribute %d not supported\n",
> > >                        cmd->task_attr);
> > > -               attr = MSG_SIMPLE_TAG;
> > > +               attr = TASK_ATTR_ACA;
> > >         }
> > >  
> > 
> > Ugh sorry, this last one should be TASK_ATTR_SIMPLE..
> 
> No point to commit this kinda of a fix separately. I'll fold your
> patch and send the third version soon.
> 

Works for me, I will await your rev3 to push into tcm_ibmvscsis with
this particular fix.

Best Regards,

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:19         ` Nicholas A. Bellinger
@ 2011-02-14  9:31           ` FUJITA Tomonori
  2011-02-14  9:29             ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  9:31 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, bvanassche, linux-scsi

On Mon, 14 Feb 2011 01:19:35 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Mon, 2011-02-14 at 01:18 -0800, Nicholas A. Bellinger wrote:
> > On Mon, 2011-02-14 at 18:11 +0900, FUJITA Tomonori wrote:
> > > On Mon, 14 Feb 2011 08:16:45 +0100
> > > Bart Van Assche <bvanassche@acm.org> wrote:
> > > 
> > > > Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
> > > > sixth argument of transport_init_se_cmd() ? As far as I know TCM
> > > > doesn' t support any of the MSG_..._TAG values.
> > > 
> > > Oops, I'll send the next version soon.
> > 
> > Hi Tomo,
> > 
> > Target core is actually still using the TASK_ATTR_* defs for SCSI task
> > attributes from include/scsi/libsas.h..  These should probably be
> > converted to use something generic, and I think scsi_tcq.h:MSG_* would
> > sufficent, but appears to be missing some extra SCSI task attrs.
> > 
> > In any event, here is a patch against your rev2 to address the
> > short-term for lio-core-2.6.git/tcm_ibmvscsis code, please give your
> > sign-off, and I will get this pushed into LIO upstream.
> > 
> > Thanks!
> > 
> > --nab
> > 
> > diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
> > index 591cedb..36acaa1 100644
> > --- a/drivers/scsi/ibmvscsi/ibmvscsis.c
> > +++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
> > @@ -37,6 +37,7 @@
> >  #include <scsi/scsi_cmnd.h>
> >  #include <scsi/scsi_tcq.h>
> >  #include <scsi/libsrp.h>
> > +#include <scsi/libsas.h> /* For TASK_ATTR_* */
> >  #include <generated/utsrelease.h>
> >  
> >  #include <target/target_core_base.h>
> > @@ -855,18 +856,18 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
> >  
> >         switch (cmd->task_attr) {
> >         case SRP_SIMPLE_TASK:
> > -               attr = MSG_SIMPLE_TAG;
> > +               attr = TASK_ATTR_SIMPLE;
> >                 break;
> >         case SRP_ORDERED_TASK:
> > -               attr = MSG_ORDERED_TAG;
> > +               attr = TASK_ATTR_ORDERED;
> >                 break;
> >         case SRP_HEAD_TASK:
> > -               attr = MSG_HEAD_TAG;
> > +               attr = TASK_ATTR_HOQ;
> >                 break;
> >         default:
> >                 printk(KERN_WARNING "Task attribute %d not supported\n",
> >                        cmd->task_attr);
> > -               attr = MSG_SIMPLE_TAG;
> > +               attr = TASK_ATTR_ACA;
> >         }
> >  
> 
> Ugh sorry, this last one should be TASK_ATTR_SIMPLE..

No point to commit this kinda of a fix separately. I'll fold your
patch and send the third version soon.

Thanks,

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:27           ` Nicholas A. Bellinger
@ 2011-02-14  9:46             ` FUJITA Tomonori
  2011-02-14  9:51               ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-14  9:46 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, linux-scsi

On Mon, 14 Feb 2011 01:27:48 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Mon, 2011-02-14 at 18:29 +0900, FUJITA Tomonori wrote:
> > On Mon, 14 Feb 2011 01:01:12 -0800
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > > btw, can we kill the non scatter/gather data path? I think that we
> > > > should always use the scatter/gather data transfer.
> > > 
> > > Unfortuately it's not that easy.  The main reason why CDB type
> > > SCF_SCSI_CONTROL_NONSG_IO_CDB was originally added (back in 2.2/2.4
> > > days) was because certain LLDs had a problem with basic control CDBs
> > > using SGLs..
> > > 
> > > Obviously we are way past that point with drivers/scsi today, but the
> > > main reason today why SCF_SCSI_CONTROL_NONSG_IO_CDB still exists is
> > > because of CDB emulation for complex stuff in target_core_cdb.c.  It has
> > > historically proven much easier to code complex CDB emulation using a
> > > contigious buffer, than with walking SGL formatted memory.
> > > 
> > > Converting over the more complex CDB emulation stuff to SGLs would
> > > somewhat painful, at least without adding an extra location allocation +
> > > copy into SGLs (not a big deal for CONTROL CDB stuff),
> > 
> > lib/scatterlist.c provides nice helper functions to copy data between
> > a linear buffer and an SG list. I think that at least, it's more clear
> > than now.
> > 
> 
> Yeah, I think this is going to make the most sense for a proper long
> term conversion and removal of SCF_SCSI_CONTROL_NONSG_IO_CDB.

Ok, I take care of this.


> > Hmm, I don't think that very large contiguous buffer for CONTROL CDB
> > so why can't we allocate a physically contiguous buffer for it?
> > 
> 
> Well, that depends what you consider large..  ;)

2 pages is enough for most? REPORT_LUNS might need a large buffer but
I don't think it's so difficult to handle REPORT_LUNS with
scatter/gather.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:46             ` FUJITA Tomonori
@ 2011-02-14  9:51               ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-02-14  9:51 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi

On Mon, 2011-02-14 at 18:46 +0900, FUJITA Tomonori wrote:
> On Mon, 14 Feb 2011 01:27:48 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Mon, 2011-02-14 at 18:29 +0900, FUJITA Tomonori wrote:
> > > On Mon, 14 Feb 2011 01:01:12 -0800
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > > btw, can we kill the non scatter/gather data path? I think that we
> > > > > should always use the scatter/gather data transfer.
> > > > 
> > > > Unfortuately it's not that easy.  The main reason why CDB type
> > > > SCF_SCSI_CONTROL_NONSG_IO_CDB was originally added (back in 2.2/2.4
> > > > days) was because certain LLDs had a problem with basic control CDBs
> > > > using SGLs..
> > > > 
> > > > Obviously we are way past that point with drivers/scsi today, but the
> > > > main reason today why SCF_SCSI_CONTROL_NONSG_IO_CDB still exists is
> > > > because of CDB emulation for complex stuff in target_core_cdb.c.  It has
> > > > historically proven much easier to code complex CDB emulation using a
> > > > contigious buffer, than with walking SGL formatted memory.
> > > > 
> > > > Converting over the more complex CDB emulation stuff to SGLs would
> > > > somewhat painful, at least without adding an extra location allocation +
> > > > copy into SGLs (not a big deal for CONTROL CDB stuff),
> > > 
> > > lib/scatterlist.c provides nice helper functions to copy data between
> > > a linear buffer and an SG list. I think that at least, it's more clear
> > > than now.
> > > 
> > 
> > Yeah, I think this is going to make the most sense for a proper long
> > term conversion and removal of SCF_SCSI_CONTROL_NONSG_IO_CDB.
> 
> Ok, I take care of this.
> 
> 

Great, I will await your patches for this particular item..  ;)

> > > Hmm, I don't think that very large contiguous buffer for CONTROL CDB
> > > so why can't we allocate a physically contiguous buffer for it?
> > > 
> > 
> > Well, that depends what you consider large..  ;)
> 
> 2 pages is enough for most? REPORT_LUNS might need a large buffer but
> I don't think it's so difficult to handle REPORT_LUNS with
> scatter/gather.

With existing target_core_cdb.c code I can't really think of a typical
case that would go beyond a 2 page (8192 bytes min) range, unless we are
talking about returning a large number registrations with PRIN
READ_FULL_STATUS or relative target port identifers in
MI_REPORT_TARGET_PGS response payload.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14  9:11     ` FUJITA Tomonori
  2011-02-14  9:18       ` Nicholas A. Bellinger
@ 2011-02-14 11:50       ` Bart Van Assche
  2011-02-15  3:42         ` FUJITA Tomonori
  1 sibling, 1 reply; 81+ messages in thread
From: Bart Van Assche @ 2011-02-14 11:50 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi, Brian King

On Mon, Feb 14, 2011 at 10:11 AM, FUJITA Tomonori
<fujita.tomonori@lab.ntt.co.jp> wrote:
> On Mon, 14 Feb 2011 08:16:45 +0100
> Bart Van Assche <bvanassche@acm.org> wrote:
>
>> Are you sure that it is fine to pass values like MSG_SIMPLE_TAG as the
>> sixth argument of transport_init_se_cmd() ? As far as I know TCM
>> doesn' t support any of the MSG_..._TAG values.
>
> Oops, I'll send the next version soon.

Hi Tomo,

Two other issues I noticed are:
- send_rsp() sends back an SRP response with req_lim_delta = 1 before
srp_iu_put() is invoked. I think this is a race condition: it can
happen that the SRP initiator has received an SRP_RSP and sends a new
SRP_CMD before srp_iu_put() was invoked. On a heavily loaded system
that can trigger a queue overflow in the target.
- As anyone can see in the driver code (and as specified in the PAPR)
management datagrams (MADs) fall outside the SRP credit mechanism.
Hence not reserving one credit (assuming that the initiator makes sure
there is at most one outstanding MAD) for MAD can - at least in theory
- trigger a queue overflow.

Bart.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-14 11:50       ` Bart Van Assche
@ 2011-02-15  3:42         ` FUJITA Tomonori
  2011-02-15 19:20           ` Bart Van Assche
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-15  3:42 UTC (permalink / raw)
  To: bvanassche; +Cc: fujita.tomonori, linux-scsi, brking

On Mon, 14 Feb 2011 12:50:40 +0100
Bart Van Assche <bvanassche@acm.org> wrote:

> - send_rsp() sends back an SRP response with req_lim_delta = 1 before
> srp_iu_put() is invoked. I think this is a race condition: it can
> happen that the SRP initiator has received an SRP_RSP and sends a new
> SRP_CMD before srp_iu_put() was invoked. On a heavily loaded system
> that can trigger a queue overflow in the target.

Yeah, we had better to handle such case better.

But I don't think that we hit a critical event such as crash, memory
corruption, etc, with the current code.

In such case, srp_iu_get return NULL, so the target ignores the
request, then eventually the initiator recovers.

Probably, we should set the queue size to INITIAL_SRP_LIMIT + 1.


> - As anyone can see in the driver code (and as specified in the PAPR)
> management datagrams (MADs) fall outside the SRP credit mechanism.
> Hence not reserving one credit (assuming that the initiator makes sure
> there is at most one outstanding MAD) for MAD can - at least in theory
> - trigger a queue overflow.

I think that MAD is used only before the initiator starts real
I/Os. So I don't think that we can hit this issue in reality.


Thanks,

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-15  3:42         ` FUJITA Tomonori
@ 2011-02-15 19:20           ` Bart Van Assche
  2011-02-15 23:21             ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Bart Van Assche @ 2011-02-15 19:20 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linux-scsi, brking

On Tue, Feb 15, 2011 at 4:42 AM, FUJITA Tomonori
<fujita.tomonori@lab.ntt.co.jp> wrote:
> On Mon, 14 Feb 2011 12:50:40 +0100
> Bart Van Assche <bvanassche@acm.org> wrote:
>
>> - send_rsp() sends back an SRP response with req_lim_delta = 1 before
>> srp_iu_put() is invoked. I think this is a race condition: it can
>> happen that the SRP initiator has received an SRP_RSP and sends a new
>> SRP_CMD before srp_iu_put() was invoked. On a heavily loaded system
>> that can trigger a queue overflow in the target.
>
> Yeah, we had better to handle such case better.
>
> But I don't think that we hit a critical event such as crash, memory
> corruption, etc, with the current code.
>
> In such case, srp_iu_get return NULL, so the target ignores the
> request, then eventually the initiator recovers.

Sorry but I do not agree that hitting this race is harmless. If this
race gets triggered the credit associated with the lost SRP
information unit will never be returned to the initiator. If this
happens frequently enough (INITIAL_SRP_LIMIT times), the initiator
will run out of credits and will lock up forever.

> Probably, we should set the queue size to INITIAL_SRP_LIMIT + 1.

With the current implementation of the SCSI storage target core that's
probably sufficient. But if that core is ever modified such that
multiple SCSI commands can be processed concurrently, that limit
wouldn't be sufficient anymore because then the described race
condition could be triggered in multiple threads simultaneously - at
least in theory.

An alternative solution is to modify send_iu() such that srp_iu_put()
is invoked after all relevant data has been copied via h_copy_rdma()
and to the CRQ and before h_send_crq() is invoked. That approach has
e.g. been implemented in this patch:
http://www.spinics.net/lists/linux-scsi/msg49105.html.

Bart.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-15 19:20           ` Bart Van Assche
@ 2011-02-15 23:21             ` FUJITA Tomonori
  0 siblings, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-02-15 23:21 UTC (permalink / raw)
  To: bvanassche; +Cc: fujita.tomonori, linux-scsi, brking

On Tue, 15 Feb 2011 20:20:53 +0100
Bart Van Assche <bvanassche@acm.org> wrote:

> On Tue, Feb 15, 2011 at 4:42 AM, FUJITA Tomonori
> <fujita.tomonori@lab.ntt.co.jp> wrote:
> > On Mon, 14 Feb 2011 12:50:40 +0100
> > Bart Van Assche <bvanassche@acm.org> wrote:
> >
> >> - send_rsp() sends back an SRP response with req_lim_delta = 1 before
> >> srp_iu_put() is invoked. I think this is a race condition: it can
> >> happen that the SRP initiator has received an SRP_RSP and sends a new
> >> SRP_CMD before srp_iu_put() was invoked. On a heavily loaded system
> >> that can trigger a queue overflow in the target.
> >
> > Yeah, we had better to handle such case better.
> >
> > But I don't think that we hit a critical event such as crash, memory
> > corruption, etc, with the current code.
> >
> > In such case, srp_iu_get return NULL, so the target ignores the
> > request, then eventually the initiator recovers.
> 
> Sorry but I do not agree that hitting this race is harmless. If this
> race gets triggered the credit associated with the lost SRP
> information unit will never be returned to the initiator. If this
> happens frequently enough (INITIAL_SRP_LIMIT times), the initiator
> will run out of credits and will lock up forever.

I'm not sure that happens because the initiator increases
hostdata->request_limit when the request hits the time out.


> > Probably, we should set the queue size to INITIAL_SRP_LIMIT + 1.
> 
> With the current implementation of the SCSI storage target core that's
> probably sufficient. But if that core is ever modified such that
> multiple SCSI commands can be processed concurrently, that limit
> wouldn't be sufficient anymore because then the described race
> condition could be triggered in multiple threads simultaneously - at
> least in theory.

ok.


> An alternative solution is to modify send_iu() such that srp_iu_put()
> is invoked after all relevant data has been copied via h_copy_rdma()
> and to the CRQ and before h_send_crq() is invoked. That approach has
> e.g. been implemented in this patch:
> http://www.spinics.net/lists/linux-scsi/msg49105.html.

Sounds an option. I'll take a look.

Thanks,

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-02-12 20:27         ` James Bottomley
@ 2011-03-07  4:41           ` FUJITA Tomonori
  2011-03-07  6:17             ` Nicholas A. Bellinger
  2011-03-07 14:40             ` James Bottomley
  0 siblings, 2 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-07  4:41 UTC (permalink / raw)
  To: James.Bottomley; +Cc: brking, nab, fujita.tomonori, linux-scsi

On Sat, 12 Feb 2011 14:27:26 -0600
James Bottomley <James.Bottomley@suse.de> wrote:

> > Disregard my previous comment. It looks like current client should handle reservations
> > just fine without any further changes.
> 
> So is that an ack for putting this in scsi-misc ... or did you want to
> do more testing first?

Ping,

Brian, James, can we merge this during the next merge window?



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07  4:41           ` FUJITA Tomonori
@ 2011-03-07  6:17             ` Nicholas A. Bellinger
  2011-03-07  6:24               ` FUJITA Tomonori
  2011-03-07 14:40             ` James Bottomley
  1 sibling, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-07  6:17 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, brking, linux-scsi

On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> On Sat, 12 Feb 2011 14:27:26 -0600
> James Bottomley <James.Bottomley@suse.de> wrote:
> 
> > > Disregard my previous comment. It looks like current client should handle reservations
> > > just fine without any further changes.
> > 
> > So is that an ack for putting this in scsi-misc ... or did you want to
> > do more testing first?
> 
> Ping,
> 
> Brian, James, can we merge this during the next merge window?
> 

Hi Tomo-san,

Just a heads up that I have merged the lio-core-2.6.git/tcm_ibmvscsis
working branch into lio-4.1+master with your latest IBMVSCSIS code
posted here:

[PATCH 0/3] ibmvscsis driver rewrite
http://marc.info/?l=linux-scsi&m=129734085306633&w=2

At this point there is only one minor change to update parameter usage
of tcm_queuecommand() -> transport_get_lun_for_cmd() for lio-4.1 that
has been pushed here:

commit 39d42814c54de74ab8bf1a59b90331a337a89cd3
Author: Nicholas Bellinger <nab@linux-iscsi.org>
Date:   Mon Mar 7 06:12:33 2011 +0000

    ibmvscsis: Update transport_get_lun_for_cmd() usage for lio-4.1

The patch to change transport_get_lun_for_cmd() parameter usage has not
been submitted for-39 mainline yet, so please ignore this minor patch
for the merge of IBMVSCSIS against mainline v4.0 target code.

Please let me know if you have any more updates to be included in the
working tree.

Thank you!

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07  6:17             ` Nicholas A. Bellinger
@ 2011-03-07  6:24               ` FUJITA Tomonori
  2011-03-07  6:55                 ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-07  6:24 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, James.Bottomley, brking, linux-scsi

On Sun, 06 Mar 2011 22:17:25 -0800
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> > On Sat, 12 Feb 2011 14:27:26 -0600
> > James Bottomley <James.Bottomley@suse.de> wrote:
> > 
> > > > Disregard my previous comment. It looks like current client should handle reservations
> > > > just fine without any further changes.
> > > 
> > > So is that an ack for putting this in scsi-misc ... or did you want to
> > > do more testing first?
> > 
> > Ping,
> > 
> > Brian, James, can we merge this during the next merge window?
> > 
> 
> Hi Tomo-san,
> 
> Just a heads up that I have merged the lio-core-2.6.git/tcm_ibmvscsis
> working branch into lio-4.1+master with your latest IBMVSCSIS code
> posted here:

Thanks, however, I'm not sure the inclusion of your tree automatically
means the inclusion of mainline via scsi-misc :)

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07  6:24               ` FUJITA Tomonori
@ 2011-03-07  6:55                 ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-07  6:55 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, brking, linux-scsi

On Mon, 2011-03-07 at 15:24 +0900, FUJITA Tomonori wrote:
> On Sun, 06 Mar 2011 22:17:25 -0800
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> > > On Sat, 12 Feb 2011 14:27:26 -0600
> > > James Bottomley <James.Bottomley@suse.de> wrote:
> > > 
> > > > > Disregard my previous comment. It looks like current client should handle reservations
> > > > > just fine without any further changes.
> > > > 
> > > > So is that an ack for putting this in scsi-misc ... or did you want to
> > > > do more testing first?
> > > 
> > > Ping,
> > > 
> > > Brian, James, can we merge this during the next merge window?
> > > 
> > 
> > Hi Tomo-san,
> > 
> > Just a heads up that I have merged the lio-core-2.6.git/tcm_ibmvscsis
> > working branch into lio-4.1+master with your latest IBMVSCSIS code
> > posted here:
> 
> Thanks, however, I'm not sure the inclusion of your tree automatically
> means the inclusion of mainline via scsi-misc :)

Unfortuately not, but having IBMVSCSIS included into the main LIO
upstream branch is a good way to ensure a fabric module will stay
current against the latest target code.  ;)

So at this point (for Brian and James) I am happy enough with the
current state of things to give my ack for-39 mainline.

Acked-by: Nicholas A. Bellinger <nab@linux-iscsi.org>

Thanks Tomo!

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07  4:41           ` FUJITA Tomonori
  2011-03-07  6:17             ` Nicholas A. Bellinger
@ 2011-03-07 14:40             ` James Bottomley
  2011-03-18 16:57               ` James Bottomley
  2011-03-18 20:58               ` Brian King
  1 sibling, 2 replies; 81+ messages in thread
From: James Bottomley @ 2011-03-07 14:40 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, nab, linux-scsi

On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> On Sat, 12 Feb 2011 14:27:26 -0600
> James Bottomley <James.Bottomley@suse.de> wrote:
> 
> > > Disregard my previous comment. It looks like current client should handle reservations
> > > just fine without any further changes.
> > 
> > So is that an ack for putting this in scsi-misc ... or did you want to
> > do more testing first?
> 
> Ping,
> 
> Brian, James, can we merge this during the next merge window?

I'm still waiting for an ack from Brian.

James




^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07 14:40             ` James Bottomley
@ 2011-03-18 16:57               ` James Bottomley
  2011-03-18 20:58               ` Brian King
  1 sibling, 0 replies; 81+ messages in thread
From: James Bottomley @ 2011-03-18 16:57 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, nab, linux-scsi

On Mon, 2011-03-07 at 14:40 +0000, James Bottomley wrote:
> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> > On Sat, 12 Feb 2011 14:27:26 -0600
> > James Bottomley <James.Bottomley@suse.de> wrote:
> > 
> > > > Disregard my previous comment. It looks like current client should handle reservations
> > > > just fine without any further changes.
> > > 
> > > So is that an ack for putting this in scsi-misc ... or did you want to
> > > do more testing first?
> > 
> > Ping,
> > 
> > Brian, James, can we merge this during the next merge window?
> 
> I'm still waiting for an ack from Brian.

Brian,

Still waiting ... this is going to miss the merge window unless I get an
ack in the next couple of days; what's the holdup?

James



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-07 14:40             ` James Bottomley
  2011-03-18 16:57               ` James Bottomley
@ 2011-03-18 20:58               ` Brian King
  2011-03-18 22:09                 ` Nicholas A. Bellinger
                                   ` (2 more replies)
  1 sibling, 3 replies; 81+ messages in thread
From: Brian King @ 2011-03-18 20:58 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, nab, linux-scsi

On 03/07/2011 08:40 AM, James Bottomley wrote:
> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
>> On Sat, 12 Feb 2011 14:27:26 -0600
>> James Bottomley <James.Bottomley@suse.de> wrote:
>>
>>>> Disregard my previous comment. It looks like current client should handle reservations
>>>> just fine without any further changes.
>>>
>>> So is that an ack for putting this in scsi-misc ... or did you want to
>>> do more testing first?
>>
>> Ping,
>>
>> Brian, James, can we merge this during the next merge window?
> 
> I'm still waiting for an ack from Brian.

Sorry for the delay... I've got this loaded in the lab and have managed to oops
a couple times. The first one was during shutdown, which I wasn't able to collect
any data for. The most recent occurred when a client was trying to login for the
first time:

Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod sg configfs ibmveth ses enclosure ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
NIP: d000000004a01dc4 LR: d000000004a01db4 CTR: c0000000005b36a0
REGS: c00000033fb139d0 TRAP: 0300   Not tainted  (2.6.38-rc7-0.7-ppc64-00163-gfb62c00-dirty)
MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 28002022  XER: 00000002
DAR: 0000000000000000, DSISR: 40000000
TASK = c00000033fb08d70[89] 'kworker/0:1' THREAD: c00000033fb10000 CPU: 0
GPR00: 0000000000000000 c00000033fb13c50 d000000004a0bff8 c00000033f84de94 
GPR04: d000000004a03c74 0000000000000001 0000000000000002 0000000000000001 
GPR08: fffffffffffffffc 0000000080000000 0000000000000000 0000000000000000 
GPR12: d000000004a02e58 c00000000f190000 0000000000000200 0000000000000008 
GPR16: 0000000000000008 c000000004821110 0000000000000000 0000000000000000 
GPR20: c00000033e9e66d8 c00000033f84ddf8 c00000033f84de00 c00000033f84de94 
GPR24: 000000033f4e0000 c00000033e9e6680 c00000033f84dd80 c00000033bd60000 
GPR28: 0000000000000024 c000000000000000 d000000004a0c008 8000000000000000 
NIP [d000000004a01dc4] .handle_crq+0x7ac/0xa60 [ibmvscsis]
LR [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis]
Call Trace:
[c00000033fb13c50] [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis] (unreliable)
[c00000033fb13d60] [c0000000000c0e38] .process_one_work+0x198/0x518
[c00000033fb13e10] [c0000000000c1694] .worker_thread+0x1f4/0x518
[c00000033fb13ed0] [c0000000000c9ddc] .kthread+0xb4/0xc0
[c00000033fb13f90] [c00000000001e864] .kernel_thread+0x54/0x70
Instruction dump:
7be05f60 2f800000 409e016c 7be086e0 2f800000 409e0160 7ee3bb78 480010a9 
e8410028 7be046a0 e97a0140 780045e4 <7d2b002e> 2f890001 419e000c 3800007f 

Prior to DLPAR adding a vscsi client adapter to my client LPAR, which caused
the VIOS crash, I had created a single file backed disk:

tcm_node --fileio fileio_0/test  /vdisks/test 1000000
 ConfigFS HBA: fileio_0
Successfully added TCM/ConfigFS HBA: fileio_0
 ConfigFS Device Alias: test
Device Params ['fd_dev_name=/vdisks/test,fd_dev_size=1000000']
Status: DEACTIVATED  Execute/Left/Max Queue Depth: 0/32/32  SectorSize: 512  MaxSectors: 1024
        TCM FILEIO ID: 0        File: /vdisks/test  Size: 1000000  Mode: Synchronous
Set T10 WWN Unit Serial for fileio_0/test to: 092a1bf2-92d9-4bb0-aceb-39ce865c8a80
Successfully created TCM/ConfigFS storage object: /sys/kernel/config/target/core/fileio_0/test

-Brian


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-18 20:58               ` Brian King
@ 2011-03-18 22:09                 ` Nicholas A. Bellinger
  2011-03-19 14:32                 ` James Bottomley
  2011-03-21  1:09                 ` FUJITA Tomonori
  2 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-18 22:09 UTC (permalink / raw)
  To: Brian King; +Cc: James Bottomley, FUJITA Tomonori, linux-scsi

On Fri, 2011-03-18 at 15:58 -0500, Brian King wrote:
> On 03/07/2011 08:40 AM, James Bottomley wrote:
> > On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >> On Sat, 12 Feb 2011 14:27:26 -0600
> >> James Bottomley <James.Bottomley@suse.de> wrote:
> >>
> >>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>> just fine without any further changes.
> >>>
> >>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>> do more testing first?
> >>
> >> Ping,
> >>
> >> Brian, James, can we merge this during the next merge window?
> > 
> > I'm still waiting for an ack from Brian.
> 
> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> a couple times.

Hi Bryan,

Thank you for your testing.  Unfortuately my p505 is still not enabled
with VIOS firmware, so I can't reproduce these issues directly at the
moment..

> The first one was during shutdown, which I wasn't able to collect
> any data for.

Ok, I will take a moment in the next week to review the active I/O
shutdown code for the demo mode TPG case that tcm ibmvscsis is currently
using.

Note that a active I/O shutdown bug was recently fixed and merged
into .38-FINAL here:

target: Fix t_transport_aborted handling in LUN_RESET + active I/O shutdown
http://git.kernel.org/?p=linux/kernel/git/jejb/scsi-rc-fixes-2.6.git;a=commitdiff;h=52208ae3fc60cbcb214c10fb8b82304199e2cc3a

reported via here:

https://bugzilla.kernel.org/show_bug.cgi?id=29442

This was originally involving the shutdown of active I/O to explict
NodeACLs and MappedLUNs in:

/sys/kernel/config/target/$FABRIC_MOD/$TARGET_WWN/tpgt_$TPGT/acls/

I am fairly certain this is going to apply to modules like tcm ibmvscsis
that do not use explict NodeACLs or MappedLUNs, but instead use demo
mode TPG access by default for initiator login access.

> The most recent occurred when a client was trying to login for the
> first time:
> 
> Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod sg configfs ibmveth ses enclosure ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
> NIP: d000000004a01dc4 LR: d000000004a01db4 CTR: c0000000005b36a0
> REGS: c00000033fb139d0 TRAP: 0300   Not tainted  (2.6.38-rc7-0.7-ppc64-00163-gfb62c00-dirty)
> MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 28002022  XER: 00000002
> DAR: 0000000000000000, DSISR: 40000000
> TASK = c00000033fb08d70[89] 'kworker/0:1' THREAD: c00000033fb10000 CPU: 0
> GPR00: 0000000000000000 c00000033fb13c50 d000000004a0bff8 c00000033f84de94 
> GPR04: d000000004a03c74 0000000000000001 0000000000000002 0000000000000001 
> GPR08: fffffffffffffffc 0000000080000000 0000000000000000 0000000000000000 
> GPR12: d000000004a02e58 c00000000f190000 0000000000000200 0000000000000008 
> GPR16: 0000000000000008 c000000004821110 0000000000000000 0000000000000000 
> GPR20: c00000033e9e66d8 c00000033f84ddf8 c00000033f84de00 c00000033f84de94 
> GPR24: 000000033f4e0000 c00000033e9e6680 c00000033f84dd80 c00000033bd60000 
> GPR28: 0000000000000024 c000000000000000 d000000004a0c008 8000000000000000 
> NIP [d000000004a01dc4] .handle_crq+0x7ac/0xa60 [ibmvscsis]
> LR [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis]
> Call Trace:
> [c00000033fb13c50] [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis] (unreliable)
> [c00000033fb13d60] [c0000000000c0e38] .process_one_work+0x198/0x518
> [c00000033fb13e10] [c0000000000c1694] .worker_thread+0x1f4/0x518
> [c00000033fb13ed0] [c0000000000c9ddc] .kthread+0xb4/0xc0
> [c00000033fb13f90] [c00000000001e864] .kernel_thread+0x54/0x70
> Instruction dump:
> 7be05f60 2f800000 409e016c 7be086e0 2f800000 409e0160 7ee3bb78 480010a9 
> e8410028 7be046a0 e97a0140 780045e4 <7d2b002e> 2f890001 419e000c 3800007f 
> 
> Prior to DLPAR adding a vscsi client adapter to my client LPAR, which caused
> the VIOS crash, I had created a single file backed disk:
> 

Tomo-san, do you have any input on this particular issue..?

Also sending along any gdb output from this backtrace and full dmesg
context to me offline to grok would be greatly appericated.

FYI, the lio-core-2.6.git/tcm_ibmvscsis branch w/ Tomo-san's latest v3
code has been updated to LIO v4.0.0-rc7 / .38-FINAL code..  Please grab
this code or the .38 bugfix mentioned above for active I/O shutdown case
and retest this case at your earliest convenience.

Best Regards,

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-18 20:58               ` Brian King
  2011-03-18 22:09                 ` Nicholas A. Bellinger
@ 2011-03-19 14:32                 ` James Bottomley
  2011-03-21  1:09                 ` FUJITA Tomonori
  2 siblings, 0 replies; 81+ messages in thread
From: James Bottomley @ 2011-03-19 14:32 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, nab, linux-scsi

On Fri, 2011-03-18 at 15:58 -0500, Brian King wrote:
> On 03/07/2011 08:40 AM, James Bottomley wrote:
> > On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >> On Sat, 12 Feb 2011 14:27:26 -0600
> >> James Bottomley <James.Bottomley@suse.de> wrote:
> >>
> >>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>> just fine without any further changes.
> >>>
> >>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>> do more testing first?
> >>
> >> Ping,
> >>
> >> Brian, James, can we merge this during the next merge window?
> > 
> > I'm still waiting for an ack from Brian.
> 
> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> a couple times. The first one was during shutdown, which I wasn't able to collect
> any data for. The most recent occurred when a client was trying to login for the
> first time:

OK, that's a bit of a show stopper, then.

> Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod sg configfs ibmveth ses enclosure ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
> NIP: d000000004a01dc4 LR: d000000004a01db4 CTR: c0000000005b36a0
> REGS: c00000033fb139d0 TRAP: 0300   Not tainted  (2.6.38-rc7-0.7-ppc64-00163-gfb62c00-dirty)
> MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 28002022  XER: 00000002
> DAR: 0000000000000000, DSISR: 40000000
> TASK = c00000033fb08d70[89] 'kworker/0:1' THREAD: c00000033fb10000 CPU: 0
> GPR00: 0000000000000000 c00000033fb13c50 d000000004a0bff8 c00000033f84de94 
> GPR04: d000000004a03c74 0000000000000001 0000000000000002 0000000000000001 
> GPR08: fffffffffffffffc 0000000080000000 0000000000000000 0000000000000000 
> GPR12: d000000004a02e58 c00000000f190000 0000000000000200 0000000000000008 
> GPR16: 0000000000000008 c000000004821110 0000000000000000 0000000000000000 
> GPR20: c00000033e9e66d8 c00000033f84ddf8 c00000033f84de00 c00000033f84de94 
> GPR24: 000000033f4e0000 c00000033e9e6680 c00000033f84dd80 c00000033bd60000 
> GPR28: 0000000000000024 c000000000000000 d000000004a0c008 8000000000000000 
> NIP [d000000004a01dc4] .handle_crq+0x7ac/0xa60 [ibmvscsis]
> LR [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis]

Can you get a better handle on this location?  It's clearly inside one
of the expanded static functions, but knowing which one would help Tomo
debug it.

James


> Call Trace:
> [c00000033fb13c50] [d000000004a01db4] .handle_crq+0x79c/0xa60 [ibmvscsis] (unreliable)
> [c00000033fb13d60] [c0000000000c0e38] .process_one_work+0x198/0x518
> [c00000033fb13e10] [c0000000000c1694] .worker_thread+0x1f4/0x518
> [c00000033fb13ed0] [c0000000000c9ddc] .kthread+0xb4/0xc0
> [c00000033fb13f90] [c00000000001e864] .kernel_thread+0x54/0x70
> Instruction dump:
> 7be05f60 2f800000 409e016c 7be086e0 2f800000 409e0160 7ee3bb78 480010a9 
> e8410028 7be046a0 e97a0140 780045e4 <7d2b002e> 2f890001 419e000c 3800007f 
> 
> Prior to DLPAR adding a vscsi client adapter to my client LPAR, which caused
> the VIOS crash, I had created a single file backed disk:
> 
> tcm_node --fileio fileio_0/test  /vdisks/test 1000000
>  ConfigFS HBA: fileio_0
> Successfully added TCM/ConfigFS HBA: fileio_0
>  ConfigFS Device Alias: test
> Device Params ['fd_dev_name=/vdisks/test,fd_dev_size=1000000']
> Status: DEACTIVATED  Execute/Left/Max Queue Depth: 0/32/32  SectorSize: 512  MaxSectors: 1024
>         TCM FILEIO ID: 0        File: /vdisks/test  Size: 1000000  Mode: Synchronous
> Set T10 WWN Unit Serial for fileio_0/test to: 092a1bf2-92d9-4bb0-aceb-39ce865c8a80
> Successfully created TCM/ConfigFS storage object: /sys/kernel/config/target/core/fileio_0/test
> 
> -Brian
> 
> 



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-18 20:58               ` Brian King
  2011-03-18 22:09                 ` Nicholas A. Bellinger
  2011-03-19 14:32                 ` James Bottomley
@ 2011-03-21  1:09                 ` FUJITA Tomonori
  2011-03-21 12:56                   ` Brian King
                                     ` (2 more replies)
  2 siblings, 3 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21  1:09 UTC (permalink / raw)
  To: brking; +Cc: James.Bottomley, fujita.tomonori, nab, linux-scsi

Thanks for the testings,

On Fri, 18 Mar 2011 15:58:53 -0500
Brian King <brking@linux.vnet.ibm.com> wrote:

> On 03/07/2011 08:40 AM, James Bottomley wrote:
> > On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >> On Sat, 12 Feb 2011 14:27:26 -0600
> >> James Bottomley <James.Bottomley@suse.de> wrote:
> >>
> >>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>> just fine without any further changes.
> >>>
> >>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>> do more testing first?
> >>
> >> Ping,
> >>
> >> Brian, James, can we merge this during the next merge window?
> > 
> > I'm still waiting for an ack from Brian.
> 
> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> a couple times. The first one was during shutdown, which I wasn't able to collect
> any data for. The most recent occurred when a client was trying to login for the
> first time:

You mean that the kernel crashes every time when a client logs in?

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21  1:09                 ` FUJITA Tomonori
@ 2011-03-21 12:56                   ` Brian King
  2011-03-21 21:01                   ` Brian King
  2011-03-21 22:22                   ` Brian King
  2 siblings, 0 replies; 81+ messages in thread
From: Brian King @ 2011-03-21 12:56 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, nab, linux-scsi

On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
> Thanks for the testings,
> 
> On Fri, 18 Mar 2011 15:58:53 -0500
> Brian King <brking@linux.vnet.ibm.com> wrote:
> 
>> On 03/07/2011 08:40 AM, James Bottomley wrote:
>>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
>>>> On Sat, 12 Feb 2011 14:27:26 -0600
>>>> James Bottomley <James.Bottomley@suse.de> wrote:
>>>>
>>>>>> Disregard my previous comment. It looks like current client should handle reservations
>>>>>> just fine without any further changes.
>>>>>
>>>>> So is that an ack for putting this in scsi-misc ... or did you want to
>>>>> do more testing first?
>>>>
>>>> Ping,
>>>>
>>>> Brian, James, can we merge this during the next merge window?
>>>
>>> I'm still waiting for an ack from Brian.
>>
>> Sorry for the delay... I've got this loaded in the lab and have managed to oops
>> a couple times. The first one was during shutdown, which I wasn't able to collect
>> any data for. The most recent occurred when a client was trying to login for the
>> first time:
> 
> You mean that the kernel crashes every time when a client logs in?

That is correct. This occurs whether or not there are any virtual disks defined. This
is occurring with current linus git head plus the three patches that were posted.

-Brian


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21  1:09                 ` FUJITA Tomonori
  2011-03-21 12:56                   ` Brian King
@ 2011-03-21 21:01                   ` Brian King
  2011-03-21 21:01                     ` Nicholas A. Bellinger
  2011-03-21 21:05                     ` James Bottomley
  2011-03-21 22:22                   ` Brian King
  2 siblings, 2 replies; 81+ messages in thread
From: Brian King @ 2011-03-21 21:01 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, nab, linux-scsi

On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
> Thanks for the testings,
> 
> On Fri, 18 Mar 2011 15:58:53 -0500
> Brian King <brking@linux.vnet.ibm.com> wrote:
> 
>> On 03/07/2011 08:40 AM, James Bottomley wrote:
>>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
>>>> On Sat, 12 Feb 2011 14:27:26 -0600
>>>> James Bottomley <James.Bottomley@suse.de> wrote:
>>>>
>>>>>> Disregard my previous comment. It looks like current client should handle reservations
>>>>>> just fine without any further changes.
>>>>>
>>>>> So is that an ack for putting this in scsi-misc ... or did you want to
>>>>> do more testing first?
>>>>
>>>> Ping,
>>>>
>>>> Brian, James, can we merge this during the next merge window?
>>>
>>> I'm still waiting for an ack from Brian.
>>
>> Sorry for the delay... I've got this loaded in the lab and have managed to oops
>> a couple times. The first one was during shutdown, which I wasn't able to collect
>> any data for. The most recent occurred when a client was trying to login for the
>> first time:
> 
> You mean that the kernel crashes every time when a client logs in?

I think the crash I was seeing when the client logs in was just due to the fact that
I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
realize at first I had to go off and manually create a lot of the configfs layout
described in the wiki page. Once I did that, I was able to get a LUN to successfully
report in on the client side. I'll start pounding on this a bit and see how things
hold up.

Thanks,

Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:01                   ` Brian King
@ 2011-03-21 21:01                     ` Nicholas A. Bellinger
  2011-03-21 21:24                       ` Brian King
  2011-03-21 21:05                     ` James Bottomley
  1 sibling, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 21:01 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Mon, 2011-03-21 at 16:01 -0500, Brian King wrote:
> On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
> > Thanks for the testings,
> > 
> > On Fri, 18 Mar 2011 15:58:53 -0500
> > Brian King <brking@linux.vnet.ibm.com> wrote:
> > 
> >> On 03/07/2011 08:40 AM, James Bottomley wrote:
> >>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >>>> On Sat, 12 Feb 2011 14:27:26 -0600
> >>>> James Bottomley <James.Bottomley@suse.de> wrote:
> >>>>
> >>>>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>>>> just fine without any further changes.
> >>>>>
> >>>>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>>>> do more testing first?
> >>>>
> >>>> Ping,
> >>>>
> >>>> Brian, James, can we merge this during the next merge window?
> >>>
> >>> I'm still waiting for an ack from Brian.
> >>
> >> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> >> a couple times. The first one was during shutdown, which I wasn't able to collect
> >> any data for. The most recent occurred when a client was trying to login for the
> >> first time:
> > 
> > You mean that the kernel crashes every time when a client logs in?
> 
> I think the crash I was seeing when the client logs in was just due to the fact that
> I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
> realize at first I had to go off and manually create a lot of the configfs layout
> described in the wiki page. Once I did that, I was able to get a LUN to successfully
> report in on the client side. I'll start pounding on this a bit and see how things
> hold up.
> 

Hi Brian,

If could send along the original misconfigured configfs layout that is
causing the OOPs in handle_crq(), I would be happy to have a quick look.

Thank you,

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:01                   ` Brian King
  2011-03-21 21:01                     ` Nicholas A. Bellinger
@ 2011-03-21 21:05                     ` James Bottomley
  2011-03-21 22:37                       ` Brian King
  1 sibling, 1 reply; 81+ messages in thread
From: James Bottomley @ 2011-03-21 21:05 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, nab, linux-scsi

On Mon, 2011-03-21 at 16:01 -0500, Brian King wrote:
> On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
> > Thanks for the testings,
> > 
> > On Fri, 18 Mar 2011 15:58:53 -0500
> > Brian King <brking@linux.vnet.ibm.com> wrote:
> > 
> >> On 03/07/2011 08:40 AM, James Bottomley wrote:
> >>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >>>> On Sat, 12 Feb 2011 14:27:26 -0600
> >>>> James Bottomley <James.Bottomley@suse.de> wrote:
> >>>>
> >>>>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>>>> just fine without any further changes.
> >>>>>
> >>>>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>>>> do more testing first?
> >>>>
> >>>> Ping,
> >>>>
> >>>> Brian, James, can we merge this during the next merge window?
> >>>
> >>> I'm still waiting for an ack from Brian.
> >>
> >> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> >> a couple times. The first one was during shutdown, which I wasn't able to collect
> >> any data for. The most recent occurred when a client was trying to login for the
> >> first time:
> > 
> > You mean that the kernel crashes every time when a client logs in?
> 
> I think the crash I was seeing when the client logs in was just due to the fact that
> I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
> realize at first I had to go off and manually create a lot of the configfs layout
> described in the wiki page. Once I did that, I was able to get a LUN to successfully
> report in on the client side. I'll start pounding on this a bit and see how things
> hold up.

Hmm, so that still indicates the error handling is screwed at this
point.  That, I suppose, should be fixable (plus being a fix, it's merge
window exempt).  Do you want this in now, or wait until next merge
window?

James



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:01                     ` Nicholas A. Bellinger
@ 2011-03-21 21:24                       ` Brian King
  2011-03-21 22:29                         ` Nicholas A. Bellinger
  2011-03-21 23:20                         ` FUJITA Tomonori
  0 siblings, 2 replies; 81+ messages in thread
From: Brian King @ 2011-03-21 21:24 UTC (permalink / raw)
  To: Nicholas A. Bellinger; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On 03/21/2011 04:01 PM, Nicholas A. Bellinger wrote:
> On Mon, 2011-03-21 at 16:01 -0500, Brian King wrote:
>> On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
>>> Thanks for the testings,
>>>
>>> On Fri, 18 Mar 2011 15:58:53 -0500
>>> Brian King <brking@linux.vnet.ibm.com> wrote:
>>>
>>>> On 03/07/2011 08:40 AM, James Bottomley wrote:
>>>>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
>>>>>> On Sat, 12 Feb 2011 14:27:26 -0600
>>>>>> James Bottomley <James.Bottomley@suse.de> wrote:
>>>>>>
>>>>>>>> Disregard my previous comment. It looks like current client should handle reservations
>>>>>>>> just fine without any further changes.
>>>>>>>
>>>>>>> So is that an ack for putting this in scsi-misc ... or did you want to
>>>>>>> do more testing first?
>>>>>>
>>>>>> Ping,
>>>>>>
>>>>>> Brian, James, can we merge this during the next merge window?
>>>>>
>>>>> I'm still waiting for an ack from Brian.
>>>>
>>>> Sorry for the delay... I've got this loaded in the lab and have managed to oops
>>>> a couple times. The first one was during shutdown, which I wasn't able to collect
>>>> any data for. The most recent occurred when a client was trying to login for the
>>>> first time:
>>>
>>> You mean that the kernel crashes every time when a client logs in?
>>
>> I think the crash I was seeing when the client logs in was just due to the fact that
>> I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
>> realize at first I had to go off and manually create a lot of the configfs layout
>> described in the wiki page. Once I did that, I was able to get a LUN to successfully
>> report in on the client side. I'll start pounding on this a bit and see how things
>> hold up.
>>
> 
> Hi Brian,
> 
> If could send along the original misconfigured configfs layout that is
> causing the OOPs in handle_crq(), I would be happy to have a quick look.

Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
to get it to work, I did the following. Please let me know if there is a better way
to do this...

cd /sys/kernel/configfs/target
mkdir -p ibmvscsis/30000003/tpgt_1
mkdir ibmvscsis/30000003/tpgt_1/lun/lun_0
ln -s core/fileio_0/testfvd ibmvscsis/30000003/tpgt_1/lun/lun_0/default

I had previously created a file backed lun via:

tcm_node --fileio fileio_0/testfvd /vdisks/test 100000000

Thanks,

Brian


./target
./target/core
./target/core/fileio_0
./target/core/fileio_0/testfvd
./target/core/fileio_0/testfvd/alua
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/members
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/tg_pt_gp_id
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/preferred
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/trans_delay_msecs
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/nonop_delay_msecs
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_write_metadata
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_type
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_status
./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_state
./target/core/fileio_0/testfvd/wwn
./target/core/fileio_0/testfvd/wwn/vpd_assoc_scsi_target_device
./target/core/fileio_0/testfvd/wwn/vpd_assoc_target_port
./target/core/fileio_0/testfvd/wwn/vpd_assoc_logical_unit
./target/core/fileio_0/testfvd/wwn/vpd_protocol_identifier
./target/core/fileio_0/testfvd/wwn/vpd_unit_serial
./target/core/fileio_0/testfvd/pr
./target/core/fileio_0/testfvd/pr/res_aptpl_metadata
./target/core/fileio_0/testfvd/pr/res_aptpl_active
./target/core/fileio_0/testfvd/pr/res_type
./target/core/fileio_0/testfvd/pr/res_pr_type
./target/core/fileio_0/testfvd/pr/res_pr_registered_i_pts
./target/core/fileio_0/testfvd/pr/res_pr_holder_tg_port
./target/core/fileio_0/testfvd/pr/res_pr_generation
./target/core/fileio_0/testfvd/pr/res_pr_all_tgt_pts
./target/core/fileio_0/testfvd/pr/res_holder
./target/core/fileio_0/testfvd/attrib
./target/core/fileio_0/testfvd/attrib/unmap_granularity_alignment
./target/core/fileio_0/testfvd/attrib/unmap_granularity
./target/core/fileio_0/testfvd/attrib/max_unmap_block_desc_count
./target/core/fileio_0/testfvd/attrib/max_unmap_lba_count
./target/core/fileio_0/testfvd/attrib/task_timeout
./target/core/fileio_0/testfvd/attrib/queue_depth
./target/core/fileio_0/testfvd/attrib/hw_queue_depth
./target/core/fileio_0/testfvd/attrib/optimal_sectors
./target/core/fileio_0/testfvd/attrib/max_sectors
./target/core/fileio_0/testfvd/attrib/hw_max_sectors
./target/core/fileio_0/testfvd/attrib/block_size
./target/core/fileio_0/testfvd/attrib/hw_block_size
./target/core/fileio_0/testfvd/attrib/enforce_pr_isids
./target/core/fileio_0/testfvd/attrib/emulate_tpws
./target/core/fileio_0/testfvd/attrib/emulate_tpu
./target/core/fileio_0/testfvd/attrib/emulate_tas
./target/core/fileio_0/testfvd/attrib/emulate_ua_intlck_ctrl
./target/core/fileio_0/testfvd/attrib/emulate_write_cache
./target/core/fileio_0/testfvd/attrib/emulate_fua_read
./target/core/fileio_0/testfvd/attrib/emulate_fua_write
./target/core/fileio_0/testfvd/attrib/emulate_dpo
./target/core/fileio_0/testfvd/alua_lu_gp
./target/core/fileio_0/testfvd/enable
./target/core/fileio_0/testfvd/udev_path
./target/core/fileio_0/testfvd/alias
./target/core/fileio_0/testfvd/control
./target/core/fileio_0/testfvd/info
./target/core/fileio_0/hba_mode
./target/core/fileio_0/hba_info
./target/core/alua
./target/core/alua/lu_gps
./target/core/alua/lu_gps/default_lu_gp
./target/core/alua/lu_gps/default_lu_gp/members
./target/core/alua/lu_gps/default_lu_gp/lu_gp_id
./target/version



-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21  1:09                 ` FUJITA Tomonori
  2011-03-21 12:56                   ` Brian King
  2011-03-21 21:01                   ` Brian King
@ 2011-03-21 22:22                   ` Brian King
  2011-03-21 22:31                     ` Brian King
                                       ` (2 more replies)
  2 siblings, 3 replies; 81+ messages in thread
From: Brian King @ 2011-03-21 22:22 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, nab, linux-scsi

Tomo,

Thanks for all the work you've done with this driver. It is much appreciated.

What sort of performance are you seeing with this? With filebacked devices, I
was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
some setup issue?

I tried a ramdisk device and saw this on the server:


DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1



All my I/O on the client timed out because of this. 

-Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:24                       ` Brian King
@ 2011-03-21 22:29                         ` Nicholas A. Bellinger
  2011-03-21 23:20                         ` FUJITA Tomonori
  1 sibling, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 22:29 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Mon, 2011-03-21 at 16:24 -0500, Brian King wrote:
> On 03/21/2011 04:01 PM, Nicholas A. Bellinger wrote:
> > On Mon, 2011-03-21 at 16:01 -0500, Brian King wrote:
> >> On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
> >>> Thanks for the testings,
> >>>
> >>> On Fri, 18 Mar 2011 15:58:53 -0500
> >>> Brian King <brking@linux.vnet.ibm.com> wrote:
> >>>
> >>>> On 03/07/2011 08:40 AM, James Bottomley wrote:
> >>>>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
> >>>>>> On Sat, 12 Feb 2011 14:27:26 -0600
> >>>>>> James Bottomley <James.Bottomley@suse.de> wrote:
> >>>>>>
> >>>>>>>> Disregard my previous comment. It looks like current client should handle reservations
> >>>>>>>> just fine without any further changes.
> >>>>>>>
> >>>>>>> So is that an ack for putting this in scsi-misc ... or did you want to
> >>>>>>> do more testing first?
> >>>>>>
> >>>>>> Ping,
> >>>>>>
> >>>>>> Brian, James, can we merge this during the next merge window?
> >>>>>
> >>>>> I'm still waiting for an ack from Brian.
> >>>>
> >>>> Sorry for the delay... I've got this loaded in the lab and have managed to oops
> >>>> a couple times. The first one was during shutdown, which I wasn't able to collect
> >>>> any data for. The most recent occurred when a client was trying to login for the
> >>>> first time:
> >>>
> >>> You mean that the kernel crashes every time when a client logs in?
> >>
> >> I think the crash I was seeing when the client logs in was just due to the fact that
> >> I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
> >> realize at first I had to go off and manually create a lot of the configfs layout
> >> described in the wiki page. Once I did that, I was able to get a LUN to successfully
> >> report in on the client side. I'll start pounding on this a bit and see how things
> >> hold up.
> >>
> > 
> > Hi Brian,
> > 
> > If could send along the original misconfigured configfs layout that is
> > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> 
> Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> to get it to work, I did the following. Please let me know if there is a better way
> to do this...
> 
> cd /sys/kernel/configfs/target
> mkdir -p ibmvscsis/30000003/tpgt_1
> mkdir ibmvscsis/30000003/tpgt_1/lun/lun_0
> ln -s core/fileio_0/testfvd ibmvscsis/30000003/tpgt_1/lun/lun_0/default
> 

This is correct..

> I had previously created a file backed lun via:
> 
> tcm_node --fileio fileio_0/testfvd /vdisks/test 100000000
> 
> Thanks,
> 
> Brian
> 
> 

So indeed, the issue appears to be that handle_crq() is running w/o a
configured /sys/kernel/config/target/ibmvscsis/$VSCSI_WWN/tpgt_1/
endpoint, but still receiving incoming I/O via handle_crq() ->
handle_cmd_queue() -> tcm_queuecommand()..

Which means that tcm_queuecommand() and struct ibmvscsis_adapter need to
be aware of some form of state between I/O and control path in order to
prevent adapter->se_tpg and adapter->se_sess from being referenced
before the TPG endpoint and VIO I_T Nexus (TCM Session) has been setup
with the explict mkdir(2) operation via target_core_fabric_configfs.c:
target_fabric_make_tpg() -> ibmvscsis_make_tpg() code.

I recall it being mentioned at some point that certain VIO clients had
issue with the failure of the (initial..?) expected INQUIRY, for which
is handled internally in ibmvscsis_queuecommand() code..

So that said, I think we have a couple of options here..

*) Add a lock to protect adapter->se_sess for I/O and
ibmvscsis_make_tpg() control path code, and find a way to gracefully
handle tcm_queuecommand() exceptions for non-configured targets
endpoints after the internally emulated INQUIRY is completed.

*) Determine if breaking up individual external VIO setup logic makes
more sense from being done in a global manner for all available VIO
target connections via:

    module_init() ->
	ibmvscsis_init() ->
		vio_register_probe() -> 

into a more flexable method via individually configured
target_core_fabric_configfs.c:target_fabric_make_tpg()
-> ibmvscsis_make_tpg(), where we don't actually pull the current
ibmvscsis_probe() -> crq_queue_create() until mkdir -p
$VIO_TARGET_FULLPATH/tpgt_1 has been preformed.

Obviously for the long term direction of this code I would prefer the
the latter and a more flexiable direction (Tomo-san, thoughts on
this..?).

To prevent this from happening with existing code, I think the following
should address the current OOPs.  Please have a look and let me know.

Thanks!

--nab

diff --git a/drivers/scsi/ibmvscsi/ibmvscsis.c b/drivers/scsi/ibmvscsi/ibmvscsis.c
index 714af48..a0cd9b8 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsis.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsis.c
@@ -98,6 +98,9 @@ struct ibmvscsis_adapter {
 
        struct work_struct crq_work;
 
+       /* lock for protecting ->se_sess */
+       spinlock_t sess_lock;
+
        unsigned long liobn;
        unsigned long riobn;
 
@@ -335,6 +338,7 @@ static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
        struct ibmvscsis_adapter *adapter =
                container_of(wwn, struct ibmvscsis_adapter, tport_wwn);
        struct se_node_acl *acl;
+       struct se_session *se_sess;
        int ret;
        char *dname = (char *)dev_name(&adapter->dma_dev->dev);
 
@@ -347,16 +351,15 @@ static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
        if (ret)
                return ERR_PTR(-ENOMEM);
 
-       adapter->se_sess = transport_init_session();
-       if (!adapter->se_sess) {
+       se_sess = transport_init_session();
+       if (!se_sess) {
                core_tpg_deregister(&adapter->se_tpg);
                return ERR_PTR(-ENOMEM);
        }
 
        acl = core_tpg_check_initiator_node_acl(&adapter->se_tpg, dname);
        if (!acl) {
-               transport_free_session(adapter->se_sess);
-               adapter->se_sess = NULL;
+               transport_free_session(se_sess);
                return ERR_PTR(-ENOMEM);
        }
        adapter->se_sess->se_node_acl = acl;
@@ -365,6 +368,10 @@ static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
                                   adapter->se_sess->se_node_acl,
                                   adapter->se_sess, adapter);
 
+       spin_lock_bh(&adapter->sess_lock);
+       adapter->se_sess = se_sess;
+       spin_unlock_bh(&adapter->sess_lock);
+
        return &adapter->se_tpg;
 }
@@ -372,16 +379,19 @@ static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg)
 {
        struct ibmvscsis_adapter *adapter =
                container_of(se_tpg, struct ibmvscsis_adapter, se_tpg);
+       struct se_session *se_sess;
        unsigned long flags;
+       
+       transport_deregister_session_configfs(adapter->se_sess);
 
+       spin_lock_bh(&adapter->sess_lock);
+       se_sess = adapter->se_sess;
+       adapter->se_sess = NULL;
+       spin_unlock_bh(&adapter->sess_lock);
 
-       transport_deregister_session_configfs(adapter->se_sess);
        transport_free_session(adapter->se_sess);
-       core_tpg_deregister(se_tpg);
 
-       spin_lock_irqsave(&tpg_lock, flags);
-       adapter->se_sess = NULL;
-       spin_unlock_irqrestore(&tpg_lock, flags);
+       core_tpg_deregister(se_tpg);
 }
 
 static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
@@ -873,6 +883,17 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
        data_len = srp_data_length(cmd, srp_cmd_direction(cmd));
 
        se_cmd = &vsc->se_cmd;
+       /*
+        * Ensure that an adapter>se_sess and TCM TPG endpoint have been
+        * configured via ibmvscsis_make_tpg().
+        */
+       spin_lock_bh(&adapter->sess_lock);
+       if (!adapter->se_sess) {
+               spin_unlock_bh(&adapter->sess_lock);
+               printk(KERN_ERR "struct ibmvscsis_adapter->se_sess has not been configured\n");
+               return -ENODEV:
+       }
+       spin_unlock_bh(&adapter->sess_lock);
 
        transport_init_se_cmd(se_cmd,
                              adapter->se_tpg.se_tpg_tfo,
@@ -1576,6 +1597,7 @@ static int ibmvscsis_probe(struct vio_dev *dev, const struct vio_device_id *id)
        if (!adapter)
                return -ENOMEM;
 
+       spin_lock_init(&adapter->sess_lock);
        adapter->dma_dev = dev;
 
        dma = (unsigned int *)vio_get_attribute(dev, "ibm,my-dma-window",



^ permalink raw reply related	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:22                   ` Brian King
@ 2011-03-21 22:31                     ` Brian King
  2011-03-21 22:48                       ` Nicholas A. Bellinger
  2011-03-21 22:34                     ` Nicholas A. Bellinger
  2011-03-21 23:30                     ` FUJITA Tomonori
  2 siblings, 1 reply; 81+ messages in thread
From: Brian King @ 2011-03-21 22:31 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, nab, linux-scsi

Just hit another potential issue. I was mapping / unmapping disks a couple times,
so that might have helped trigger the issue. I had a file backed disk mapped
to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
the filebacked lun after running into issues with the ramdisk lun and saw this:


Mar 21 16:25:57 jn30a-lp4 kernel: unexpected fifo state
Mar 21 16:25:57 jn30a-lp4 kernel: ------------[ cut here ]------------
Mar 21 16:25:57 jn30a-lp4 kernel: WARNING: at drivers/scsi/libsrp.c:162
Mar 21 16:25:57 jn30a-lp4 kernel: Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod ses enclosure sg ibmveth configfs ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
Mar 21 16:25:57 jn30a-lp4 kernel: NIP: d0000000047e0b38 LR: d0000000047e0b34 CTR: 0000000000000000
Mar 21 16:25:57 jn30a-lp4 kernel: REGS: c00000033f4ef860 TRAP: 0700   Not tainted  (2.6.38-0.7-ppc64-06439-g5bab188-dirty)
Mar 21 16:25:57 jn30a-lp4 kernel: MSR: 8000000000029032 <EE,ME,CE,IR,DR>  CR: 24002024  XER: 20000001
Mar 21 16:25:57 jn30a-lp4 kernel: TASK = c00000033f2b39e0[58] 'kworker/4:1' THREAD: c00000033f4ec000 CPU: 4
Mar 21 16:25:57 jn30a-lp4 kernel: GPR00: d0000000047e0b34 c00000033f4efae0 d0000000047e9768 0000000000000018
Mar 21 16:25:57 jn30a-lp4 kernel: GPR04: 0000000000000000 0000000000000004 0000000000000000 c000000000f86610
Mar 21 16:25:57 jn30a-lp4 kernel: GPR08: c000000000f86b20 c0000000008b38b8 000000000007ffff 0000000000000001
Mar 21 16:25:57 jn30a-lp4 kernel: GPR12: 0000000028002082 c00000000f190a00 0000000000000000 0000000002b80610
Mar 21 16:25:57 jn30a-lp4 kernel: GPR16: 0000000001a3fc60 0000000002b80d08 0000000001a3fc70 0000000002c81870
Mar 21 16:25:57 jn30a-lp4 kernel: GPR20: 0000000002b805c8 0000000002c81888 0000000002c81910 0000000000000000
Mar 21 16:25:57 jn30a-lp4 kernel: GPR24: 0000000000000000 0000000000000000 0000000000000000 c00000033f1bacc0
Mar 21 16:25:57 jn30a-lp4 kernel: GPR28: 0000000000000001 0000000000000000 d0000000047e9778 d0000000047e1ba8
Mar 21 16:25:57 jn30a-lp4 kernel: NIP [d0000000047e0b38] .srp_iu_get+0x118/0x130 [libsrp]
Mar 21 16:25:57 jn30a-lp4 kernel: LR [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp]
Mar 21 16:25:57 jn30a-lp4 kernel: Call Trace:
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efae0] [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp] (unreliable)
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efb90] [d0000000048f0d6c] .process_crq+0xcc/0x5b8 [ibmvscsis]
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efc50] [d0000000048f183c] .handle_crq+0x224/0xa60 [ibmvscsis]
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efd60] [c0000000000c2120] .process_one_work+0x198/0x518
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efe10] [c0000000000c297c] .worker_thread+0x1f4/0x518
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efed0] [c0000000000cb4c4] .kthread+0xb4/0xc0
Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4eff90] [c00000000001e864] .kernel_thread+0x54/0x70
Mar 21 16:25:57 jn30a-lp4 kernel: Instruction dump:
Mar 21 16:25:57 jn30a-lp4 kernel: e8010010 eb41ffd0 7c0803a6 eb61ffd8 eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8
Mar 21 16:25:57 jn30a-lp4 kernel: 4e800020 e87e8058 48000739 e8410028 <0fe00000> 38000001 38600000 981f0000
Mar 21 16:25:57 jn30a-lp4 kernel: ---[ end trace ec6b6139d888a732 ]---
Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool

I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
on occasion, which may play into the performance issue I mentioned in my previous mail.

-Brian


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:22                   ` Brian King
  2011-03-21 22:31                     ` Brian King
@ 2011-03-21 22:34                     ` Nicholas A. Bellinger
  2011-03-21 23:06                       ` FUJITA Tomonori
  2011-03-21 23:30                     ` FUJITA Tomonori
  2 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 22:34 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Mon, 2011-03-21 at 17:22 -0500, Brian King wrote:
> Tomo,
> 
> Thanks for all the work you've done with this driver. It is much appreciated.
> 
> What sort of performance are you seeing with this? With filebacked devices, I
> was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> some setup issue?
> 
> I tried a ramdisk device and saw this on the server:
> 
> 
> DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1
> 
> 

So for the RAMDISK_DR (direct) SGL memory mapping case we do not support
WRITEs for HW fabric modules using task_sg_changing=1.

Please go ahead and use RAMDISK_MCP instead, and I will look at adding a
special case in target_core_fabric_configfs.c code to disallow
RAMDISK_DR symlink creation of configfs ports for target fabric modules
using task_sg_chaining=1.

Thanks!

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:05                     ` James Bottomley
@ 2011-03-21 22:37                       ` Brian King
  0 siblings, 0 replies; 81+ messages in thread
From: Brian King @ 2011-03-21 22:37 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, nab, linux-scsi

On 03/21/2011 04:05 PM, James Bottomley wrote:
> On Mon, 2011-03-21 at 16:01 -0500, Brian King wrote:
>> On 03/20/2011 08:09 PM, FUJITA Tomonori wrote:
>>> Thanks for the testings,
>>>
>>> On Fri, 18 Mar 2011 15:58:53 -0500
>>> Brian King <brking@linux.vnet.ibm.com> wrote:
>>>
>>>> On 03/07/2011 08:40 AM, James Bottomley wrote:
>>>>> On Mon, 2011-03-07 at 13:41 +0900, FUJITA Tomonori wrote:
>>>>>> On Sat, 12 Feb 2011 14:27:26 -0600
>>>>>> James Bottomley <James.Bottomley@suse.de> wrote:
>>>>>>
>>>>>>>> Disregard my previous comment. It looks like current client should handle reservations
>>>>>>>> just fine without any further changes.
>>>>>>>
>>>>>>> So is that an ack for putting this in scsi-misc ... or did you want to
>>>>>>> do more testing first?
>>>>>>
>>>>>> Ping,
>>>>>>
>>>>>> Brian, James, can we merge this during the next merge window?
>>>>>
>>>>> I'm still waiting for an ack from Brian.
>>>>
>>>> Sorry for the delay... I've got this loaded in the lab and have managed to oops
>>>> a couple times. The first one was during shutdown, which I wasn't able to collect
>>>> any data for. The most recent occurred when a client was trying to login for the
>>>> first time:
>>>
>>> You mean that the kernel crashes every time when a client logs in?
>>
>> I think the crash I was seeing when the client logs in was just due to the fact that
>> I had things misconfigured. It took me a bit to figure out the configfs stuff. Didn't
>> realize at first I had to go off and manually create a lot of the configfs layout
>> described in the wiki page. Once I did that, I was able to get a LUN to successfully
>> report in on the client side. I'll start pounding on this a bit and see how things
>> hold up.
> 
> Hmm, so that still indicates the error handling is screwed at this
> point.  That, I suppose, should be fixable (plus being a fix, it's merge
> window exempt).  Do you want this in now, or wait until next merge
> window?

Since this would replace ibmvstgt, and I've been running into issues fairly quickly,
it might be prudent to wait until the next merge window. If it was a new driver
or if there was some way to still build the old ibmvstgt driver, I'd feel more
comfortable with pushing it in.

Thanks,

Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:31                     ` Brian King
@ 2011-03-21 22:48                       ` Nicholas A. Bellinger
  2011-03-22 12:53                         ` Brian King
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 22:48 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> so that might have helped trigger the issue. I had a file backed disk mapped
> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> the filebacked lun after running into issues with the ramdisk lun and saw this:
> 
> 

By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
$VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?

> Mar 21 16:25:57 jn30a-lp4 kernel: unexpected fifo state
> Mar 21 16:25:57 jn30a-lp4 kernel: ------------[ cut here ]------------
> Mar 21 16:25:57 jn30a-lp4 kernel: WARNING: at drivers/scsi/libsrp.c:162
> Mar 21 16:25:57 jn30a-lp4 kernel: Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod ses enclosure sg ibmveth configfs ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
> Mar 21 16:25:57 jn30a-lp4 kernel: NIP: d0000000047e0b38 LR: d0000000047e0b34 CTR: 0000000000000000
> Mar 21 16:25:57 jn30a-lp4 kernel: REGS: c00000033f4ef860 TRAP: 0700   Not tainted  (2.6.38-0.7-ppc64-06439-g5bab188-dirty)
> Mar 21 16:25:57 jn30a-lp4 kernel: MSR: 8000000000029032 <EE,ME,CE,IR,DR>  CR: 24002024  XER: 20000001
> Mar 21 16:25:57 jn30a-lp4 kernel: TASK = c00000033f2b39e0[58] 'kworker/4:1' THREAD: c00000033f4ec000 CPU: 4
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR00: d0000000047e0b34 c00000033f4efae0 d0000000047e9768 0000000000000018
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR04: 0000000000000000 0000000000000004 0000000000000000 c000000000f86610
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR08: c000000000f86b20 c0000000008b38b8 000000000007ffff 0000000000000001
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR12: 0000000028002082 c00000000f190a00 0000000000000000 0000000002b80610
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR16: 0000000001a3fc60 0000000002b80d08 0000000001a3fc70 0000000002c81870
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR20: 0000000002b805c8 0000000002c81888 0000000002c81910 0000000000000000
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR24: 0000000000000000 0000000000000000 0000000000000000 c00000033f1bacc0
> Mar 21 16:25:57 jn30a-lp4 kernel: GPR28: 0000000000000001 0000000000000000 d0000000047e9778 d0000000047e1ba8
> Mar 21 16:25:57 jn30a-lp4 kernel: NIP [d0000000047e0b38] .srp_iu_get+0x118/0x130 [libsrp]
> Mar 21 16:25:57 jn30a-lp4 kernel: LR [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp]
> Mar 21 16:25:57 jn30a-lp4 kernel: Call Trace:
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efae0] [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp] (unreliable)
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efb90] [d0000000048f0d6c] .process_crq+0xcc/0x5b8 [ibmvscsis]
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efc50] [d0000000048f183c] .handle_crq+0x224/0xa60 [ibmvscsis]
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efd60] [c0000000000c2120] .process_one_work+0x198/0x518
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efe10] [c0000000000c297c] .worker_thread+0x1f4/0x518
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efed0] [c0000000000cb4c4] .kthread+0xb4/0xc0
> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4eff90] [c00000000001e864] .kernel_thread+0x54/0x70
> Mar 21 16:25:57 jn30a-lp4 kernel: Instruction dump:
> Mar 21 16:25:57 jn30a-lp4 kernel: e8010010 eb41ffd0 7c0803a6 eb61ffd8 eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8
> Mar 21 16:25:57 jn30a-lp4 kernel: 4e800020 e87e8058 48000739 e8410028 <0fe00000> 38000001 38600000 981f0000
> Mar 21 16:25:57 jn30a-lp4 kernel: ---[ end trace ec6b6139d888a732 ]---
> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> 

If we are talking about the latter case I think my last patch should
address this with active I_T Nexus I/O and ibmvscsis_drop_tpg(), but I
will followup a bit more and send out a proper patch this evening for
Tomo to comment..

> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
> on occasion, which may play into the performance issue I mentioned in my previous mail.
> 

Mmmm, please verify with RAMDISK_MCP backends as well, as by default
FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
however..

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:34                     ` Nicholas A. Bellinger
@ 2011-03-21 23:06                       ` FUJITA Tomonori
  2011-03-21 23:13                         ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21 23:06 UTC (permalink / raw)
  To: nab; +Cc: brking, fujita.tomonori, James.Bottomley, linux-scsi

On Mon, 21 Mar 2011 15:34:33 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Mon, 2011-03-21 at 17:22 -0500, Brian King wrote:
> > Tomo,
> > 
> > Thanks for all the work you've done with this driver. It is much appreciated.
> > 
> > What sort of performance are you seeing with this? With filebacked devices, I
> > was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> > to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> > some setup issue?
> > 
> > I tried a ramdisk device and saw this on the server:
> > 
> > 
> > DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1
> > 
> > 
> 
> So for the RAMDISK_DR (direct) SGL memory mapping case we do not support
> WRITEs for HW fabric modules using task_sg_changing=1.
> 
> Please go ahead and use RAMDISK_MCP instead, and I will look at adding a
> special case in target_core_fabric_configfs.c code to disallow
> RAMDISK_DR symlink creation of configfs ports for target fabric modules
> using task_sg_chaining=1.

I really want to remove 'task_sg_chaining=0', all the drivers support
sg chaining. It's a hacky. What drivers doesn't support it?

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:06                       ` FUJITA Tomonori
@ 2011-03-21 23:13                         ` Nicholas A. Bellinger
  2011-03-21 23:22                           ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 23:13 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi

On Tue, 2011-03-22 at 08:06 +0900, FUJITA Tomonori wrote:
> On Mon, 21 Mar 2011 15:34:33 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Mon, 2011-03-21 at 17:22 -0500, Brian King wrote:
> > > Tomo,
> > > 
> > > Thanks for all the work you've done with this driver. It is much appreciated.
> > > 
> > > What sort of performance are you seeing with this? With filebacked devices, I
> > > was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> > > to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> > > some setup issue?
> > > 
> > > I tried a ramdisk device and saw this on the server:
> > > 
> > > 
> > > DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1
> > > 
> > > 
> > 
> > So for the RAMDISK_DR (direct) SGL memory mapping case we do not support
> > WRITEs for HW fabric modules using task_sg_changing=1.
> > 
> > Please go ahead and use RAMDISK_MCP instead, and I will look at adding a
> > special case in target_core_fabric_configfs.c code to disallow
> > RAMDISK_DR symlink creation of configfs ports for target fabric modules
> > using task_sg_chaining=1.
> 
> I really want to remove 'task_sg_chaining=0', all the drivers support
> sg chaining. It's a hacky. What drivers doesn't support it?

It's only drivers for which we are not using HW provided scatterlists
into TCM backend memory, which today is just iscsi_target_mod
iscsi_target_rx_thread() -> sock_recvmsg() code.

So that said, dropping TFO->task_sg_chaning is fine with me.  It will
still be useful for TCM to determine this for a fabric module using a
new flag (for the current stable iscsi_target_mod code case) so that non
iscsi_target_mod of RAMDISK_DR Port/LUN export can be -ENOSYS within
target_core_fabric_configfs.c:target_fabric_port_link().

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 21:24                       ` Brian King
  2011-03-21 22:29                         ` Nicholas A. Bellinger
@ 2011-03-21 23:20                         ` FUJITA Tomonori
  2011-03-21 23:50                           ` Nicholas A. Bellinger
  1 sibling, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21 23:20 UTC (permalink / raw)
  To: brking; +Cc: nab, fujita.tomonori, James.Bottomley, linux-scsi

On Mon, 21 Mar 2011 16:24:58 -0500
Brian King <brking@linux.vnet.ibm.com> wrote:

> > If could send along the original misconfigured configfs layout that is
> > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> 
> Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order

I suspect something like that. The driver can't crash with any
configurations though.


> to get it to work, I did the following. Please let me know if there is a better way
> to do this...

As I said long before, this kinda hardware configuration should
automatically show up when we load the driver module. But the target
core is broken in this regard.

Anyway, I'll fix the driver not to crash with incomplete
configurations.

Thanks,


> cd /sys/kernel/configfs/target
> mkdir -p ibmvscsis/30000003/tpgt_1
> mkdir ibmvscsis/30000003/tpgt_1/lun/lun_0
> ln -s core/fileio_0/testfvd ibmvscsis/30000003/tpgt_1/lun/lun_0/default
> 
> I had previously created a file backed lun via:
> 
> tcm_node --fileio fileio_0/testfvd /vdisks/test 100000000
> 
> Thanks,
> 
> Brian
> 
> 
> ./target
> ./target/core
> ./target/core/fileio_0
> ./target/core/fileio_0/testfvd
> ./target/core/fileio_0/testfvd/alua
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/members
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/tg_pt_gp_id
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/preferred
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/trans_delay_msecs
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/nonop_delay_msecs
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_write_metadata
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_type
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_status
> ./target/core/fileio_0/testfvd/alua/default_tg_pt_gp/alua_access_state
> ./target/core/fileio_0/testfvd/wwn
> ./target/core/fileio_0/testfvd/wwn/vpd_assoc_scsi_target_device
> ./target/core/fileio_0/testfvd/wwn/vpd_assoc_target_port
> ./target/core/fileio_0/testfvd/wwn/vpd_assoc_logical_unit
> ./target/core/fileio_0/testfvd/wwn/vpd_protocol_identifier
> ./target/core/fileio_0/testfvd/wwn/vpd_unit_serial
> ./target/core/fileio_0/testfvd/pr
> ./target/core/fileio_0/testfvd/pr/res_aptpl_metadata
> ./target/core/fileio_0/testfvd/pr/res_aptpl_active
> ./target/core/fileio_0/testfvd/pr/res_type
> ./target/core/fileio_0/testfvd/pr/res_pr_type
> ./target/core/fileio_0/testfvd/pr/res_pr_registered_i_pts
> ./target/core/fileio_0/testfvd/pr/res_pr_holder_tg_port
> ./target/core/fileio_0/testfvd/pr/res_pr_generation
> ./target/core/fileio_0/testfvd/pr/res_pr_all_tgt_pts
> ./target/core/fileio_0/testfvd/pr/res_holder
> ./target/core/fileio_0/testfvd/attrib
> ./target/core/fileio_0/testfvd/attrib/unmap_granularity_alignment
> ./target/core/fileio_0/testfvd/attrib/unmap_granularity
> ./target/core/fileio_0/testfvd/attrib/max_unmap_block_desc_count
> ./target/core/fileio_0/testfvd/attrib/max_unmap_lba_count
> ./target/core/fileio_0/testfvd/attrib/task_timeout
> ./target/core/fileio_0/testfvd/attrib/queue_depth
> ./target/core/fileio_0/testfvd/attrib/hw_queue_depth
> ./target/core/fileio_0/testfvd/attrib/optimal_sectors
> ./target/core/fileio_0/testfvd/attrib/max_sectors
> ./target/core/fileio_0/testfvd/attrib/hw_max_sectors
> ./target/core/fileio_0/testfvd/attrib/block_size
> ./target/core/fileio_0/testfvd/attrib/hw_block_size
> ./target/core/fileio_0/testfvd/attrib/enforce_pr_isids
> ./target/core/fileio_0/testfvd/attrib/emulate_tpws
> ./target/core/fileio_0/testfvd/attrib/emulate_tpu
> ./target/core/fileio_0/testfvd/attrib/emulate_tas
> ./target/core/fileio_0/testfvd/attrib/emulate_ua_intlck_ctrl
> ./target/core/fileio_0/testfvd/attrib/emulate_write_cache
> ./target/core/fileio_0/testfvd/attrib/emulate_fua_read
> ./target/core/fileio_0/testfvd/attrib/emulate_fua_write
> ./target/core/fileio_0/testfvd/attrib/emulate_dpo
> ./target/core/fileio_0/testfvd/alua_lu_gp
> ./target/core/fileio_0/testfvd/enable
> ./target/core/fileio_0/testfvd/udev_path
> ./target/core/fileio_0/testfvd/alias
> ./target/core/fileio_0/testfvd/control
> ./target/core/fileio_0/testfvd/info
> ./target/core/fileio_0/hba_mode
> ./target/core/fileio_0/hba_info
> ./target/core/alua
> ./target/core/alua/lu_gps
> ./target/core/alua/lu_gps/default_lu_gp
> ./target/core/alua/lu_gps/default_lu_gp/members
> ./target/core/alua/lu_gps/default_lu_gp/lu_gp_id
> ./target/version
> 
> 
> 
> -- 
> Brian King
> Linux on Power Virtualization
> IBM Linux Technology Center
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:13                         ` Nicholas A. Bellinger
@ 2011-03-21 23:22                           ` FUJITA Tomonori
  2011-03-22  0:03                             ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21 23:22 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi

On Mon, 21 Mar 2011 16:13:07 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Tue, 2011-03-22 at 08:06 +0900, FUJITA Tomonori wrote:
> > On Mon, 21 Mar 2011 15:34:33 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > On Mon, 2011-03-21 at 17:22 -0500, Brian King wrote:
> > > > Tomo,
> > > > 
> > > > Thanks for all the work you've done with this driver. It is much appreciated.
> > > > 
> > > > What sort of performance are you seeing with this? With filebacked devices, I
> > > > was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> > > > to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> > > > some setup issue?
> > > > 
> > > > I tried a ramdisk device and saw this on the server:
> > > > 
> > > > 
> > > > DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1
> > > > 
> > > > 
> > > 
> > > So for the RAMDISK_DR (direct) SGL memory mapping case we do not support
> > > WRITEs for HW fabric modules using task_sg_changing=1.
> > > 
> > > Please go ahead and use RAMDISK_MCP instead, and I will look at adding a
> > > special case in target_core_fabric_configfs.c code to disallow
> > > RAMDISK_DR symlink creation of configfs ports for target fabric modules
> > > using task_sg_chaining=1.
> > 
> > I really want to remove 'task_sg_chaining=0', all the drivers support
> > sg chaining. It's a hacky. What drivers doesn't support it?
> 
> It's only drivers for which we are not using HW provided scatterlists
> into TCM backend memory, which today is just iscsi_target_mod
> iscsi_target_rx_thread() -> sock_recvmsg() code.

Please fix iscsi_target_mod then.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:22                   ` Brian King
  2011-03-21 22:31                     ` Brian King
  2011-03-21 22:34                     ` Nicholas A. Bellinger
@ 2011-03-21 23:30                     ` FUJITA Tomonori
  2 siblings, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21 23:30 UTC (permalink / raw)
  To: brking; +Cc: fujita.tomonori, James.Bottomley, nab, linux-scsi

On Mon, 21 Mar 2011 17:22:17 -0500
Brian King <brking@linux.vnet.ibm.com> wrote:

> Thanks for all the work you've done with this driver. It is much
> appreciated.

No problem. Thanks for the testing.


> What sort of performance are you seeing with this? With filebacked devices, I
> was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> some setup issue?

I've not done performance tests here but surely we should do
better. I'll do but we might need to fix the target core too for the
better performance.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:20                         ` FUJITA Tomonori
@ 2011-03-21 23:50                           ` Nicholas A. Bellinger
  2011-03-21 23:55                             ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-21 23:50 UTC (permalink / raw)
  To: FUJITA Tomonori
  Cc: brking, James.Bottomley, linux-scsi, Jerome Martin, Marc Fleischmann

On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> On Mon, 21 Mar 2011 16:24:58 -0500
> Brian King <brking@linux.vnet.ibm.com> wrote:
> 
> > > If could send along the original misconfigured configfs layout that is
> > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > 
> > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> 
> I suspect something like that. The driver can't crash with any
> configurations though.
> 
> 
> > to get it to work, I did the following. Please let me know if there is a better way
> > to do this...
> 
> As I said long before, this kinda hardware configuration should
> automatically show up when we load the driver module. But the target
> core is broken in this regard.
> 

It's possible to move the I_T nexus creation into VIO context code, but
we still want to be able to define the actual target endpoint definition
from userspace to get the proper VFS reference counting for fabric data
structures containing struct config_group that reference the
config_groups from the rest of the target stack.  We can open that
discussion with jlbec again, but at this point I still do agree with
this original statement that in the end this is going to be unnecessary,
and will end up being ugly and more complex in kernel code to handle
both.

I think what we need to make this easy for ibmvscsis users are simple
userspace items like scanning of existing VIO sysfs layouts to output a
useable set of /sys/kernel/config/target/ibmvscsis ops they can run, and
and the saving to presistent state to /etc/target/, et al.

That said I will get these userspace items added into lio-utils.git code
in the upcoming weeks to support basic scanning and persistent
init.d/target functionality for ibmvscsis.  From there we (RisingTide
Systems) would also like to get ibmvscsis supported into rtsadmin-gpl v2
in order to provide a proper high level shell to users of ibmvscsis.
(mwf and jxm CC'ed)

Best Regards,

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:50                           ` Nicholas A. Bellinger
@ 2011-03-21 23:55                             ` FUJITA Tomonori
  2011-03-22  0:26                               ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-21 23:55 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jxm, mwf

On Mon, 21 Mar 2011 16:50:14 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> > On Mon, 21 Mar 2011 16:24:58 -0500
> > Brian King <brking@linux.vnet.ibm.com> wrote:
> > 
> > > > If could send along the original misconfigured configfs layout that is
> > > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > > 
> > > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> > 
> > I suspect something like that. The driver can't crash with any
> > configurations though.
> > 
> > 
> > > to get it to work, I did the following. Please let me know if there is a better way
> > > to do this...
> > 
> > As I said long before, this kinda hardware configuration should
> > automatically show up when we load the driver module. But the target
> > core is broken in this regard.
> > 
> 
> It's possible to move the I_T nexus creation into VIO context code, but
> we still want to be able to define the actual target endpoint definition
> from userspace to get the proper VFS reference counting for fabric data
> structures containing struct config_group that reference the
> config_groups from the rest of the target stack.

Hmm, what can we configure for the ibmvscsis? I think that there is
nothing. Everything is configured via the firmware before the kernel
boots. So I don't see any point to configure something from the user
space. That just wastes user's time like Brian.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:22                           ` FUJITA Tomonori
@ 2011-03-22  0:03                             ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-22  0:03 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi

On Tue, 2011-03-22 at 08:22 +0900, FUJITA Tomonori wrote:
> On Mon, 21 Mar 2011 16:13:07 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Tue, 2011-03-22 at 08:06 +0900, FUJITA Tomonori wrote:
> > > On Mon, 21 Mar 2011 15:34:33 -0700
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > On Mon, 2011-03-21 at 17:22 -0500, Brian King wrote:
> > > > > Tomo,
> > > > > 
> > > > > Thanks for all the work you've done with this driver. It is much appreciated.
> > > > > 
> > > > > What sort of performance are you seeing with this? With filebacked devices, I
> > > > > was seeing around 1 MB/sec doing a basic disktest run. I haven't compared this
> > > > > to ibmvstgt, but I'm pretty sure we did a fair bit better there. Perhaps I've got
> > > > > some setup issue?
> > > > > 
> > > > > I tried a ramdisk device and saw this on the server:
> > > > > 
> > > > > 
> > > > > DMA_TO_DEVICE not supported for RAMDISK_DR with task_sg_chaining=1
> > > > > 
> > > > > 
> > > > 
> > > > So for the RAMDISK_DR (direct) SGL memory mapping case we do not support
> > > > WRITEs for HW fabric modules using task_sg_changing=1.
> > > > 
> > > > Please go ahead and use RAMDISK_MCP instead, and I will look at adding a
> > > > special case in target_core_fabric_configfs.c code to disallow
> > > > RAMDISK_DR symlink creation of configfs ports for target fabric modules
> > > > using task_sg_chaining=1.
> > > 
> > > I really want to remove 'task_sg_chaining=0', all the drivers support
> > > sg chaining. It's a hacky. What drivers doesn't support it?
> > 
> > It's only drivers for which we are not using HW provided scatterlists
> > into TCM backend memory, which today is just iscsi_target_mod
> > iscsi_target_rx_thread() -> sock_recvmsg() code.
> 
> Please fix iscsi_target_mod then.

<nod>.

I drop this flag for the next round of target core v4.1 code, and just
use iscsi_target_mod's target_core_fabric_ops->alloc_cmd_iovecs() to
know when to prevent RAMDISK_DR export for the typical non
iscsi_target_mod case.

Best Regards,

--nab




^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 23:55                             ` FUJITA Tomonori
@ 2011-03-22  0:26                               ` Nicholas A. Bellinger
  2011-03-22  0:32                                 ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-22  0:26 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, jxm, mwf

On Tue, 2011-03-22 at 08:55 +0900, FUJITA Tomonori wrote:
> On Mon, 21 Mar 2011 16:50:14 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> > > On Mon, 21 Mar 2011 16:24:58 -0500
> > > Brian King <brking@linux.vnet.ibm.com> wrote:
> > > 
> > > > > If could send along the original misconfigured configfs layout that is
> > > > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > > > 
> > > > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> > > 
> > > I suspect something like that. The driver can't crash with any
> > > configurations though.
> > > 
> > > 
> > > > to get it to work, I did the following. Please let me know if there is a better way
> > > > to do this...
> > > 
> > > As I said long before, this kinda hardware configuration should
> > > automatically show up when we load the driver module. But the target
> > > core is broken in this regard.
> > > 
> > 
> > It's possible to move the I_T nexus creation into VIO context code, but
> > we still want to be able to define the actual target endpoint definition
> > from userspace to get the proper VFS reference counting for fabric data
> > structures containing struct config_group that reference the
> > config_groups from the rest of the target stack.
> 
> Hmm, what can we configure for the ibmvscsis? I think that there is
> nothing. Everything is configured via the firmware before the kernel
> boots. So I don't see any point to configure something from the user
> space. That just wastes user's time like Brian.
> --

Yes, but the user still needs to configure the fabric Port/LUNs in order
to build the the view of the target backends, right..? 

In this particular case for ibmvscsis since we already know the target
endpoint names from sysfs attributes, using a small amount of scripting
code in lio-utils to automatically setup the endpoints for the user is
really simple, and we still expect the user to define which backend
devices from /sys/kernel/config/target/$HBA/$DEV will be configured as
Port/LUNs for the fabric endpoint.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22  0:26                               ` Nicholas A. Bellinger
@ 2011-03-22  0:32                                 ` FUJITA Tomonori
  2011-03-22  2:28                                   ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-22  0:32 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jxm, mwf

On Mon, 21 Mar 2011 17:26:17 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Tue, 2011-03-22 at 08:55 +0900, FUJITA Tomonori wrote:
> > On Mon, 21 Mar 2011 16:50:14 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> > > > On Mon, 21 Mar 2011 16:24:58 -0500
> > > > Brian King <brking@linux.vnet.ibm.com> wrote:
> > > > 
> > > > > > If could send along the original misconfigured configfs layout that is
> > > > > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > > > > 
> > > > > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> > > > 
> > > > I suspect something like that. The driver can't crash with any
> > > > configurations though.
> > > > 
> > > > 
> > > > > to get it to work, I did the following. Please let me know if there is a better way
> > > > > to do this...
> > > > 
> > > > As I said long before, this kinda hardware configuration should
> > > > automatically show up when we load the driver module. But the target
> > > > core is broken in this regard.
> > > > 
> > > 
> > > It's possible to move the I_T nexus creation into VIO context code, but
> > > we still want to be able to define the actual target endpoint definition
> > > from userspace to get the proper VFS reference counting for fabric data
> > > structures containing struct config_group that reference the
> > > config_groups from the rest of the target stack.
> > 
> > Hmm, what can we configure for the ibmvscsis? I think that there is
> > nothing. Everything is configured via the firmware before the kernel
> > boots. So I don't see any point to configure something from the user
> > space. That just wastes user's time like Brian.
> > --
> 
> Yes, but the user still needs to configure the fabric Port/LUNs in order
> to build the the view of the target backends, right..? 

As I said before, there is no 'Port' to configure for the ibmvscsis.

So only LUNs. But before confuging LUNs, the target should work, that
is, telling an initiator that there is no LUNs.


> In this particular case for ibmvscsis since we already know the target
> endpoint names from sysfs attributes, using a small amount of scripting
> code in lio-utils to automatically setup the endpoints for the user is
> really simple, and we still expect the user to define which backend
> devices from /sys/kernel/config/target/$HBA/$DEV will be configured as
> Port/LUNs for the fabric endpoint.

Why does users bother to run the script? We know that the kernel can
configure everything to make the target work.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22  0:32                                 ` FUJITA Tomonori
@ 2011-03-22  2:28                                   ` Nicholas A. Bellinger
  2011-03-22  3:26                                     ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-22  2:28 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, jxm, mwf

On Tue, 2011-03-22 at 09:32 +0900, FUJITA Tomonori wrote:
> On Mon, 21 Mar 2011 17:26:17 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Tue, 2011-03-22 at 08:55 +0900, FUJITA Tomonori wrote:
> > > On Mon, 21 Mar 2011 16:50:14 -0700
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> > > > > On Mon, 21 Mar 2011 16:24:58 -0500
> > > > > Brian King <brking@linux.vnet.ibm.com> wrote:
> > > > > 
> > > > > > > If could send along the original misconfigured configfs layout that is
> > > > > > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > > > > > 
> > > > > > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> > > > > 
> > > > > I suspect something like that. The driver can't crash with any
> > > > > configurations though.
> > > > > 
> > > > > 
> > > > > > to get it to work, I did the following. Please let me know if there is a better way
> > > > > > to do this...
> > > > > 
> > > > > As I said long before, this kinda hardware configuration should
> > > > > automatically show up when we load the driver module. But the target
> > > > > core is broken in this regard.
> > > > > 
> > > > 
> > > > It's possible to move the I_T nexus creation into VIO context code, but
> > > > we still want to be able to define the actual target endpoint definition
> > > > from userspace to get the proper VFS reference counting for fabric data
> > > > structures containing struct config_group that reference the
> > > > config_groups from the rest of the target stack.
> > > 
> > > Hmm, what can we configure for the ibmvscsis? I think that there is
> > > nothing. Everything is configured via the firmware before the kernel
> > > boots. So I don't see any point to configure something from the user
> > > space. That just wastes user's time like Brian.
> > > --
> > 
> > Yes, but the user still needs to configure the fabric Port/LUNs in order
> > to build the the view of the target backends, right..? 
> 
> As I said before, there is no 'Port' to configure for the ibmvscsis.
> 
> So only LUNs. But before confuging LUNs, the target should work, that
> is, telling an initiator that there is no LUNs.

Ok, this is where I was getting originally confused, thank you for the
clarification.

Then we should be able to do something like this together with earlier
patch to handle reporting no available LUNs because "target endpoint has
not been configured.  I will revist my patch from this afternoon to fix
up the REPORT_LUNs emulation to allow this, and error out in
ibmvscsis.c:tcm_queuecommand() and send out in proper form to review.

> 
> > In this particular case for ibmvscsis since we already know the target
> > endpoint names from sysfs attributes, using a small amount of scripting
> > code in lio-utils to automatically setup the endpoints for the user is
> > really simple, and we still expect the user to define which backend
> > devices from /sys/kernel/config/target/$HBA/$DEV will be configured as
> > Port/LUNs for the fabric endpoint.
> 
> Why does users bother to run the script? We know that the kernel can
> configure everything to make the target work.

The type of script is an example of driving the typical configuration
layout for normal usage (eg: with TCM backends).  For a HW target mode
case like with w/ ibmvscsis, the user should be presented with a list of
available backends and un-configured target HW fabric endpoints, and
selects one of more of the backends to create the running layout and can
optionally save this persistently, have it generated for them based on
sysfs layout+attributes, etc.

But in the end what rtsadmin has done thus far successfully is abstract
away the enduser notion of configfs symlinks and really all direct
configfs interaction from the end user in the high level shell, and use
python library (rtslib) to drive the configuration of that layout.  This
gives us a method to handle potential future configfs layout changes in
target_core_fabric_configfs.c and target_core_configfs.c inside of a
python library, instead of breaking userspace script that depends upon a
particular /sys/kernel/config/target/ layout for each major future
target rev.

This is also the method that we are intending to release GPL with
rtsadmin-v2 to allow new fabric module code (and authors :-) to get the
complete userspace package for their target mode developments.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22  2:28                                   ` Nicholas A. Bellinger
@ 2011-03-22  3:26                                     ` FUJITA Tomonori
  0 siblings, 0 replies; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-22  3:26 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jxm, mwf

On Mon, 21 Mar 2011 19:28:41 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Tue, 2011-03-22 at 09:32 +0900, FUJITA Tomonori wrote:
> > On Mon, 21 Mar 2011 17:26:17 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > On Tue, 2011-03-22 at 08:55 +0900, FUJITA Tomonori wrote:
> > > > On Mon, 21 Mar 2011 16:50:14 -0700
> > > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > > 
> > > > > On Tue, 2011-03-22 at 08:20 +0900, FUJITA Tomonori wrote:
> > > > > > On Mon, 21 Mar 2011 16:24:58 -0500
> > > > > > Brian King <brking@linux.vnet.ibm.com> wrote:
> > > > > > 
> > > > > > > > If could send along the original misconfigured configfs layout that is
> > > > > > > > causing the OOPs in handle_crq(), I would be happy to have a quick look.
> > > > > > > 
> > > > > > > Here it is. As you can see, there is no ./target/ibmvscsis directory created. In order
> > > > > > 
> > > > > > I suspect something like that. The driver can't crash with any
> > > > > > configurations though.
> > > > > > 
> > > > > > 
> > > > > > > to get it to work, I did the following. Please let me know if there is a better way
> > > > > > > to do this...
> > > > > > 
> > > > > > As I said long before, this kinda hardware configuration should
> > > > > > automatically show up when we load the driver module. But the target
> > > > > > core is broken in this regard.
> > > > > > 
> > > > > 
> > > > > It's possible to move the I_T nexus creation into VIO context code, but
> > > > > we still want to be able to define the actual target endpoint definition
> > > > > from userspace to get the proper VFS reference counting for fabric data
> > > > > structures containing struct config_group that reference the
> > > > > config_groups from the rest of the target stack.
> > > > 
> > > > Hmm, what can we configure for the ibmvscsis? I think that there is
> > > > nothing. Everything is configured via the firmware before the kernel
> > > > boots. So I don't see any point to configure something from the user
> > > > space. That just wastes user's time like Brian.
> > > > --
> > > 
> > > Yes, but the user still needs to configure the fabric Port/LUNs in order
> > > to build the the view of the target backends, right..? 
> > 
> > As I said before, there is no 'Port' to configure for the ibmvscsis.
> > 
> > So only LUNs. But before confuging LUNs, the target should work, that
> > is, telling an initiator that there is no LUNs.
> 
> Ok, this is where I was getting originally confused, thank you for the
> clarification.
> 
> Then we should be able to do something like this together with earlier
> patch to handle reporting no available LUNs because "target endpoint has
> not been configured.  I will revist my patch from this afternoon to fix
> up the REPORT_LUNs emulation to allow this, and error out in
> ibmvscsis.c:tcm_queuecommand() and send out in proper form to review.
> 
> > 
> > > In this particular case for ibmvscsis since we already know the target
> > > endpoint names from sysfs attributes, using a small amount of scripting
> > > code in lio-utils to automatically setup the endpoints for the user is
> > > really simple, and we still expect the user to define which backend
> > > devices from /sys/kernel/config/target/$HBA/$DEV will be configured as
> > > Port/LUNs for the fabric endpoint.
> > 
> > Why does users bother to run the script? We know that the kernel can
> > configure everything to make the target work.
> 
> The type of script is an example of driving the typical configuration
> layout for normal usage (eg: with TCM backends).  For a HW target mode
> case like with w/ ibmvscsis, the user should be presented with a list of
> available backends and un-configured target HW fabric endpoints

You are still confused. Nothing to configure about fabric endpoints
for ibmvscsis.

I've not used the script. I don't want.

The hardware information needs to show up automatically. You are
against it?

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-21 22:48                       ` Nicholas A. Bellinger
@ 2011-03-22 12:53                         ` Brian King
  2011-03-22 22:06                           ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: Brian King @ 2011-03-22 12:53 UTC (permalink / raw)
  To: Nicholas A. Bellinger; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
>> Just hit another potential issue. I was mapping / unmapping disks a couple times,
>> so that might have helped trigger the issue. I had a file backed disk mapped
>> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
>> the filebacked lun after running into issues with the ramdisk lun and saw this:
>>
>>
> 
> By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?

I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0

> 
>> Mar 21 16:25:57 jn30a-lp4 kernel: unexpected fifo state
>> Mar 21 16:25:57 jn30a-lp4 kernel: ------------[ cut here ]------------
>> Mar 21 16:25:57 jn30a-lp4 kernel: WARNING: at drivers/scsi/libsrp.c:162
>> Mar 21 16:25:57 jn30a-lp4 kernel: Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod ses enclosure sg ibmveth configfs ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
>> Mar 21 16:25:57 jn30a-lp4 kernel: NIP: d0000000047e0b38 LR: d0000000047e0b34 CTR: 0000000000000000
>> Mar 21 16:25:57 jn30a-lp4 kernel: REGS: c00000033f4ef860 TRAP: 0700   Not tainted  (2.6.38-0.7-ppc64-06439-g5bab188-dirty)
>> Mar 21 16:25:57 jn30a-lp4 kernel: MSR: 8000000000029032 <EE,ME,CE,IR,DR>  CR: 24002024  XER: 20000001
>> Mar 21 16:25:57 jn30a-lp4 kernel: TASK = c00000033f2b39e0[58] 'kworker/4:1' THREAD: c00000033f4ec000 CPU: 4
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR00: d0000000047e0b34 c00000033f4efae0 d0000000047e9768 0000000000000018
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR04: 0000000000000000 0000000000000004 0000000000000000 c000000000f86610
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR08: c000000000f86b20 c0000000008b38b8 000000000007ffff 0000000000000001
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR12: 0000000028002082 c00000000f190a00 0000000000000000 0000000002b80610
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR16: 0000000001a3fc60 0000000002b80d08 0000000001a3fc70 0000000002c81870
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR20: 0000000002b805c8 0000000002c81888 0000000002c81910 0000000000000000
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR24: 0000000000000000 0000000000000000 0000000000000000 c00000033f1bacc0
>> Mar 21 16:25:57 jn30a-lp4 kernel: GPR28: 0000000000000001 0000000000000000 d0000000047e9778 d0000000047e1ba8
>> Mar 21 16:25:57 jn30a-lp4 kernel: NIP [d0000000047e0b38] .srp_iu_get+0x118/0x130 [libsrp]
>> Mar 21 16:25:57 jn30a-lp4 kernel: LR [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp]
>> Mar 21 16:25:57 jn30a-lp4 kernel: Call Trace:
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efae0] [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp] (unreliable)
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efb90] [d0000000048f0d6c] .process_crq+0xcc/0x5b8 [ibmvscsis]
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efc50] [d0000000048f183c] .handle_crq+0x224/0xa60 [ibmvscsis]
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efd60] [c0000000000c2120] .process_one_work+0x198/0x518
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efe10] [c0000000000c297c] .worker_thread+0x1f4/0x518
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efed0] [c0000000000cb4c4] .kthread+0xb4/0xc0
>> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4eff90] [c00000000001e864] .kernel_thread+0x54/0x70
>> Mar 21 16:25:57 jn30a-lp4 kernel: Instruction dump:
>> Mar 21 16:25:57 jn30a-lp4 kernel: e8010010 eb41ffd0 7c0803a6 eb61ffd8 eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8
>> Mar 21 16:25:57 jn30a-lp4 kernel: 4e800020 e87e8058 48000739 e8410028 <0fe00000> 38000001 38600000 981f0000
>> Mar 21 16:25:57 jn30a-lp4 kernel: ---[ end trace ec6b6139d888a732 ]---
>> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
>> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
>> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
>> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
>>
> 
> If we are talking about the latter case I think my last patch should
> address this with active I_T Nexus I/O and ibmvscsis_drop_tpg(), but I
> will followup a bit more and send out a proper patch this evening for
> Tomo to comment..
> 
>> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
>> on occasion, which may play into the performance issue I mentioned in my previous mail.
>>
> 
> Mmmm, please verify with RAMDISK_MCP backends as well, as by default
> FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
> however..

How do I specify RAMDISK_MCP? I don't see an option in tcm_node.

Thanks,

Brian


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22 12:53                         ` Brian King
@ 2011-03-22 22:06                           ` Nicholas A. Bellinger
  2011-03-22 22:49                             ` FUJITA Tomonori
  2011-03-23 15:19                             ` Brian King
  0 siblings, 2 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-22 22:06 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Tue, 2011-03-22 at 07:53 -0500, Brian King wrote:
> On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> > On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> >> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> >> so that might have helped trigger the issue. I had a file backed disk mapped
> >> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> >> the filebacked lun after running into issues with the ramdisk lun and saw this:
> >>
> >>
> > 
> > By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> > w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> > $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?
> 
> I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0
> 

Ok, thanks for the clarification here..

I am pretty certain this backtrace is related to active I/O LUN shutdown
with TPG demo mode operation and ibmvscsis.  I will need to take a
deeper look to determine that this is working as expected w/o explict
MappedLUN ACLs provided by target_core_fabric_configfs.c make_nodeacl
and drop_nodeacl() struct target_core_fabric_ops vectors, or if there is
some additional ibmvscsis / libsrp specific logic that needs to be made
to address the active I/O TCM backend Port/LUN unlink.

If the latter ends up being the case, this would most likely be using
the optional target_core_fabric_ops ->port_link() and ->port_unlink()
vectors.  These are used today by the tcm_loop LLD to call Linux/SCSI
code via scsi_device_lookup() -> scsi_remove_device() ->
scsi_device_put() to handle fabric level shutdown.   This could be used
for something similar quiesce I/O for a particular TPG LUN symlink dest
to target core /sys/kernel/config/target/core/$HBA/$DEV symlink src.

> > 
> >> Mar 21 16:25:57 jn30a-lp4 kernel: unexpected fifo state
> >> Mar 21 16:25:57 jn30a-lp4 kernel: ------------[ cut here ]------------
> >> Mar 21 16:25:57 jn30a-lp4 kernel: WARNING: at drivers/scsi/libsrp.c:162
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Modules linked in: target_core_pscsi target_core_file target_core_iblock ip6t_LOG xt_tcpudp xt_pkttype ipt_LOG xt_limit ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw xt_NOTRACK ipt_REJECT xt_state iptable_raw iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 ip_tables ip6table_filter ip6_tables x_tables ipv6 fuse loop dm_mod ibmvscsis libsrp scsi_tgt target_core_mod ses enclosure sg ibmveth configfs ext3 jbd mbcache sd_mod crc_t10dif ipr libata scsi_mod
> >> Mar 21 16:25:57 jn30a-lp4 kernel: NIP: d0000000047e0b38 LR: d0000000047e0b34 CTR: 0000000000000000
> >> Mar 21 16:25:57 jn30a-lp4 kernel: REGS: c00000033f4ef860 TRAP: 0700   Not tainted  (2.6.38-0.7-ppc64-06439-g5bab188-dirty)
> >> Mar 21 16:25:57 jn30a-lp4 kernel: MSR: 8000000000029032 <EE,ME,CE,IR,DR>  CR: 24002024  XER: 20000001
> >> Mar 21 16:25:57 jn30a-lp4 kernel: TASK = c00000033f2b39e0[58] 'kworker/4:1' THREAD: c00000033f4ec000 CPU: 4
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR00: d0000000047e0b34 c00000033f4efae0 d0000000047e9768 0000000000000018
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR04: 0000000000000000 0000000000000004 0000000000000000 c000000000f86610
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR08: c000000000f86b20 c0000000008b38b8 000000000007ffff 0000000000000001
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR12: 0000000028002082 c00000000f190a00 0000000000000000 0000000002b80610
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR16: 0000000001a3fc60 0000000002b80d08 0000000001a3fc70 0000000002c81870
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR20: 0000000002b805c8 0000000002c81888 0000000002c81910 0000000000000000
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR24: 0000000000000000 0000000000000000 0000000000000000 c00000033f1bacc0
> >> Mar 21 16:25:57 jn30a-lp4 kernel: GPR28: 0000000000000001 0000000000000000 d0000000047e9778 d0000000047e1ba8
> >> Mar 21 16:25:57 jn30a-lp4 kernel: NIP [d0000000047e0b38] .srp_iu_get+0x118/0x130 [libsrp]
> >> Mar 21 16:25:57 jn30a-lp4 kernel: LR [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp]
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Call Trace:
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efae0] [d0000000047e0b34] .srp_iu_get+0x114/0x130 [libsrp] (unreliable)
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efb90] [d0000000048f0d6c] .process_crq+0xcc/0x5b8 [ibmvscsis]
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efc50] [d0000000048f183c] .handle_crq+0x224/0xa60 [ibmvscsis]
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efd60] [c0000000000c2120] .process_one_work+0x198/0x518
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efe10] [c0000000000c297c] .worker_thread+0x1f4/0x518
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4efed0] [c0000000000cb4c4] .kthread+0xb4/0xc0
> >> Mar 21 16:25:57 jn30a-lp4 kernel: [c00000033f4eff90] [c00000000001e864] .kernel_thread+0x54/0x70
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Instruction dump:
> >> Mar 21 16:25:57 jn30a-lp4 kernel: e8010010 eb41ffd0 7c0803a6 eb61ffd8 eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8
> >> Mar 21 16:25:57 jn30a-lp4 kernel: 4e800020 e87e8058 48000739 e8410028 <0fe00000> 38000001 38600000 981f0000
> >> Mar 21 16:25:57 jn30a-lp4 kernel: ---[ end trace ec6b6139d888a732 ]---
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> >> Mar 21 16:25:57 jn30a-lp4 kernel: Error getting IU from pool
> >>
> > 
> > If we are talking about the latter case I think my last patch should
> > address this with active I_T Nexus I/O and ibmvscsis_drop_tpg(), but I
> > will followup a bit more and send out a proper patch this evening for
> > Tomo to comment..
> > 
> >> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
> >> on occasion, which may play into the performance issue I mentioned in my previous mail.
> >>
> > 
> > Mmmm, please verify with RAMDISK_MCP backends as well, as by default
> > FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
> > however..
> 
> How do I specify RAMDISK_MCP? I don't see an option in tcm_node.
> 

RAMDISK_DR and RAMDISK_MCP backend are configured with 'rd_dr_0/ramdisk'
and 'rd_mcp_0/ramdisk' for /sys/kernel/config/target/$HBA/$DEV/.  This
is the same with tcm_node --ramdisk $HBA/$DEV usage.

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22 22:06                           ` Nicholas A. Bellinger
@ 2011-03-22 22:49                             ` FUJITA Tomonori
  2011-03-23  1:35                               ` Nicholas A. Bellinger
  2011-03-23 15:19                             ` Brian King
  1 sibling, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-22 22:49 UTC (permalink / raw)
  To: nab; +Cc: brking, fujita.tomonori, James.Bottomley, linux-scsi

On Tue, 22 Mar 2011 15:06:47 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Tue, 2011-03-22 at 07:53 -0500, Brian King wrote:
> > On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> > > On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> > >> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> > >> so that might have helped trigger the issue. I had a file backed disk mapped
> > >> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> > >> the filebacked lun after running into issues with the ramdisk lun and saw this:
> > >>
> > >>
> > > 
> > > By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> > > w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> > > $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?
> > 
> > I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0
> > 
> 
> Ok, thanks for the clarification here..
> 
> I am pretty certain this backtrace is related to active I/O LUN shutdown
> with TPG demo mode operation and ibmvscsis.  I will need to take a
> deeper look to determine that this is working as expected w/o explict
> MappedLUN ACLs provided by target_core_fabric_configfs.c make_nodeacl
> and drop_nodeacl() struct target_core_fabric_ops vectors, or if there is
> some additional ibmvscsis / libsrp specific logic that needs to be made
> to address the active I/O TCM backend Port/LUN unlink.

Why does a driver need to take care of this?

I thought that preventing the removal of a logical unit having
outstanding I/Os is the responsibility of the target core.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22 22:49                             ` FUJITA Tomonori
@ 2011-03-23  1:35                               ` Nicholas A. Bellinger
  2011-03-23  5:12                                 ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-23  1:35 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi

On Wed, 2011-03-23 at 07:49 +0900, FUJITA Tomonori wrote:
> On Tue, 22 Mar 2011 15:06:47 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Tue, 2011-03-22 at 07:53 -0500, Brian King wrote:
> > > On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> > > > On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> > > >> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> > > >> so that might have helped trigger the issue. I had a file backed disk mapped
> > > >> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> > > >> the filebacked lun after running into issues with the ramdisk lun and saw this:
> > > >>
> > > >>
> > > > 
> > > > By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> > > > w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> > > > $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?
> > > 
> > > I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0
> > > 
> > 
> > Ok, thanks for the clarification here..
> > 
> > I am pretty certain this backtrace is related to active I/O LUN shutdown
> > with TPG demo mode operation and ibmvscsis.  I will need to take a
> > deeper look to determine that this is working as expected w/o explict
> > MappedLUN ACLs provided by target_core_fabric_configfs.c make_nodeacl
> > and drop_nodeacl() struct target_core_fabric_ops vectors, or if there is
> > some additional ibmvscsis / libsrp specific logic that needs to be made
> > to address the active I/O TCM backend Port/LUN unlink.
> 
> Why does a driver need to take care of this?
> 
> I thought that preventing the removal of a logical unit having
> outstanding I/Os is the responsibility of the target core.

Yes this should be the case.  However with TPG demo mode and active I/O
shutdown case w/o ->port_link + ->port_unlink usage, or an fabric
endpoint disable attribute (to clear the I_T Nexus for the endpoint) we
may be running into problems here when removing TPG LUNs in demo-mode
with an active I_T Nexus.

I will take a deeper look and see if there is some breakage wrt to
target core or something else externally, and will try to reproduce with
some other fabric modules that support TPG demo mode.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23  1:35                               ` Nicholas A. Bellinger
@ 2011-03-23  5:12                                 ` FUJITA Tomonori
  2011-03-23  8:26                                   ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-23  5:12 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi

On Tue, 22 Mar 2011 18:35:36 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Wed, 2011-03-23 at 07:49 +0900, FUJITA Tomonori wrote:
> > On Tue, 22 Mar 2011 15:06:47 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > On Tue, 2011-03-22 at 07:53 -0500, Brian King wrote:
> > > > On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> > > > > On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> > > > >> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> > > > >> so that might have helped trigger the issue. I had a file backed disk mapped
> > > > >> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> > > > >> the filebacked lun after running into issues with the ramdisk lun and saw this:
> > > > >>
> > > > >>
> > > > > 
> > > > > By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> > > > > w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> > > > > $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?
> > > > 
> > > > I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0
> > > > 
> > > 
> > > Ok, thanks for the clarification here..
> > > 
> > > I am pretty certain this backtrace is related to active I/O LUN shutdown
> > > with TPG demo mode operation and ibmvscsis.  I will need to take a
> > > deeper look to determine that this is working as expected w/o explict
> > > MappedLUN ACLs provided by target_core_fabric_configfs.c make_nodeacl
> > > and drop_nodeacl() struct target_core_fabric_ops vectors, or if there is
> > > some additional ibmvscsis / libsrp specific logic that needs to be made
> > > to address the active I/O TCM backend Port/LUN unlink.
> > 
> > Why does a driver need to take care of this?
> > 
> > I thought that preventing the removal of a logical unit having
> > outstanding I/Os is the responsibility of the target core.
> 
> Yes this should be the case.  However with TPG demo mode and active I/O
> shutdown case w/o ->port_link + ->port_unlink usage, or an fabric
> endpoint disable attribute (to clear the I_T Nexus for the endpoint) we
> may be running into problems here when removing TPG LUNs in demo-mode
> with an active I_T Nexus.

What are the differences of the demo mode and the non-demo mode?

I don't know if the similar bug is also in the non-demo mode but why
can't we integrate them well instead of having two totally different
paths?

I just don't want to play with TPG since there is no TPG concept in
SRP (and ibmvscsis). And I also don't play with any security stuff
about it because it's also irrelevant for ibmvscsis.

Also please rename 'demo-mode'. I don't think that it's a good name
since it doesn't tell anything.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23  5:12                                 ` FUJITA Tomonori
@ 2011-03-23  8:26                                   ` Nicholas A. Bellinger
  2011-03-23  8:48                                     ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-23  8:26 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, Joel Becker

On Wed, 2011-03-23 at 14:12 +0900, FUJITA Tomonori wrote:
> On Tue, 22 Mar 2011 18:35:36 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Wed, 2011-03-23 at 07:49 +0900, FUJITA Tomonori wrote:
> > > On Tue, 22 Mar 2011 15:06:47 -0700
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > On Tue, 2011-03-22 at 07:53 -0500, Brian King wrote:
> > > > > On 03/21/2011 05:48 PM, Nicholas A. Bellinger wrote:
> > > > > > On Mon, 2011-03-21 at 17:31 -0500, Brian King wrote:
> > > > > >> Just hit another potential issue. I was mapping / unmapping disks a couple times,
> > > > > >> so that might have helped trigger the issue. I had a file backed disk mapped
> > > > > >> to a vscsi lun, then unmapped it, mapped a ramdisk lun, then switched back to
> > > > > >> the filebacked lun after running into issues with the ramdisk lun and saw this:
> > > > > >>
> > > > > >>
> > > > > > 
> > > > > > By mapping/unmapping here do you mean unlinking+linking the Port/LUNs
> > > > > > w/o removing the active VIO I_T Nexus, or actually rmdir'ing the whole
> > > > > > $VIO_TARGET_FULLPATH/tpgt_1/ struct config_group..?
> > > > > 
> > > > > I just did an rm -r $VIO_TARGET_FULLPATH/tpgt_1/lun/lun_0
> > > > > 
> > > > 
> > > > Ok, thanks for the clarification here..
> > > > 
> > > > I am pretty certain this backtrace is related to active I/O LUN shutdown
> > > > with TPG demo mode operation and ibmvscsis.  I will need to take a
> > > > deeper look to determine that this is working as expected w/o explict
> > > > MappedLUN ACLs provided by target_core_fabric_configfs.c make_nodeacl
> > > > and drop_nodeacl() struct target_core_fabric_ops vectors, or if there is
> > > > some additional ibmvscsis / libsrp specific logic that needs to be made
> > > > to address the active I/O TCM backend Port/LUN unlink.
> > > 
> > > Why does a driver need to take care of this?
> > > 
> > > I thought that preventing the removal of a logical unit having
> > > outstanding I/Os is the responsibility of the target core.
> > 
> > Yes this should be the case.  However with TPG demo mode and active I/O
> > shutdown case w/o ->port_link + ->port_unlink usage, or an fabric
> > endpoint disable attribute (to clear the I_T Nexus for the endpoint) we
> > may be running into problems here when removing TPG LUNs in demo-mode
> > with an active I_T Nexus.
> 
> What are the differences of the demo mode and the non-demo mode?
> 

Demo-mode means that a struct se_node_acl will be dynamically allocated
when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
Initiator WWN in the process of creating a new I_T nexus (struct
se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
is set.

Using explict NodeACLs (eg: non demo-mode) means that each se_node_acl
is created in /sys/kernel/config/target/$FABRIC/$TARGET_WWN/tpgt_1/acls/
before fabric module level call to core_tpg_check_initiator_node_acl(),
and will fail upon the reception of an unknown SCSI Initiator WWN when
the fabric is setting target_core_fabric_ops->tpg_check_demo_mode()=0.

Dynamically created se_node_acls can also be converted to an explict
NodeACL, which requires that TPG LUN -> MappedLUN layout be configured
as well.  (This can currently happen w/o disruption to the existing I_T
nexus)

> I don't know if the similar bug is also in the non-demo mode but why
> can't we integrate them well instead of having two totally different
> paths?
> 

These are not different codepaths paths from the perspective of current
I/O path code for access to backend target core struct se_device.  We
still create each struct se_node_acl->device_list[] based upon the
default set of TPG LUN mappings that allows the SCSI Initiator access to
the target core backend devices once the I_T nexus has been established
via transport_get_lun_for_cmd().

With explict NodeACLs these can be initiator context specific MappedLUNs
that can optionally be different from default TPG LUN layout and have
Write Protected (WP=1) access.

> I just don't want to play with TPG since there is no TPG concept in
> SRP (and ibmvscsis). And I also don't play with any security stuff
> about it because it's also irrelevant for ibmvscsis.
> 

This is where I think we have an misunderstanding.

Currently we use user-space driven TPG LUN configfs symlinks from fabric
module data structures into a seperate module (target_core_mod) in order
to represent the backend exports for fabric TPG LUN layout.

In the past we have tried patches for driving the configfs layout via
kernel-space as well, which does function with mkdir and rmdir ops with
some VFS level changes, but has been firmly reject by the configfs
maintainer back in 2009 and dropped in modern lio-core-2.6.git code.
(jlbec CC'ed)

So that said I don't have an issue with ibmvscsis allowing fabric
dependent TPG data structure's allocation to be driven by kernel-level
code for the special case where no TPG has yet been configured.  However
this still requires the explict setup of fabric TPG endpoint @
/sys/kernel/config/target/ibmvscsis/$VIO_TARGET/tpgt_1/ in order to
access the $VIO_TARGET/tpgt_1/lun/ group as a destination for TPG LUNs
symlinks into target core configfs backends.

But in the end I think we still want to be able to drive the creation of
configfs symlinks for fabric TPG LUN <-> target core backend usage from
userspace driven code.  We can do the creation of a configfs layout
using a small amount of interpreted userspace code that would require a
larger amount kernel code complexity in order to function.  I personally
do not see a hard-requirement for doing TPG LUN <-> target core symlink
configuration from kernel space for my own code, but if really think
this is required and convience folks like Joel Greg-KH with patches, I
would be happy to consider a new look at a hybrid target user-level +
kernel-level driven control plane.

> Also please rename 'demo-mode'. I don't think that it's a good name
> since it doesn't tell anything.
> --

With iscsi_target_mod this logic is controlled via a fabric dependent
attribute @ target/iscsi/$TARGET_IQN/tpgt_1/attrib/generate_node_acls.

I am happy to change the *_demo_mode*() suffixes in target core, does a
suffix like *_dynamic_nodeacl*() work for you..?

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23  8:26                                   ` Nicholas A. Bellinger
@ 2011-03-23  8:48                                     ` FUJITA Tomonori
  2011-03-23 10:00                                       ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-23  8:48 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jlbec

On Wed, 23 Mar 2011 01:26:30 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> Demo-mode means that a struct se_node_acl will be dynamically allocated
> when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> Initiator WWN in the process of creating a new I_T nexus (struct
> se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> is set.

Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
it.

All I asking for is simply accepting any initiators and exporting all
the luns in a target.


> > I don't know if the similar bug is also in the non-demo mode but why
> > can't we integrate them well instead of having two totally different
> > paths?
> > 
> 
> These are not different codepaths paths from the perspective of current
> I/O path code for access to backend target core struct se_device.  We
> still create each struct se_node_acl->device_list[] based upon the
> default set of TPG LUN mappings that allows the SCSI Initiator access to
> the target core backend devices once the I_T nexus has been established
> via transport_get_lun_for_cmd().
> 
> With explict NodeACLs these can be initiator context specific MappedLUNs
> that can optionally be different from default TPG LUN layout and have
> Write Protected (WP=1) access.

Sounds like that non-demo mode has the similar bug that Brian saw.


> > I just don't want to play with TPG since there is no TPG concept in
> > SRP (and ibmvscsis). And I also don't play with any security stuff
> > about it because it's also irrelevant for ibmvscsis.
> > 
> 
> This is where I think we have an misunderstanding.
> 
> Currently we use user-space driven TPG LUN configfs symlinks from fabric
> module data structures into a seperate module (target_core_mod) in order
> to represent the backend exports for fabric TPG LUN layout.
> 
> In the past we have tried patches for driving the configfs layout via
> kernel-space as well, which does function with mkdir and rmdir ops with
> some VFS level changes, but has been firmly reject by the configfs
> maintainer back in 2009 and dropped in modern lio-core-2.6.git code.
> (jlbec CC'ed)
> 
> So that said I don't have an issue with ibmvscsis allowing fabric
> dependent TPG data structure's allocation to be driven by kernel-level
> code for the special case where no TPG has yet been configured.  However
> this still requires the explict setup of fabric TPG endpoint @
> /sys/kernel/config/target/ibmvscsis/$VIO_TARGET/tpgt_1/ in order to
> access the $VIO_TARGET/tpgt_1/lun/ group as a destination for TPG LUNs
> symlinks into target core configfs backends.
> 
> But in the end I think we still want to be able to drive the creation of
> configfs symlinks for fabric TPG LUN <-> target core backend usage from
> userspace driven code.
  We can do the creation of a configfs layout
> using a small amount of interpreted userspace code that would require a
> larger amount kernel code complexity in order to function.  I personally
> do not see a hard-requirement for doing TPG LUN <-> target core symlink
> configuration from kernel space for my own code, but if really think
> this is required and convience folks like Joel Greg-KH with patches, I
> would be happy to consider a new look at a hybrid target user-level +
> kernel-level driven control plane.

We definitely need to set up the hardware information in kernel space.

For example, even after loading the kernel module, creating
/sys/kernel/config/target/ibmvscsi directory by hand makes sense for
you?

If configfs doesn't fit the bill, needs to create something new.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23  8:48                                     ` FUJITA Tomonori
@ 2011-03-23 10:00                                       ` Nicholas A. Bellinger
  2011-03-23 12:04                                         ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-23 10:00 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, jlbec

On Wed, 2011-03-23 at 17:48 +0900, FUJITA Tomonori wrote:
> On Wed, 23 Mar 2011 01:26:30 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > Demo-mode means that a struct se_node_acl will be dynamically allocated
> > when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> > Initiator WWN in the process of creating a new I_T nexus (struct
> > se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> > is set.
> 
> Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
> it.
> 
> All I asking for is simply accepting any initiators and exporting all
> the luns in a target.
> 
> 

Yes, I understand this.  But driving (from kernel-level) a default set
of fabric TPG LUN exports for all available target_core_mod backend
export is not a manageable way for /sys/kernel/config/target/$HBA/$DEV
backend export.  This needs to be driven from python library code for
userspace applications automatically without interaction from end user
for ibmvscsis or any other fabric module.

> > > I don't know if the similar bug is also in the non-demo mode but why
> > > can't we integrate them well instead of having two totally different
> > > paths?
> > > 
> > 
> > These are not different codepaths paths from the perspective of current
> > I/O path code for access to backend target core struct se_device.  We
> > still create each struct se_node_acl->device_list[] based upon the
> > default set of TPG LUN mappings that allows the SCSI Initiator access to
> > the target core backend devices once the I_T nexus has been established
> > via transport_get_lun_for_cmd().
> > 
> > With explict NodeACLs these can be initiator context specific MappedLUNs
> > that can optionally be different from default TPG LUN layout and have
> > Write Protected (WP=1) access.
> 
> Sounds like that non-demo mode has the similar bug that Brian saw.
> 
> 

No, we have been able to verify with the following patch that active I/O
shutdown with Explict NodeACLs and MappedLUNs is now working as expected
for .38 stable:

target: Fix t_transport_aborted handling in LUN_RESET + active I/O shutdown
http://git.kernel.org/?p=linux/kernel/git/jejb/scsi-rc-fixes-2.6.git;a=commitdiff;h=52208ae3fc60cbcb214c10fb8b82304199e2cc3a

> > > I just don't want to play with TPG since there is no TPG concept in
> > > SRP (and ibmvscsis). And I also don't play with any security stuff
> > > about it because it's also irrelevant for ibmvscsis.
> > > 
> > 
> > This is where I think we have an misunderstanding.
> > 
> > Currently we use user-space driven TPG LUN configfs symlinks from fabric
> > module data structures into a seperate module (target_core_mod) in order
> > to represent the backend exports for fabric TPG LUN layout.
> > 
> > In the past we have tried patches for driving the configfs layout via
> > kernel-space as well, which does function with mkdir and rmdir ops with
> > some VFS level changes, but has been firmly reject by the configfs
> > maintainer back in 2009 and dropped in modern lio-core-2.6.git code.
> > (jlbec CC'ed)
> > 
> > So that said I don't have an issue with ibmvscsis allowing fabric
> > dependent TPG data structure's allocation to be driven by kernel-level
> > code for the special case where no TPG has yet been configured.  However
> > this still requires the explict setup of fabric TPG endpoint @
> > /sys/kernel/config/target/ibmvscsis/$VIO_TARGET/tpgt_1/ in order to
> > access the $VIO_TARGET/tpgt_1/lun/ group as a destination for TPG LUNs
> > symlinks into target core configfs backends.
> > 
> > But in the end I think we still want to be able to drive the creation of
> > configfs symlinks for fabric TPG LUN <-> target core backend usage from
> > userspace driven code.
>   We can do the creation of a configfs layout
> > using a small amount of interpreted userspace code that would require a
> > larger amount kernel code complexity in order to function.  I personally
> > do not see a hard-requirement for doing TPG LUN <-> target core symlink
> > configuration from kernel space for my own code, but if really think
> > this is required and convience folks like Joel Greg-KH with patches, I
> > would be happy to consider a new look at a hybrid target user-level +
> > kernel-level driven control plane.
> 
> We definitely need to set up the hardware information in kernel space.
> 
> For example, even after loading the kernel module, creating
> /sys/kernel/config/target/ibmvscsi directory by hand makes sense for
> you?
> 

No, not by hand.  We expect rtslib-gpl python library code to drive all
of this for userspace level applications for all fabric modules using
the generic target_core_fabric_configfs.c control plane.

> If configfs doesn't fit the bill, needs to create something new.
> --

Yes, I am open to suggestions for a hybrid approach for userspace and
kernelspace driven control plane for target mode, and adapting
rtslib-gpl to work with it.  However,  I still think that driving the
creation of target core struct config_group from userspace with configfs
symlinks to an external module target core backends is the cleanest
kernel control plane from a kernel code perspective.

--nab



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23 10:00                                       ` Nicholas A. Bellinger
@ 2011-03-23 12:04                                         ` FUJITA Tomonori
  2011-03-23 21:17                                           ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-23 12:04 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jlbec

On Wed, 23 Mar 2011 03:00:03 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Wed, 2011-03-23 at 17:48 +0900, FUJITA Tomonori wrote:
> > On Wed, 23 Mar 2011 01:26:30 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > Demo-mode means that a struct se_node_acl will be dynamically allocated
> > > when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> > > Initiator WWN in the process of creating a new I_T nexus (struct
> > > se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> > > is set.
> > 
> > Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
> > it.
> > 
> > All I asking for is simply accepting any initiators and exporting all
> > the luns in a target.
> > 
> > 
> 
> Yes, I understand this.  But driving (from kernel-level) a default set
> of fabric TPG LUN exports for all available target_core_mod backend
> export is not a manageable way for /sys/kernel/config/target/$HBA/$DEV
> backend export.  This needs to be driven from python library code for
> userspace applications automatically without interaction from end user
> for ibmvscsis or any other fabric module.

Hmm, you still don't understand.

Again, there is no 'TPG' concept in SRP transport.

The current design strangely forces the TPG concept to SRP. Well, the
design is pretty tied to iSCSI.

We need to make TPG concept optional in configfs and the driver design.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-22 22:06                           ` Nicholas A. Bellinger
  2011-03-22 22:49                             ` FUJITA Tomonori
@ 2011-03-23 15:19                             ` Brian King
  2011-03-23 20:34                               ` Nicholas A. Bellinger
  1 sibling, 1 reply; 81+ messages in thread
From: Brian King @ 2011-03-23 15:19 UTC (permalink / raw)
  To: Nicholas A. Bellinger; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On 03/22/2011 05:06 PM, Nicholas A. Bellinger wrote:
>>>> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
>>>> on occasion, which may play into the performance issue I mentioned in my previous mail.
>>>>
>>>
>>> Mmmm, please verify with RAMDISK_MCP backends as well, as by default
>>> FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
>>> however..

With RAMDISK_MCP I don't see any of the problems seen with RAMDISK_DR. Additionally,
disktest is running much snappier. I'm seeing between 30 and 60 MB/sec on the read workload
and between 100 and 300 MB/sec on the writes. 

I'm having some trouble with multiple LUNs, however. Perhaps this is more
configuration issues, but if I create a lun_1 directory and link to a second
device, the client just sees two devices which seems to be both mapped to
the device mapped at lun_0.

Thanks,

Brian


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23 15:19                             ` Brian King
@ 2011-03-23 20:34                               ` Nicholas A. Bellinger
  2011-03-25 14:33                                 ` Brian King
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-23 20:34 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Wed, 2011-03-23 at 10:19 -0500, Brian King wrote:
> On 03/22/2011 05:06 PM, Nicholas A. Bellinger wrote:
> >>>> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
> >>>> on occasion, which may play into the performance issue I mentioned in my previous mail.
> >>>>
> >>>
> >>> Mmmm, please verify with RAMDISK_MCP backends as well, as by default
> >>> FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
> >>> however..
> 
> With RAMDISK_MCP I don't see any of the problems seen with RAMDISK_DR. Additionally,
> disktest is running much snappier. I'm seeing between 30 and 60 MB/sec on the read workload
> and between 100 and 300 MB/sec on the writes. 
> 

Thanks for the update Brian!  I am glad to hear we have a stable
baseline for large block throughput with RAMDISK_MCP.

I would also be interested to see how small block performance looks with
RAMDISK_MCP, and for IBLOCK/FILEIO/PSCSI export on top of some fast
physical storage as well.  :)

> I'm having some trouble with multiple LUNs, however. Perhaps this is more
> configuration issues, but if I create a lun_1 directory and link to a second
> device, the client just sees two devices which seems to be both mapped to
> the device mapped at lun_0.
> 

Mmmm, this sounds like some a bug in incoming LUN unpack or outgoing LUN
pack issue.  From a quick look it appears we are missing a
scsi_lun_to_int() call for transport_get_lun_for_cmd which is currently
expecting an unpacked LUN.

@@ -880,7 +901,7 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
                              srp_cmd_direction(cmd),
                              attr, vsc->sense_buf);
 
-       ret = transport_get_lun_for_cmd(se_cmd, cmd->lun);
+       ret = transport_get_lun_for_cmd(se_cmd, scsi_lun_to_int(cmd->lun));
        if (ret) {
                printk(KERN_ERR "invalid lun %u\n", GETLUN(cmd->lun));
                transport_send_check_condition_and_sense(se_cmd,

Best Regards,

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23 12:04                                         ` FUJITA Tomonori
@ 2011-03-23 21:17                                           ` Nicholas A. Bellinger
  2011-03-24  1:54                                             ` FUJITA Tomonori
  0 siblings, 1 reply; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-23 21:17 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, jlbec

On Wed, 2011-03-23 at 21:04 +0900, FUJITA Tomonori wrote:
> On Wed, 23 Mar 2011 03:00:03 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Wed, 2011-03-23 at 17:48 +0900, FUJITA Tomonori wrote:
> > > On Wed, 23 Mar 2011 01:26:30 -0700
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > Demo-mode means that a struct se_node_acl will be dynamically allocated
> > > > when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> > > > Initiator WWN in the process of creating a new I_T nexus (struct
> > > > se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> > > > is set.
> > > 
> > > Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
> > > it.
> > > 
> > > All I asking for is simply accepting any initiators and exporting all
> > > the luns in a target.
> > > 
> > > 
> > 
> > Yes, I understand this.  But driving (from kernel-level) a default set
> > of fabric TPG LUN exports for all available target_core_mod backend
> > export is not a manageable way for /sys/kernel/config/target/$HBA/$DEV
> > backend export.  This needs to be driven from python library code for
> > userspace applications automatically without interaction from end user
> > for ibmvscsis or any other fabric module.
> 
> Hmm, you still don't understand.
> 
> Again, there is no 'TPG' concept in SRP transport.
> 

Yes, I understand that there is no hard requirement for a fabric level
concept of a TPG in SRP.  But the main reason why this has been done in
target_core_fabric_config.c is to present a single configfs group layout
for userspace code to follow using fabric dependent WWN naming, with an
optional unlimited set fabric dependent attributes hanging off thse
default set of target fabric groups.

I don't have an issue with a fabric module connecting directly to a
struct se_wwn->se_group, but you do realize that this would require both
WWN and TPGT access modes in exported target I/O path code, yes..?

> The current design strangely forces the TPG concept to SRP. Well, the
> design is pretty tied to iSCSI.
> 
> We need to make TPG concept optional in configfs and the driver design.

I think abstracting away target_core_fabric_ops->[make,drop]_tpg() makes
sense for the long term for these types of fabric modules.  But I would
much rather put a struct se_portal_group into the fabric dependent wwn
structure and do both WWN and tpgt_1 setup from ->[make,drop]_wwn() than
requring WWN and TPG access modes for target core I/O path fabric code.

A good first step would be for modules to signal they are only
interested in a single 'tpgt_1' per target/$FABRIC/$TARGET_WWN/, and
enforce this in target_core_fabric_configfs.c instead of in fabric
module control plane code.  From there we can see if it makes sense to
setup tpgt_1 as a default_group for these fabrics, or if just doing it
transparent in target_fabric_make_wwn() is a possibility.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23 21:17                                           ` Nicholas A. Bellinger
@ 2011-03-24  1:54                                             ` FUJITA Tomonori
  2011-03-24  7:29                                               ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: FUJITA Tomonori @ 2011-03-24  1:54 UTC (permalink / raw)
  To: nab; +Cc: fujita.tomonori, brking, James.Bottomley, linux-scsi, jlbec

On Wed, 23 Mar 2011 14:17:10 -0700
"Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:

> On Wed, 2011-03-23 at 21:04 +0900, FUJITA Tomonori wrote:
> > On Wed, 23 Mar 2011 03:00:03 -0700
> > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > 
> > > On Wed, 2011-03-23 at 17:48 +0900, FUJITA Tomonori wrote:
> > > > On Wed, 23 Mar 2011 01:26:30 -0700
> > > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > > 
> > > > > Demo-mode means that a struct se_node_acl will be dynamically allocated
> > > > > when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> > > > > Initiator WWN in the process of creating a new I_T nexus (struct
> > > > > se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> > > > > is set.
> > > > 
> > > > Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
> > > > it.
> > > > 
> > > > All I asking for is simply accepting any initiators and exporting all
> > > > the luns in a target.
> > > > 
> > > > 
> > > 
> > > Yes, I understand this.  But driving (from kernel-level) a default set
> > > of fabric TPG LUN exports for all available target_core_mod backend
> > > export is not a manageable way for /sys/kernel/config/target/$HBA/$DEV
> > > backend export.  This needs to be driven from python library code for
> > > userspace applications automatically without interaction from end user
> > > for ibmvscsis or any other fabric module.
> > 
> > Hmm, you still don't understand.
> > 
> > Again, there is no 'TPG' concept in SRP transport.
> > 
> 
> Yes, I understand that there is no hard requirement for a fabric level
> concept of a TPG in SRP.

Hard requirement? No, please read SRP spec? There is not such thing.

So forcing TPG is the wrong desing. Requirements must be in all the
transport specs. Transport specific things such as TPG shouldn't be
exported to every transports.

Again, the point is that the current design is pretty tied to
iSCSI. Other transports are forced to handle iSCSI specific stuff
strangely.

^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-24  1:54                                             ` FUJITA Tomonori
@ 2011-03-24  7:29                                               ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-24  7:29 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: brking, James.Bottomley, linux-scsi, jlbec

On Thu, 2011-03-24 at 10:54 +0900, FUJITA Tomonori wrote:
> On Wed, 23 Mar 2011 14:17:10 -0700
> "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> 
> > On Wed, 2011-03-23 at 21:04 +0900, FUJITA Tomonori wrote:
> > > On Wed, 23 Mar 2011 03:00:03 -0700
> > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > 
> > > > On Wed, 2011-03-23 at 17:48 +0900, FUJITA Tomonori wrote:
> > > > > On Wed, 23 Mar 2011 01:26:30 -0700
> > > > > "Nicholas A. Bellinger" <nab@linux-iscsi.org> wrote:
> > > > > 
> > > > > > Demo-mode means that a struct se_node_acl will be dynamically allocated
> > > > > > when core_tpg_check_initiator_node_acl() is called for an unknown SCSI
> > > > > > Initiator WWN in the process of creating a new I_T nexus (struct
> > > > > > se_session) when struct target_core_fabric_ops->tpg_check_demo_mode()=1
> > > > > > is set.
> > > > > 
> > > > > Again, acl is not irrelevant for ibmvscsis. So I shoundn't set up
> > > > > it.
> > > > > 
> > > > > All I asking for is simply accepting any initiators and exporting all
> > > > > the luns in a target.
> > > > > 
> > > > > 
> > > > 
> > > > Yes, I understand this.  But driving (from kernel-level) a default set
> > > > of fabric TPG LUN exports for all available target_core_mod backend
> > > > export is not a manageable way for /sys/kernel/config/target/$HBA/$DEV
> > > > backend export.  This needs to be driven from python library code for
> > > > userspace applications automatically without interaction from end user
> > > > for ibmvscsis or any other fabric module.
> > > 
> > > Hmm, you still don't understand.
> > > 
> > > Again, there is no 'TPG' concept in SRP transport.
> > > 
> > 
> > Yes, I understand that there is no hard requirement for a fabric level
> > concept of a TPG in SRP.
> 
> Hard requirement? No, please read SRP spec? There is not such thing.
>
> So forcing TPG is the wrong desing. Requirements must be in all the
> transport specs. Transport specific things such as TPG shouldn't be
> exported to every transports.
> 

I have already explained why this was chosen as the default for the
target_core_fabric_configfs.c design, and that I am open to abstracting
this away from the fabric module control plane for non iSCSI fabrics.

> Again, the point is that the current design is pretty tied to
> iSCSI. Other transports are forced to handle iSCSI specific stuff
> strangely.

This has not been a limiting factor for any of non iSCSI fabric code
that I have developed or seen yet.  I would rather have
target_core_fabric_config.c logic handle a default 'tgpt_1'
transparently for a non iSCSI struct se_wwn->wwn_group registration,
than provide two modes of I/O path target endpoint reference for struct
se_wwn and struct se_portal_group.

I am happy to consider patches to address this.

--nab


^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-23 20:34                               ` Nicholas A. Bellinger
@ 2011-03-25 14:33                                 ` Brian King
  2011-03-25 20:13                                   ` Nicholas A. Bellinger
  0 siblings, 1 reply; 81+ messages in thread
From: Brian King @ 2011-03-25 14:33 UTC (permalink / raw)
  To: Nicholas A. Bellinger; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On 03/23/2011 03:34 PM, Nicholas A. Bellinger wrote:
> On Wed, 2011-03-23 at 10:19 -0500, Brian King wrote:
>> On 03/22/2011 05:06 PM, Nicholas A. Bellinger wrote:
>>>>>> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
>>>>>> on occasion, which may play into the performance issue I mentioned in my previous mail.
>>>>>>
>>>>>
>>>>> Mmmm, please verify with RAMDISK_MCP backends as well, as by default
>>>>> FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
>>>>> however..
>>
>> With RAMDISK_MCP I don't see any of the problems seen with RAMDISK_DR. Additionally,
>> disktest is running much snappier. I'm seeing between 30 and 60 MB/sec on the read workload
>> and between 100 and 300 MB/sec on the writes. 
>>
> 
> Thanks for the update Brian!  I am glad to hear we have a stable
> baseline for large block throughput with RAMDISK_MCP.
> 
> I would also be interested to see how small block performance looks with
> RAMDISK_MCP, and for IBLOCK/FILEIO/PSCSI export on top of some fast
> physical storage as well.  :)

Not too good. I've tried both FILEIO and IBLOCK and am seeing in the neighborhood
of 1 MB/sec read throughput and 5 MB/sec write throughput. I also continue
to see warnings from disktest indicating I/O's are taking longer than 120 seconds.
This is all with data integrity testing enabled, but I would still expect to see
much better numbers... Not sure where the bottleneck is at this point. If I run with
both a ramdisk LUN and an iblock LUN, I am seeing the ramdisk performance significantly
reduced to be on par with the iblock performance.

>> I'm having some trouble with multiple LUNs, however. Perhaps this is more
>> configuration issues, but if I create a lun_1 directory and link to a second
>> device, the client just sees two devices which seems to be both mapped to
>> the device mapped at lun_0.
>>
> 
> Mmmm, this sounds like some a bug in incoming LUN unpack or outgoing LUN
> pack issue.  From a quick look it appears we are missing a
> scsi_lun_to_int() call for transport_get_lun_for_cmd which is currently
> expecting an unpacked LUN.
> 
> @@ -880,7 +901,7 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
>                               srp_cmd_direction(cmd),
>                               attr, vsc->sense_buf);
> 
> -       ret = transport_get_lun_for_cmd(se_cmd, cmd->lun);
> +       ret = transport_get_lun_for_cmd(se_cmd, scsi_lun_to_int(cmd->lun));
>         if (ret) {
>                 printk(KERN_ERR "invalid lun %u\n", GETLUN(cmd->lun));
>                 transport_send_check_condition_and_sense(se_cmd,

This worked. I had to move the scsi_lun_to_int function further up the file in order
to be able to build it, but once I did this multiple LUNs seems to be working.

Thanks,

Brian

-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center



^ permalink raw reply	[flat|nested] 81+ messages in thread

* Re: [PATCH 3/3] tcm ibmvscsis driver
  2011-03-25 14:33                                 ` Brian King
@ 2011-03-25 20:13                                   ` Nicholas A. Bellinger
  0 siblings, 0 replies; 81+ messages in thread
From: Nicholas A. Bellinger @ 2011-03-25 20:13 UTC (permalink / raw)
  To: Brian King; +Cc: FUJITA Tomonori, James.Bottomley, linux-scsi

On Fri, 2011-03-25 at 09:33 -0500, Brian King wrote:
> On 03/23/2011 03:34 PM, Nicholas A. Bellinger wrote:
> > On Wed, 2011-03-23 at 10:19 -0500, Brian King wrote:
> >> On 03/22/2011 05:06 PM, Nicholas A. Bellinger wrote:
> >>>>>> I'm also seeing disktest complain on the client about commands taking longer than 120 seconds
> >>>>>> on occasion, which may play into the performance issue I mentioned in my previous mail.
> >>>>>>
> >>>>>
> >>>>> Mmmm, please verify with RAMDISK_MCP backends as well, as by default
> >>>>> FILEIO has O_SYNC enabled..  This does seem strange for LTP disktest
> >>>>> however..
> >>
> >> With RAMDISK_MCP I don't see any of the problems seen with RAMDISK_DR. Additionally,
> >> disktest is running much snappier. I'm seeing between 30 and 60 MB/sec on the read workload
> >> and between 100 and 300 MB/sec on the writes. 
> >>
> > 
> > Thanks for the update Brian!  I am glad to hear we have a stable
> > baseline for large block throughput with RAMDISK_MCP.
> > 
> > I would also be interested to see how small block performance looks with
> > RAMDISK_MCP, and for IBLOCK/FILEIO/PSCSI export on top of some fast
> > physical storage as well.  :)
> 
> Not too good. I've tried both FILEIO and IBLOCK and am seeing in the neighborhood
> of 1 MB/sec read throughput and 5 MB/sec write throughput. I also continue
> to see warnings from disktest indicating I/O's are taking longer than 120 seconds.
> This is all with data integrity testing enabled, but I would still expect to see
> much better numbers... Not sure where the bottleneck is at this point. If I run with
> both a ramdisk LUN and an iblock LUN, I am seeing the ramdisk performance significantly
> reduced to be on par with the iblock performance.
> 

Hmmmm..  There have recently been some reports of poor performance on
bleeding edge .38 target FILEIO+IBLOCK backends with iscsi-target
export.  One tester notes they appears to go away when he went back to
a .36.4 kernel with current stable v3.5.2 target code from the
lio-core-backports.git tree.

I am not sure if these are related yet to what you are observing with
ibmvscsis with .38, but would be interested to see if using the pSCSI
backend passthrough makes any difference for struct block_device
backends, eg: it's a submit_bio() or higher regression with the block
layer, or some form of v3.5 -> v4.0 target regression.

Also just FYI, the plan is to provide an out-of-tree v4.0 target stable
backport build tree back to ~.32 stable code into lio-core-backports.git
in the next weeks that will allow v4 modules like ibmvscsis to function
(which appears is going to require your libsrp bugfix as well) for
current stable distro kernels.

So first trying /sys/kernel/config/target/core/pscsi_0/scsi_dev
passthrough device export on .38, and a backport of ibmvscsis to .36
kernel with v4 code for an IBLOCK/FILEIO control should help diagnoise
the issue.  For the latter, it should be really easy to get
drivers/target/ on .36.4 you want to try this ahead of the offical v4
backport to verify.

> >> I'm having some trouble with multiple LUNs, however. Perhaps this is more
> >> configuration issues, but if I create a lun_1 directory and link to a second
> >> device, the client just sees two devices which seems to be both mapped to
> >> the device mapped at lun_0.
> >>
> > 
> > Mmmm, this sounds like some a bug in incoming LUN unpack or outgoing LUN
> > pack issue.  From a quick look it appears we are missing a
> > scsi_lun_to_int() call for transport_get_lun_for_cmd which is currently
> > expecting an unpacked LUN.
> > 
> > @@ -880,7 +901,7 @@ static int tcm_queuecommand(struct ibmvscsis_adapter *adapter,
> >                               srp_cmd_direction(cmd),
> >                               attr, vsc->sense_buf);
> > 
> > -       ret = transport_get_lun_for_cmd(se_cmd, cmd->lun);
> > +       ret = transport_get_lun_for_cmd(se_cmd, scsi_lun_to_int(cmd->lun));
> >         if (ret) {
> >                 printk(KERN_ERR "invalid lun %u\n", GETLUN(cmd->lun));
> >                 transport_send_check_condition_and_sense(se_cmd,
> 
> This worked. I had to move the scsi_lun_to_int function further up the file in order
> to be able to build it, but once I did this multiple LUNs seems to be working.
> 

Ok great, thank you for the clarification here.

I still need to review my patch in the next days for Tomo-san for active
I/O shutdown items previously discussed.  Please feel free to send this
tested bugfix to him directly, and please let us know if you find
anything else that needs to be addressed.

Thanks!

--nab




^ permalink raw reply	[flat|nested] 81+ messages in thread

end of thread, other threads:[~2011-03-25 20:20 UTC | newest]

Thread overview: 81+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-10 12:21 [PATCH 0/3] ibmvscsis driver rewrite FUJITA Tomonori
2011-02-10 12:21 ` [PATCH 1/3] libsrp: add srp_data_length helper function FUJITA Tomonori
2011-02-10 12:21 ` [PATCH 2/3] libsrp: fix dma_unmap_sg FUJITA Tomonori
2011-02-10 12:21 ` [PATCH 3/3] tcm ibmvscsis driver FUJITA Tomonori
2011-02-10 19:03   ` Nicholas A. Bellinger
2011-02-14  1:36     ` FUJITA Tomonori
2011-02-14  3:26     ` FUJITA Tomonori
2011-02-14  9:01       ` Nicholas A. Bellinger
2011-02-14  9:29         ` FUJITA Tomonori
2011-02-14  9:27           ` Nicholas A. Bellinger
2011-02-14  9:46             ` FUJITA Tomonori
2011-02-14  9:51               ` Nicholas A. Bellinger
2011-02-10 19:15   ` Brian King
2011-02-10 19:38     ` Nicholas A. Bellinger
2011-02-11 21:13       ` Brian King
2011-02-12 20:27         ` James Bottomley
2011-03-07  4:41           ` FUJITA Tomonori
2011-03-07  6:17             ` Nicholas A. Bellinger
2011-03-07  6:24               ` FUJITA Tomonori
2011-03-07  6:55                 ` Nicholas A. Bellinger
2011-03-07 14:40             ` James Bottomley
2011-03-18 16:57               ` James Bottomley
2011-03-18 20:58               ` Brian King
2011-03-18 22:09                 ` Nicholas A. Bellinger
2011-03-19 14:32                 ` James Bottomley
2011-03-21  1:09                 ` FUJITA Tomonori
2011-03-21 12:56                   ` Brian King
2011-03-21 21:01                   ` Brian King
2011-03-21 21:01                     ` Nicholas A. Bellinger
2011-03-21 21:24                       ` Brian King
2011-03-21 22:29                         ` Nicholas A. Bellinger
2011-03-21 23:20                         ` FUJITA Tomonori
2011-03-21 23:50                           ` Nicholas A. Bellinger
2011-03-21 23:55                             ` FUJITA Tomonori
2011-03-22  0:26                               ` Nicholas A. Bellinger
2011-03-22  0:32                                 ` FUJITA Tomonori
2011-03-22  2:28                                   ` Nicholas A. Bellinger
2011-03-22  3:26                                     ` FUJITA Tomonori
2011-03-21 21:05                     ` James Bottomley
2011-03-21 22:37                       ` Brian King
2011-03-21 22:22                   ` Brian King
2011-03-21 22:31                     ` Brian King
2011-03-21 22:48                       ` Nicholas A. Bellinger
2011-03-22 12:53                         ` Brian King
2011-03-22 22:06                           ` Nicholas A. Bellinger
2011-03-22 22:49                             ` FUJITA Tomonori
2011-03-23  1:35                               ` Nicholas A. Bellinger
2011-03-23  5:12                                 ` FUJITA Tomonori
2011-03-23  8:26                                   ` Nicholas A. Bellinger
2011-03-23  8:48                                     ` FUJITA Tomonori
2011-03-23 10:00                                       ` Nicholas A. Bellinger
2011-03-23 12:04                                         ` FUJITA Tomonori
2011-03-23 21:17                                           ` Nicholas A. Bellinger
2011-03-24  1:54                                             ` FUJITA Tomonori
2011-03-24  7:29                                               ` Nicholas A. Bellinger
2011-03-23 15:19                             ` Brian King
2011-03-23 20:34                               ` Nicholas A. Bellinger
2011-03-25 14:33                                 ` Brian King
2011-03-25 20:13                                   ` Nicholas A. Bellinger
2011-03-21 22:34                     ` Nicholas A. Bellinger
2011-03-21 23:06                       ` FUJITA Tomonori
2011-03-21 23:13                         ` Nicholas A. Bellinger
2011-03-21 23:22                           ` FUJITA Tomonori
2011-03-22  0:03                             ` Nicholas A. Bellinger
2011-03-21 23:30                     ` FUJITA Tomonori
2011-02-14  1:42       ` FUJITA Tomonori
2011-02-14  1:42     ` FUJITA Tomonori
2011-02-14  7:16   ` Bart Van Assche
2011-02-14  9:11     ` FUJITA Tomonori
2011-02-14  9:18       ` Nicholas A. Bellinger
2011-02-14  9:19         ` Nicholas A. Bellinger
2011-02-14  9:31           ` FUJITA Tomonori
2011-02-14  9:29             ` Nicholas A. Bellinger
2011-02-14 11:50       ` Bart Van Assche
2011-02-15  3:42         ` FUJITA Tomonori
2011-02-15 19:20           ` Bart Van Assche
2011-02-15 23:21             ` FUJITA Tomonori
2011-02-10 18:34 ` [PATCH 0/3] ibmvscsis driver rewrite Nicholas A. Bellinger
2011-02-14  1:36   ` FUJITA Tomonori
2011-02-14  8:48     ` Nicholas A. Bellinger
     [not found] ` <4D53DE96.2020502@suse.de>
     [not found]   ` <1297363312.18212.153.camel@haakon2.linux-iscsi.org>
2011-02-10 21:22     ` Bart Van Assche

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.