[SPDK] SPDK + user space appliance

From: Shahar Salzman <shahar.salzman at kaminario.com>
To: spdk@lists.01.org
Subject: [SPDK] SPDK + user space appliance
Date: Thu, 25 Jan 2018 14:19:52 +0000	[thread overview]
Message-ID: <AM5PR04MB307492437496A0911FD8F28189E10@AM5PR04MB3074.eurprd04.prod.outlook.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1479 bytes --]

Hi all,

Sorry for the delay, had to solve a quarantine issue in order to get access to the list.

Some clarifications regarding the user space application:

1. The application is not the nvmf_tgt, we have an entire applicance to which we are integrating spdk

2. We are currently using nvmf_tgt functions in order to activate spdk, and the bdev_user in order to handle IO

3. This is all in user space (I am used to the kernel/user distinction in order to separate protocol/appliance).

4. The bdev_user will also notify spdk of changes to namespaces (e.g. a new namespace has been added, and can be attached to the spdk subsystem)

I am glad that this is your intention, the question is, do you think that it would be useful to create such a bdev_user module which will allow other users to integrate spdk to their appliance using such a simple threading model? Perhaps such a module will allow easier integration of spdk.

I am attaching a reference application which is does NULL IO via bdev_user.

Regarding the RPC, we have an implementation of it, and will be happy to push it upstream.

I am not sure that using the RPC for this type of bdev_user namespaces is the correct approach in the long run, since the user appliance is the one adding/removing namespaces (like hot plugging of a new NVME device), so it can just call the "add_namespace_to_subsystem" interface directly, and does not need to use an RPC for it.

Thanks,

Shahar

[-- Attachment #2: attachment.html --]
[-- Type: text/html, Size: 2849 bytes --]

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: nvmf_null_tgt.c --]
[-- Type: text/x-csrc, Size: 9465 bytes --]

/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <spdk/stdinc.h>
#include <spdk/env.h>
#include <spdk/event.h>
#include <spdk/log.h>
#include <spdk/nvmf.h>
#include <spdk/bdev.h>
#include <app/nvmf_tgt/nvmf_tgt.h>
#include "../..//lib/bdev/user/blockdev_user.h"
#include <limits.h>

#define NVMF_NULL_TGT_NUM_IOS (64 * 1024)
#define NVMF_NULL_TGT_IO_RING_SIZE (NVMF_NULL_TGT_NUM_IOS)
#define NVMF_NULL_TGT_IO_POOL_CACHE_SIZE (64)
#define NVMF_NULL_TGT_HANDLE_REQUEST_BATCH (8)

#define NVMF_NULL_TGT_DEFAULT_NQN "nqn.2016-06.io.spdk:cnode1"
#define MAX_CONF_FILE_LEN (2048)

char conf_file_template[] = ""
"[Global]\n"
"  ReactorMask 0x2\n"
"  LogFacility \"local7\"\n"
"[USER]\n"
"  USER User1\n"
"[Nvmf]\n"
"  MaxQueuesPerSession 4\n"
"  AcceptorPollRate 10000\n"
"[Subsystem1]\n"
"  NQN %s\n"
"  Core 1\n"
"  Mode Virtual\n"
"  Listen RDMA %s:1023\n"
"  Namespace User1 \n"
"  SN SPDK00000000000001\n"
"";

char conf_file_temp_name[PATH_MAX];
struct spdk_event *g_shutdown_event = NULL;

struct nvmf_null_tgt_ring_entry {
	struct spdk_bdev_io *bdev_io;
	struct spdk_io_channel *io_channel;
};

struct nvmf_null_tgt_io_thread_ctxt {
	pthread_t tid;
	struct spdk_ring *io_ring;
	struct spdk_mempool *io_event_pool;
	uint32_t lcore;
	volatile bool io_thread_run;
};

struct nvmf_null_tgt_app {
	struct spdk_app_opts opts;
	struct nvmf_null_tgt_io_thread_ctxt thread_ctxt;
	char *listener_ip;
	char *nqn;
};

struct nvmf_null_tgt_io_thread_ctxt *app_thread_ctxt;

static void __shutdown_event_cb(void *arg1, void *arg2)
{
	spdk_nvmf_shutdown_cb();
}

/* This is exactly what lib/event/app would do, but it allows the user to catch
 * its own signals, but also issue the spdk shutdown event */
static void nvmf_null_tgt_signal_handler(int signum)
{
	SPDK_NOTICELOG("Shutting down spdk\n");
	if (g_shutdown_event) {
		spdk_event_call(g_shutdown_event);
		g_shutdown_event = NULL;
	}
}

static void nvmf_null_tgt_get_conf_file(struct nvmf_null_tgt_app *app)
{
	char template[] = "/tmp/spdk_conf_fileXXXXXX";
	char conf_file_contents[MAX_CONF_FILE_LEN];
	int fd;
	int rc;

	snprintf(conf_file_contents, MAX_CONF_FILE_LEN, conf_file_template,
		app->nqn, app->listener_ip);
	strcpy(conf_file_temp_name, template);
	fd = mkstemp(conf_file_temp_name);
	rc = write(fd, conf_file_contents, strlen(conf_file_contents));
	assert(rc > 0);
	close(fd);

	app->opts.config_file = conf_file_temp_name;
}

static void nvmf_null_tgt_remove_conf_file(struct spdk_app_opts *opts)
{
	unlink(conf_file_temp_name);
}

/* For multiple handlers, we iterate between the receivers */
static struct nvmf_null_tgt_io_thread_ctxt *nvmf_null_tgt_choose_handler(void)
{
	return app_thread_ctxt;
}

static void nvmf_null_tgt_set_default_opts(struct spdk_app_opts *opts)
{
	spdk_app_opts_init(opts);
	opts->name = "nvmf_null_tgt";
	opts->max_delay_us = 1000;

	return;
}

static void nvmf_null_tgt_handle_request_batch(struct nvmf_null_tgt_io_thread_ctxt *ctxt)
{
	int count;
	void *events[NVMF_NULL_TGT_HANDLE_REQUEST_BATCH];
	struct nvmf_null_tgt_ring_entry *io_event;
	int i;

	count = spdk_ring_dequeue(ctxt->io_ring, events, 1);
	if (count == 0) {
		return;
	}

	assert(count < NVMF_NULL_TGT_HANDLE_REQUEST_BATCH);

	for (i = 0; i < count; i++) {
		io_event = events[i];
		blockdev_user_submit_completion(io_event->bdev_io);
		spdk_mempool_put(ctxt->io_event_pool, io_event);
	}
}

static void nvmf_null_tgt_submit_request(struct spdk_io_channel *io_channel,
	struct spdk_bdev_io *bdev_io)
{
	int num_enqueued;
	struct nvmf_null_tgt_ring_entry *io_event;
	struct nvmf_null_tgt_io_thread_ctxt *handler_thread_ctxt;

	handler_thread_ctxt = nvmf_null_tgt_choose_handler();

	io_event = spdk_mempool_get(handler_thread_ctxt->io_event_pool);
	io_event->bdev_io = bdev_io;
	io_event->io_channel = io_channel;

	num_enqueued = spdk_ring_enqueue(handler_thread_ctxt->io_ring,
		(void **)&io_event, 1);
	assert(num_enqueued == 1);
}

static void * nvmf_null_tgt_io_thread_fn(void *arg)
{
	struct nvmf_null_tgt_io_thread_ctxt *ctxt = arg;
	int rc;
	cpu_set_t cpu_set;
        struct sched_param param;

        param.sched_priority = sched_get_priority_max(SCHED_RR);
        assert(param.sched_priority != -1);
        rc = pthread_setschedparam(ctxt->tid, SCHED_RR, &param);
	assert(rc == 0);

	CPU_SET(ctxt->lcore, &cpu_set);
	rc = pthread_setaffinity_np(ctxt->tid, sizeof(cpu_set_t), &cpu_set);

	SPDK_NOTICELOG("Starting IO handler\n");

	while (ctxt->io_thread_run) {
		nvmf_null_tgt_handle_request_batch(ctxt);
	}

	SPDK_NOTICELOG("Stopping IO handler\n");

	return NULL;
}

static void nvmf_null_tgt_io_thread_run(struct nvmf_null_tgt_io_thread_ctxt *ctxt)
{
	int rc;

	ctxt->io_thread_run = true;
	ctxt->lcore = 2; /* TODO - set this from user input */
	rc = pthread_create(&ctxt->tid, NULL, nvmf_null_tgt_io_thread_fn,
		ctxt);
	assert(rc == 0);
}

static void nvmf_null_tgt_io_thread_stop(struct nvmf_null_tgt_io_thread_ctxt *ctxt)
{
	void *res;
	int rc;

	ctxt->io_thread_run = false;
	rc = pthread_join(ctxt->tid, &res);
	assert(rc == 0);
}

static void usage(void)
{
	printf("nvmf_null_tgt [options]\n");
	printf("options:\n");
	printf(" -n nqn  - target nqn (default %s)\n", NVMF_NULL_TGT_DEFAULT_NQN);
	printf(" -i IP - listener IP (required)\n");
}

static void nvmf_null_tgt_cli(struct nvmf_null_tgt_app *app, int argc,
	char *argv[])
{
	int ch;

	app->nqn = NVMF_NULL_TGT_DEFAULT_NQN;

	while ((ch = getopt(argc, argv, "i:n:H")) != -1) {
		switch (ch) {
		case 'i':
			app->listener_ip = optarg;
			break;
		case 'n':
			app->nqn = optarg;
			break;
		case 'h':
		default:
			usage();
			exit(EXIT_SUCCESS);
		}
	}

	if (app->listener_ip == NULL) {
		printf("Missing required argument IP (-i)\n");
		printf("\n");
		usage();
	}

	SPDK_NOTICELOG("Using nqn %s IP %s\n", app->nqn, app->listener_ip);
}

static void nvmf_shutdown_test_setup_cb(void *arg1, void *arg2)
{
	SPDK_NOTICELOG("NVMF shutdown test ready to roll!\n");
}

static void nvmf_null_tgt_setup(struct nvmf_null_tgt_app *app)
{
	struct spdk_user_fn_table spdk_user_kal_fn_table = {
		.submit_request	= nvmf_null_tgt_submit_request,
	};
	int rc;

	rc = signal(SIGINT, nvmf_null_tgt_signal_handler);
	assert(rc == 0);
	rc = signal(SIGTERM, nvmf_null_tgt_signal_handler);
	assert(rc == 0);

	spdk_log_set_print_level(SPDK_LOG_NOTICE);
	nvmf_null_tgt_set_default_opts(&app->opts);
	nvmf_null_tgt_get_conf_file(app);

	spdk_app_init(&app->opts);

	blockdev_user_register_fn_table(&spdk_user_kal_fn_table);

	app->thread_ctxt.io_ring = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
		NVMF_NULL_TGT_IO_RING_SIZE,
		SPDK_ENV_SOCKET_ID_ANY);
	app->thread_ctxt.io_event_pool = spdk_mempool_create("null_tgt_io_event",
		NVMF_NULL_TGT_NUM_IOS, sizeof(struct nvmf_null_tgt_ring_entry),
		NVMF_NULL_TGT_IO_POOL_CACHE_SIZE,
		SPDK_ENV_SOCKET_ID_ANY);
	app_thread_ctxt = &app->thread_ctxt;

	g_shutdown_event = spdk_event_allocate(spdk_env_get_current_core(),
		__shutdown_event_cb, NULL, NULL);

	spdk_app_subsystem_init(nvmf_shutdown_test_setup_cb);
}

static void nvmf_null_tgt_run(struct nvmf_null_tgt_app *app)
{
	struct spdk_event *app_start_event;

	nvmf_null_tgt_io_thread_run(&app->thread_ctxt);
	app_start_event = spdk_event_allocate(spdk_env_get_current_core(),
		spdk_nvmf_startup, NULL, NULL);
	spdk_event_call(app_start_event);

	spdk_reactors_start();

	nvmf_null_tgt_io_thread_stop(&app->thread_ctxt);
}

static void nvmf_null_tgt_teardown(struct nvmf_null_tgt_app *app)
{
	spdk_ring_free(app->thread_ctxt.io_ring);
	spdk_mempool_free(app->thread_ctxt.io_event_pool);
	nvmf_null_tgt_remove_conf_file(&app->opts);
}

int main(int argc, char **argv)
{
	struct nvmf_null_tgt_app app ;

	memset(&app, 0, sizeof(struct nvmf_null_tgt_app));

	nvmf_null_tgt_cli(&app, argc, argv);
	nvmf_null_tgt_setup(&app);
	nvmf_null_tgt_run(&app); /* Stopped by signal handler */
	nvmf_null_tgt_teardown(&app);

	return 0;
}