From mboxrd@z Thu Jan 1 00:00:00 1970 From: jmoyer@redhat.com (Jeff Moyer) Date: Tue, 24 Nov 2015 10:19:54 -0500 Subject: [PATCH 06/47] blk-mq: add a flags parameter to blk_mq_alloc_request In-Reply-To: <1448037342-18384-7-git-send-email-hch@lst.de> (Christoph Hellwig's message of "Fri, 20 Nov 2015 17:35:01 +0100") References: <1448037342-18384-1-git-send-email-hch@lst.de> <1448037342-18384-7-git-send-email-hch@lst.de> Message-ID: Christoph Hellwig writes: > We already have the reserved flag, and a nowait flag awkwardly encoded as > a gfp_t. Add a real flags argument to make the scheme more extensible and > allow for a nicer calling convention. > > Signed-off-by: Christoph Hellwig > --- > block/blk-core.c | 11 +- > block/blk-mq-tag.c | 11 +- > block/blk-mq.c | 20 +- > block/blk-mq.h | 11 +- > block/blk.h | 2 +- > drivers/block/mtip32xx/mtip32xx.c | 2 +- > drivers/nvme/host/core.c | 1172 +++++++++++++++++++++++++++++++++++++ Christoph, I think you included a bit too much in this patch! ;-) -Jeff > drivers/nvme/host/pci.c | 11 +- > include/linux/blk-mq.h | 8 +- > 9 files changed, 1210 insertions(+), 38 deletions(-) > create mode 100644 drivers/nvme/host/core.c > > diff --git a/block/blk-core.c b/block/blk-core.c > index af9c315..d2100aa 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -630,7 +630,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) > } > EXPORT_SYMBOL(blk_alloc_queue); > > -int blk_queue_enter(struct request_queue *q, gfp_t gfp) > +int blk_queue_enter(struct request_queue *q, bool nowait) > { > while (true) { > int ret; > @@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp) > if (percpu_ref_tryget_live(&q->q_usage_counter)) > return 0; > > - if (!gfpflags_allow_blocking(gfp)) > + if (nowait) > return -EBUSY; > > ret = wait_event_interruptible(q->mq_freeze_wq, > @@ -1284,7 +1284,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, > struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) > { > if (q->mq_ops) > - return blk_mq_alloc_request(q, rw, gfp_mask, false); > + return blk_mq_alloc_request(q, rw, > + (gfp_mask & __GFP_DIRECT_RECLAIM) ? > + 0 : BLK_MQ_REQ_NOWAIT); > else > return blk_old_get_request(q, rw, gfp_mask); > } > @@ -2052,8 +2054,7 @@ blk_qc_t generic_make_request(struct bio *bio) > do { > struct request_queue *q = bdev_get_queue(bio->bi_bdev); > > - if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { > - > + if (likely(blk_queue_enter(q, false) == 0)) { > ret = q->make_request_fn(q, bio); > > blk_queue_exit(q); > diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > index a07ca34..abdbb47 100644 > --- a/block/blk-mq-tag.c > +++ b/block/blk-mq-tag.c > @@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data, > if (tag != -1) > return tag; > > - if (!gfpflags_allow_blocking(data->gfp)) > + if (data->flags & BLK_MQ_REQ_NOWAIT) > return -1; > > bs = bt_wait_ptr(bt, hctx); > @@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data, > data->ctx = blk_mq_get_ctx(data->q); > data->hctx = data->q->mq_ops->map_queue(data->q, > data->ctx->cpu); > - if (data->reserved) { > + if (data->flags & BLK_MQ_REQ_RESERVED) { > bt = &data->hctx->tags->breserved_tags; > } else { > last_tag = &data->ctx->last_tag; > @@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) > > unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) > { > - if (!data->reserved) > - return __blk_mq_get_tag(data); > - > - return __blk_mq_get_reserved_tag(data); > + if (data->flags & BLK_MQ_REQ_RESERVED) > + return __blk_mq_get_reserved_tag(data); > + return __blk_mq_get_tag(data); > } > > static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) > diff --git a/block/blk-mq.c b/block/blk-mq.c > index c932605..6da03f1 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -230,8 +230,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) > return NULL; > } > > -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, > - bool reserved) > +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, > + unsigned int flags) > { > struct blk_mq_ctx *ctx; > struct blk_mq_hw_ctx *hctx; > @@ -239,24 +239,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, > struct blk_mq_alloc_data alloc_data; > int ret; > > - ret = blk_queue_enter(q, gfp); > + ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); > if (ret) > return ERR_PTR(ret); > > ctx = blk_mq_get_ctx(q); > hctx = q->mq_ops->map_queue(q, ctx->cpu); > - blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM, > - reserved, ctx, hctx); > + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); > > rq = __blk_mq_alloc_request(&alloc_data, rw); > - if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) { > + if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) { > __blk_mq_run_hw_queue(hctx); > blk_mq_put_ctx(ctx); > > ctx = blk_mq_get_ctx(q); > hctx = q->mq_ops->map_queue(q, ctx->cpu); > - blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx, > - hctx); > + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); > rq = __blk_mq_alloc_request(&alloc_data, rw); > ctx = alloc_data.ctx; > } > @@ -1181,8 +1179,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, > rw |= REQ_SYNC; > > trace_block_getrq(q, bio, rw); > - blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx, > - hctx); > + blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx); > rq = __blk_mq_alloc_request(&alloc_data, rw); > if (unlikely(!rq)) { > __blk_mq_run_hw_queue(hctx); > @@ -1191,8 +1188,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, > > ctx = blk_mq_get_ctx(q); > hctx = q->mq_ops->map_queue(q, ctx->cpu); > - blk_mq_set_alloc_data(&alloc_data, q, > - __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx); > + blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); > rq = __blk_mq_alloc_request(&alloc_data, rw); > ctx = alloc_data.ctx; > hctx = alloc_data.hctx; > diff --git a/block/blk-mq.h b/block/blk-mq.h > index 713820b..eaede8e 100644 > --- a/block/blk-mq.h > +++ b/block/blk-mq.h > @@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) > struct blk_mq_alloc_data { > /* input parameter */ > struct request_queue *q; > - gfp_t gfp; > - bool reserved; > + unsigned int flags; > > /* input & output parameter */ > struct blk_mq_ctx *ctx; > @@ -105,13 +104,11 @@ struct blk_mq_alloc_data { > }; > > static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, > - struct request_queue *q, gfp_t gfp, bool reserved, > - struct blk_mq_ctx *ctx, > - struct blk_mq_hw_ctx *hctx) > + struct request_queue *q, unsigned int flags, > + struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx) > { > data->q = q; > - data->gfp = gfp; > - data->reserved = reserved; > + data->flags = flags; > data->ctx = ctx; > data->hctx = hctx; > } > diff --git a/block/blk.h b/block/blk.h > index 1d95107..38bf997 100644 > --- a/block/blk.h > +++ b/block/blk.h > @@ -72,7 +72,7 @@ void blk_dequeue_request(struct request *rq); > void __blk_queue_free_tags(struct request_queue *q); > bool __blk_end_bidi_request(struct request *rq, int error, > unsigned int nr_bytes, unsigned int bidi_bytes); > -int blk_queue_enter(struct request_queue *q, gfp_t gfp); > +int blk_queue_enter(struct request_queue *q, bool nowait); > void blk_queue_exit(struct request_queue *q); > void blk_freeze_queue(struct request_queue *q); > > diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c > index a28a562..cf3b51a 100644 > --- a/drivers/block/mtip32xx/mtip32xx.c > +++ b/drivers/block/mtip32xx/mtip32xx.c > @@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) > { > struct request *rq; > > - rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); > + rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); > return blk_mq_rq_to_pdu(rq); > } > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > new file mode 100644 > index 0000000..53cf507 > --- /dev/null > +++ b/drivers/nvme/host/core.c > @@ -0,0 +1,1172 @@ > +/* > + * NVM Express device driver > + * Copyright (c) 2011-2014, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "nvme.h" > + > +#define NVME_MINORS (1U << MINORBITS) > + > +static int nvme_major; > +module_param(nvme_major, int, 0); > + > +static int nvme_char_major; > +module_param(nvme_char_major, int, 0); > + > +static LIST_HEAD(nvme_ctrl_list); > +DEFINE_SPINLOCK(dev_list_lock); > + > +static struct class *nvme_class; > + > +static void nvme_free_ns(struct kref *kref) > +{ > + struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); > + > + if (ns->type == NVME_NS_LIGHTNVM) > + nvme_nvm_unregister(ns->queue, ns->disk->disk_name); > + > + spin_lock(&dev_list_lock); > + ns->disk->private_data = NULL; > + spin_unlock(&dev_list_lock); > + > + nvme_put_ctrl(ns->ctrl); > + put_disk(ns->disk); > + kfree(ns); > +} > + > +static void nvme_put_ns(struct nvme_ns *ns) > +{ > + kref_put(&ns->kref, nvme_free_ns); > +} > + > +static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) > +{ > + struct nvme_ns *ns; > + > + spin_lock(&dev_list_lock); > + ns = disk->private_data; > + if (ns && !kref_get_unless_zero(&ns->kref)) > + ns = NULL; > + spin_unlock(&dev_list_lock); > + > + return ns; > +} > + > +static struct request *nvme_alloc_request(struct request_queue *q, > + struct nvme_command *cmd) > +{ > + bool write = cmd->common.opcode & 1; > + struct request *req; > + > + req = blk_mq_alloc_request(q, write, 0); > + if (IS_ERR(req)) > + return req; > + > + req->cmd_type = REQ_TYPE_DRV_PRIV; > + req->cmd_flags |= REQ_FAILFAST_DRIVER; > + req->__data_len = 0; > + req->__sector = (sector_t) -1; > + req->bio = req->biotail = NULL; > + > + req->cmd = (unsigned char *)cmd; > + req->cmd_len = sizeof(struct nvme_command); > + req->special = (void *)0; > + > + return req; > +} > + > +/* > + * Returns 0 on success. If the result is negative, it's a Linux error code; > + * if the result is positive, it's an NVM Express status code > + */ > +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, > + void *buffer, unsigned bufflen, u32 *result, unsigned timeout) > +{ > + struct request *req; > + int ret; > + > + req = nvme_alloc_request(q, cmd); > + if (IS_ERR(req)) > + return PTR_ERR(req); > + > + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; > + > + if (buffer && bufflen) { > + ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); > + if (ret) > + goto out; > + } > + > + blk_execute_rq(req->q, NULL, req, 0); > + if (result) > + *result = (u32)(uintptr_t)req->special; > + ret = req->errors; > + out: > + blk_mq_free_request(req); > + return ret; > +} > + > +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, > + void *buffer, unsigned bufflen) > +{ > + return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); > +} > + > +int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, > + void __user *ubuffer, unsigned bufflen, > + void __user *meta_buffer, unsigned meta_len, u32 meta_seed, > + u32 *result, unsigned timeout) > +{ > + bool write = cmd->common.opcode & 1; > + struct nvme_ns *ns = q->queuedata; > + struct gendisk *disk = ns ? ns->disk : NULL; > + struct request *req; > + struct bio *bio = NULL; > + void *meta = NULL; > + int ret; > + > + req = nvme_alloc_request(q, cmd); > + if (IS_ERR(req)) > + return PTR_ERR(req); > + > + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; > + > + if (ubuffer && bufflen) { > + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, > + GFP_KERNEL); > + if (ret) > + goto out; > + bio = req->bio; > + > + if (!disk) > + goto submit; > + bio->bi_bdev = bdget_disk(disk, 0); > + if (!bio->bi_bdev) { > + ret = -ENODEV; > + goto out_unmap; > + } > + > + if (meta_buffer) { > + struct bio_integrity_payload *bip; > + > + meta = kmalloc(meta_len, GFP_KERNEL); > + if (!meta) { > + ret = -ENOMEM; > + goto out_unmap; > + } > + > + if (write) { > + if (copy_from_user(meta, meta_buffer, > + meta_len)) { > + ret = -EFAULT; > + goto out_free_meta; > + } > + } > + > + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); > + if (!bip) { > + ret = -ENOMEM; > + goto out_free_meta; > + } > + > + bip->bip_iter.bi_size = meta_len; > + bip->bip_iter.bi_sector = meta_seed; > + > + ret = bio_integrity_add_page(bio, virt_to_page(meta), > + meta_len, offset_in_page(meta)); > + if (ret != meta_len) { > + ret = -ENOMEM; > + goto out_free_meta; > + } > + } > + } > + submit: > + blk_execute_rq(req->q, disk, req, 0); > + ret = req->errors; > + if (result) > + *result = (u32)(uintptr_t)req->special; > + if (meta && !ret && !write) { > + if (copy_to_user(meta_buffer, meta, meta_len)) > + ret = -EFAULT; > + } > + out_free_meta: > + kfree(meta); > + out_unmap: > + if (bio) { > + if (disk && bio->bi_bdev) > + bdput(bio->bi_bdev); > + blk_rq_unmap_user(bio); > + } > + out: > + blk_mq_free_request(req); > + return ret; > +} > + > +int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, > + void __user *ubuffer, unsigned bufflen, u32 *result, > + unsigned timeout) > +{ > + return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, > + result, timeout); > +} > + > +int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) > +{ > + struct nvme_command c = { }; > + int error; > + > + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ > + c.identify.opcode = nvme_admin_identify; > + c.identify.cns = cpu_to_le32(1); > + > + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); > + if (!*id) > + return -ENOMEM; > + > + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, > + sizeof(struct nvme_id_ctrl)); > + if (error) > + kfree(*id); > + return error; > +} > + > +static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) > +{ > + struct nvme_command c = { }; > + > + c.identify.opcode = nvme_admin_identify; > + c.identify.cns = cpu_to_le32(2); > + c.identify.nsid = cpu_to_le32(nsid); > + return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); > +} > + > +int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, > + struct nvme_id_ns **id) > +{ > + struct nvme_command c = { }; > + int error; > + > + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ > + c.identify.opcode = nvme_admin_identify, > + c.identify.nsid = cpu_to_le32(nsid), > + > + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); > + if (!*id) > + return -ENOMEM; > + > + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, > + sizeof(struct nvme_id_ns)); > + if (error) > + kfree(*id); > + return error; > +} > + > +int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, > + dma_addr_t dma_addr, u32 *result) > +{ > + struct nvme_command c; > + > + memset(&c, 0, sizeof(c)); > + c.features.opcode = nvme_admin_get_features; > + c.features.nsid = cpu_to_le32(nsid); > + c.features.prp1 = cpu_to_le64(dma_addr); > + c.features.fid = cpu_to_le32(fid); > + > + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); > +} > + > +int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, > + dma_addr_t dma_addr, u32 *result) > +{ > + struct nvme_command c; > + > + memset(&c, 0, sizeof(c)); > + c.features.opcode = nvme_admin_set_features; > + c.features.prp1 = cpu_to_le64(dma_addr); > + c.features.fid = cpu_to_le32(fid); > + c.features.dword11 = cpu_to_le32(dword11); > + > + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); > +} > + > +int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) > +{ > + struct nvme_command c = { }; > + int error; > + > + c.common.opcode = nvme_admin_get_log_page, > + c.common.nsid = cpu_to_le32(0xFFFFFFFF), > + c.common.cdw10[0] = cpu_to_le32( > + (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | > + NVME_LOG_SMART), > + > + *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); > + if (!*log) > + return -ENOMEM; > + > + error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, > + sizeof(struct nvme_smart_log)); > + if (error) > + kfree(*log); > + return error; > +} > + > +static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) > +{ > + struct nvme_user_io io; > + struct nvme_command c; > + unsigned length, meta_len; > + void __user *metadata; > + > + if (copy_from_user(&io, uio, sizeof(io))) > + return -EFAULT; > + > + switch (io.opcode) { > + case nvme_cmd_write: > + case nvme_cmd_read: > + case nvme_cmd_compare: > + break; > + default: > + return -EINVAL; > + } > + > + length = (io.nblocks + 1) << ns->lba_shift; > + meta_len = (io.nblocks + 1) * ns->ms; > + metadata = (void __user *)(uintptr_t)io.metadata; > + > + if (ns->ext) { > + length += meta_len; > + meta_len = 0; > + } else if (meta_len) { > + if ((io.metadata & 3) || !io.metadata) > + return -EINVAL; > + } > + > + memset(&c, 0, sizeof(c)); > + c.rw.opcode = io.opcode; > + c.rw.flags = io.flags; > + c.rw.nsid = cpu_to_le32(ns->ns_id); > + c.rw.slba = cpu_to_le64(io.slba); > + c.rw.length = cpu_to_le16(io.nblocks); > + c.rw.control = cpu_to_le16(io.control); > + c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); > + c.rw.reftag = cpu_to_le32(io.reftag); > + c.rw.apptag = cpu_to_le16(io.apptag); > + c.rw.appmask = cpu_to_le16(io.appmask); > + > + return __nvme_submit_user_cmd(ns->queue, &c, > + (void __user *)(uintptr_t)io.addr, length, > + metadata, meta_len, io.slba, NULL, 0); > +} > + > +static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, > + struct nvme_passthru_cmd __user *ucmd) > +{ > + struct nvme_passthru_cmd cmd; > + struct nvme_command c; > + unsigned timeout = 0; > + int status; > + > + if (!capable(CAP_SYS_ADMIN)) > + return -EACCES; > + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) > + return -EFAULT; > + > + memset(&c, 0, sizeof(c)); > + c.common.opcode = cmd.opcode; > + c.common.flags = cmd.flags; > + c.common.nsid = cpu_to_le32(cmd.nsid); > + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); > + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); > + c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); > + c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); > + c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); > + c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); > + c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); > + c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); > + > + if (cmd.timeout_ms) > + timeout = msecs_to_jiffies(cmd.timeout_ms); > + > + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, > + (void __user *)cmd.addr, cmd.data_len, > + &cmd.result, timeout); > + if (status >= 0) { > + if (put_user(cmd.result, &ucmd->result)) > + return -EFAULT; > + } > + > + return status; > +} > + > +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, > + unsigned int cmd, unsigned long arg) > +{ > + struct nvme_ns *ns = bdev->bd_disk->private_data; > + > + switch (cmd) { > + case NVME_IOCTL_ID: > + force_successful_syscall_return(); > + return ns->ns_id; > + case NVME_IOCTL_ADMIN_CMD: > + return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); > + case NVME_IOCTL_IO_CMD: > + return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); > + case NVME_IOCTL_SUBMIT_IO: > + return nvme_submit_io(ns, (void __user *)arg); > + case SG_GET_VERSION_NUM: > + return nvme_sg_get_version_num((void __user *)arg); > + case SG_IO: > + return nvme_sg_io(ns, (void __user *)arg); > + default: > + return -ENOTTY; > + } > +} > + > +#ifdef CONFIG_COMPAT > +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, > + unsigned int cmd, unsigned long arg) > +{ > + switch (cmd) { > + case SG_IO: > + return -ENOIOCTLCMD; > + } > + return nvme_ioctl(bdev, mode, cmd, arg); > +} > +#else > +#define nvme_compat_ioctl NULL > +#endif > + > +static int nvme_open(struct block_device *bdev, fmode_t mode) > +{ > + return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; > +} > + > +static void nvme_release(struct gendisk *disk, fmode_t mode) > +{ > + nvme_put_ns(disk->private_data); > +} > + > +static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) > +{ > + /* some standard values */ > + geo->heads = 1 << 6; > + geo->sectors = 1 << 5; > + geo->cylinders = get_capacity(bdev->bd_disk) >> 11; > + return 0; > +} > + > +#ifdef CONFIG_BLK_DEV_INTEGRITY > +static void nvme_init_integrity(struct nvme_ns *ns) > +{ > + struct blk_integrity integrity; > + > + switch (ns->pi_type) { > + case NVME_NS_DPS_PI_TYPE3: > + integrity.profile = &t10_pi_type3_crc; > + break; > + case NVME_NS_DPS_PI_TYPE1: > + case NVME_NS_DPS_PI_TYPE2: > + integrity.profile = &t10_pi_type1_crc; > + break; > + default: > + integrity.profile = NULL; > + break; > + } > + integrity.tuple_size = ns->ms; > + blk_integrity_register(ns->disk, &integrity); > + blk_queue_max_integrity_segments(ns->queue, 1); > +} > +#else > +static void nvme_init_integrity(struct nvme_ns *ns) > +{ > +} > +#endif /* CONFIG_BLK_DEV_INTEGRITY */ > + > +static void nvme_config_discard(struct nvme_ns *ns) > +{ > + u32 logical_block_size = queue_logical_block_size(ns->queue); > + ns->queue->limits.discard_zeroes_data = 0; > + ns->queue->limits.discard_alignment = logical_block_size; > + ns->queue->limits.discard_granularity = logical_block_size; > + blk_queue_max_discard_sectors(ns->queue, 0xffffffff); > + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); > +} > + > +static int nvme_revalidate_disk(struct gendisk *disk) > +{ > + struct nvme_ns *ns = disk->private_data; > + struct nvme_id_ns *id; > + u8 lbaf, pi_type; > + u16 old_ms; > + unsigned short bs; > + > + if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { > + dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", > + __func__, ns->ctrl->instance, ns->ns_id); > + return -ENODEV; > + } > + if (id->ncap == 0) { > + kfree(id); > + return -ENODEV; > + } > + > + if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { > + if (nvme_nvm_register(ns->queue, disk->disk_name)) { > + dev_warn(ns->ctrl->dev, > + "%s: LightNVM init failure\n", __func__); > + kfree(id); > + return -ENODEV; > + } > + ns->type = NVME_NS_LIGHTNVM; > + } > + > + old_ms = ns->ms; > + lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; > + ns->lba_shift = id->lbaf[lbaf].ds; > + ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); > + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); > + > + /* > + * If identify namespace failed, use default 512 byte block size so > + * block layer can use before failing read/write for 0 capacity. > + */ > + if (ns->lba_shift == 0) > + ns->lba_shift = 9; > + bs = 1 << ns->lba_shift; > + /* XXX: PI implementation requires metadata equal t10 pi tuple size */ > + pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? > + id->dps & NVME_NS_DPS_PI_MASK : 0; > + > + blk_mq_freeze_queue(disk->queue); > + if (blk_get_integrity(disk) && (ns->pi_type != pi_type || > + ns->ms != old_ms || > + bs != queue_logical_block_size(disk->queue) || > + (ns->ms && ns->ext))) > + blk_integrity_unregister(disk); > + > + ns->pi_type = pi_type; > + blk_queue_logical_block_size(ns->queue, bs); > + > + if (ns->ms && !blk_get_integrity(disk) && !ns->ext) > + nvme_init_integrity(ns); > + if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) > + set_capacity(disk, 0); > + else > + set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); > + > + if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) > + nvme_config_discard(ns); > + blk_mq_unfreeze_queue(disk->queue); > + > + kfree(id); > + return 0; > +} > + > +static char nvme_pr_type(enum pr_type type) > +{ > + switch (type) { > + case PR_WRITE_EXCLUSIVE: > + return 1; > + case PR_EXCLUSIVE_ACCESS: > + return 2; > + case PR_WRITE_EXCLUSIVE_REG_ONLY: > + return 3; > + case PR_EXCLUSIVE_ACCESS_REG_ONLY: > + return 4; > + case PR_WRITE_EXCLUSIVE_ALL_REGS: > + return 5; > + case PR_EXCLUSIVE_ACCESS_ALL_REGS: > + return 6; > + default: > + return 0; > + } > +}; > + > +static int nvme_pr_command(struct block_device *bdev, u32 cdw10, > + u64 key, u64 sa_key, u8 op) > +{ > + struct nvme_ns *ns = bdev->bd_disk->private_data; > + struct nvme_command c; > + u8 data[16] = { 0, }; > + > + put_unaligned_le64(key, &data[0]); > + put_unaligned_le64(sa_key, &data[8]); > + > + memset(&c, 0, sizeof(c)); > + c.common.opcode = op; > + c.common.nsid = cpu_to_le32(ns->ns_id); > + c.common.cdw10[0] = cpu_to_le32(cdw10); > + > + return nvme_submit_sync_cmd(ns->queue, &c, data, 16); > +} > + > +static int nvme_pr_register(struct block_device *bdev, u64 old, > + u64 new, unsigned flags) > +{ > + u32 cdw10; > + > + if (flags & ~PR_FL_IGNORE_KEY) > + return -EOPNOTSUPP; > + > + cdw10 = old ? 2 : 0; > + cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; > + cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ > + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); > +} > + > +static int nvme_pr_reserve(struct block_device *bdev, u64 key, > + enum pr_type type, unsigned flags) > +{ > + u32 cdw10; > + > + if (flags & ~PR_FL_IGNORE_KEY) > + return -EOPNOTSUPP; > + > + cdw10 = nvme_pr_type(type) << 8; > + cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); > + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); > +} > + > +static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, > + enum pr_type type, bool abort) > +{ > + u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; > + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); > +} > + > +static int nvme_pr_clear(struct block_device *bdev, u64 key) > +{ > + u32 cdw10 = 1 | key ? 1 << 3 : 0; > + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); > +} > + > +static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) > +{ > + u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; > + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); > +} > + > +static const struct pr_ops nvme_pr_ops = { > + .pr_register = nvme_pr_register, > + .pr_reserve = nvme_pr_reserve, > + .pr_release = nvme_pr_release, > + .pr_preempt = nvme_pr_preempt, > + .pr_clear = nvme_pr_clear, > +}; > + > +static const struct block_device_operations nvme_fops = { > + .owner = THIS_MODULE, > + .ioctl = nvme_ioctl, > + .compat_ioctl = nvme_compat_ioctl, > + .open = nvme_open, > + .release = nvme_release, > + .getgeo = nvme_getgeo, > + .revalidate_disk= nvme_revalidate_disk, > + .pr_ops = &nvme_pr_ops, > +}; > + > +/* > + * Initialize the cached copies of the Identify data and various controller > + * register in our nvme_ctrl structure. This should be called as soon as > + * the admin queue is fully up and running. > + */ > +int nvme_init_identify(struct nvme_ctrl *ctrl) > +{ > + struct nvme_id_ctrl *id; > + u64 cap; > + int ret, page_shift; > + > + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); > + if (ret) { > + dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret); > + return ret; > + } > + > + ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); > + if (ret) { > + dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret); > + return ret; > + } > + page_shift = NVME_CAP_MPSMIN(cap) + 12; > + ctrl->page_size = 1 << page_shift; > + > + if (ctrl->vs >= NVME_VS(1, 1)) > + ctrl->subsystem = NVME_CAP_NSSRC(cap); > + > + ret = nvme_identify_ctrl(ctrl, &id); > + if (ret) { > + dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret); > + return -EIO; > + } > + > + ctrl->oncs = le16_to_cpup(&id->oncs); > + atomic_set(&ctrl->abort_limit, id->acl + 1); > + ctrl->vwc = id->vwc; > + memcpy(ctrl->serial, id->sn, sizeof(id->sn)); > + memcpy(ctrl->model, id->mn, sizeof(id->mn)); > + memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); > + if (id->mdts) > + ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9); > + else > + ctrl->max_hw_sectors = UINT_MAX; > + > + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { > + unsigned int max_hw_sectors; > + > + ctrl->stripe_size = 1 << (id->vs[3] + page_shift); > + max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); > + if (ctrl->max_hw_sectors) { > + ctrl->max_hw_sectors = min(max_hw_sectors, > + ctrl->max_hw_sectors); > + } else { > + ctrl->max_hw_sectors = max_hw_sectors; > + } > + } > + > + kfree(id); > + return 0; > +} > + > +static int nvme_dev_open(struct inode *inode, struct file *file) > +{ > + struct nvme_ctrl *ctrl; > + int instance = iminor(inode); > + int ret = -ENODEV; > + > + spin_lock(&dev_list_lock); > + list_for_each_entry(ctrl, &nvme_ctrl_list, node) { > + if (ctrl->instance != instance) > + continue; > + > + if (!ctrl->admin_q) { > + ret = -EWOULDBLOCK; > + break; > + } > + if (!kref_get_unless_zero(&ctrl->kref)) > + break; > + file->private_data = ctrl; > + ret = 0; > + break; > + } > + spin_unlock(&dev_list_lock); > + > + return ret; > +} > + > +static int nvme_dev_release(struct inode *inode, struct file *file) > +{ > + nvme_put_ctrl(file->private_data); > + return 0; > +} > + > +static long nvme_dev_ioctl(struct file *file, unsigned int cmd, > + unsigned long arg) > +{ > + struct nvme_ctrl *ctrl = file->private_data; > + void __user *argp = (void __user *)arg; > + struct nvme_ns *ns; > + > + switch (cmd) { > + case NVME_IOCTL_ADMIN_CMD: > + return nvme_user_cmd(ctrl, NULL, argp); > + case NVME_IOCTL_IO_CMD: > + if (list_empty(&ctrl->namespaces)) > + return -ENOTTY; > + ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); > + return nvme_user_cmd(ctrl, ns, argp); > + case NVME_IOCTL_RESET: > + dev_warn(ctrl->dev, "resetting controller\n"); > + return ctrl->ops->reset_ctrl(ctrl); > + case NVME_IOCTL_SUBSYS_RESET: > + return nvme_reset_subsystem(ctrl); > + default: > + return -ENOTTY; > + } > +} > + > +static const struct file_operations nvme_dev_fops = { > + .owner = THIS_MODULE, > + .open = nvme_dev_open, > + .release = nvme_dev_release, > + .unlocked_ioctl = nvme_dev_ioctl, > + .compat_ioctl = nvme_dev_ioctl, > +}; > + > +static ssize_t nvme_sysfs_reset(struct device *dev, > + struct device_attribute *attr, const char *buf, > + size_t count) > +{ > + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); > + int ret; > + > + ret = ctrl->ops->reset_ctrl(ctrl); > + if (ret < 0) > + return ret; > + return count; > +} > +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); > + > +static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) > +{ > + struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); > + struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); > + > + return nsa->ns_id - nsb->ns_id; > +} > + > +static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid) > +{ > + struct nvme_ns *ns; > + > + list_for_each_entry(ns, &ctrl->namespaces, list) { > + if (ns->ns_id == nsid) > + return ns; > + if (ns->ns_id > nsid) > + break; > + } > + return NULL; > +} > + > +static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) > +{ > + struct nvme_ns *ns; > + struct gendisk *disk; > + int node = dev_to_node(ctrl->dev); > + > + ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); > + if (!ns) > + return; > + > + ns->queue = blk_mq_init_queue(ctrl->tagset); > + if (IS_ERR(ns->queue)) > + goto out_free_ns; > + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); > + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); > + ns->queue->queuedata = ns; > + ns->ctrl = ctrl; > + > + disk = alloc_disk_node(0, node); > + if (!disk) > + goto out_free_queue; > + > + kref_init(&ns->kref); > + ns->ns_id = nsid; > + ns->disk = disk; > + ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ > + > + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); > + if (ctrl->max_hw_sectors) { > + blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors); > + blk_queue_max_segments(ns->queue, > + (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1); > + } > + if (ctrl->stripe_size) > + blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9); > + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) > + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); > + blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1); > + > + disk->major = nvme_major; > + disk->first_minor = 0; > + disk->fops = &nvme_fops; > + disk->private_data = ns; > + disk->queue = ns->queue; > + disk->driverfs_dev = ctrl->device; > + disk->flags = GENHD_FL_EXT_DEVT; > + sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); > + > + if (nvme_revalidate_disk(ns->disk)) > + goto out_free_disk; > + > + list_add_tail(&ns->list, &ctrl->namespaces); > + kref_get(&ctrl->kref); > + if (ns->type != NVME_NS_LIGHTNVM) > + add_disk(ns->disk); > + > + return; > + out_free_disk: > + kfree(disk); > + out_free_queue: > + blk_cleanup_queue(ns->queue); > + out_free_ns: > + kfree(ns); > +} > + > +static void nvme_ns_remove(struct nvme_ns *ns) > +{ > + bool kill = nvme_io_incapable(ns->ctrl) && > + !blk_queue_dying(ns->queue); > + > + if (kill) > + blk_set_queue_dying(ns->queue); > + if (ns->disk->flags & GENHD_FL_UP) { > + if (blk_get_integrity(ns->disk)) > + blk_integrity_unregister(ns->disk); > + del_gendisk(ns->disk); > + } > + if (kill || !blk_queue_dying(ns->queue)) { > + blk_mq_abort_requeue_list(ns->queue); > + blk_cleanup_queue(ns->queue); > + } > + list_del_init(&ns->list); > + nvme_put_ns(ns); > +} > + > +static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) > +{ > + struct nvme_ns *ns; > + > + ns = nvme_find_ns(ctrl, nsid); > + if (ns) { > + if (revalidate_disk(ns->disk)) > + nvme_ns_remove(ns); > + } else > + nvme_alloc_ns(ctrl, nsid); > +} > + > +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) > +{ > + struct nvme_ns *ns; > + __le32 *ns_list; > + unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); > + int ret = 0; > + > + ns_list = kzalloc(0x1000, GFP_KERNEL); > + if (!ns_list) > + return -ENOMEM; > + > + for (i = 0; i < num_lists; i++) { > + ret = nvme_identify_ns_list(ctrl, prev, ns_list); > + if (ret) > + goto out; > + > + for (j = 0; j < min(nn, 1024U); j++) { > + nsid = le32_to_cpu(ns_list[j]); > + if (!nsid) > + goto out; > + > + nvme_validate_ns(ctrl, nsid); > + > + while (++prev < nsid) { > + ns = nvme_find_ns(ctrl, prev); > + if (ns) > + nvme_ns_remove(ns); > + } > + } > + nn -= j; > + } > + out: > + kfree(ns_list); > + return ret; > +} > + > +static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) > +{ > + struct nvme_ns *ns, *next; > + unsigned i; > + > + for (i = 1; i <= nn; i++) > + nvme_validate_ns(ctrl, i); > + > + list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { > + if (ns->ns_id > nn) > + nvme_ns_remove(ns); > + } > +} > + > +void nvme_scan_namespaces(struct nvme_ctrl *ctrl) > +{ > + struct nvme_id_ctrl *id; > + unsigned nn; > + > + if (nvme_identify_ctrl(ctrl, &id)) > + return; > + > + nn = le32_to_cpu(id->nn); > + if (ctrl->vs >= NVME_VS(1, 1)) { > + if (!nvme_scan_ns_list(ctrl, nn)) > + goto done; > + } > + __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); > + done: > + list_sort(NULL, &ctrl->namespaces, ns_cmp); > + kfree(id); > +} > + > +void nvme_remove_namespaces(struct nvme_ctrl *ctrl) > +{ > + struct nvme_ns *ns, *next; > + > + list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) > + nvme_ns_remove(ns); > +} > + > +static DEFINE_IDA(nvme_instance_ida); > + > +static int nvme_set_instance(struct nvme_ctrl *ctrl) > +{ > + int instance, error; > + > + do { > + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) > + return -ENODEV; > + > + spin_lock(&dev_list_lock); > + error = ida_get_new(&nvme_instance_ida, &instance); > + spin_unlock(&dev_list_lock); > + } while (error == -EAGAIN); > + > + if (error) > + return -ENODEV; > + > + ctrl->instance = instance; > + return 0; > +} > + > +static void nvme_release_instance(struct nvme_ctrl *ctrl) > +{ > + spin_lock(&dev_list_lock); > + ida_remove(&nvme_instance_ida, ctrl->instance); > + spin_unlock(&dev_list_lock); > +} > + > +void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) > + { > + device_remove_file(ctrl->device, &dev_attr_reset_controller); > + device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); > + > + spin_lock(&dev_list_lock); > + list_del(&ctrl->node); > + spin_unlock(&dev_list_lock); > +} > + > +static void nvme_free_ctrl(struct kref *kref) > +{ > + struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); > + > + put_device(ctrl->device); > + nvme_release_instance(ctrl); > + > + ctrl->ops->free_ctrl(ctrl); > +} > + > +void nvme_put_ctrl(struct nvme_ctrl *ctrl) > +{ > + kref_put(&ctrl->kref, nvme_free_ctrl); > +} > + > +/* > + * Initialize a NVMe controller structures. This needs to be called during > + * earliest initialization so that we have the initialized structured around > + * during probing. > + */ > +int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, > + const struct nvme_ctrl_ops *ops, u16 vendor, > + unsigned long quirks) > +{ > + int ret; > + > + INIT_LIST_HEAD(&ctrl->namespaces); > + kref_init(&ctrl->kref); > + ctrl->dev = dev; > + ctrl->ops = ops; > + ctrl->vendor = vendor; > + ctrl->quirks = quirks; > + > + ret = nvme_set_instance(ctrl); > + if (ret) > + goto out; > + > + ctrl->device = device_create(nvme_class, ctrl->dev, > + MKDEV(nvme_char_major, ctrl->instance), > + dev, "nvme%d", ctrl->instance); > + if (IS_ERR(ctrl->device)) { > + ret = PTR_ERR(ctrl->device); > + goto out_release_instance; > + } > + get_device(ctrl->device); > + dev_set_drvdata(ctrl->device, ctrl); > + > + ret = device_create_file(ctrl->device, &dev_attr_reset_controller); > + if (ret) > + goto out_put_device; > + > + spin_lock(&dev_list_lock); > + list_add_tail(&ctrl->node, &nvme_ctrl_list); > + spin_unlock(&dev_list_lock); > + > + return 0; > + > +out_put_device: > + put_device(ctrl->device); > + device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); > +out_release_instance: > + nvme_release_instance(ctrl); > +out: > + return ret; > +} > + > +int __init nvme_core_init(void) > +{ > + int result; > + > + result = register_blkdev(nvme_major, "nvme"); > + if (result < 0) > + return result; > + else if (result > 0) > + nvme_major = result; > + > + result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", > + &nvme_dev_fops); > + if (result < 0) > + goto unregister_blkdev; > + else if (result > 0) > + nvme_char_major = result; > + > + nvme_class = class_create(THIS_MODULE, "nvme"); > + if (IS_ERR(nvme_class)) { > + result = PTR_ERR(nvme_class); > + goto unregister_chrdev; > + } > + > + return 0; > + > + unregister_chrdev: > + __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); > + unregister_blkdev: > + unregister_blkdev(nvme_major, "nvme"); > + return result; > +} > + > +void nvme_core_exit(void) > +{ > + unregister_blkdev(nvme_major, "nvme"); > + class_destroy(nvme_class); > + __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); > +} > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index 9444884..5c5f455 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -1040,7 +1040,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, > struct request *req; > int ret; > > - req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); > + req = blk_mq_alloc_request(q, write, 0); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -1093,7 +1093,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) > struct nvme_cmd_info *cmd_info; > struct request *req; > > - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true); > + req = blk_mq_alloc_request(dev->admin_q, WRITE, > + BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -1118,7 +1119,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, > struct request *req; > struct nvme_cmd_info *cmd_rq; > > - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); > + req = blk_mq_alloc_request(dev->admin_q, WRITE, 0); > if (IS_ERR(req)) > return PTR_ERR(req); > > @@ -1319,8 +1320,8 @@ static void nvme_abort_req(struct request *req) > if (!dev->abort_limit) > return; > > - abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, > - false); > + abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, > + BLK_MQ_REQ_NOWAIT); > if (IS_ERR(abort_req)) > return; > > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index daf17d7..7fc9296 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); > void blk_mq_free_request(struct request *rq); > void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); > bool blk_mq_can_queue(struct blk_mq_hw_ctx *); > + > +enum { > + BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ > + BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ > +}; > + > struct request *blk_mq_alloc_request(struct request_queue *q, int rw, > - gfp_t gfp, bool reserved); > + unsigned int flags); > struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); > struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);