Skip to content

Commit

Permalink
Merge branch 'nvme-5.3' of git://git.infradead.org/nvme into for-linus
Browse files Browse the repository at this point in the history
Pull NVMe fixes from Christoph:

"Lof of fixes all over the place, and two very minor features that
 were in the nvme tree by the end of the merge window, but hadn't made
 it out to Jens yet."

* 'nvme-5.3' of git://git.infradead.org/nvme:
  nvme: fix regression upon hot device removal and insertion
  nvme-fc: fix module unloads while lports still pending
  nvme-tcp: don't use sendpage for SLAB pages
  nvme-tcp: set the STABLE_WRITES flag when data digests are enabled
  nvmet: print a hint while rejecting NSID 0 or 0xffffffff
  nvme-multipath: do not select namespaces which are about to be removed
  nvme-multipath: also check for a disabled path if there is a single sibling
  nvme-multipath: factor out a nvme_path_is_disabled helper
  nvme: set physical block size and optimal I/O size
  nvme: add I/O characteristics fields
  nvmet: export I/O characteristics attributes in Identify
  nvme-trace: add delete completion and submission queue to admin cmds tracer
  nvme-trace: fix spelling mistake "spcecific" -> "specific"
  nvme-pci: limit max_hw_sectors based on the DMA max mapping size
  nvme-pci: check for NULL return from pci_alloc_p2pmem()
  nvme-pci: don't create a read hctx mapping without read queues
  nvme-pci: don't fall back to a 32-bit DMA mask
  nvme-pci: make nvme_dev_pm_ops static
  nvme-fcloop: resolve warnings on RCU usage and sleep warnings
  nvme-fcloop: fix inconsistent lock state warnings
  • Loading branch information
axboe committed Jul 11, 2019
2 parents 4ddeaae + 420dc73 commit b740306
Show file tree
Hide file tree
Showing 14 changed files with 237 additions and 51 deletions.
43 changes: 39 additions & 4 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/hdreg.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/list_sort.h>
#include <linux/slab.h>
#include <linux/types.h>
Expand Down Expand Up @@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;

if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
Expand All @@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);

if (id->nabo == 0) {
/*
* Bit 1 indicates whether NAWUPF is defined for this namespace
* and whether it should be used instead of AWUPF. If NAWUPF ==
* 0 then AWUPF must be used instead.
*/
if (id->nsfeat & (1 << 1) && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
} else {
atomic_bs = bs;
}
phys_bs = bs;
io_opt = bs;
if (id->nsfeat & (1 << 4)) {
/* NPWG = Namespace Preferred Write Granularity */
phys_bs *= 1 + le16_to_cpu(id->npwg);
/* NOWS = Namespace Optimal Write Size */
io_opt *= 1 + le16_to_cpu(id->nows);
}

blk_queue_logical_block_size(disk->queue, bs);
blk_queue_physical_block_size(disk->queue, bs);
blk_queue_io_min(disk->queue, bs);
/*
* Linux filesystems assume writing a single physical block is
* an atomic operation. Hence limit the physical block size to the
* value of the Atomic Write Unit Power Fail parameter.
*/
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);

if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
Expand Down Expand Up @@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
lockdep_assert_held(&nvme_subsystems_lock);

list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
if (ctrl->state == NVME_CTRL_DELETING ||
ctrl->state == NVME_CTRL_DEAD)
if (tmp->state == NVME_CTRL_DELETING ||
tmp->state == NVME_CTRL_DEAD)
continue;

if (tmp->cntlid == ctrl->cntlid) {
Expand Down Expand Up @@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
subsys->iopolicy = NVME_IOPOLICY_NUMA;
#endif
Expand Down Expand Up @@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
goto out_free_ns;
}

if (ctrl->opts->data_digest)
ns->queue->backing_dev_info->capabilities
|= BDI_CAP_STABLE_WRITES;

blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
Expand Down
51 changes: 48 additions & 3 deletions drivers/nvme/host/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);

static struct workqueue_struct *nvme_fc_wq;

static bool nvme_fc_waiting_to_unload;
static DECLARE_COMPLETION(nvme_fc_unload_proceed);

/*
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
Expand All @@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref)
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
complete(&nvme_fc_unload_proceed);
spin_unlock_irqrestore(&nvme_fc_lock, flags);

ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
Expand Down Expand Up @@ -3456,11 +3461,51 @@ static int __init nvme_fc_init_module(void)
return ret;
}

static void
nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
{
struct nvme_fc_ctrl *ctrl;

spin_lock(&rport->lock);
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport unloading: deleting ctrl\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
}
spin_unlock(&rport->lock);
}

static void
nvme_fc_cleanup_for_unload(void)
{
struct nvme_fc_lport *lport;
struct nvme_fc_rport *rport;

list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
list_for_each_entry(rport, &lport->endp_list, endp_list) {
nvme_fc_delete_controllers(rport);
}
}
}

static void __exit nvme_fc_exit_module(void)
{
/* sanity check - all lports should be removed */
if (!list_empty(&nvme_fc_lport_list))
pr_warn("%s: localport list not empty\n", __func__);
unsigned long flags;
bool need_cleanup = false;

spin_lock_irqsave(&nvme_fc_lock, flags);
nvme_fc_waiting_to_unload = true;
if (!list_empty(&nvme_fc_lport_list)) {
need_cleanup = true;
nvme_fc_cleanup_for_unload();
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);
if (need_cleanup) {
pr_info("%s: waiting for ctlr deletes\n", __func__);
wait_for_completion(&nvme_fc_unload_proceed);
pr_info("%s: ctrl deletes complete\n", __func__);
}

nvmf_unregister_transport(&nvme_fc_transport);

Expand Down
18 changes: 13 additions & 5 deletions drivers/nvme/host/multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns)
}
}

static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
return ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
test_bit(NVME_NS_REMOVING, &ns->flags);
}

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
struct nvme_ns *found = NULL, *fallback = NULL, *ns;

list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
if (nvme_path_is_disabled(ns))
continue;

if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
Expand Down Expand Up @@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
{
struct nvme_ns *ns, *found, *fallback = NULL;

if (list_is_singular(&head->list))
if (list_is_singular(&head->list)) {
if (nvme_path_is_disabled(old))
return NULL;
return old;
}

for (ns = nvme_next_ns(head, old);
ns != old;
ns = nvme_next_ns(head, ns)) {
if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
if (nvme_path_is_disabled(ns))
continue;

if (ns->ana_state == NVME_ANA_OPTIMIZED) {
Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ struct nvme_subsystem {
char firmware_rev[8];
u8 cmic;
u16 vendor_id;
u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
Expand Down
26 changes: 16 additions & 10 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1439,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,

if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
if (nvmeq->sq_dma_addr) {
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
return 0;
if (nvmeq->sq_cmds) {
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
if (nvmeq->sq_dma_addr) {
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
return 0;
}

pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
}
}

Expand Down Expand Up @@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
dev->tagset.nr_maps = 2; /* default + read */
dev->tagset.nr_maps = 1; /* default */
if (dev->io_queues[HCTX_TYPE_READ])
dev->tagset.nr_maps++;
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT;
Expand Down Expand Up @@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)

pci_set_master(pdev);

if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
goto disable;

if (readl(dev->bar + NVME_REG_CSTS) == -1) {
Expand Down Expand Up @@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work)
* Limit the max command size to prevent iod->sg allocations going
* over a single page.
*/
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_hw_sectors = min_t(u32,
NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
dev->ctrl.max_segments = NVME_MAX_SEGS;

/*
Expand Down Expand Up @@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev)
return 0;
}

const struct dev_pm_ops nvme_dev_pm_ops = {
static const struct dev_pm_ops nvme_dev_pm_ops = {
.suspend = nvme_suspend,
.resume = nvme_resume,
.freeze = nvme_simple_suspend,
Expand Down
9 changes: 8 additions & 1 deletion drivers/nvme/host/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
else
flags |= MSG_MORE;

ret = kernel_sendpage(queue->sock, page, offset, len, flags);
/* can't zcopy slab pages */
if (unlikely(PageSlab(page))) {
ret = sock_no_sendpage(queue->sock, page, offset, len,
flags);
} else {
ret = kernel_sendpage(queue->sock, page, offset, len,
flags);
}
if (ret <= 0)
return ret;

Expand Down
28 changes: 27 additions & 1 deletion drivers/nvme/host/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
#include <asm/unaligned.h>
#include "trace.h"

static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u16 sqid = get_unaligned_le16(cdw10);

trace_seq_printf(p, "sqid=%u", sqid);
trace_seq_putc(p, 0);

return ret;
}

static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
Expand All @@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
return ret;
}

static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u16 cqid = get_unaligned_le16(cdw10);

trace_seq_printf(p, "cqid=%u", cqid);
trace_seq_putc(p, 0);

return ret;
}

static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
Expand Down Expand Up @@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
u8 opcode, u8 *cdw10)
{
switch (opcode) {
case nvme_admin_delete_sq:
return nvme_trace_delete_sq(p, cdw10);
case nvme_admin_create_sq:
return nvme_trace_create_sq(p, cdw10);
case nvme_admin_delete_cq:
return nvme_trace_delete_cq(p, cdw10);
case nvme_admin_create_cq:
return nvme_trace_create_cq(p, cdw10);
case nvme_admin_identify:
Expand Down Expand Up @@ -178,7 +204,7 @@ static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
{
const char *ret = trace_seq_buffer_ptr(p);

trace_seq_printf(p, "spcecific=%*ph", 24, spc);
trace_seq_printf(p, "specific=%*ph", 24, spc);
trace_seq_putc(p, 0);
return ret;
}
Expand Down
3 changes: 3 additions & 0 deletions drivers/nvme/target/admin-cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
break;
}

if (ns->bdev)
nvmet_bdev_set_limits(ns->bdev, id);

/*
* We just provide a single LBA format that matches what the
* underlying device reports.
Expand Down
4 changes: 3 additions & 1 deletion drivers/nvme/target/configfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,10 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
goto out;

ret = -EINVAL;
if (nsid == 0 || nsid == NVME_NSID_ALL)
if (nsid == 0 || nsid == NVME_NSID_ALL) {
pr_err("invalid nsid %#x", nsid);
goto out;
}

ret = -ENOMEM;
ns = nvmet_ns_alloc(subsys, nsid);
Expand Down
Loading

0 comments on commit b740306

Please sign in to comment.