nvme: Prevent resets during paused controller state
authorKeith Busch <kbusch@kernel.org>
Fri, 6 Sep 2019 17:23:08 +0000 (11:23 -0600)
committerKeith Busch <kbusch@kernel.org>
Mon, 14 Oct 2019 14:21:54 +0000 (23:21 +0900)
A paused controller is doing critical internal activation work in the
background. Prevent subsequent controller resets from occurring during
this period by setting the controller state to RESETTING first. A helper
function, nvme_try_sched_reset_work(), is introduced for these paths so
they may continue with scheduling the reset_work after they've completed
their uninterruptible critical section.

Tested-by: Edmund Nadolski <edmund.nadolski@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/core.c

index e451e77..e94fa69 100644 (file)
@@ -120,6 +120,21 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
                queue_work(nvme_wq, &ctrl->scan_work);
 }
 
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+       if (ctrl->state != NVME_CTRL_RESETTING)
+               return -EBUSY;
+       if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+               return -EBUSY;
+       return 0;
+}
+
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
        if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -3828,13 +3843,13 @@ static void nvme_fw_act_work(struct work_struct *work)
                if (time_after(jiffies, fw_act_timeout)) {
                        dev_warn(ctrl->device,
                                "Fw activation timeout, reset controller\n");
-                       nvme_reset_ctrl(ctrl);
-                       break;
+                       nvme_try_sched_reset(ctrl);
+                       return;
                }
                msleep(100);
        }
 
-       if (ctrl->state != NVME_CTRL_LIVE)
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
                return;
 
        nvme_start_queues(ctrl);
@@ -3854,7 +3869,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
                nvme_queue_scan(ctrl);
                break;
        case NVME_AER_NOTICE_FW_ACT_STARTING:
-               queue_work(nvme_wq, &ctrl->fw_act_work);
+               /*
+                * We are (ab)using the RESETTING state to prevent subsequent
+                * recovery actions from interfering with the controller's
+                * firmware activation.
+                */
+               if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+                       queue_work(nvme_wq, &ctrl->fw_act_work);
                break;
 #ifdef CONFIG_NVME_MULTIPATH
        case NVME_AER_NOTICE_ANA: