scsi: storvsc: Re-init stor_chns when a channel interrupt is re-assigned
authorAndrea Parri (Microsoft) <parri.andrea@gmail.com>
Mon, 6 Apr 2020 00:15:14 +0000 (02:15 +0200)
committerWei Liu <wei.liu@kernel.org>
Wed, 20 May 2020 09:13:19 +0000 (09:13 +0000)
For each storvsc_device, storvsc keeps track of the channel target CPUs
associated to the device (alloced_cpus) and it uses this information to
fill a "cache" (stor_chns) mapping CPU->channel according to a certain
heuristic.  Update the alloced_cpus mask and the stor_chns array when a
channel of the storvsc device is re-assigned to a different CPU.

Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <linux-scsi@vger.kernel.org>
Link: https://lore.kernel.org/r/20200406001514.19876-12-parri.andrea@gmail.com
Reviewed-by; Long Li <longli@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
[ wei: fix a small issue reported by kbuild test robot <lkp@intel.com> ]
Signed-off-by: Wei Liu <wei.liu@kernel.org>
drivers/hv/vmbus_drv.c
drivers/scsi/storvsc_drv.c
include/linux/hyperv.h

index 5d24b25..28c009c 100644 (file)
@@ -1777,6 +1777,10 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
         * in on a CPU that is different from the channel target_cpu value.
         */
 
+       if (channel->change_target_cpu_callback)
+               (*channel->change_target_cpu_callback)(channel,
+                               channel->target_cpu, target_cpu);
+
        channel->target_cpu = target_cpu;
        channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
        channel->numa_node = cpu_to_node(target_cpu);
index fb41636..072ed87 100644 (file)
@@ -621,6 +621,64 @@ get_in_err:
 
 }
 
+static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old,
+                                     u32 new)
+{
+       struct storvsc_device *stor_device;
+       struct vmbus_channel *cur_chn;
+       bool old_is_alloced = false;
+       struct hv_device *device;
+       unsigned long flags;
+       int cpu;
+
+       device = channel->primary_channel ?
+                       channel->primary_channel->device_obj
+                               : channel->device_obj;
+       stor_device = get_out_stor_device(device);
+       if (!stor_device)
+               return;
+
+       /* See storvsc_do_io() -> get_og_chn(). */
+       spin_lock_irqsave(&device->channel->lock, flags);
+
+       /*
+        * Determines if the storvsc device has other channels assigned to
+        * the "old" CPU to update the alloced_cpus mask and the stor_chns
+        * array.
+        */
+       if (device->channel != channel && device->channel->target_cpu == old) {
+               cur_chn = device->channel;
+               old_is_alloced = true;
+               goto old_is_alloced;
+       }
+       list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
+               if (cur_chn == channel)
+                       continue;
+               if (cur_chn->target_cpu == old) {
+                       old_is_alloced = true;
+                       goto old_is_alloced;
+               }
+       }
+
+old_is_alloced:
+       if (old_is_alloced)
+               WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
+       else
+               cpumask_clear_cpu(old, &stor_device->alloced_cpus);
+
+       /* "Flush" the stor_chns array. */
+       for_each_possible_cpu(cpu) {
+               if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
+                                       cpu, &stor_device->alloced_cpus))
+                       WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
+       }
+
+       WRITE_ONCE(stor_device->stor_chns[new], channel);
+       cpumask_set_cpu(new, &stor_device->alloced_cpus);
+
+       spin_unlock_irqrestore(&device->channel->lock, flags);
+}
+
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
        struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -648,6 +706,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
                return;
        }
 
+       new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
+
        /* Add the sub-channel to the array of available channels. */
        stor_device->stor_chns[new_sc->target_cpu] = new_sc;
        cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
@@ -876,6 +936,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
        if (stor_device->stor_chns == NULL)
                return -ENOMEM;
 
+       device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
+
        stor_device->stor_chns[device->channel->target_cpu] = device->channel;
        cpumask_set_cpu(device->channel->target_cpu,
                        &stor_device->alloced_cpus);
@@ -1248,8 +1310,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
        const struct cpumask *node_mask;
        int num_channels, tgt_cpu;
 
-       if (stor_device->num_sc == 0)
+       if (stor_device->num_sc == 0) {
+               stor_device->stor_chns[q_num] = stor_device->device->channel;
                return stor_device->device->channel;
+       }
 
        /*
         * Our channel array is sparsley populated and we
@@ -1258,7 +1322,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
         * The strategy is simple:
         * I. Ensure NUMA locality
         * II. Distribute evenly (best effort)
-        * III. Mapping is persistent.
         */
 
        node_mask = cpumask_of_node(cpu_to_node(q_num));
@@ -1268,8 +1331,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
                if (cpumask_test_cpu(tgt_cpu, node_mask))
                        num_channels++;
        }
-       if (num_channels == 0)
+       if (num_channels == 0) {
+               stor_device->stor_chns[q_num] = stor_device->device->channel;
                return stor_device->device->channel;
+       }
 
        hash_qnum = q_num;
        while (hash_qnum >= num_channels)
@@ -1295,6 +1360,7 @@ static int storvsc_do_io(struct hv_device *device,
        struct storvsc_device *stor_device;
        struct vstor_packet *vstor_packet;
        struct vmbus_channel *outgoing_channel, *channel;
+       unsigned long flags;
        int ret = 0;
        const struct cpumask *node_mask;
        int tgt_cpu;
@@ -1308,10 +1374,11 @@ static int storvsc_do_io(struct hv_device *device,
 
        request->device  = device;
        /*
-        * Select an an appropriate channel to send the request out.
+        * Select an appropriate channel to send the request out.
         */
-       if (stor_device->stor_chns[q_num] != NULL) {
-               outgoing_channel = stor_device->stor_chns[q_num];
+       /* See storvsc_change_target_cpu(). */
+       outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
+       if (outgoing_channel != NULL) {
                if (outgoing_channel->target_cpu == q_num) {
                        /*
                         * Ideally, we want to pick a different channel if
@@ -1324,7 +1391,10 @@ static int storvsc_do_io(struct hv_device *device,
                                        continue;
                                if (tgt_cpu == q_num)
                                        continue;
-                               channel = stor_device->stor_chns[tgt_cpu];
+                               channel = READ_ONCE(
+                                       stor_device->stor_chns[tgt_cpu]);
+                               if (channel == NULL)
+                                       continue;
                                if (hv_get_avail_to_write_percent(
                                                        &channel->outbound)
                                                > ring_avail_percent_lowater) {
@@ -1350,7 +1420,10 @@ static int storvsc_do_io(struct hv_device *device,
                        for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
                                if (cpumask_test_cpu(tgt_cpu, node_mask))
                                        continue;
-                               channel = stor_device->stor_chns[tgt_cpu];
+                               channel = READ_ONCE(
+                                       stor_device->stor_chns[tgt_cpu]);
+                               if (channel == NULL)
+                                       continue;
                                if (hv_get_avail_to_write_percent(
                                                        &channel->outbound)
                                                > ring_avail_percent_lowater) {
@@ -1360,7 +1433,14 @@ static int storvsc_do_io(struct hv_device *device,
                        }
                }
        } else {
+               spin_lock_irqsave(&device->channel->lock, flags);
+               outgoing_channel = stor_device->stor_chns[q_num];
+               if (outgoing_channel != NULL) {
+                       spin_unlock_irqrestore(&device->channel->lock, flags);
+                       goto found_channel;
+               }
                outgoing_channel = get_og_chn(stor_device, q_num);
+               spin_unlock_irqrestore(&device->channel->lock, flags);
        }
 
 found_channel:
index b85d758..cd64ab7 100644 (file)
@@ -773,6 +773,9 @@ struct vmbus_channel {
        void (*onchannel_callback)(void *context);
        void *channel_callback_context;
 
+       void (*change_target_cpu_callback)(struct vmbus_channel *channel,
+                       u32 old, u32 new);
+
        /*
         * Synchronize channel scheduling and channel removal; see the inline
         * comments in vmbus_chan_sched() and vmbus_reset_channel_cb().