mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 02:20:54 +07:00
Drivers: hv: vmbus: Resolve more races involving init_vp_index()
init_vp_index() uses the (per-node) hv_numa_map[] masks to record the
CPUs allocated for channel interrupts at a given time, and distribute
the performance-critical channels across the available CPUs: in part.,
the mask of "candidate" target CPUs in a given NUMA node, for a newly
offered channel, is determined by XOR-ing the node's CPU mask and the
node's hv_numa_map. This operation/mechanism assumes that no offline
CPUs is set in the hv_numa_map mask, an assumption that does not hold
since such mask is currently not updated when a channel is removed or
assigned to a different CPU.
To address the issues described above, this adds hooks in the channel
removal path (hv_process_channel_removal()) and in target_cpu_store()
in order to clear, resp. to update, the hv_numa_map[] masks as needed.
This also adds a (missed) update of the masks in init_vp_index() (cf.,
e.g., the memory-allocation failure path in this function).
Like in the case of init_vp_index(), such hooks require to determine
if the given channel is performance critical. init_vp_index() does
this by parsing the channel's offer, it can not rely on the device
data structure (device_obj) to retrieve such information because the
device data structure has not been allocated/linked with the channel
by the time that init_vp_index() executes. A similar situation may
hold in hv_is_alloced_cpu() (defined below); the adopted approach is
to "cache" the device type of the channel, as computed by parsing the
channel's offer, in the channel structure itself.
Fixes: 7527810573
("Drivers: hv: vmbus: Introduce the CHANNELMSG_MODIFYCHANNEL message type")
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20200522171901.204127-3-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
This commit is contained in:
parent
a949e86c0d
commit
afaa33da08
@ -24,9 +24,9 @@
|
|||||||
|
|
||||||
#include "hyperv_vmbus.h"
|
#include "hyperv_vmbus.h"
|
||||||
|
|
||||||
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
|
static void init_vp_index(struct vmbus_channel *channel);
|
||||||
|
|
||||||
static const struct vmbus_device vmbus_devs[] = {
|
const struct vmbus_device vmbus_devs[] = {
|
||||||
/* IDE */
|
/* IDE */
|
||||||
{ .dev_type = HV_IDE,
|
{ .dev_type = HV_IDE,
|
||||||
HV_IDE_GUID,
|
HV_IDE_GUID,
|
||||||
@ -431,6 +431,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
|
|||||||
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
spin_unlock_irqrestore(&primary_channel->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is a "perf" channel, updates the hv_numa_map[] masks so that
|
||||||
|
* init_vp_index() can (re-)use the CPU.
|
||||||
|
*/
|
||||||
|
if (hv_is_perf_channel(channel))
|
||||||
|
hv_clear_alloced_cpu(channel->target_cpu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
|
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
|
||||||
* the relid is invalidated; after hibernation, when the user-space app
|
* the relid is invalidated; after hibernation, when the user-space app
|
||||||
@ -497,7 +504,7 @@ static void vmbus_add_channel_work(struct work_struct *work)
|
|||||||
if (!newchannel->device_obj)
|
if (!newchannel->device_obj)
|
||||||
goto err_deq_chan;
|
goto err_deq_chan;
|
||||||
|
|
||||||
newchannel->device_obj->device_id = hv_get_dev_type(newchannel);
|
newchannel->device_obj->device_id = newchannel->device_id;
|
||||||
/*
|
/*
|
||||||
* Add the new device to the bus. This will kick off device-driver
|
* Add the new device to the bus. This will kick off device-driver
|
||||||
* binding which eventually invokes the device driver's AddDevice()
|
* binding which eventually invokes the device driver's AddDevice()
|
||||||
@ -580,7 +587,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
|
|||||||
*/
|
*/
|
||||||
mutex_lock(&vmbus_connection.channel_mutex);
|
mutex_lock(&vmbus_connection.channel_mutex);
|
||||||
|
|
||||||
init_vp_index(newchannel, hv_get_dev_type(newchannel));
|
init_vp_index(newchannel);
|
||||||
|
|
||||||
/* Remember the channels that should be cleaned up upon suspend. */
|
/* Remember the channels that should be cleaned up upon suspend. */
|
||||||
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
|
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
|
||||||
@ -676,9 +683,9 @@ static int next_numa_node_id;
|
|||||||
* evenly among all the available NUMA nodes. Once the node is assigned,
|
* evenly among all the available NUMA nodes. Once the node is assigned,
|
||||||
* we will assign the CPU based on a simple round robin scheme.
|
* we will assign the CPU based on a simple round robin scheme.
|
||||||
*/
|
*/
|
||||||
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
static void init_vp_index(struct vmbus_channel *channel)
|
||||||
{
|
{
|
||||||
bool perf_chn = vmbus_devs[dev_type].perf_device;
|
bool perf_chn = hv_is_perf_channel(channel);
|
||||||
cpumask_var_t available_mask;
|
cpumask_var_t available_mask;
|
||||||
struct cpumask *alloced_mask;
|
struct cpumask *alloced_mask;
|
||||||
u32 target_cpu;
|
u32 target_cpu;
|
||||||
@ -699,6 +706,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
|||||||
channel->target_cpu = VMBUS_CONNECT_CPU;
|
channel->target_cpu = VMBUS_CONNECT_CPU;
|
||||||
channel->target_vp =
|
channel->target_vp =
|
||||||
hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
|
hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
|
||||||
|
if (perf_chn)
|
||||||
|
hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -862,6 +871,7 @@ static void vmbus_setup_channel_state(struct vmbus_channel *channel,
|
|||||||
sizeof(struct vmbus_channel_offer_channel));
|
sizeof(struct vmbus_channel_offer_channel));
|
||||||
channel->monitor_grp = (u8)offer->monitorid / 32;
|
channel->monitor_grp = (u8)offer->monitorid / 32;
|
||||||
channel->monitor_bit = (u8)offer->monitorid % 32;
|
channel->monitor_bit = (u8)offer->monitorid % 32;
|
||||||
|
channel->device_id = hv_get_dev_type(channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -395,6 +395,54 @@ enum delay {
|
|||||||
MESSAGE_DELAY = 1,
|
MESSAGE_DELAY = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern const struct vmbus_device vmbus_devs[];
|
||||||
|
|
||||||
|
static inline bool hv_is_perf_channel(struct vmbus_channel *channel)
|
||||||
|
{
|
||||||
|
return vmbus_devs[channel->device_id].perf_device;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool hv_is_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
struct vmbus_channel *channel, *sc;
|
||||||
|
|
||||||
|
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
||||||
|
/*
|
||||||
|
* List additions/deletions as well as updates of the target CPUs are
|
||||||
|
* protected by channel_mutex.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
||||||
|
if (!hv_is_perf_channel(channel))
|
||||||
|
continue;
|
||||||
|
if (channel->target_cpu == cpu)
|
||||||
|
return true;
|
||||||
|
list_for_each_entry(sc, &channel->sc_list, sc_list) {
|
||||||
|
if (sc->target_cpu == cpu)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_set_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
cpumask_set_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_clear_alloced_cpu(unsigned int cpu)
|
||||||
|
{
|
||||||
|
if (hv_is_alloced_cpu(cpu))
|
||||||
|
return;
|
||||||
|
cpumask_clear_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hv_update_alloced_cpus(unsigned int old_cpu,
|
||||||
|
unsigned int new_cpu)
|
||||||
|
{
|
||||||
|
hv_set_alloced_cpu(new_cpu);
|
||||||
|
hv_clear_alloced_cpu(old_cpu);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HYPERV_TESTING
|
#ifdef CONFIG_HYPERV_TESTING
|
||||||
|
|
||||||
int hv_debug_add_dev_dir(struct hv_device *dev);
|
int hv_debug_add_dev_dir(struct hv_device *dev);
|
||||||
|
@ -1687,8 +1687,8 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
|
|||||||
static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||||
const char *buf, size_t count)
|
const char *buf, size_t count)
|
||||||
{
|
{
|
||||||
|
u32 target_cpu, origin_cpu;
|
||||||
ssize_t ret = count;
|
ssize_t ret = count;
|
||||||
u32 target_cpu;
|
|
||||||
|
|
||||||
if (vmbus_proto_version < VERSION_WIN10_V4_1)
|
if (vmbus_proto_version < VERSION_WIN10_V4_1)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
@ -1741,7 +1741,8 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
|||||||
goto cpu_store_unlock;
|
goto cpu_store_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (channel->target_cpu == target_cpu)
|
origin_cpu = channel->target_cpu;
|
||||||
|
if (target_cpu == origin_cpu)
|
||||||
goto cpu_store_unlock;
|
goto cpu_store_unlock;
|
||||||
|
|
||||||
if (vmbus_send_modifychannel(channel->offermsg.child_relid,
|
if (vmbus_send_modifychannel(channel->offermsg.child_relid,
|
||||||
@ -1763,14 +1764,20 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
|||||||
* in on a CPU that is different from the channel target_cpu value.
|
* in on a CPU that is different from the channel target_cpu value.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (channel->change_target_cpu_callback)
|
|
||||||
(*channel->change_target_cpu_callback)(channel,
|
|
||||||
channel->target_cpu, target_cpu);
|
|
||||||
|
|
||||||
channel->target_cpu = target_cpu;
|
channel->target_cpu = target_cpu;
|
||||||
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
|
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
|
||||||
channel->numa_node = cpu_to_node(target_cpu);
|
channel->numa_node = cpu_to_node(target_cpu);
|
||||||
|
|
||||||
|
/* See init_vp_index(). */
|
||||||
|
if (hv_is_perf_channel(channel))
|
||||||
|
hv_update_alloced_cpus(origin_cpu, target_cpu);
|
||||||
|
|
||||||
|
/* Currently set only for storvsc channels. */
|
||||||
|
if (channel->change_target_cpu_callback) {
|
||||||
|
(*channel->change_target_cpu_callback)(channel,
|
||||||
|
origin_cpu, target_cpu);
|
||||||
|
}
|
||||||
|
|
||||||
cpu_store_unlock:
|
cpu_store_unlock:
|
||||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
|
@ -901,6 +901,13 @@ struct vmbus_channel {
|
|||||||
|
|
||||||
bool probe_done;
|
bool probe_done;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache the device ID here for easy access; this is useful, in
|
||||||
|
* particular, in situations where the channel's device_obj has
|
||||||
|
* not been allocated/initialized yet.
|
||||||
|
*/
|
||||||
|
u16 device_id;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must offload the handling of the primary/sub channels
|
* We must offload the handling of the primary/sub channels
|
||||||
* from the single-threaded vmbus_connection.work_queue to
|
* from the single-threaded vmbus_connection.work_queue to
|
||||||
|
Loading…
Reference in New Issue
Block a user