mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
1a3fc2c402
There has been a report about a deadlock in the xenbus driver:
[ 247.979498] ======================================================
[ 247.985688] WARNING: possible circular locking dependency detected
[ 247.991882] 4.12.0-rc4-00022-gc4b25c0 #575 Not tainted
[ 247.997040] ------------------------------------------------------
[ 248.003232] xenbus/91 is trying to acquire lock:
[ 248.007875] (&u->msgbuffer_mutex){+.+.+.}, at: [<ffff00000863e904>]
xenbus_dev_queue_reply+0x3c/0x230
[ 248.017163]
[ 248.017163] but task is already holding lock:
[ 248.023096] (xb_write_mutex){+.+...}, at: [<ffff00000863a940>]
xenbus_thread+0x5f0/0x798
[ 248.031267]
[ 248.031267] which lock already depends on the new lock.
[ 248.031267]
[ 248.039615]
[ 248.039615] the existing dependency chain (in reverse order) is:
[ 248.047176]
[ 248.047176] -> #1 (xb_write_mutex){+.+...}:
[ 248.052943] __lock_acquire+0x1728/0x1778
[ 248.057498] lock_acquire+0xc4/0x288
[ 248.061630] __mutex_lock+0x84/0x868
[ 248.065755] mutex_lock_nested+0x3c/0x50
[ 248.070227] xs_send+0x164/0x1f8
[ 248.074015] xenbus_dev_request_and_reply+0x6c/0x88
[ 248.079427] xenbus_file_write+0x260/0x420
[ 248.084073] __vfs_write+0x48/0x138
[ 248.088113] vfs_write+0xa8/0x1b8
[ 248.091983] SyS_write+0x54/0xb0
[ 248.095768] el0_svc_naked+0x24/0x28
[ 248.099897]
[ 248.099897] -> #0 (&u->msgbuffer_mutex){+.+.+.}:
[ 248.106088] print_circular_bug+0x80/0x2e0
[ 248.110730] __lock_acquire+0x1768/0x1778
[ 248.115288] lock_acquire+0xc4/0x288
[ 248.119417] __mutex_lock+0x84/0x868
[ 248.123545] mutex_lock_nested+0x3c/0x50
[ 248.128016] xenbus_dev_queue_reply+0x3c/0x230
[ 248.133005] xenbus_thread+0x788/0x798
[ 248.137306] kthread+0x110/0x140
[ 248.141087] ret_from_fork+0x10/0x40
It is rather easy to avoid by dropping xb_write_mutex before calling
xenbus_dev_queue_reply().
Fixes: fd8aa9095a
("xen: optimize xenbus
driver for multiple concurrent xenstore accesses").
Cc: <stable@vger.kernel.org> # 4.11
Reported-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
488 lines
11 KiB
C
488 lines
11 KiB
C
/******************************************************************************
|
|
* xenbus_comms.c
|
|
*
|
|
* Low level code to talks to Xen Store: ringbuffer and event channel.
|
|
*
|
|
* Copyright (C) 2005 Rusty Russell, IBM Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License version 2
|
|
* as published by the Free Software Foundation; or, when distributed
|
|
* separately from the Linux kernel or incorporated into other
|
|
* software packages, subject to the following license:
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this source file (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use, copy, modify,
|
|
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
* and to permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/wait.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/err.h>
|
|
#include <xen/xenbus.h>
|
|
#include <asm/xen/hypervisor.h>
|
|
#include <xen/events.h>
|
|
#include <xen/page.h>
|
|
#include "xenbus.h"
|
|
|
|
/* A list of replies. Currently only one will ever be outstanding. */
|
|
LIST_HEAD(xs_reply_list);
|
|
|
|
/* A list of write requests. */
|
|
LIST_HEAD(xb_write_list);
|
|
DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
|
|
DEFINE_MUTEX(xb_write_mutex);
|
|
|
|
/* Protect xenbus reader thread against save/restore. */
|
|
DEFINE_MUTEX(xs_response_mutex);
|
|
|
|
static int xenbus_irq;
|
|
static struct task_struct *xenbus_task;
|
|
|
|
static DECLARE_WORK(probe_work, xenbus_probe);
|
|
|
|
|
|
static irqreturn_t wake_waiting(int irq, void *unused)
|
|
{
|
|
if (unlikely(xenstored_ready == 0)) {
|
|
xenstored_ready = 1;
|
|
schedule_work(&probe_work);
|
|
}
|
|
|
|
wake_up(&xb_waitq);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
|
|
{
|
|
return ((prod - cons) <= XENSTORE_RING_SIZE);
|
|
}
|
|
|
|
static void *get_output_chunk(XENSTORE_RING_IDX cons,
|
|
XENSTORE_RING_IDX prod,
|
|
char *buf, uint32_t *len)
|
|
{
|
|
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
|
|
if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
|
|
*len = XENSTORE_RING_SIZE - (prod - cons);
|
|
return buf + MASK_XENSTORE_IDX(prod);
|
|
}
|
|
|
|
static const void *get_input_chunk(XENSTORE_RING_IDX cons,
|
|
XENSTORE_RING_IDX prod,
|
|
const char *buf, uint32_t *len)
|
|
{
|
|
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
|
|
if ((prod - cons) < *len)
|
|
*len = prod - cons;
|
|
return buf + MASK_XENSTORE_IDX(cons);
|
|
}
|
|
|
|
static int xb_data_to_write(void)
|
|
{
|
|
struct xenstore_domain_interface *intf = xen_store_interface;
|
|
|
|
return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
|
|
!list_empty(&xb_write_list);
|
|
}
|
|
|
|
/**
|
|
* xb_write - low level write
|
|
* @data: buffer to send
|
|
* @len: length of buffer
|
|
*
|
|
* Returns number of bytes written or -err.
|
|
*/
|
|
static int xb_write(const void *data, unsigned int len)
|
|
{
|
|
struct xenstore_domain_interface *intf = xen_store_interface;
|
|
XENSTORE_RING_IDX cons, prod;
|
|
unsigned int bytes = 0;
|
|
|
|
while (len != 0) {
|
|
void *dst;
|
|
unsigned int avail;
|
|
|
|
/* Read indexes, then verify. */
|
|
cons = intf->req_cons;
|
|
prod = intf->req_prod;
|
|
if (!check_indexes(cons, prod)) {
|
|
intf->req_cons = intf->req_prod = 0;
|
|
return -EIO;
|
|
}
|
|
if (!xb_data_to_write())
|
|
return bytes;
|
|
|
|
/* Must write data /after/ reading the consumer index. */
|
|
virt_mb();
|
|
|
|
dst = get_output_chunk(cons, prod, intf->req, &avail);
|
|
if (avail == 0)
|
|
continue;
|
|
if (avail > len)
|
|
avail = len;
|
|
|
|
memcpy(dst, data, avail);
|
|
data += avail;
|
|
len -= avail;
|
|
bytes += avail;
|
|
|
|
/* Other side must not see new producer until data is there. */
|
|
virt_wmb();
|
|
intf->req_prod += avail;
|
|
|
|
/* Implies mb(): other side will see the updated producer. */
|
|
if (prod <= intf->req_cons)
|
|
notify_remote_via_evtchn(xen_store_evtchn);
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
static int xb_data_to_read(void)
|
|
{
|
|
struct xenstore_domain_interface *intf = xen_store_interface;
|
|
return (intf->rsp_cons != intf->rsp_prod);
|
|
}
|
|
|
|
static int xb_read(void *data, unsigned int len)
|
|
{
|
|
struct xenstore_domain_interface *intf = xen_store_interface;
|
|
XENSTORE_RING_IDX cons, prod;
|
|
unsigned int bytes = 0;
|
|
|
|
while (len != 0) {
|
|
unsigned int avail;
|
|
const char *src;
|
|
|
|
/* Read indexes, then verify. */
|
|
cons = intf->rsp_cons;
|
|
prod = intf->rsp_prod;
|
|
if (cons == prod)
|
|
return bytes;
|
|
|
|
if (!check_indexes(cons, prod)) {
|
|
intf->rsp_cons = intf->rsp_prod = 0;
|
|
return -EIO;
|
|
}
|
|
|
|
src = get_input_chunk(cons, prod, intf->rsp, &avail);
|
|
if (avail == 0)
|
|
continue;
|
|
if (avail > len)
|
|
avail = len;
|
|
|
|
/* Must read data /after/ reading the producer index. */
|
|
virt_rmb();
|
|
|
|
memcpy(data, src, avail);
|
|
data += avail;
|
|
len -= avail;
|
|
bytes += avail;
|
|
|
|
/* Other side must not see free space until we've copied out */
|
|
virt_mb();
|
|
intf->rsp_cons += avail;
|
|
|
|
/* Implies mb(): other side will see the updated consumer. */
|
|
if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
|
|
notify_remote_via_evtchn(xen_store_evtchn);
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
static int process_msg(void)
|
|
{
|
|
static struct {
|
|
struct xsd_sockmsg msg;
|
|
char *body;
|
|
union {
|
|
void *alloc;
|
|
struct xs_watch_event *watch;
|
|
};
|
|
bool in_msg;
|
|
bool in_hdr;
|
|
unsigned int read;
|
|
} state;
|
|
struct xb_req_data *req;
|
|
int err;
|
|
unsigned int len;
|
|
|
|
if (!state.in_msg) {
|
|
state.in_msg = true;
|
|
state.in_hdr = true;
|
|
state.read = 0;
|
|
|
|
/*
|
|
* We must disallow save/restore while reading a message.
|
|
* A partial read across s/r leaves us out of sync with
|
|
* xenstored.
|
|
* xs_response_mutex is locked as long as we are processing one
|
|
* message. state.in_msg will be true as long as we are holding
|
|
* the lock here.
|
|
*/
|
|
mutex_lock(&xs_response_mutex);
|
|
|
|
if (!xb_data_to_read()) {
|
|
/* We raced with save/restore: pending data 'gone'. */
|
|
mutex_unlock(&xs_response_mutex);
|
|
state.in_msg = false;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (state.in_hdr) {
|
|
if (state.read != sizeof(state.msg)) {
|
|
err = xb_read((void *)&state.msg + state.read,
|
|
sizeof(state.msg) - state.read);
|
|
if (err < 0)
|
|
goto out;
|
|
state.read += err;
|
|
if (state.read != sizeof(state.msg))
|
|
return 0;
|
|
if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
len = state.msg.len + 1;
|
|
if (state.msg.type == XS_WATCH_EVENT)
|
|
len += sizeof(*state.watch);
|
|
|
|
state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
|
|
if (!state.alloc)
|
|
return -ENOMEM;
|
|
|
|
if (state.msg.type == XS_WATCH_EVENT)
|
|
state.body = state.watch->body;
|
|
else
|
|
state.body = state.alloc;
|
|
state.in_hdr = false;
|
|
state.read = 0;
|
|
}
|
|
|
|
err = xb_read(state.body + state.read, state.msg.len - state.read);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
state.read += err;
|
|
if (state.read != state.msg.len)
|
|
return 0;
|
|
|
|
state.body[state.msg.len] = '\0';
|
|
|
|
if (state.msg.type == XS_WATCH_EVENT) {
|
|
state.watch->len = state.msg.len;
|
|
err = xs_watch_msg(state.watch);
|
|
} else {
|
|
err = -ENOENT;
|
|
mutex_lock(&xb_write_mutex);
|
|
list_for_each_entry(req, &xs_reply_list, list) {
|
|
if (req->msg.req_id == state.msg.req_id) {
|
|
list_del(&req->list);
|
|
err = 0;
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&xb_write_mutex);
|
|
if (err)
|
|
goto out;
|
|
|
|
if (req->state == xb_req_state_wait_reply) {
|
|
req->msg.type = state.msg.type;
|
|
req->msg.len = state.msg.len;
|
|
req->body = state.body;
|
|
req->state = xb_req_state_got_reply;
|
|
req->cb(req);
|
|
} else
|
|
kfree(req);
|
|
}
|
|
|
|
mutex_unlock(&xs_response_mutex);
|
|
|
|
state.in_msg = false;
|
|
state.alloc = NULL;
|
|
return err;
|
|
|
|
out:
|
|
mutex_unlock(&xs_response_mutex);
|
|
state.in_msg = false;
|
|
kfree(state.alloc);
|
|
state.alloc = NULL;
|
|
return err;
|
|
}
|
|
|
|
static int process_writes(void)
|
|
{
|
|
static struct {
|
|
struct xb_req_data *req;
|
|
int idx;
|
|
unsigned int written;
|
|
} state;
|
|
void *base;
|
|
unsigned int len;
|
|
int err = 0;
|
|
|
|
if (!xb_data_to_write())
|
|
return 0;
|
|
|
|
mutex_lock(&xb_write_mutex);
|
|
|
|
if (!state.req) {
|
|
state.req = list_first_entry(&xb_write_list,
|
|
struct xb_req_data, list);
|
|
state.idx = -1;
|
|
state.written = 0;
|
|
}
|
|
|
|
if (state.req->state == xb_req_state_aborted)
|
|
goto out_err;
|
|
|
|
while (state.idx < state.req->num_vecs) {
|
|
if (state.idx < 0) {
|
|
base = &state.req->msg;
|
|
len = sizeof(state.req->msg);
|
|
} else {
|
|
base = state.req->vec[state.idx].iov_base;
|
|
len = state.req->vec[state.idx].iov_len;
|
|
}
|
|
err = xb_write(base + state.written, len - state.written);
|
|
if (err < 0)
|
|
goto out_err;
|
|
state.written += err;
|
|
if (state.written != len)
|
|
goto out;
|
|
|
|
state.idx++;
|
|
state.written = 0;
|
|
}
|
|
|
|
list_del(&state.req->list);
|
|
state.req->state = xb_req_state_wait_reply;
|
|
list_add_tail(&state.req->list, &xs_reply_list);
|
|
state.req = NULL;
|
|
|
|
out:
|
|
mutex_unlock(&xb_write_mutex);
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
state.req->msg.type = XS_ERROR;
|
|
state.req->err = err;
|
|
list_del(&state.req->list);
|
|
if (state.req->state == xb_req_state_aborted)
|
|
kfree(state.req);
|
|
else {
|
|
state.req->state = xb_req_state_got_reply;
|
|
wake_up(&state.req->wq);
|
|
}
|
|
|
|
mutex_unlock(&xb_write_mutex);
|
|
|
|
state.req = NULL;
|
|
|
|
return err;
|
|
}
|
|
|
|
static int xb_thread_work(void)
|
|
{
|
|
return xb_data_to_read() || xb_data_to_write();
|
|
}
|
|
|
|
static int xenbus_thread(void *unused)
|
|
{
|
|
int err;
|
|
|
|
while (!kthread_should_stop()) {
|
|
if (wait_event_interruptible(xb_waitq, xb_thread_work()))
|
|
continue;
|
|
|
|
err = process_msg();
|
|
if (err == -ENOMEM)
|
|
schedule();
|
|
else if (err)
|
|
pr_warn_ratelimited("error %d while reading message\n",
|
|
err);
|
|
|
|
err = process_writes();
|
|
if (err)
|
|
pr_warn_ratelimited("error %d while writing message\n",
|
|
err);
|
|
}
|
|
|
|
xenbus_task = NULL;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* xb_init_comms - Set up interrupt handler off store event channel.
|
|
*/
|
|
int xb_init_comms(void)
|
|
{
|
|
struct xenstore_domain_interface *intf = xen_store_interface;
|
|
|
|
if (intf->req_prod != intf->req_cons)
|
|
pr_err("request ring is not quiescent (%08x:%08x)!\n",
|
|
intf->req_cons, intf->req_prod);
|
|
|
|
if (intf->rsp_prod != intf->rsp_cons) {
|
|
pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
|
|
intf->rsp_cons, intf->rsp_prod);
|
|
/* breaks kdump */
|
|
if (!reset_devices)
|
|
intf->rsp_cons = intf->rsp_prod;
|
|
}
|
|
|
|
if (xenbus_irq) {
|
|
/* Already have an irq; assume we're resuming */
|
|
rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
|
|
} else {
|
|
int err;
|
|
|
|
err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
|
|
0, "xenbus", &xb_waitq);
|
|
if (err < 0) {
|
|
pr_err("request irq failed %i\n", err);
|
|
return err;
|
|
}
|
|
|
|
xenbus_irq = err;
|
|
|
|
if (!xenbus_task) {
|
|
xenbus_task = kthread_run(xenbus_thread, NULL,
|
|
"xenbus");
|
|
if (IS_ERR(xenbus_task))
|
|
return PTR_ERR(xenbus_task);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void xb_deinit_comms(void)
|
|
{
|
|
unbind_from_irqhandler(xenbus_irq, &xb_waitq);
|
|
xenbus_irq = 0;
|
|
}
|