mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 04:20:53 +07:00
Tools: hv: Reopen the devices if read() or write() returns errors
The state machine in the hv_utils driver can run out of order in some corner cases, e.g. if the kvp daemon doesn't call write() fast enough due to some reason, kvp_timeout_func() can run first and move the state to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later, the daemon's write() gets an error -EINVAL, and the daemon will exit(). We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait for 1 minute, and send a SIGCONT signal to the daemon: the daemon will exit() quickly. We can fix the issue by forcing a reset of the device (which means the daemon can close() and open() the device again) and doing extra necessary clean-up. Signed-off-by: Dexuan Cui <decui@microsoft.com> Reviewed-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
3a6fb6c425
commit
9fc3c01a1f
@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg)
|
|||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
done:
|
done:
|
||||||
|
if (error)
|
||||||
|
target_fname[0] = '\0';
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,15 +110,29 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset target_fname to "" in the two below functions for hibernation: if
|
||||||
|
* the fcopy operation is aborted by hibernation, the daemon should remove the
|
||||||
|
* partially-copied file; to achieve this, the hv_utils driver always fakes a
|
||||||
|
* CANCEL_FCOPY message upon suspend, and later when the VM resumes back,
|
||||||
|
* the daemon calls hv_copy_cancel() to remove the file; if a file is copied
|
||||||
|
* successfully before suspend, hv_copy_finished() must reset target_fname to
|
||||||
|
* avoid that the file can be incorrectly removed upon resume, since the faked
|
||||||
|
* CANCEL_FCOPY message is spurious in this case.
|
||||||
|
*/
|
||||||
static int hv_copy_finished(void)
|
static int hv_copy_finished(void)
|
||||||
{
|
{
|
||||||
close(target_fd);
|
close(target_fd);
|
||||||
|
target_fname[0] = '\0';
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
static int hv_copy_cancel(void)
|
static int hv_copy_cancel(void)
|
||||||
{
|
{
|
||||||
close(target_fd);
|
close(target_fd);
|
||||||
|
if (strlen(target_fname) > 0) {
|
||||||
unlink(target_fname);
|
unlink(target_fname);
|
||||||
|
target_fname[0] = '\0';
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -131,7 +147,7 @@ void print_usage(char *argv[])
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int fcopy_fd;
|
int fcopy_fd = -1;
|
||||||
int error;
|
int error;
|
||||||
int daemonize = 1, long_index = 0, opt;
|
int daemonize = 1, long_index = 0, opt;
|
||||||
int version = FCOPY_CURRENT_VERSION;
|
int version = FCOPY_CURRENT_VERSION;
|
||||||
@ -141,7 +157,7 @@ int main(int argc, char *argv[])
|
|||||||
struct hv_do_fcopy copy;
|
struct hv_do_fcopy copy;
|
||||||
__u32 kernel_modver;
|
__u32 kernel_modver;
|
||||||
} buffer = { };
|
} buffer = { };
|
||||||
int in_handshake = 1;
|
int in_handshake;
|
||||||
|
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
{"help", no_argument, 0, 'h' },
|
{"help", no_argument, 0, 'h' },
|
||||||
@ -170,6 +186,12 @@ int main(int argc, char *argv[])
|
|||||||
openlog("HV_FCOPY", 0, LOG_USER);
|
openlog("HV_FCOPY", 0, LOG_USER);
|
||||||
syslog(LOG_INFO, "starting; pid is:%d", getpid());
|
syslog(LOG_INFO, "starting; pid is:%d", getpid());
|
||||||
|
|
||||||
|
reopen_fcopy_fd:
|
||||||
|
if (fcopy_fd != -1)
|
||||||
|
close(fcopy_fd);
|
||||||
|
/* Remove any possible partially-copied file on error */
|
||||||
|
hv_copy_cancel();
|
||||||
|
in_handshake = 1;
|
||||||
fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
|
fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
|
||||||
|
|
||||||
if (fcopy_fd < 0) {
|
if (fcopy_fd < 0) {
|
||||||
@ -196,7 +218,7 @@ int main(int argc, char *argv[])
|
|||||||
len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
|
len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
|
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
|
||||||
exit(EXIT_FAILURE);
|
goto reopen_fcopy_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_handshake) {
|
if (in_handshake) {
|
||||||
@ -231,9 +253,14 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pwrite() may return an error due to the faked CANCEL_FCOPY
|
||||||
|
* message upon hibernation. Ignore the error by resetting the
|
||||||
|
* dev file, i.e. closing and re-opening it.
|
||||||
|
*/
|
||||||
if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
|
if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
|
||||||
syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
|
syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
|
||||||
exit(EXIT_FAILURE);
|
goto reopen_fcopy_fd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ enum {
|
|||||||
DNS
|
DNS
|
||||||
};
|
};
|
||||||
|
|
||||||
static int in_hand_shake = 1;
|
static int in_hand_shake;
|
||||||
|
|
||||||
static char *os_name = "";
|
static char *os_name = "";
|
||||||
static char *os_major = "";
|
static char *os_major = "";
|
||||||
@ -1360,7 +1360,7 @@ void print_usage(char *argv[])
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int kvp_fd, len;
|
int kvp_fd = -1, len;
|
||||||
int error;
|
int error;
|
||||||
struct pollfd pfd;
|
struct pollfd pfd;
|
||||||
char *p;
|
char *p;
|
||||||
@ -1400,14 +1400,6 @@ int main(int argc, char *argv[])
|
|||||||
openlog("KVP", 0, LOG_USER);
|
openlog("KVP", 0, LOG_USER);
|
||||||
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
|
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
|
||||||
|
|
||||||
kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
|
|
||||||
|
|
||||||
if (kvp_fd < 0) {
|
|
||||||
syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
|
|
||||||
errno, strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve OS release information.
|
* Retrieve OS release information.
|
||||||
*/
|
*/
|
||||||
@ -1423,6 +1415,18 @@ int main(int argc, char *argv[])
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reopen_kvp_fd:
|
||||||
|
if (kvp_fd != -1)
|
||||||
|
close(kvp_fd);
|
||||||
|
in_hand_shake = 1;
|
||||||
|
kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
|
||||||
|
|
||||||
|
if (kvp_fd < 0) {
|
||||||
|
syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
|
||||||
|
errno, strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Register ourselves with the kernel.
|
* Register ourselves with the kernel.
|
||||||
*/
|
*/
|
||||||
@ -1456,9 +1460,7 @@ int main(int argc, char *argv[])
|
|||||||
if (len != sizeof(struct hv_kvp_msg)) {
|
if (len != sizeof(struct hv_kvp_msg)) {
|
||||||
syslog(LOG_ERR, "read failed; error:%d %s",
|
syslog(LOG_ERR, "read failed; error:%d %s",
|
||||||
errno, strerror(errno));
|
errno, strerror(errno));
|
||||||
|
goto reopen_kvp_fd;
|
||||||
close(kvp_fd);
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1617,13 +1619,17 @@ int main(int argc, char *argv[])
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send the value back to the kernel. */
|
/*
|
||||||
|
* Send the value back to the kernel. Note: the write() may
|
||||||
|
* return an error due to hibernation; we can ignore the error
|
||||||
|
* by resetting the dev file, i.e. closing and re-opening it.
|
||||||
|
*/
|
||||||
kvp_done:
|
kvp_done:
|
||||||
len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
|
len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
|
||||||
if (len != sizeof(struct hv_kvp_msg)) {
|
if (len != sizeof(struct hv_kvp_msg)) {
|
||||||
syslog(LOG_ERR, "write failed; error: %d %s", errno,
|
syslog(LOG_ERR, "write failed; error: %d %s", errno,
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
exit(EXIT_FAILURE);
|
goto reopen_kvp_fd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
|
|
||||||
|
static bool fs_frozen;
|
||||||
|
|
||||||
/* Don't use syslog() in the function since that can cause write to disk */
|
/* Don't use syslog() in the function since that can cause write to disk */
|
||||||
static int vss_do_freeze(char *dir, unsigned int cmd)
|
static int vss_do_freeze(char *dir, unsigned int cmd)
|
||||||
{
|
{
|
||||||
@ -155,17 +157,26 @@ static int vss_operate(int operation)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
error |= vss_do_freeze(ent->mnt_dir, cmd);
|
error |= vss_do_freeze(ent->mnt_dir, cmd);
|
||||||
if (error && operation == VSS_OP_FREEZE)
|
if (operation == VSS_OP_FREEZE) {
|
||||||
|
if (error)
|
||||||
goto err;
|
goto err;
|
||||||
|
fs_frozen = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
endmntent(mounts);
|
endmntent(mounts);
|
||||||
|
|
||||||
if (root_seen) {
|
if (root_seen) {
|
||||||
error |= vss_do_freeze("/", cmd);
|
error |= vss_do_freeze("/", cmd);
|
||||||
if (error && operation == VSS_OP_FREEZE)
|
if (operation == VSS_OP_FREEZE) {
|
||||||
|
if (error)
|
||||||
goto err;
|
goto err;
|
||||||
|
fs_frozen = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (operation == VSS_OP_THAW && !error)
|
||||||
|
fs_frozen = false;
|
||||||
|
|
||||||
goto out;
|
goto out;
|
||||||
err:
|
err:
|
||||||
@ -175,6 +186,7 @@ static int vss_operate(int operation)
|
|||||||
endmntent(mounts);
|
endmntent(mounts);
|
||||||
}
|
}
|
||||||
vss_operate(VSS_OP_THAW);
|
vss_operate(VSS_OP_THAW);
|
||||||
|
fs_frozen = false;
|
||||||
/* Call syslog after we thaw all filesystems */
|
/* Call syslog after we thaw all filesystems */
|
||||||
if (ent)
|
if (ent)
|
||||||
syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
|
syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
|
||||||
@ -196,13 +208,13 @@ void print_usage(char *argv[])
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int vss_fd, len;
|
int vss_fd = -1, len;
|
||||||
int error;
|
int error;
|
||||||
struct pollfd pfd;
|
struct pollfd pfd;
|
||||||
int op;
|
int op;
|
||||||
struct hv_vss_msg vss_msg[1];
|
struct hv_vss_msg vss_msg[1];
|
||||||
int daemonize = 1, long_index = 0, opt;
|
int daemonize = 1, long_index = 0, opt;
|
||||||
int in_handshake = 1;
|
int in_handshake;
|
||||||
__u32 kernel_modver;
|
__u32 kernel_modver;
|
||||||
|
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
@ -232,6 +244,18 @@ int main(int argc, char *argv[])
|
|||||||
openlog("Hyper-V VSS", 0, LOG_USER);
|
openlog("Hyper-V VSS", 0, LOG_USER);
|
||||||
syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
|
syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
|
||||||
|
|
||||||
|
reopen_vss_fd:
|
||||||
|
if (vss_fd != -1)
|
||||||
|
close(vss_fd);
|
||||||
|
if (fs_frozen) {
|
||||||
|
if (vss_operate(VSS_OP_THAW) || fs_frozen) {
|
||||||
|
syslog(LOG_ERR, "failed to thaw file system: err=%d",
|
||||||
|
errno);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
in_handshake = 1;
|
||||||
vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
|
vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
|
||||||
if (vss_fd < 0) {
|
if (vss_fd < 0) {
|
||||||
syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
|
syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
|
||||||
@ -284,8 +308,7 @@ int main(int argc, char *argv[])
|
|||||||
if (len != sizeof(struct hv_vss_msg)) {
|
if (len != sizeof(struct hv_vss_msg)) {
|
||||||
syslog(LOG_ERR, "read failed; error:%d %s",
|
syslog(LOG_ERR, "read failed; error:%d %s",
|
||||||
errno, strerror(errno));
|
errno, strerror(errno));
|
||||||
close(vss_fd);
|
goto reopen_vss_fd;
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
op = vss_msg->vss_hdr.operation;
|
op = vss_msg->vss_hdr.operation;
|
||||||
@ -312,14 +335,18 @@ int main(int argc, char *argv[])
|
|||||||
default:
|
default:
|
||||||
syslog(LOG_ERR, "Illegal op:%d\n", op);
|
syslog(LOG_ERR, "Illegal op:%d\n", op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The write() may return an error due to the faked VSS_OP_THAW
|
||||||
|
* message upon hibernation. Ignore the error by resetting the
|
||||||
|
* dev file, i.e. closing and re-opening it.
|
||||||
|
*/
|
||||||
vss_msg->error = error;
|
vss_msg->error = error;
|
||||||
len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
|
len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
|
||||||
if (len != sizeof(struct hv_vss_msg)) {
|
if (len != sizeof(struct hv_vss_msg)) {
|
||||||
syslog(LOG_ERR, "write failed; error: %d %s", errno,
|
syslog(LOG_ERR, "write failed; error: %d %s", errno,
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
goto reopen_vss_fd;
|
||||||
if (op == VSS_OP_FREEZE)
|
|
||||||
vss_operate(VSS_OP_THAW);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user