qrap: Add probe retry on connection reset from passt for KubeVirt integration
KubeVirt uses libvirt to start qrap in its current draft integration (https://github.com/kubevirt/kubevirt/pull/7849/), and libvirtd starts qrap three times every time a new virtual machine is created: once on domain creation, and twice on domain start (for "probing") and to finally start it for real. Very often, a subsequent invocation of qrap happen before the previously running instance of qemu terminates, which means that passt will refuse the new connection as the old one is still active. Introduce a single retry with a 100ms delay to work around this. This should be checked again once native libvirt support is there, and that point qrap will have no reason to exist anymore. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
721fa1bf5d
commit
fca5e11773
1 changed files with 33 additions and 1 deletions
34
qrap.c
34
qrap.c
|
@ -25,6 +25,7 @@
|
||||||
#include <net/if_arp.h>
|
#include <net/if_arp.h>
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <netinet/if_ether.h>
|
#include <netinet/if_ether.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "passt.h"
|
#include "passt.h"
|
||||||
|
@ -112,7 +113,7 @@ void usage(const char *name)
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct timeval tv = { .tv_sec = 0, .tv_usec = (long)(500 * 1000) };
|
struct timeval tv = { .tv_sec = 0, .tv_usec = (long)(500 * 1000) };
|
||||||
int i, s, qemu_argc = 0, addr_map = 0, has_dev = 0;
|
int i, s, qemu_argc = 0, addr_map = 0, has_dev = 0, retry_on_reset;
|
||||||
char *qemu_argv[ARG_MAX], dev_str[ARG_MAX];
|
char *qemu_argv[ARG_MAX], dev_str[ARG_MAX];
|
||||||
struct sockaddr_un addr = {
|
struct sockaddr_un addr = {
|
||||||
.sun_family = AF_UNIX,
|
.sun_family = AF_UNIX,
|
||||||
|
@ -233,6 +234,9 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
valid_args:
|
valid_args:
|
||||||
for (i = 1; i < UNIX_SOCK_MAX; i++) {
|
for (i = 1; i < UNIX_SOCK_MAX; i++) {
|
||||||
|
retry_on_reset = 1;
|
||||||
|
|
||||||
|
retry:
|
||||||
s = socket(AF_UNIX, SOCK_STREAM, 0);
|
s = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
if (s < 0) {
|
if (s < 0) {
|
||||||
perror("socket");
|
perror("socket");
|
||||||
|
@ -254,6 +258,34 @@ valid_args:
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* FIXME: in a KubeVirt environment, libvirtd invokes qrap three
|
||||||
|
* times in a strict sequence when a virtual machine needs to
|
||||||
|
* be started, namely, when:
|
||||||
|
* - the domain XML is saved
|
||||||
|
* - the domain is started (for "probing")
|
||||||
|
* - the virtual machine is started for real
|
||||||
|
* and it often happens that the qemu process is still running
|
||||||
|
* when qrap is invoked again, so passt will refuse the new
|
||||||
|
* connection because the previous one is still active. This
|
||||||
|
* overlap seems to be anywhere between 0 and 3ms.
|
||||||
|
*
|
||||||
|
* If we get a connection reset, retry, just once, after 100ms,
|
||||||
|
* to allow for the previous qemu instance to terminate and, in
|
||||||
|
* turn, for the connection to passt to be closed.
|
||||||
|
*
|
||||||
|
* This should be fixed in libvirt instead. It probably makes
|
||||||
|
* sense to check this behaviour once native libvirt support is
|
||||||
|
* there, and this implies native qemu support too, so at that
|
||||||
|
* point qrap will have no reason to exist anymore -- that is,
|
||||||
|
* this FIXME will probably remain until the tool itself is
|
||||||
|
* obsoleted.
|
||||||
|
*/
|
||||||
|
if (retry_on_reset && errno == ECONNRESET) {
|
||||||
|
retry_on_reset = 0;
|
||||||
|
usleep(100 * 1000);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
fprintf(stderr, "Probe of %s failed\n", addr.sun_path);
|
fprintf(stderr, "Probe of %s failed\n", addr.sun_path);
|
||||||
|
|
||||||
close(s);
|
close(s);
|
||||||
|
|
Loading…
Reference in a new issue