icmp: Use 'flowside' epoll references for ping sockets
Currently ping sockets use a custom epoll reference type which includes the ICMP id. However, now that we have entries in the flow table for ping flows, finding that is sufficient to get everything else we want, including the id. Therefore remove the icmp_epoll_ref type and use the general 'flowside' field for ping sockets. Having done this we no longer need separate EPOLL_TYPE_ICMP and EPOLL_TYPE_ICMPV6 reference types, because we can easily determine which case we have from the flow type. Merge both types into EPOLL_TYPE_PING. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
02cbdb0b86
commit
4779dfe12f
5 changed files with 24 additions and 44 deletions
34
icmp.c
34
icmp.c
|
@ -49,19 +49,19 @@
|
||||||
#define SOCKSIDE 0
|
#define SOCKSIDE 0
|
||||||
#define TAPSIDE 1
|
#define TAPSIDE 1
|
||||||
|
|
||||||
|
#define PINGF(idx) (&(FLOW(idx)->ping))
|
||||||
|
|
||||||
/* Indexed by ICMP echo identifier */
|
/* Indexed by ICMP echo identifier */
|
||||||
static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
|
static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
|
* icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @af: Address family (AF_INET or AF_INET6)
|
|
||||||
* @ref: epoll reference
|
* @ref: epoll reference
|
||||||
*/
|
*/
|
||||||
void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
|
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||||
{
|
{
|
||||||
struct icmp_ping_flow *pingf = af == AF_INET
|
struct icmp_ping_flow *pingf = PINGF(ref.flowside.flow);
|
||||||
? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id];
|
|
||||||
union sockaddr_inany sr;
|
union sockaddr_inany sr;
|
||||||
socklen_t sl = sizeof(sr);
|
socklen_t sl = sizeof(sr);
|
||||||
char buf[USHRT_MAX];
|
char buf[USHRT_MAX];
|
||||||
|
@ -78,27 +78,26 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
|
||||||
flow_err(pingf, "recvfrom() error: %s", strerror(errno));
|
flow_err(pingf, "recvfrom() error: %s", strerror(errno));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (sr.sa_family != af)
|
|
||||||
goto unexpected;
|
|
||||||
|
|
||||||
if (af == AF_INET) {
|
if (pingf->f.type == FLOW_PING4) {
|
||||||
struct icmphdr *ih4 = (struct icmphdr *)buf;
|
struct icmphdr *ih4 = (struct icmphdr *)buf;
|
||||||
|
|
||||||
if ((size_t)n < sizeof(*ih4) || ih4->type != ICMP_ECHOREPLY)
|
if (sr.sa_family != AF_INET || (size_t)n < sizeof(*ih4) ||
|
||||||
|
ih4->type != ICMP_ECHOREPLY)
|
||||||
goto unexpected;
|
goto unexpected;
|
||||||
|
|
||||||
/* Adjust packet back to guest-side ID */
|
/* Adjust packet back to guest-side ID */
|
||||||
ih4->un.echo.id = htons(ref.icmp.id);
|
ih4->un.echo.id = htons(pingf->id);
|
||||||
seq = ntohs(ih4->un.echo.sequence);
|
seq = ntohs(ih4->un.echo.sequence);
|
||||||
} else if (af == AF_INET6) {
|
} else if (pingf->f.type == FLOW_PING6) {
|
||||||
struct icmp6hdr *ih6 = (struct icmp6hdr *)buf;
|
struct icmp6hdr *ih6 = (struct icmp6hdr *)buf;
|
||||||
|
|
||||||
if ((size_t)n < sizeof(*ih6) ||
|
if (sr.sa_family != AF_INET6 || (size_t)n < sizeof(*ih6) ||
|
||||||
ih6->icmp6_type != ICMPV6_ECHO_REPLY)
|
ih6->icmp6_type != ICMPV6_ECHO_REPLY)
|
||||||
goto unexpected;
|
goto unexpected;
|
||||||
|
|
||||||
/* Adjust packet back to guest-side ID */
|
/* Adjust packet back to guest-side ID */
|
||||||
ih6->icmp6_identifier = htons(ref.icmp.id);
|
ih6->icmp6_identifier = htons(pingf->id);
|
||||||
seq = ntohs(ih6->icmp6_sequence);
|
seq = ntohs(ih6->icmp6_sequence);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(0);
|
ASSERT(0);
|
||||||
|
@ -113,11 +112,11 @@ void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref)
|
||||||
}
|
}
|
||||||
|
|
||||||
flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
|
flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
|
||||||
ref.icmp.id, seq);
|
pingf->id, seq);
|
||||||
|
|
||||||
if (af == AF_INET)
|
if (pingf->f.type == FLOW_PING4)
|
||||||
tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
|
tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
|
||||||
else if (af == AF_INET6)
|
else if (pingf->f.type == FLOW_PING6)
|
||||||
tap_icmp6_send(c, &sr.sa6.sin6_addr,
|
tap_icmp6_send(c, &sr.sa6.sin6_addr,
|
||||||
tap_ip6_daddr(c, &sr.sa6.sin6_addr), buf, n);
|
tap_ip6_daddr(c, &sr.sa6.sin6_addr), buf, n);
|
||||||
return;
|
return;
|
||||||
|
@ -159,7 +158,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
|
||||||
sa_family_t af, uint16_t id)
|
sa_family_t af, uint16_t id)
|
||||||
{
|
{
|
||||||
uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
|
uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
|
||||||
union icmp_epoll_ref iref = { .id = id };
|
union epoll_ref ref = { .type = EPOLL_TYPE_PING };
|
||||||
union flow *flow = flow_alloc();
|
union flow *flow = flow_alloc();
|
||||||
struct icmp_ping_flow *pingf;
|
struct icmp_ping_flow *pingf;
|
||||||
const void *bind_addr;
|
const void *bind_addr;
|
||||||
|
@ -181,8 +180,9 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
|
||||||
bind_if = c->ip6.ifname_out;
|
bind_if = c->ip6.ifname_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ref.flowside = FLOW_SIDX(flow, SOCKSIDE);
|
||||||
pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
|
pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
|
||||||
0, iref.u32);
|
0, ref.data);
|
||||||
|
|
||||||
if (pingf->sock < 0) {
|
if (pingf->sock < 0) {
|
||||||
warn("Cannot open \"ping\" socket. You might need to:");
|
warn("Cannot open \"ping\" socket. You might need to:");
|
||||||
|
|
13
icmp.h
13
icmp.h
|
@ -11,23 +11,12 @@
|
||||||
struct ctx;
|
struct ctx;
|
||||||
struct icmp_ping_flow;
|
struct icmp_ping_flow;
|
||||||
|
|
||||||
void icmp_sock_handler(const struct ctx *c, sa_family_t af, union epoll_ref ref);
|
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
|
||||||
int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||||
const void *saddr, const void *daddr,
|
const void *saddr, const void *daddr,
|
||||||
const struct pool *p, const struct timespec *now);
|
const struct pool *p, const struct timespec *now);
|
||||||
void icmp_init(void);
|
void icmp_init(void);
|
||||||
|
|
||||||
/**
|
|
||||||
* union icmp_epoll_ref - epoll reference portion for ICMP tracking
|
|
||||||
* @v6: Set for IPv6 sockets or connections
|
|
||||||
* @u32: Opaque u32 value of reference
|
|
||||||
* @id: Associated echo identifier, needed if bind() fails
|
|
||||||
*/
|
|
||||||
union icmp_epoll_ref {
|
|
||||||
uint16_t id;
|
|
||||||
uint32_t u32;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct icmp_ctx - Execution context for ICMP routines
|
* struct icmp_ctx - Execution context for ICMP routines
|
||||||
* @timer_run: Timestamp of most recent timer run
|
* @timer_run: Timestamp of most recent timer run
|
||||||
|
|
10
passt.c
10
passt.c
|
@ -66,8 +66,7 @@ char *epoll_type_str[] = {
|
||||||
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
|
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
|
||||||
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
|
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
|
||||||
[EPOLL_TYPE_UDP] = "UDP socket",
|
[EPOLL_TYPE_UDP] = "UDP socket",
|
||||||
[EPOLL_TYPE_ICMP] = "ICMP socket",
|
[EPOLL_TYPE_PING] = "ICMP/ICMPv6 ping socket",
|
||||||
[EPOLL_TYPE_ICMPV6] = "ICMPv6 socket",
|
|
||||||
[EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch",
|
[EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch",
|
||||||
[EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch",
|
[EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch",
|
||||||
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
|
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
|
||||||
|
@ -377,11 +376,8 @@ loop:
|
||||||
case EPOLL_TYPE_UDP:
|
case EPOLL_TYPE_UDP:
|
||||||
udp_sock_handler(&c, ref, eventmask, &now);
|
udp_sock_handler(&c, ref, eventmask, &now);
|
||||||
break;
|
break;
|
||||||
case EPOLL_TYPE_ICMP:
|
case EPOLL_TYPE_PING:
|
||||||
icmp_sock_handler(&c, AF_INET, ref);
|
icmp_sock_handler(&c, ref);
|
||||||
break;
|
|
||||||
case EPOLL_TYPE_ICMPV6:
|
|
||||||
icmp_sock_handler(&c, AF_INET6, ref);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* Can't happen */
|
/* Can't happen */
|
||||||
|
|
7
passt.h
7
passt.h
|
@ -59,10 +59,8 @@ enum epoll_type {
|
||||||
EPOLL_TYPE_TCP_TIMER,
|
EPOLL_TYPE_TCP_TIMER,
|
||||||
/* UDP sockets */
|
/* UDP sockets */
|
||||||
EPOLL_TYPE_UDP,
|
EPOLL_TYPE_UDP,
|
||||||
/* IPv4 ICMP sockets */
|
/* ICMP/ICMPv6 ping sockets */
|
||||||
EPOLL_TYPE_ICMP,
|
EPOLL_TYPE_PING,
|
||||||
/* ICMPv6 sockets */
|
|
||||||
EPOLL_TYPE_ICMPV6,
|
|
||||||
/* inotify fd watching for end of netns (pasta) */
|
/* inotify fd watching for end of netns (pasta) */
|
||||||
EPOLL_TYPE_NSQUIT_INOTIFY,
|
EPOLL_TYPE_NSQUIT_INOTIFY,
|
||||||
/* timer fd watching for end of netns, fallback for inotify (pasta) */
|
/* timer fd watching for end of netns, fallback for inotify (pasta) */
|
||||||
|
@ -100,7 +98,6 @@ union epoll_ref {
|
||||||
flow_sidx_t flowside;
|
flow_sidx_t flowside;
|
||||||
union tcp_listen_epoll_ref tcp_listen;
|
union tcp_listen_epoll_ref tcp_listen;
|
||||||
union udp_epoll_ref udp;
|
union udp_epoll_ref udp;
|
||||||
union icmp_epoll_ref icmp;
|
|
||||||
uint32_t data;
|
uint32_t data;
|
||||||
int nsdir_fd;
|
int nsdir_fd;
|
||||||
};
|
};
|
||||||
|
|
4
util.c
4
util.c
|
@ -72,10 +72,8 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||||
ref.type = EPOLL_TYPE_UDP;
|
ref.type = EPOLL_TYPE_UDP;
|
||||||
break;
|
break;
|
||||||
case IPPROTO_ICMP:
|
case IPPROTO_ICMP:
|
||||||
ref.type = EPOLL_TYPE_ICMP;
|
|
||||||
break;
|
|
||||||
case IPPROTO_ICMPV6:
|
case IPPROTO_ICMPV6:
|
||||||
ref.type = EPOLL_TYPE_ICMPV6;
|
ref.type = EPOLL_TYPE_PING;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -EPFNOSUPPORT; /* Not implemented. */
|
return -EPFNOSUPPORT; /* Not implemented. */
|
||||||
|
|
Loading…
Reference in a new issue