tap, tcp, udp, icmp: Cut down on some oversized buffers
The existing sizes provide no measurable differences in throughput and packet rates at this point. They were probably needed as batched implementations were not complete, but they can be decreased quite a bit now. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
1f4b7fa0d7
commit
37c228ada8
6 changed files with 72 additions and 31 deletions
12
icmp.c
12
icmp.c
|
@ -43,13 +43,13 @@
|
||||||
/**
|
/**
|
||||||
* struct icmp_id_sock - Tracking information for single ICMP echo identifier
|
* struct icmp_id_sock - Tracking information for single ICMP echo identifier
|
||||||
* @sock: Bound socket for identifier
|
* @sock: Bound socket for identifier
|
||||||
* @ts: Last associated activity from tap, seconds
|
|
||||||
* @seq: Last sequence number sent to tap, host order
|
* @seq: Last sequence number sent to tap, host order
|
||||||
|
* @ts: Last associated activity from tap, seconds
|
||||||
*/
|
*/
|
||||||
struct icmp_id_sock {
|
struct icmp_id_sock {
|
||||||
int sock;
|
int sock;
|
||||||
time_t ts;
|
|
||||||
uint16_t seq;
|
uint16_t seq;
|
||||||
|
time_t ts;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Indexed by ICMP echo identifier */
|
/* Indexed by ICMP echo identifier */
|
||||||
|
@ -168,6 +168,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
|
||||||
s = sock_l4(c, AF_INET, IPPROTO_ICMP, id, 0, iref.u32);
|
s = sock_l4(c, AF_INET, IPPROTO_ICMP, id, 0, iref.u32);
|
||||||
if (s < 0)
|
if (s < 0)
|
||||||
goto fail_sock;
|
goto fail_sock;
|
||||||
|
if (s > SOCKET_MAX) {
|
||||||
|
close(s);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
icmp_id_map[V4][id].sock = s;
|
icmp_id_map[V4][id].sock = s;
|
||||||
}
|
}
|
||||||
|
@ -201,6 +205,10 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
|
||||||
iref.u32);
|
iref.u32);
|
||||||
if (s < 0)
|
if (s < 0)
|
||||||
goto fail_sock;
|
goto fail_sock;
|
||||||
|
if (s > SOCKET_MAX) {
|
||||||
|
close(s);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
icmp_id_map[V6][id].sock = s;
|
icmp_id_map[V6][id].sock = s;
|
||||||
}
|
}
|
||||||
|
|
2
passt.h
2
passt.h
|
@ -63,7 +63,7 @@ union epoll_ref {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define TAP_BUF_BYTES \
|
#define TAP_BUF_BYTES \
|
||||||
ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 256), PAGE_SIZE)
|
ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
|
||||||
#define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t))
|
#define TAP_BUF_FILL (TAP_BUF_BYTES - ETH_MAX_MTU - sizeof(uint32_t))
|
||||||
#define TAP_MSGS \
|
#define TAP_MSGS \
|
||||||
DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
|
DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
|
||||||
|
|
24
tap.c
24
tap.c
|
@ -57,6 +57,8 @@
|
||||||
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
|
static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
|
||||||
static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
|
static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
|
||||||
|
|
||||||
|
#define TAP_SEQS 128 /* Different L4 tuples in one batch */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tap_send() - Send frame, with qemu socket header if needed
|
* tap_send() - Send frame, with qemu socket header if needed
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
|
@ -225,7 +227,7 @@ static struct tap4_l4_t {
|
||||||
uint32_t daddr;
|
uint32_t daddr;
|
||||||
|
|
||||||
struct pool_l4_t p;
|
struct pool_l4_t p;
|
||||||
} tap4_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
} tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct l4_seq6_t - Message sequence for one protocol handler call, IPv6
|
* struct l4_seq6_t - Message sequence for one protocol handler call, IPv6
|
||||||
|
@ -247,7 +249,7 @@ static struct tap6_l4_t {
|
||||||
struct in6_addr daddr;
|
struct in6_addr daddr;
|
||||||
|
|
||||||
struct pool_l4_t p;
|
struct pool_l4_t p;
|
||||||
} tap6_l4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
} tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tap_packet_debug() - Print debug message for packet(s) from guest/tap
|
* tap_packet_debug() - Print debug message for packet(s) from guest/tap
|
||||||
|
@ -401,12 +403,12 @@ resume:
|
||||||
seq->daddr = iph->daddr; \
|
seq->daddr = iph->daddr; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
|
if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < TAP_SEQS)
|
||||||
goto append;
|
goto append;
|
||||||
|
|
||||||
for (seq = tap4_l4 + seq_count - 1; seq >= tap4_l4; seq--) {
|
for (seq = tap4_l4 + seq_count - 1; seq >= tap4_l4; seq--) {
|
||||||
if (L4_MATCH(iph, uh, seq)) {
|
if (L4_MATCH(iph, uh, seq)) {
|
||||||
if (seq->p.count >= UIO_MAXIOV)
|
if (seq->p.count >= TAP_SEQS)
|
||||||
seq = NULL;
|
seq = NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -424,7 +426,7 @@ resume:
|
||||||
append:
|
append:
|
||||||
packet_add((struct pool *)&seq->p, l4_len, l4h);
|
packet_add((struct pool *)&seq->p, l4_len, l4h);
|
||||||
|
|
||||||
if (seq_count == UIO_MAXIOV)
|
if (seq_count == TAP_SEQS)
|
||||||
break; /* Resume after flushing if i < count */
|
break; /* Resume after flushing if i < count */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -563,12 +565,12 @@ resume:
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
|
if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
|
||||||
seq->p.count < UIO_MAXIOV)
|
seq->p.count < TAP_SEQS)
|
||||||
goto append;
|
goto append;
|
||||||
|
|
||||||
for (seq = tap6_l4 + seq_count - 1; seq >= tap6_l4; seq--) {
|
for (seq = tap6_l4 + seq_count - 1; seq >= tap6_l4; seq--) {
|
||||||
if (L4_MATCH(ip6h, proto, uh, seq)) {
|
if (L4_MATCH(ip6h, proto, uh, seq)) {
|
||||||
if (seq->p.count >= UIO_MAXIOV)
|
if (seq->p.count >= TAP_SEQS)
|
||||||
seq = NULL;
|
seq = NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -586,7 +588,7 @@ resume:
|
||||||
append:
|
append:
|
||||||
packet_add((struct pool *)&seq->p, l4_len, l4h);
|
packet_add((struct pool *)&seq->p, l4_len, l4h);
|
||||||
|
|
||||||
if (seq_count == UIO_MAXIOV)
|
if (seq_count == TAP_SEQS)
|
||||||
break; /* Resume after flushing if i < count */
|
break; /* Resume after flushing if i < count */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -924,9 +926,9 @@ void tap_sock_init(struct ctx *c)
|
||||||
pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, pkt_buf, sz);
|
pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, pkt_buf, sz);
|
||||||
pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, pkt_buf, sz);
|
pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, pkt_buf, sz);
|
||||||
|
|
||||||
for (i = 0; i < UIO_MAXIOV; i++) {
|
for (i = 0; i < TAP_SEQS; i++) {
|
||||||
tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz);
|
tap4_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz);
|
||||||
tap6_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, pkt_buf, sz);
|
tap6_l4[i].p = PACKET_INIT(pool_l4, TAP_SEQS, pkt_buf, sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->fd_tap != -1) {
|
if (c->fd_tap != -1) {
|
||||||
|
|
51
tcp.c
51
tcp.c
|
@ -70,9 +70,9 @@
|
||||||
*
|
*
|
||||||
* Data needs to linger on sockets as long as it's not acknowledged by the
|
* Data needs to linger on sockets as long as it's not acknowledged by the
|
||||||
* guest, and is read using MSG_PEEK into preallocated static buffers sized
|
* guest, and is read using MSG_PEEK into preallocated static buffers sized
|
||||||
* to the maximum supported window, 64MiB ("discard" buffer, for already-sent
|
* to the maximum supported window, 16 MiB ("discard" buffer, for already-sent
|
||||||
* data) plus a number of maximum-MSS-sized buffers. This imposes a practical
|
* data) plus a number of maximum-MSS-sized buffers. This imposes a practical
|
||||||
* limitation on window scaling, that is, the maximum factor is 1024. Larger
|
* limitation on window scaling, that is, the maximum factor is 256. Larger
|
||||||
* factors will be accepted, but resulting, larger values are never advertised
|
* factors will be accepted, but resulting, larger values are never advertised
|
||||||
* to the other side, and not used while queueing data.
|
* to the other side, and not used while queueing data.
|
||||||
*
|
*
|
||||||
|
@ -299,7 +299,7 @@
|
||||||
#include "conf.h"
|
#include "conf.h"
|
||||||
#include "tcp_splice.h"
|
#include "tcp_splice.h"
|
||||||
|
|
||||||
#define TCP_FRAMES_MEM 256
|
#define TCP_FRAMES_MEM 128
|
||||||
#define TCP_FRAMES \
|
#define TCP_FRAMES \
|
||||||
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
|
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
|
||||||
|
|
||||||
|
@ -311,17 +311,48 @@
|
||||||
#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \
|
#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \
|
||||||
TCP_HASH_TABLE_LOAD)
|
TCP_HASH_TABLE_LOAD)
|
||||||
|
|
||||||
#define MAX_WS 10
|
#define MAX_WS 8
|
||||||
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
|
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
|
||||||
|
|
||||||
/* MSS rounding: see SET_MSS() */
|
/* MSS rounding: see SET_MSS() */
|
||||||
#define MSS_DEFAULT 536
|
#define MSS_DEFAULT 536
|
||||||
#define MSS4 ROUND_DOWN(USHRT_MAX - \
|
|
||||||
sizeof(uint32_t) - sizeof(struct ethhdr) - \
|
struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
|
||||||
sizeof(struct iphdr) - sizeof(struct tcphdr), 4)
|
uint32_t psum;
|
||||||
#define MSS6 ROUND_DOWN(USHRT_MAX - \
|
uint32_t tsum;
|
||||||
sizeof(uint32_t) - sizeof(struct ethhdr) - \
|
#ifdef __AVX2__
|
||||||
sizeof(struct ipv6hdr) - sizeof(struct tcphdr), 4)
|
uint8_t pad[18];
|
||||||
|
#else
|
||||||
|
uint8_t pad[2];
|
||||||
|
#endif
|
||||||
|
uint32_t vnet_len;
|
||||||
|
struct ethhdr eh;
|
||||||
|
struct iphdr iph;
|
||||||
|
struct tcphdr th;
|
||||||
|
#ifdef __AVX2__
|
||||||
|
} __attribute__ ((packed, aligned(32)));
|
||||||
|
#else
|
||||||
|
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct tcp6_l2_head { /* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
|
||||||
|
#ifdef __AVX2__
|
||||||
|
uint8_t pad[14];
|
||||||
|
#else
|
||||||
|
uint8_t pad[2];
|
||||||
|
#endif
|
||||||
|
uint32_t vnet_len;
|
||||||
|
struct ethhdr eh;
|
||||||
|
struct ipv6hdr ip6h;
|
||||||
|
struct tcphdr th;
|
||||||
|
#ifdef __AVX2__
|
||||||
|
} __attribute__ ((packed, aligned(32)));
|
||||||
|
#else
|
||||||
|
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MSS4 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp4_l2_head), 4)
|
||||||
|
#define MSS6 ROUND_DOWN(USHRT_MAX - sizeof(struct tcp6_l2_head), 4)
|
||||||
|
|
||||||
#define WINDOW_DEFAULT 14600 /* RFC 6928 */
|
#define WINDOW_DEFAULT 14600 /* RFC 6928 */
|
||||||
#ifdef HAS_SND_WND
|
#ifdef HAS_SND_WND
|
||||||
|
|
10
tcp_splice.c
10
tcp_splice.c
|
@ -102,10 +102,10 @@ struct tcp_splice_conn {
|
||||||
#define RCVLOWAT_ACT_B BIT(5)
|
#define RCVLOWAT_ACT_B BIT(5)
|
||||||
#define CLOSING BIT(6)
|
#define CLOSING BIT(6)
|
||||||
|
|
||||||
uint64_t a_read;
|
uint32_t a_read;
|
||||||
uint64_t a_written;
|
uint32_t a_written;
|
||||||
uint64_t b_read;
|
uint32_t b_read;
|
||||||
uint64_t b_written;
|
uint32_t b_written;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CONN_V6(x) (x->flags & SOCK_V6)
|
#define CONN_V6(x) (x->flags & SOCK_V6)
|
||||||
|
@ -553,7 +553,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
|
||||||
{
|
{
|
||||||
uint8_t lowat_set_flag, lowat_act_flag;
|
uint8_t lowat_set_flag, lowat_act_flag;
|
||||||
int from, to, *pipes, eof, never_read;
|
int from, to, *pipes, eof, never_read;
|
||||||
uint64_t *seq_read, *seq_write;
|
uint32_t *seq_read, *seq_write;
|
||||||
struct tcp_splice_conn *conn;
|
struct tcp_splice_conn *conn;
|
||||||
|
|
||||||
if (ref.r.p.tcp.tcp.listen) {
|
if (ref.r.p.tcp.tcp.listen) {
|
||||||
|
|
4
udp.c
4
udp.c
|
@ -117,8 +117,8 @@
|
||||||
#include "pcap.h"
|
#include "pcap.h"
|
||||||
|
|
||||||
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
|
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
|
||||||
#define UDP_SPLICE_FRAMES 128
|
#define UDP_SPLICE_FRAMES 32
|
||||||
#define UDP_TAP_FRAMES_MEM 128
|
#define UDP_TAP_FRAMES_MEM 32
|
||||||
#define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1)
|
#define UDP_TAP_FRAMES (c->mode == MODE_PASST ? UDP_TAP_FRAMES_MEM : 1)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue