tcp, udp: Don't include destination address in partially precomputed csums
We partially prepopulate IP and TCP header structures including, amongst other things the destination address, which for IPv4 is always the known address of the guest/namespace. We partially precompute both the IPv4 header checksum and the TCP checksum based on this. In future we're going to want more flexibility with controlling the destination for IPv4 (as we already do for IPv6), so this precomputed value gets in the way. Therefore remove the IPv4 destination from the precomputed checksum and fold it into the checksum update when we actually send a packet. Doing this means we no longer need to recompute those partial sums when the destination address changes ({tcp,udp}_update_l2_buf()) and instead the computation can be moved to compile time. This means while we perform slightly more computations on each packet, we slightly reduce the amount of memory we need to access. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
8aa32009ed
commit
5bf200ae8a
3 changed files with 27 additions and 52 deletions
61
tcp.c
61
tcp.c
|
@ -323,10 +323,8 @@
|
||||||
#define MSS_DEFAULT 536
|
#define MSS_DEFAULT 536
|
||||||
|
|
||||||
struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
|
struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
|
||||||
uint32_t psum;
|
|
||||||
uint32_t tsum;
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
uint8_t pad[18];
|
uint8_t pad[26];
|
||||||
#else
|
#else
|
||||||
uint8_t pad[2];
|
uint8_t pad[2];
|
||||||
#endif
|
#endif
|
||||||
|
@ -443,8 +441,6 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
||||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
|
||||||
* @tsum: Partial TCP header checksum (excluding length and saddr)
|
|
||||||
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
||||||
* @taph: Tap-level headers (partially pre-filled)
|
* @taph: Tap-level headers (partially pre-filled)
|
||||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||||
|
@ -452,17 +448,15 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||||
* @data: Storage for TCP payload
|
* @data: Storage for TCP payload
|
||||||
*/
|
*/
|
||||||
static struct tcp4_l2_buf_t {
|
static struct tcp4_l2_buf_t {
|
||||||
uint32_t psum; /* 0 */
|
|
||||||
uint32_t tsum; /* 4 */
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
uint8_t pad[18]; /* 8, align th to 32 bytes */
|
uint8_t pad[26]; /* 0, align th to 32 bytes */
|
||||||
#else
|
#else
|
||||||
uint8_t pad[2]; /* align iph to 4 bytes 8 */
|
uint8_t pad[2]; /* align iph to 4 bytes 0 */
|
||||||
#endif
|
#endif
|
||||||
struct tap_hdr taph; /* 26 10 */
|
struct tap_hdr taph; /* 26 2 */
|
||||||
struct iphdr iph; /* 44 28 */
|
struct iphdr iph; /* 44 20 */
|
||||||
struct tcphdr th; /* 64 48 */
|
struct tcphdr th; /* 64 40 */
|
||||||
uint8_t data[MSS4]; /* 84 68 */
|
uint8_t data[MSS4]; /* 84 60 */
|
||||||
/* 65536 65532 */
|
/* 65536 65532 */
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
} __attribute__ ((packed, aligned(32)))
|
} __attribute__ ((packed, aligned(32)))
|
||||||
|
@ -517,8 +511,6 @@ static struct iovec tcp_iov [UIO_MAXIOV];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
|
* tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
|
||||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
|
||||||
* @tsum: Partial TCP header checksum (excluding length and saddr)
|
|
||||||
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
|
||||||
* @taph: Tap-level headers (partially pre-filled)
|
* @taph: Tap-level headers (partially pre-filled)
|
||||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||||
|
@ -526,16 +518,14 @@ static struct iovec tcp_iov [UIO_MAXIOV];
|
||||||
* @opts: Headroom for TCP options
|
* @opts: Headroom for TCP options
|
||||||
*/
|
*/
|
||||||
static struct tcp4_l2_flags_buf_t {
|
static struct tcp4_l2_flags_buf_t {
|
||||||
uint32_t psum; /* 0 */
|
|
||||||
uint32_t tsum; /* 4 */
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
uint8_t pad[18]; /* 8, align th to 32 bytes */
|
uint8_t pad[26]; /* 0, align th to 32 bytes */
|
||||||
#else
|
#else
|
||||||
uint8_t pad[2]; /* align iph to 4 bytes 8 */
|
uint8_t pad[2]; /* align iph to 4 bytes 0 */
|
||||||
#endif
|
#endif
|
||||||
struct tap_hdr taph; /* 26 10 */
|
struct tap_hdr taph; /* 26 2 */
|
||||||
struct iphdr iph; /* 44 28 */
|
struct iphdr iph; /* 44 20 */
|
||||||
struct tcphdr th; /* 64 48 */
|
struct tcphdr th; /* 64 40 */
|
||||||
char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
|
char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
} __attribute__ ((packed, aligned(32)))
|
} __attribute__ ((packed, aligned(32)))
|
||||||
|
@ -953,11 +943,13 @@ void tcp_sock_set_bufsize(const struct ctx *c, int s)
|
||||||
*/
|
*/
|
||||||
static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
|
static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
|
||||||
{
|
{
|
||||||
uint32_t sum = buf->psum;
|
uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_TCP);
|
||||||
|
|
||||||
sum += buf->iph.tot_len;
|
sum += buf->iph.tot_len;
|
||||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||||
sum += buf->iph.saddr & 0xffff;
|
sum += buf->iph.saddr & 0xffff;
|
||||||
|
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||||
|
sum += buf->iph.daddr & 0xffff;
|
||||||
|
|
||||||
buf->iph.check = (uint16_t)~csum_fold(sum);
|
buf->iph.check = (uint16_t)~csum_fold(sum);
|
||||||
}
|
}
|
||||||
|
@ -969,10 +961,12 @@ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
|
||||||
static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf)
|
static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf)
|
||||||
{
|
{
|
||||||
uint16_t tlen = ntohs(buf->iph.tot_len) - 20;
|
uint16_t tlen = ntohs(buf->iph.tot_len) - 20;
|
||||||
uint32_t sum = buf->tsum;
|
uint32_t sum = htons(IPPROTO_TCP);
|
||||||
|
|
||||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||||
sum += buf->iph.saddr & 0xffff;
|
sum += buf->iph.saddr & 0xffff;
|
||||||
|
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||||
|
sum += buf->iph.daddr & 0xffff;
|
||||||
sum += htons(ntohs(buf->iph.tot_len) - 20);
|
sum += htons(ntohs(buf->iph.tot_len) - 20);
|
||||||
|
|
||||||
buf->th.check = 0;
|
buf->th.check = 0;
|
||||||
|
@ -1023,20 +1017,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||||
|
|
||||||
if (ip_da) {
|
if (ip_da) {
|
||||||
b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr;
|
b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr;
|
||||||
if (!i) {
|
|
||||||
b4f->iph.saddr = b4->iph.saddr = 0;
|
|
||||||
b4f->iph.tot_len = b4->iph.tot_len = 0;
|
|
||||||
b4f->iph.check = b4->iph.check = 0;
|
|
||||||
b4f->psum = b4->psum = sum_16b(&b4->iph, 20);
|
|
||||||
|
|
||||||
b4->tsum = ((ip_da->s_addr >> 16) & 0xffff) +
|
|
||||||
(ip_da->s_addr & 0xffff) +
|
|
||||||
htons(IPPROTO_TCP);
|
|
||||||
b4f->tsum = b4->tsum;
|
|
||||||
} else {
|
|
||||||
b4f->psum = b4->psum = tcp4_l2_buf[0].psum;
|
|
||||||
b4f->tsum = b4->tsum = tcp4_l2_buf[0].tsum;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1045,15 +1025,16 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||||
* tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
|
* tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
*/
|
*/
|
||||||
static void tcp_sock4_iov_init(const struct ctx *c)
|
static void tcp_sock4_iov_init(struct ctx *c)
|
||||||
{
|
{
|
||||||
|
struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
|
||||||
struct iovec *iov;
|
struct iovec *iov;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
|
for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
|
||||||
tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
|
tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
|
||||||
.taph = TAP_HDR_INIT(ETH_P_IP),
|
.taph = TAP_HDR_INIT(ETH_P_IP),
|
||||||
.iph = L2_BUF_IP4_INIT(IPPROTO_TCP),
|
.iph = iph,
|
||||||
.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
|
.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
14
udp.c
14
udp.c
|
@ -168,7 +168,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
|
||||||
/**
|
/**
|
||||||
* udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
* udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
|
||||||
* @s_in: Source socket address, filled in by recvmmsg()
|
* @s_in: Source socket address, filled in by recvmmsg()
|
||||||
* @psum: Partial IP header checksum (excluding tot_len and saddr)
|
|
||||||
* @taph: Tap-level headers (partially pre-filled)
|
* @taph: Tap-level headers (partially pre-filled)
|
||||||
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
* @iph: Pre-filled IP header (except for tot_len and saddr)
|
||||||
* @uh: Headroom for UDP header
|
* @uh: Headroom for UDP header
|
||||||
|
@ -176,7 +175,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
|
||||||
*/
|
*/
|
||||||
static struct udp4_l2_buf_t {
|
static struct udp4_l2_buf_t {
|
||||||
struct sockaddr_in s_in;
|
struct sockaddr_in s_in;
|
||||||
uint32_t psum;
|
|
||||||
|
|
||||||
struct tap_hdr taph;
|
struct tap_hdr taph;
|
||||||
struct iphdr iph;
|
struct iphdr iph;
|
||||||
|
@ -263,11 +261,13 @@ static void udp_invert_portmap(struct udp_port_fwd *fwd)
|
||||||
*/
|
*/
|
||||||
static void udp_update_check4(struct udp4_l2_buf_t *buf)
|
static void udp_update_check4(struct udp4_l2_buf_t *buf)
|
||||||
{
|
{
|
||||||
uint32_t sum = buf->psum;
|
uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_UDP);
|
||||||
|
|
||||||
sum += buf->iph.tot_len;
|
sum += buf->iph.tot_len;
|
||||||
sum += (buf->iph.saddr >> 16) & 0xffff;
|
sum += (buf->iph.saddr >> 16) & 0xffff;
|
||||||
sum += buf->iph.saddr & 0xffff;
|
sum += buf->iph.saddr & 0xffff;
|
||||||
|
sum += (buf->iph.daddr >> 16) & 0xffff;
|
||||||
|
sum += buf->iph.daddr & 0xffff;
|
||||||
|
|
||||||
buf->iph.check = (uint16_t)~csum_fold(sum);
|
buf->iph.check = (uint16_t)~csum_fold(sum);
|
||||||
}
|
}
|
||||||
|
@ -292,14 +292,6 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||||
|
|
||||||
if (ip_da) {
|
if (ip_da) {
|
||||||
b4->iph.daddr = ip_da->s_addr;
|
b4->iph.daddr = ip_da->s_addr;
|
||||||
if (!i) {
|
|
||||||
b4->iph.saddr = 0;
|
|
||||||
b4->iph.tot_len = 0;
|
|
||||||
b4->iph.check = 0;
|
|
||||||
b4->psum = sum_16b(&b4->iph, 20);
|
|
||||||
} else {
|
|
||||||
b4->psum = udp4_l2_buf[0].psum;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
4
util.h
4
util.h
|
@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
|
||||||
.tot_len = 0, \
|
.tot_len = 0, \
|
||||||
.id = 0, \
|
.id = 0, \
|
||||||
.frag_off = 0, \
|
.frag_off = 0, \
|
||||||
.ttl = 255, \
|
.ttl = 0xff, \
|
||||||
.protocol = (proto), \
|
.protocol = (proto), \
|
||||||
.saddr = 0, \
|
.saddr = 0, \
|
||||||
.daddr = 0, \
|
.daddr = 0, \
|
||||||
}
|
}
|
||||||
|
#define L2_BUF_IP4_PSUM(proto) ((uint32_t)htons_constant(0x4500) + \
|
||||||
|
(uint32_t)htons_constant(0xff00 | (proto)))
|
||||||
|
|
||||||
#define L2_BUF_IP6_INIT(proto) \
|
#define L2_BUF_IP6_INIT(proto) \
|
||||||
{ \
|
{ \
|
||||||
|
|
Loading…
Reference in a new issue