tcp: Correctly update SO_PEEK_OFF when tcp_send_frames() drops frames

When using the new SO_PEEK_OFF feature on TCP sockets, we must adjust
the SO_PEEK_OFF value whenever we move conn->seq_to_tap backwards.
Although it was discussed during development, somewhere during the shuffles
the case where we move the pointer backwards because we lost frames while
sending them to the guest.  This can happen, for example, if the socket
buffer on the Unix socket to qemu overflows.

Fixing this is slightly complicated because we need to pass a non-const
context pointer to some places we previously didn't need it.  While we're
there also fix a small stylistic issue in the function comment for
tcp_revert_seq() - it was using spaces instead of tabs.

Fixes: e63d281871 ("tcp: leverage support of SO_PEEK_OFF socket option when available")
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-07-24 13:31:09 +10:00 committed by Stefano Brivio
parent 9cb6b50815
commit a09aeb4bd6
2 changed files with 15 additions and 10 deletions

View file

@ -235,12 +235,13 @@ void tcp_flags_flush(const struct ctx *c)
/** /**
* tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission * tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission
* @ctx: Execution context
* @conns: Array of connection pointers corresponding to queued frames * @conns: Array of connection pointers corresponding to queued frames
* @frames: Two-dimensional array containing queued frames with sub-iovs * @frames: Two-dimensional array containing queued frames with sub-iovs
* @num_frames: Number of entries in the two arrays to be compared * @num_frames: Number of entries in the two arrays to be compared
*/ */
static void tcp_revert_seq(struct tcp_tap_conn **conns, struct iovec (*frames)[TCP_NUM_IOVS], static void tcp_revert_seq(struct ctx *c, struct tcp_tap_conn **conns,
int num_frames) struct iovec (*frames)[TCP_NUM_IOVS], int num_frames)
{ {
int i; int i;
@ -248,11 +249,15 @@ static void tcp_revert_seq(struct tcp_tap_conn **conns, struct iovec (*frames)[T
const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base; const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base;
struct tcp_tap_conn *conn = conns[i]; struct tcp_tap_conn *conn = conns[i];
uint32_t seq = ntohl(th->seq); uint32_t seq = ntohl(th->seq);
uint32_t peek_offset;
if (SEQ_LE(conn->seq_to_tap, seq)) if (SEQ_LE(conn->seq_to_tap, seq))
continue; continue;
conn->seq_to_tap = seq; conn->seq_to_tap = seq;
peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
if (tcp_set_peek_offset(conn->sock, peek_offset))
tcp_rst(c, conn);
} }
} }
@ -260,14 +265,14 @@ static void tcp_revert_seq(struct tcp_tap_conn **conns, struct iovec (*frames)[T
* tcp_payload_flush() - Send out buffers for segments with data * tcp_payload_flush() - Send out buffers for segments with data
* @c: Execution context * @c: Execution context
*/ */
void tcp_payload_flush(const struct ctx *c) void tcp_payload_flush(struct ctx *c)
{ {
size_t m; size_t m;
m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS, m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
tcp6_payload_used); tcp6_payload_used);
if (m != tcp6_payload_used) { if (m != tcp6_payload_used) {
tcp_revert_seq(&tcp6_frame_conns[m], &tcp6_l2_iov[m], tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m],
tcp6_payload_used - m); tcp6_payload_used - m);
} }
tcp6_payload_used = 0; tcp6_payload_used = 0;
@ -275,7 +280,7 @@ void tcp_payload_flush(const struct ctx *c)
m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS, m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
tcp4_payload_used); tcp4_payload_used);
if (m != tcp4_payload_used) { if (m != tcp4_payload_used) {
tcp_revert_seq(&tcp4_frame_conns[m], &tcp4_l2_iov[m], tcp_revert_seq(c, &tcp4_frame_conns[m], &tcp4_l2_iov[m],
tcp4_payload_used - m); tcp4_payload_used - m);
} }
tcp4_payload_used = 0; tcp4_payload_used = 0;
@ -353,7 +358,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer * @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
* @seq: Sequence number to be sent * @seq: Sequence number to be sent
*/ */
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
ssize_t dlen, int no_csum, uint32_t seq) ssize_t dlen, int no_csum, uint32_t seq)
{ {
struct iovec *iov; struct iovec *iov;

View file

@ -9,7 +9,7 @@
void tcp_sock4_iov_init(const struct ctx *c); void tcp_sock4_iov_init(const struct ctx *c);
void tcp_sock6_iov_init(const struct ctx *c); void tcp_sock6_iov_init(const struct ctx *c);
void tcp_flags_flush(const struct ctx *c); void tcp_flags_flush(const struct ctx *c);
void tcp_payload_flush(const struct ctx *c); void tcp_payload_flush(struct ctx *c);
int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags); int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);