diff --git a/tcp_buf.c b/tcp_buf.c
index cbefa42..72d99c5 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -239,9 +239,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
  * @dlen:	TCP payload length
  * @no_csum:	Don't compute IPv4 checksum, use the one from previous buffer
  * @seq:	Sequence number to be sent
+ * @push:	Set PSH flag, last segment in a batch
  */
 static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
-			    ssize_t dlen, int no_csum, uint32_t seq)
+			    ssize_t dlen, int no_csum, uint32_t seq, bool push)
 {
 	struct tcp_payload_t *payload;
 	const uint16_t *check = NULL;
@@ -268,6 +269,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	payload->th.th_x2 = 0;
 	payload->th.th_flags = 0;
 	payload->th.ack = 1;
+	payload->th.psh = push;
 	iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
 	tcp_l2_buf_fill_headers(conn, iov, check, seq, false);
 	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
@@ -402,11 +404,14 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 	seq = conn->seq_to_tap;
 	for (i = 0; i < send_bufs; i++) {
 		int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
+		bool push = false;
 
-		if (i == send_bufs - 1)
+		if (i == send_bufs - 1) {
 			dlen = last_len;
+			push = true;
+		}
 
-		tcp_data_to_tap(c, conn, dlen, no_csum, seq);
+		tcp_data_to_tap(c, conn, dlen, no_csum, seq, push);
 		seq += dlen;
 	}
 
diff --git a/tcp_vu.c b/tcp_vu.c
index a216bb1..fad7065 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -289,10 +289,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
  * @iov_cnt:		Number of entries in @iov
  * @check:		Checksum, if already known
  * @no_tcp_csum:	Do not set TCP checksum
+ * @push:		Set PSH flag, last segment in a batch
  */
 static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 			   struct iovec *iov, size_t iov_cnt,
-			   const uint16_t **check, bool no_tcp_csum)
+			   const uint16_t **check, bool no_tcp_csum, bool push)
 {
 	const struct flowside *toside = TAPFLOW(conn);
 	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
@@ -334,6 +335,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	memset(th, 0, sizeof(*th));
 	th->doff = sizeof(*th) / 4;
 	th->ack = 1;
+	th->psh = push;
 
 	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
 			 *check, conn->seq_to_tap, no_tcp_csum);
@@ -443,6 +445,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 		struct iovec *iov = &elem[head[i]].in_sg[0];
 		int buf_cnt = head[i + 1] - head[i];
 		ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
+		bool push = i == head_cnt - 1;
 
 		vu_set_vnethdr(vdev, iov->iov_base, buf_cnt);
 
@@ -451,7 +454,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 			check = NULL;
 		previous_dlen = dlen;
 
-		tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap);
+		tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
 
 		if (*c->pcap) {
 			pcap_iov(iov, buf_cnt,