Add cleaner line-by-line reading primitives

Two places in passt need to read files line by line (one parsing
resolv.conf, the other parsing /proc/net/*.  They can't use fgets()
because in glibc that can allocate memory.  Instead they use an
implementation line_read() in util.c.  This has some problems:

 * It has two completely separate modes of operation, one buffering
   and one not, the relation between these and how they're activated
   is subtle and confusing
 * At least in non-buffered mode, it will mishandle an empty line,
   folding them onto the start of the next non-empty line
 * In non-buffered mode it will use lseek() which prevents using this
   on non-regular files (we don't need that at present, but it's a
   surprising limitation)
 * It has a lot of difficult to read pointer mangling

Add a new cleaner implementation of allocation-free line-by-line
reading in lineread.c.  This one always buffers, using a state
structure to keep track of what we need.  This is larger than I'd
like, but it turns out handling all the edge cases of line-by-line
reading in C is surprisingly hard.

This just adds the code, subsequent patches will change the existing
users of line_read() to the new implementation.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
David Gibson 2022-06-24 12:17:29 +10:00 committed by Stefano Brivio
parent 20c418f1f9
commit dab2c6ee1f
3 changed files with 150 additions and 4 deletions

View file

@ -32,16 +32,16 @@ CFLAGS += -DRLIMIT_STACK_VAL=$(RLIMIT_STACK_VAL)
CFLAGS += -DARCH=\"$(TARGET_ARCH)\"
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \
mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c siphash.c \
tap.c tcp.c tcp_splice.c udp.c util.c
lineread.c mld.c ndp.c netlink.c packet.c passt.c pasta.c pcap.c \
siphash.c tap.c tcp.c tcp_splice.c udp.c util.c
QRAP_SRCS = qrap.c
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
MANPAGES = passt.1 pasta.1 qrap.1
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
ndp.h netlink.h packet.h passt.h pasta.h pcap.h siphash.h \
tap.h tcp.h tcp_splice.h udp.h util.h
lineread.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h \
siphash.h tap.h tcp.h tcp_splice.h udp.h util.h
HEADERS = $(PASST_HEADERS)
# On gcc 11.2, with -O2 and -flto, tcp_hash() and siphash_20b(), if inlined,

115
lineread.c Normal file
View file

@ -0,0 +1,115 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/* PASST - Plug A Simple Socket Transport
* for qemu/UNIX domain socket mode
*
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
* lineread.c - Allocation free line-by-line buffered file input
*
* Copyright Red Hat
* Author: David Gibson <david@gibson.dropbear.id.au>
*/
#include <stddef.h>
#include <fcntl.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
#include <unistd.h>
#include "lineread.h"
/**
* lineread_init() - Prepare for line by line file reading without allocation
* @lr: Line reader state structure to initialize
* @fd: File descriptor to read lines from
*/
void lineread_init(struct lineread *lr, int fd)
{
lr->fd = fd;
lr->next_line = lr->count = 0;
}
/**
* peek_line() - Find and NULL-terminate next line in buffer
* @lr: Line reader state structure
* @eof: Caller indicates end-of-file was already found by read()
*
* Return: length of line in bytes, -1 if no line was found
*/
static int peek_line(struct lineread *lr, bool eof)
{
char *nl;
/* Sanity checks (which also document invariants) */
assert(lr->count >= 0);
assert(lr->next_line >= 0);
assert(lr->next_line + lr->count >= lr->next_line);
assert(lr->next_line + lr->count <= LINEREAD_BUFFER_SIZE);
nl = memchr(lr->buf + lr->next_line, '\n', lr->count);
if (nl) {
*nl = '\0';
return nl - lr->buf - lr->next_line + 1;
}
if (eof) {
lr->buf[lr->next_line + lr->count] = '\0';
/* No trailing newline, so treat all remaining bytes
* as the last line
*/
return lr->count;
}
return -1;
}
/**
* lineread_get() - Read a single line from file (no allocation)
* @lr: Line reader state structure
* @line: Place a pointer to the next line in this variable
*
* Return: Length of line read on success, 0 on EOF, negative on error
*/
int lineread_get(struct lineread *lr, char **line)
{
bool eof = false;
int line_len;
while ((line_len = peek_line(lr, eof)) < 0) {
int rc;
if ((lr->next_line + lr->count) == LINEREAD_BUFFER_SIZE) {
/* No space at end */
if (lr->next_line == 0) {
/* Buffer is full, which means we've
* hit a line too long for us to
* process. FIXME: report error
* better
*/
return -1;
}
memmove(lr->buf, lr->buf + lr->next_line, lr->count);
lr->next_line = 0;
}
/* Read more data into the end of buffer */
rc = read(lr->fd, lr->buf + lr->next_line + lr->count,
LINEREAD_BUFFER_SIZE - lr->next_line - lr->count);
if (rc < 0)
return rc;
if (rc == 0)
eof = true;
else
lr->count += rc;
}
*line = lr->buf + lr->next_line;
lr->next_line += line_len;
lr->count -= line_len;
return line_len;
}

31
lineread.h Normal file
View file

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: AGPL-3.0-or-later
* Copyright Red Hat
* Author: David Gibson <david@gibson.dropbear.id.au>
*/
#ifndef LINEREAD_H
#define LINEREAD_H
#define LINEREAD_BUFFER_SIZE 8192
/**
* struct lineread - Line reader state
* @fd: File descriptor lines are read from
* @next_line: Offset in @buf of the start of the first line not yet
* returned by lineread_get()
* @count: Number of bytes in @buf read from the file, but not yet
* returned by lineread_get()
* @buf: Buffer storing data read from file.
*/
struct lineread {
int fd; int next_line;
int count;
/* One extra byte for possible trailing \0 */
char buf[LINEREAD_BUFFER_SIZE+1];
};
void lineread_init(struct lineread *lr, int fd);
int lineread_get(struct lineread *lr, char **line);
#endif /* _LINEREAD_H */