isolation: Prevent any child processes gaining capabilities

We drop our own capabilities, but it's possible that processes we exec()
could gain extra privilege via file capabilities.  It shouldn't be possible
for us to exec() anyway due to seccomp() and our filesystem isolation.  But
just in case, zero the bounding and inheritable capability sets to prevent
any such child from gainin privilege.

Note that we do this *after* spawning the pasta shell/command (if any),
because we do want the user to be able to give that privilege if they want.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2022-10-14 15:25:35 +11:00 committed by Stefano Brivio
parent c22ebccba8
commit fb449b16bd

View file

@ -121,6 +121,61 @@ static void drop_caps_ep_except(uint64_t keep)
} }
} }
/**
* clamp_caps() - Prevent any children from gaining caps
*
* This drops all capabilities from both the inheritable and the
* bounding set. This means that any exec()ed processes can't gain
* capabilities, even if they have file capabilities which would grant
* them. We shouldn't ever exec() in any case, but this provides an
* additional layer of protection. Executing this requires
* CAP_SETPCAP, which we will have within our userns.
*
* Note that dropping capabilites from the bounding set limits
* exec()ed processes, but does not remove them from the effective or
* permitted sets, so it doesn't reduce our own capabilities.
*/
static void clamp_caps(void)
{
struct __user_cap_data_struct data[CAP_WORDS];
struct __user_cap_header_struct hdr = {
.version = CAP_VERSION,
.pid = 0,
};
int i;
for (i = 0; i < 64; i++) {
/* Some errors can be ignored:
* - EINVAL, we'll get this for all values in 0..63
* that are not actually allocated caps
* - EPERM, we'll get this if we don't have
* CAP_SETPCAP, which can happen if using
* --netns-only. We don't need CAP_SETPCAP for
* normal operation, so carry on without it.
*/
if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) &&
errno != EINVAL && errno != EPERM) {
err("Couldn't drop cap %i from bounding set: %s",
i, strerror(errno));
exit(EXIT_FAILURE);
}
}
if (syscall(SYS_capget, &hdr, data)) {
err("Couldn't get current capabilities: %s", strerror(errno));
exit(EXIT_FAILURE);
}
for (i = 0; i < CAP_WORDS; i++)
data[i].inheritable = 0;
if (syscall(SYS_capset, &hdr, data)) {
err("Couldn't drop inheritable capabilities: %s",
strerror(errno));
exit(EXIT_FAILURE);
}
}
/** /**
* isolate_initial() - Early, config independent self isolation * isolate_initial() - Early, config independent self isolation
* *
@ -324,6 +379,7 @@ int isolate_prefork(struct ctx *c)
ns_caps |= BIT(CAP_NET_BIND_SERVICE); ns_caps |= BIT(CAP_NET_BIND_SERVICE);
} }
clamp_caps();
drop_caps_ep_except(ns_caps); drop_caps_ep_except(ns_caps);
return 0; return 0;