isolation: Initially Keep CAP_SETFCAP if running as UID 0 in non-init
If pasta spawns a child process while running as UID 0, which is only allowed from a non-init namespace, we need to keep CAP_SETFCAP before pasta_start_ns() is called: otherwise, starting from Linux 5.12, we won't be able to update /proc/self/uid_map with the intended mapping (from 0 to 0). See user_namespaces(7). Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
parent
b0e450aa85
commit
770d1a4502
1 changed files with 14 additions and 3 deletions
17
isolation.c
17
isolation.c
|
@ -177,6 +177,8 @@ static void clamp_caps(void)
|
|||
*/
|
||||
void isolate_initial(void)
|
||||
{
|
||||
uint64_t keep;
|
||||
|
||||
/* We want to keep CAP_NET_BIND_SERVICE in the initial
|
||||
* namespace if we have it, so that we can forward low ports
|
||||
* into the guest/namespace
|
||||
|
@ -193,9 +195,18 @@ void isolate_initial(void)
|
|||
* further capabilites in isolate_user() and
|
||||
* isolate_prefork().
|
||||
*/
|
||||
drop_caps_ep_except(BIT(CAP_NET_BIND_SERVICE) |
|
||||
BIT(CAP_SETUID) | BIT(CAP_SETGID) |
|
||||
BIT(CAP_SYS_ADMIN) | BIT(CAP_NET_ADMIN));
|
||||
keep = BIT(CAP_NET_BIND_SERVICE) | BIT(CAP_SETUID) | BIT(CAP_SETGID) |
|
||||
BIT(CAP_SYS_ADMIN) | BIT(CAP_NET_ADMIN);
|
||||
|
||||
/* Since Linux 5.12, if we want to update /proc/self/uid_map to create
|
||||
* a mapping from UID 0, which only happens with pasta spawning a child
|
||||
* from a non-init user namespace (pasta can't run as root), we need to
|
||||
* retain CAP_SETFCAP too.
|
||||
*/
|
||||
if (!ns_is_init() && !geteuid())
|
||||
keep |= BIT(CAP_SETFCAP);
|
||||
|
||||
drop_caps_ep_except(keep);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue