Linux capabilities

setcap/getcap/setfacl/getfacl/attr

$ dumpcap
-bash: /usr/sbin/dumpcap: Permission denied

$ ll -Z /usr/sbin/dumpcap
-rwxr-x---+ root wireshark system_u:object_r:bin_t:s0       /usr/sbin/dumpcap

$ getcap /usr/sbin/dumpcap
/usr/sbin/dumpcap = cap_net_admin,cap_net_raw+ep

$ getfacl /usr/sbin/dumpcap
getfacl: Removing leading '/' from absolute path names
# file: usr/sbin/dumpcap
# owner: root
# group: wireshark
user::rwx
group::r-x
other::---

$ attr -l /usr/sbin/dumpcap
Attribute "selinux" has a 27 byte value for /usr/sbin/dumpcap
Attribute "capability" has a 20 byte value for /usr/sbin/dumpcap
$ sudo setfacl -m u:runsisi:rx /usr/sbin/dumpcap

$ getfacl /usr/sbin/dumpcap
getfacl: Removing leading '/' from absolute path names
# file: usr/sbin/dumpcap
# owner: root
# group: wireshark
user::rwx
user:runsisi:r-x
group::r-x
mask::r-x
other::---
$ dumpcap
Capturing on 'nflog'

A textual representation of capability sets consists of one or more whitespace-separated clauses.

Each clause consists of a list of comma-separated capability names (or the word `all’), followed by an action-list. An action-list consists of a sequence of operator flag pairs.

cap_net_admin+pe-i cap_net_raw=+pe cap_net_admin=ep

$ getcap /usr/bin/ping
/usr/bin/ping = cap_net_admin,cap_net_raw+p
$ sudo setcap -r /usr/bin/ping
$ getcap /usr/bin/ping
$ sudo setcap cap_net_admin,cap_net_raw+p /usr/bin/ping
$ getcap /usr/bin/ping
/usr/bin/ping = cap_net_admin,cap_net_raw+p

libcap-ng-utils

$ rpm -ql libcap-ng-utils
/usr/bin/captest
/usr/bin/filecap
/usr/bin/netcap
/usr/bin/pscap

$ pscap | grep systemd-journal
1     643   root        systemd-journal   chown, dac_override, dac_read_search, fowner, setgid, setuid, sys_ptrace, sys_admin, audit_control, mac_override, syslog

$ getpcaps 643
Capabilities for `643': = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_setgid,cap_setuid,cap_sys_ptrace,cap_sys_admin,cap_audit_control,cap_mac_override,cap_syslog+ep

systemd AmbientCapabilities

$ systemd-run -t -p "AmbientCapabilities=~" capsh --print
==== AUTHENTICATING FOR org.freedesktop.systemd1.manage-units ===
Authentication is required to manage system services or other units.
Authenticating as: runsisi,,, (runsisi)
Password:
==== AUTHENTICATION COMPLETE ===
Running as unit: run-u800.service
Press ^] three times within 1s to disconnect TTY.
Current: =eip
Bounding set =cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read
Ambient set =cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read
Securebits: 00/0x0/1'b0
 secure-noroot: no (unlocked)
 secure-no-suid-fixup: no (unlocked)
 secure-keep-caps: no (unlocked)
 secure-no-ambient-raise: no (unlocked)
uid=0(root) euid=0(root)
gid=0(root)
groups=
Guessed mode: UNCERTAIN (0)

$ systemd-run -t -p "AmbientCapabilities=~" -p "SecureBits=no-setuid-fixup"  capsh --print
==== AUTHENTICATING FOR org.freedesktop.systemd1.manage-units ===
Authentication is required to manage system services or other units.
Authenticating as: runsisi,,, (runsisi)
Password:
==== AUTHENTICATION COMPLETE ===
Running as unit: run-u803.service
Press ^] three times within 1s to disconnect TTY.
Current: =eip
Bounding set =cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read
Ambient set =cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read
Securebits: 04/0x4/3'b100
 secure-noroot: no (unlocked)
 secure-no-suid-fixup: yes (unlocked)
 secure-keep-caps: no (unlocked)
 secure-no-ambient-raise: no (unlocked)
uid=0(root) euid=0(root)
gid=0(root)
groups=
Guessed mode: UNCERTAIN (0)

代码实现

// fs/proc/array.c
task_cap

// security/security.c
security_bprm_set_creds
  security_ops->bprm_set_creds(bprm) // i.e., apparmor_bprm_set_creds / selinux_bprm_set_creds / smack_bprm_set_creds / tomoyo_bprm_set_creds
    cap_bprm_set_creds

// security/commoncap.c
cap_bprm_set_creds
cap_task_fix_setuid
  cap_emulate_setxuid

// fs/exec.c
SYSCALL_DEFINE3(execve)

// fs/namei.c
/**
 * generic_permission -  check for access rights on a Posix-like filesystem
 * @inode:	inode to check access rights for
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
 *
 * Used to check for read/write/execute permissions on a file.
 * We use "fsuid" for this, letting us set arbitrary permissions
 * for filesystem access without changing the "normal" uids which
 * are used for other things.
 *
 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
 * request cannot be satisfied (eg. requires blocking or too much complexity).
 * It would then be called again in ref-walk mode.
 */
int generic_permission(struct inode *inode, int mask)

set-ambient

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <cap-ng.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <linux/capability.h>
#include <linux/securebits.h>

// g++ -o set-ambient set-ambient.cc -lcap-ng

// sudo setcap 'cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,
// cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,
// cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,
// cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,
// cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,
// cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,
// cap_wake_alarm,cap_block_suspend,cap_audit_read+p' set-ambient

static void set_ambient_cap(int cap)
{
    int rc;

    capng_get_caps_process();
    rc = capng_update(CAPNG_ADD, CAPNG_INHERITABLE, cap);
    if (rc) {
        printf("Cannot add inheritable cap\n");
        exit(2);
    }
    capng_apply(CAPNG_SELECT_CAPS);

    /* Note the two 0s at the end. Kernel checks for these */
    if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0)) {
        perror("Cannot set cap");
        exit(1);
    }
}

int main(int argc, char **argv)
{
    set_ambient_cap(CAP_CHOWN);
    set_ambient_cap(CAP_DAC_OVERRIDE);
    set_ambient_cap(CAP_DAC_READ_SEARCH);
    set_ambient_cap(CAP_FOWNER);
    set_ambient_cap(CAP_FSETID);

    set_ambient_cap(CAP_KILL);
    set_ambient_cap(CAP_SETGID);
    set_ambient_cap(CAP_SETUID);
    set_ambient_cap(CAP_SETPCAP);
    set_ambient_cap(CAP_LINUX_IMMUTABLE);
    set_ambient_cap(CAP_NET_BIND_SERVICE);

    set_ambient_cap(CAP_NET_BROADCAST);
    set_ambient_cap(CAP_NET_ADMIN);
    set_ambient_cap(CAP_NET_RAW);
    set_ambient_cap(CAP_IPC_LOCK);
    set_ambient_cap(CAP_IPC_OWNER);
    set_ambient_cap(CAP_SYS_MODULE);

    set_ambient_cap(CAP_SYS_RAWIO);
    set_ambient_cap(CAP_SYS_CHROOT);
//    set_ambient_cap(CAP_SYS_PTRACE);
    set_ambient_cap(CAP_SYS_PACCT);
    set_ambient_cap(CAP_SYS_ADMIN);
    set_ambient_cap(CAP_SYS_BOOT);

    set_ambient_cap(CAP_SYS_NICE);
    set_ambient_cap(CAP_SYS_RESOURCE);
    set_ambient_cap(CAP_SYS_TIME);
    set_ambient_cap(CAP_SYS_TTY_CONFIG);
    set_ambient_cap(CAP_MKNOD);
    set_ambient_cap(CAP_LEASE);

    set_ambient_cap(CAP_AUDIT_WRITE);
    set_ambient_cap(CAP_AUDIT_CONTROL);
    set_ambient_cap(CAP_SETFCAP);
    set_ambient_cap(CAP_MAC_OVERRIDE);
    set_ambient_cap(CAP_MAC_ADMIN);
    set_ambient_cap(CAP_SYSLOG);

//    set_ambient_cap(CAP_WAKE_ALARM);
//    set_ambient_cap(CAP_BLOCK_SUSPEND);
//    set_ambient_cap(CAP_AUDIT_READ);

//    prctl(PR_SET_SECUREBITS, SECBIT_KEEP_CAPS);
    prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP);
//    prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED);

//    setuid(1000);
//    setgid(1000);

    if (execv(argv[1], argv + 1)) {
        perror("Cannot exec");
    }

//    pid_t pid = fork();
//    if (pid < 0) {
//        perror("Cannot fork");
//        exit(1);
//    }
//    if (pid == 0) {
//        if (execv(argv[1], argv + 1)) {
//            perror("Cannot exec");
//        }
//    } else {
//        int r;
//        waitpid(pid, &r, 0);
//    }

    return 0;
}

参考资料

man 手册页

https://man7.org/linux/man-pages/man7/capabilities.7.html

Linux 内核 CAP 宏定义

https://github.com/torvalds/linux/blob/v5.9/include/uapi/linux/capability.h

Linux Capabilities in a nutshell

https://k3a.me/linux-capabilities-in-a-nutshell/

Linux Capabilities: Why They Exist and How They Work

https://blog.container-solutions.com/linux-capabilities-why-they-exist-and-how-they-work

Linux Capabilities In Practice

https://blog.container-solutions.com/linux-capabilities-in-practice

Secure Your Containers with this One Weird Trick

https://www.redhat.com/en/blog/secure-your-containers-one-weird-trick

systemd.directives – AmbientCapabilities

https://www.freedesktop.org/software/systemd/man/systemd.directives.html

capabilities: ambient capabilities

https://github.com/torvalds/linux/commit/58319057b7847667f0c

Linux capabilities 101

https://linux-audit.com/linux-capabilities-101/

LINUX CONTAINER BASICS: CAPABILITIES

https://www.schutzwerk.com/en/43/posts/linux_container_capabilities/

Understanding Capabilities in Linux

https://blog.ploetzli.ch/2014/understanding-linux-capabilities/

Making an RPM which sets POSIX files capabilities

https://stackoverflow.com/questions/26898007/making-an-rpm-which-sets-posix-files-capabilities

Linux Capabilities and when to drop all

https://raesene.github.io/blog/2017/08/27/Linux-capabilities-and-when-to-drop-all/

dist/redhat: Disables ambient capabilities when systemd/kernel doesn’t support it

https://github.com/scylladb/scylla/commit/fcc1a9f6bb35dc99c0f5d0d1a9eae2f3b5218fbd


最后修改于 2020-11-22