mirror of
https://github.com/opencontainers/runc.git
synced 2026-02-05 18:45:28 +01:00
If an attacker were to make the target of a device inode creation be a symlink to some host path, os.Create would happily truncate the target which could lead to all sorts of issues. This exploit is probably not as exploitable because device inodes are usually only bind-mounted for rootless containers, which cannot overwrite important host files (though user files would still be up for grabs). The regular inode creation logic could also theoretically be tricked into changing the access mode and ownership of host files if the newly-created device inode was swapped with a symlink to a host path. Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
183 lines
5.0 KiB
Go
183 lines
5.0 KiB
Go
//go:build linux
|
|
|
|
package system
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"unsafe"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type ParentDeathSignal int
|
|
|
|
func (p ParentDeathSignal) Restore() error {
|
|
if p == 0 {
|
|
return nil
|
|
}
|
|
current, err := GetParentDeathSignal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if p == current {
|
|
return nil
|
|
}
|
|
return p.Set()
|
|
}
|
|
|
|
func (p ParentDeathSignal) Set() error {
|
|
return SetParentDeathSignal(uintptr(p))
|
|
}
|
|
|
|
func SetParentDeathSignal(sig uintptr) error {
|
|
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
|
var sig int
|
|
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
|
|
return -1, err
|
|
}
|
|
return ParentDeathSignal(sig), nil
|
|
}
|
|
|
|
func SetKeepCaps() error {
|
|
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func ClearKeepCaps() error {
|
|
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func Setctty() error {
|
|
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
|
func SetSubreaper(i int) error {
|
|
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
|
}
|
|
|
|
// GetSubreaper returns the subreaper setting for the calling process
|
|
func GetSubreaper() (int, error) {
|
|
var i uintptr
|
|
|
|
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
return int(i), nil
|
|
}
|
|
|
|
func ExecutableMemfd(comment string, flags int) (*os.File, error) {
|
|
// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
|
|
// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
|
|
// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
|
|
// The original vm.memfd_noexec=2 implementation incorrectly silently
|
|
// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
|
|
// kernels, we will get -EACCES if we try to use MFD_EXEC with
|
|
// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
|
|
//
|
|
// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
|
|
// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
|
|
// kernels where -EINVAL is actually a security denial.
|
|
memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
|
|
if err == unix.EINVAL {
|
|
memfd, err = unix.MemfdCreate(comment, flags)
|
|
}
|
|
if err != nil {
|
|
if err == unix.EACCES {
|
|
logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
|
|
}
|
|
err := os.NewSyscallError("memfd_create", err)
|
|
return nil, fmt.Errorf("failed to create executable memfd: %w", err)
|
|
}
|
|
return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
|
|
}
|
|
|
|
// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
|
|
// both (*os.File) as an optimisation to make copies faster.
|
|
func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
|
|
dstFile, _ := dst.(*os.File)
|
|
srcFile, _ := src.(*os.File)
|
|
|
|
if dstFile != nil && srcFile != nil {
|
|
fi, err := srcFile.Stat()
|
|
if err != nil {
|
|
goto fallback
|
|
}
|
|
size := fi.Size()
|
|
for size > 0 {
|
|
n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
|
|
if n > 0 {
|
|
size -= int64(n)
|
|
copied += int64(n)
|
|
}
|
|
if err == unix.EINTR {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
if copied == 0 {
|
|
// If we haven't copied anything so far, we can safely just
|
|
// fallback to io.Copy. We could always do the fallback but
|
|
// it's safer to error out in the case of a partial copy
|
|
// followed by an error (which should never happen).
|
|
goto fallback
|
|
}
|
|
return copied, fmt.Errorf("partial sendfile copy: %w", err)
|
|
}
|
|
}
|
|
return copied, nil
|
|
}
|
|
|
|
fallback:
|
|
return io.Copy(dst, src)
|
|
}
|
|
|
|
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
|
|
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
|
|
func SetLinuxPersonality(personality int) error {
|
|
_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
|
|
if errno != 0 {
|
|
return &os.SyscallError{Syscall: "set_personality", Err: errno}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER).
|
|
func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) {
|
|
// Make sure O_NOCTTY is always set -- otherwise runc might accidentally
|
|
// gain it as a controlling terminal. O_CLOEXEC also needs to be set to
|
|
// make sure we don't leak the handle either.
|
|
flags |= unix.O_NOCTTY | unix.O_CLOEXEC
|
|
|
|
// There is no nice wrapper for this kind of ioctl in unix.
|
|
peerFd, _, errno := unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
ptyFd,
|
|
uintptr(unix.TIOCGPTPEER),
|
|
uintptr(flags),
|
|
)
|
|
if errno != 0 {
|
|
return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno)
|
|
}
|
|
return os.NewFile(peerFd, unsafePeerPath), nil
|
|
}
|