[Midnightbsd-cvs] src [9957] trunk/sys/kern: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sat May 26 10:27:48 EDT 2018
Revision: 9957
http://svnweb.midnightbsd.org/src/?rev=9957
Author: laffer1
Date: 2018-05-26 10:27:48 -0400 (Sat, 26 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/kern/sys_capability.c
trunk/sys/kern/sys_generic.c
trunk/sys/kern/sys_pipe.c
trunk/sys/kern/sys_procdesc.c
trunk/sys/kern/sys_process.c
trunk/sys/kern/sys_socket.c
Modified: trunk/sys/kern/sys_capability.c
===================================================================
--- trunk/sys/kern/sys_capability.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_capability.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,11 +1,16 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008-2011 Robert N. M. Watson
* Copyright (c) 2010-2011 Jonathan Anderson
+ * Copyright (c) 2012 FreeBSD Foundation
* All rights reserved.
*
* This software was developed at the University of Cambridge Computer
* Laboratory with support from a grant from Google, Inc.
*
+ * Portions of this software were developed by Pawel Jakub Dawidek under
+ * sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -51,23 +56,28 @@
* anonymous, rather than named, POSIX shared memory objects.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_capability.c 302229 2016-06-27 21:25:01Z bdrewery $");
+
#include "opt_capsicum.h"
+#include "opt_ktrace.h"
-#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
-
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/syscallsubr.h>
#include <sys/sysproto.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/ucred.h>
+#include <sys/uio.h>
+#include <sys/ktrace.h>
#include <security/audit/audit.h>
@@ -96,7 +106,7 @@
oldcred = p->p_ucred;
crcopy(newcred, oldcred);
newcred->cr_flags |= CRED_FLAG_CAPMODE;
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -110,7 +120,7 @@
{
u_int i;
- i = (IN_CAPABILITY_MODE(td)) ? 1 : 0;
+ i = IN_CAPABILITY_MODE(td) ? 1 : 0;
return (copyout(&i, uap->modep, sizeof(i)));
}
@@ -136,85 +146,53 @@
FEATURE(security_capabilities, "Capsicum Capabilities");
-/*
- * struct capability describes a capability, and is hung off of its struct
- * file f_data field. cap_file and cap_rightss are static once hooked up, as
- * neither the object it references nor the rights it encapsulates are
- * permitted to change.
- */
-struct capability {
- struct file *cap_object; /* Underlying object's file. */
- struct file *cap_file; /* Back-pointer to cap's file. */
- cap_rights_t cap_rights; /* Mask of rights on object. */
-};
+MALLOC_DECLARE(M_FILECAPS);
+static inline int
+_cap_check(const cap_rights_t *havep, const cap_rights_t *needp,
+ enum ktr_cap_fail_type type)
+{
+ int i;
+
+ for (i = 0; i < nitems(havep->cr_rights); i++) {
+ if (!cap_rights_contains(havep, needp)) {
+#ifdef KTRACE
+ if (KTRPOINT(curthread, KTR_CAPFAIL))
+ ktrcapfail(type, needp, havep);
+#endif
+ return (ENOTCAPABLE);
+ }
+ }
+ return (0);
+}
+
/*
- * Capabilities have a fileops vector, but in practice none should ever be
- * called except for fo_close, as the capability will normally not be
- * returned during a file descriptor lookup in the system call code.
+ * Test whether a capability grants the requested rights.
*/
-static fo_rdwr_t capability_read;
-static fo_rdwr_t capability_write;
-static fo_truncate_t capability_truncate;
-static fo_ioctl_t capability_ioctl;
-static fo_poll_t capability_poll;
-static fo_kqfilter_t capability_kqfilter;
-static fo_stat_t capability_stat;
-static fo_close_t capability_close;
-static fo_chmod_t capability_chmod;
-static fo_chown_t capability_chown;
-
-static struct fileops capability_ops = {
- .fo_read = capability_read,
- .fo_write = capability_write,
- .fo_truncate = capability_truncate,
- .fo_ioctl = capability_ioctl,
- .fo_poll = capability_poll,
- .fo_kqfilter = capability_kqfilter,
- .fo_stat = capability_stat,
- .fo_close = capability_close,
- .fo_chmod = capability_chmod,
- .fo_chown = capability_chown,
- .fo_flags = DFLAG_PASSABLE,
-};
-
-static struct fileops capability_ops_unpassable = {
- .fo_read = capability_read,
- .fo_write = capability_write,
- .fo_truncate = capability_truncate,
- .fo_ioctl = capability_ioctl,
- .fo_poll = capability_poll,
- .fo_kqfilter = capability_kqfilter,
- .fo_stat = capability_stat,
- .fo_close = capability_close,
- .fo_chmod = capability_chmod,
- .fo_chown = capability_chown,
- .fo_flags = 0,
-};
-
-static uma_zone_t capability_zone;
-
-static void
-capability_init(void *dummy __unused)
+int
+cap_check(const cap_rights_t *havep, const cap_rights_t *needp)
{
- capability_zone = uma_zcreate("capability", sizeof(struct capability),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- if (capability_zone == NULL)
- panic("capability_init: capability_zone not initialized");
+ return (_cap_check(havep, needp, CAPFAIL_NOTCAPABLE));
}
-SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL);
/*
- * Test whether a capability grants the requested rights.
+ * Convert capability rights into VM access flags.
*/
-static int
-cap_check(struct capability *c, cap_rights_t rights)
+u_char
+cap_rights_to_vmprot(cap_rights_t *havep)
{
+ u_char maxprot;
- if ((c->cap_rights | rights) != c->cap_rights)
- return (ENOTCAPABLE);
- return (0);
+ maxprot = VM_PROT_NONE;
+ if (cap_rights_is_set(havep, CAP_MMAP_R))
+ maxprot |= VM_PROT_READ;
+ if (cap_rights_is_set(havep, CAP_MMAP_W))
+ maxprot |= VM_PROT_WRITE;
+ if (cap_rights_is_set(havep, CAP_MMAP_X))
+ maxprot |= VM_PROT_EXECUTE;
+
+ return (maxprot);
}
/*
@@ -222,44 +200,83 @@
* any other way, as we want to keep all capability permission evaluation in
* this one file.
*/
-cap_rights_t
-cap_rights(struct file *fp_cap)
+
+cap_rights_t *
+cap_rights_fde(struct filedescent *fde)
{
- struct capability *c;
- KASSERT(fp_cap->f_type == DTYPE_CAPABILITY,
- ("cap_rights: !capability"));
+ return (&fde->fde_rights);
+}
- c = fp_cap->f_data;
- return (c->cap_rights);
+cap_rights_t *
+cap_rights(struct filedesc *fdp, int fd)
+{
+
+ return (cap_rights_fde(&fdp->fd_ofiles[fd]));
}
/*
- * System call to create a new capability reference to either an existing
- * file object or an an existing capability.
+ * System call to limit rights of the given capability.
*/
int
-sys_cap_new(struct thread *td, struct cap_new_args *uap)
+sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
- int error, capfd;
- int fd = uap->fd;
- struct file *fp;
- cap_rights_t rights = uap->rights;
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ int error, fd, version;
+ cap_rights_init(&rights);
+
+ error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]));
+ if (error != 0)
+ return (error);
+ version = CAPVER(&rights);
+ if (version != CAP_RIGHTS_VERSION_00)
+ return (EINVAL);
+
+ error = copyin(uap->rightsp, &rights,
+ sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights));
+ if (error != 0)
+ return (error);
+ /* Check for race. */
+ if (CAPVER(&rights) != version)
+ return (EINVAL);
+
+ if (!cap_rights_is_valid(&rights))
+ return (EINVAL);
+
+ if (version != CAP_RIGHTS_VERSION) {
+ rights.cr_rights[0] &= ~(0x3ULL << 62);
+ rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62);
+ }
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktrcaprights(&rights);
+#endif
+
+ fd = uap->fd;
+
AUDIT_ARG_FD(fd);
- AUDIT_ARG_RIGHTS(rights);
- error = fget(td, fd, rights, &fp);
- if (error)
- return (error);
- AUDIT_ARG_FILE(td->td_proc, fp);
- error = kern_capwrap(td, fp, rights, &capfd);
- /*
- * Release our reference to the file (kern_capwrap has held a reference
- * for the filedesc array).
- */
- fdrop(fp, td);
- if (error == 0)
- td->td_retval[0] = capfd;
+ AUDIT_ARG_RIGHTS(&rights);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ error = _cap_check(cap_rights(fdp, fd), &rights, CAPFAIL_INCREASE);
+ if (error == 0) {
+ fdp->fd_ofiles[fd].fde_rights = rights;
+ if (!cap_rights_is_set(&rights, CAP_IOCTL)) {
+ free(fdp->fd_ofiles[fd].fde_ioctls, M_FILECAPS);
+ fdp->fd_ofiles[fd].fde_ioctls = NULL;
+ fdp->fd_ofiles[fd].fde_nioctls = 0;
+ }
+ if (!cap_rights_is_set(&rights, CAP_FCNTL))
+ fdp->fd_ofiles[fd].fde_fcntls = 0;
+ }
+ FILEDESC_XUNLOCK(fdp);
return (error);
}
@@ -267,241 +284,297 @@
* System call to query the rights mask associated with a capability.
*/
int
-sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap)
+sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap)
{
- struct capability *cp;
- struct file *fp;
- int error;
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ int error, fd, i, n;
- AUDIT_ARG_FD(uap->fd);
- error = fgetcap(td, uap->fd, &fp);
- if (error)
- return (error);
- cp = fp->f_data;
- error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp));
- fdrop(fp, td);
+ if (uap->version != CAP_RIGHTS_VERSION_00)
+ return (EINVAL);
+
+ fd = uap->fd;
+
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ rights = *cap_rights(fdp, fd);
+ FILEDESC_SUNLOCK(fdp);
+ n = uap->version + 2;
+ if (uap->version != CAPVER(&rights)) {
+ /*
+ * For older versions we need to check if the descriptor
+ * doesn't contain rights not understood by the caller.
+ * If it does, we have to return an error.
+ */
+ for (i = n; i < CAPARSIZE(&rights); i++) {
+ if ((rights.cr_rights[i] & ~(0x7FULL << 57)) != 0)
+ return (EINVAL);
+ }
+ }
+ error = copyout(&rights, uap->rightsp, sizeof(rights.cr_rights[0]) * n);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+ ktrcaprights(&rights);
+#endif
return (error);
}
/*
- * Create a capability to wrap around an existing file.
+ * Test whether a capability grants the given ioctl command.
+ * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and
+ * ENOTCAPABLE will be returned.
*/
int
-kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
- int *capfdp)
+cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd)
{
- struct capability *cp, *cp_old;
- struct file *fp_object, *fcapp;
- int error;
+ u_long *cmds;
+ ssize_t ncmds;
+ long i;
- if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
- return (EINVAL);
+ FILEDESC_LOCK_ASSERT(fdp);
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("%s: invalid fd=%d", __func__, fd));
- /*
- * If a new capability is being derived from an existing capability,
- * then the new capability rights must be a subset of the existing
- * rights.
- */
- if (fp->f_type == DTYPE_CAPABILITY) {
- cp_old = fp->f_data;
- if ((cp_old->cap_rights | rights) != cp_old->cap_rights)
- return (ENOTCAPABLE);
+ ncmds = fdp->fd_ofiles[fd].fde_nioctls;
+ if (ncmds == -1)
+ return (0);
+
+ cmds = fdp->fd_ofiles[fd].fde_ioctls;
+ for (i = 0; i < ncmds; i++) {
+ if (cmds[i] == cmd)
+ return (0);
}
- /*
- * Allocate a new file descriptor to hang the capability off of.
- */
- error = falloc(td, &fcapp, capfdp, fp->f_flag);
- if (error)
- return (error);
-
- /*
- * Rather than nesting capabilities, directly reference the object an
- * existing capability references. There's nothing else interesting
- * to preserve for future use, as we've incorporated the previous
- * rights mask into the new one. This prevents us from having to
- * deal with capability chains.
- */
- if (fp->f_type == DTYPE_CAPABILITY)
- fp_object = ((struct capability *)fp->f_data)->cap_object;
- else
- fp_object = fp;
- fhold(fp_object);
- cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
- cp->cap_rights = rights;
- cp->cap_object = fp_object;
- cp->cap_file = fcapp;
- if (fp->f_flag & DFLAG_PASSABLE)
- finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
- &capability_ops);
- else
- finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
- &capability_ops_unpassable);
-
- /*
- * Release our private reference (the proc filedesc still has one).
- */
- fdrop(fcapp, td);
- return (0);
+ return (ENOTCAPABLE);
}
/*
- * Given a file descriptor, test it against a capability rights mask and then
- * return the file descriptor on which to actually perform the requested
- * operation. As long as the reference to fp_cap remains valid, the returned
- * pointer in *fp will remain valid, so no extra reference management is
- * required, and the caller should fdrop() fp_cap as normal when done with
- * both.
+ * Check if the current ioctls list can be replaced by the new one.
*/
-int
-cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp)
+static int
+cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds,
+ size_t ncmds)
{
- struct capability *c;
- int error;
+ u_long *ocmds;
+ ssize_t oncmds;
+ u_long i;
+ long j;
- if (fp_cap->f_type != DTYPE_CAPABILITY) {
- *fpp = fp_cap;
+ oncmds = fdp->fd_ofiles[fd].fde_nioctls;
+ if (oncmds == -1)
return (0);
+ if (oncmds < (ssize_t)ncmds)
+ return (ENOTCAPABLE);
+
+ ocmds = fdp->fd_ofiles[fd].fde_ioctls;
+ for (i = 0; i < ncmds; i++) {
+ for (j = 0; j < oncmds; j++) {
+ if (cmds[i] == ocmds[j])
+ break;
+ }
+ if (j == oncmds)
+ return (ENOTCAPABLE);
}
- c = fp_cap->f_data;
- error = cap_check(c, rights);
- if (error)
- return (error);
- *fpp = c->cap_object;
+
return (0);
}
-/*
- * Slightly different routine for memory mapping file descriptors: unwrap the
- * capability and check CAP_MMAP, but also return a bitmask representing the
- * maximum mapping rights the capability allows on the object.
- */
int
-cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp,
- struct file **fpp)
+kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds)
{
- struct capability *c;
- u_char maxprot;
+ struct filedesc *fdp;
+ u_long *ocmds;
int error;
- if (fp_cap->f_type != DTYPE_CAPABILITY) {
- *fpp = fp_cap;
- *maxprotp = VM_PROT_ALL;
- return (0);
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+
+ if (fget_locked(fdp, fd) == NULL) {
+ error = EBADF;
+ goto out;
}
- c = fp_cap->f_data;
- error = cap_check(c, rights | CAP_MMAP);
- if (error)
- return (error);
- *fpp = c->cap_object;
- maxprot = 0;
- if (c->cap_rights & CAP_READ)
- maxprot |= VM_PROT_READ;
- if (c->cap_rights & CAP_WRITE)
- maxprot |= VM_PROT_WRITE;
- if (c->cap_rights & CAP_MAPEXEC)
- maxprot |= VM_PROT_EXECUTE;
- *maxprotp = maxprot;
- return (0);
-}
-/*
- * When a capability is closed, simply drop the reference on the underlying
- * object and free the capability. fdrop() will handle the case where the
- * underlying object also needs to close, and the caller will have already
- * performed any object-specific lock or mqueue handling.
- */
-static int
-capability_close(struct file *fp, struct thread *td)
-{
- struct capability *c;
- struct file *fp_object;
+ error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds);
+ if (error != 0)
+ goto out;
- KASSERT(fp->f_type == DTYPE_CAPABILITY,
- ("capability_close: !capability"));
+ ocmds = fdp->fd_ofiles[fd].fde_ioctls;
+ fdp->fd_ofiles[fd].fde_ioctls = cmds;
+ fdp->fd_ofiles[fd].fde_nioctls = ncmds;
- c = fp->f_data;
- fp->f_ops = &badfileops;
- fp->f_data = NULL;
- fp_object = c->cap_object;
- uma_zfree(capability_zone, c);
- return (fdrop(fp_object, td));
+ cmds = ocmds;
+ error = 0;
+out:
+ FILEDESC_XUNLOCK(fdp);
+ free(cmds, M_FILECAPS);
+ return (error);
}
-/*
- * In general, file descriptor operations should never make it to the
- * capability, only the underlying file descriptor operation vector, so panic
- * if we do turn up here.
- */
-static int
-capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
+int
+sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
{
+ u_long *cmds;
+ size_t ncmds;
+ int error;
- panic("capability_read");
-}
+ ncmds = uap->ncmds;
-static int
-capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
+ if (ncmds > 256) /* XXX: Is 256 sane? */
+ return (EINVAL);
- panic("capability_write");
+ if (ncmds == 0) {
+ cmds = NULL;
+ } else {
+ cmds = malloc(sizeof(cmds[0]) * ncmds, M_FILECAPS, M_WAITOK);
+ error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds);
+ if (error != 0) {
+ free(cmds, M_FILECAPS);
+ return (error);
+ }
+ }
+
+ return (kern_cap_ioctls_limit(td, uap->fd, cmds, ncmds));
}
-static int
-capability_truncate(struct file *fp, off_t length, struct ucred *active_cred,
- struct thread *td)
+int
+sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
{
+ struct filedesc *fdp;
+ struct filedescent *fdep;
+ u_long *cmds;
+ size_t maxcmds;
+ int error, fd;
- panic("capability_truncate");
-}
+ fd = uap->fd;
+ cmds = uap->cmds;
+ maxcmds = uap->maxcmds;
-static int
-capability_ioctl(struct file *fp, u_long com, void *data,
- struct ucred *active_cred, struct thread *td)
-{
+ AUDIT_ARG_FD(fd);
- panic("capability_ioctl");
-}
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
-static int
-capability_poll(struct file *fp, int events, struct ucred *active_cred,
- struct thread *td)
-{
+ if (fget_locked(fdp, fd) == NULL) {
+ error = EBADF;
+ goto out;
+ }
- panic("capability_poll");
+ /*
+ * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL)
+ * the only sane thing we can do is to not populate the given array and
+ * return CAP_IOCTLS_ALL.
+ */
+
+ fdep = &fdp->fd_ofiles[fd];
+ if (cmds != NULL && fdep->fde_ioctls != NULL) {
+ error = copyout(fdep->fde_ioctls, cmds,
+ sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds));
+ if (error != 0)
+ goto out;
+ }
+ if (fdep->fde_nioctls == -1)
+ td->td_retval[0] = CAP_IOCTLS_ALL;
+ else
+ td->td_retval[0] = fdep->fde_nioctls;
+
+ error = 0;
+out:
+ FILEDESC_SUNLOCK(fdp);
+ return (error);
}
-static int
-capability_kqfilter(struct file *fp, struct knote *kn)
+/*
+ * Test whether a capability grants the given fcntl command.
+ */
+int
+cap_fcntl_check_fde(struct filedescent *fde, int cmd)
{
+ uint32_t fcntlcap;
- panic("capability_kqfilter");
+ fcntlcap = (1 << cmd);
+ KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0,
+ ("Unsupported fcntl=%d.", cmd));
+
+ if ((fde->fde_fcntls & fcntlcap) != 0)
+ return (0);
+
+ return (ENOTCAPABLE);
}
-static int
-capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
- struct thread *td)
+int
+cap_fcntl_check(struct filedesc *fdp, int fd, int cmd)
{
- panic("capability_stat");
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("%s: invalid fd=%d", __func__, fd));
+
+ return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd));
}
int
-capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
- struct thread *td)
+sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
{
+ struct filedesc *fdp;
+ uint32_t fcntlrights;
+ int fd;
- panic("capability_chmod");
+ fd = uap->fd;
+ fcntlrights = uap->fcntlrights;
+
+ AUDIT_ARG_FD(fd);
+ AUDIT_ARG_FCNTL_RIGHTS(fcntlrights);
+
+ if ((fcntlrights & ~CAP_FCNTL_ALL) != 0)
+ return (EINVAL);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+
+ if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ return (ENOTCAPABLE);
+ }
+
+ fdp->fd_ofiles[fd].fde_fcntls = fcntlrights;
+ FILEDESC_XUNLOCK(fdp);
+
+ return (0);
}
int
-capability_chown(struct file *fp, uid_t uid, gid_t gid,
- struct ucred *active_cred, struct thread *td)
+sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
{
+ struct filedesc *fdp;
+ uint32_t rights;
+ int fd;
- panic("capability_chown");
+ fd = uap->fd;
+
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ rights = fdp->fd_ofiles[fd].fde_fcntls;
+ FILEDESC_SUNLOCK(fdp);
+
+ return (copyout(&rights, uap->fcntlrightsp, sizeof(rights)));
}
#else /* !CAPABILITIES */
@@ -510,8 +583,9 @@
* Stub Capability functions for when options CAPABILITIES isn't compiled
* into the kernel.
*/
+
int
-sys_cap_new(struct thread *td, struct cap_new_args *uap)
+sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
return (ENOSYS);
@@ -518,7 +592,7 @@
}
int
-sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap)
+sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap)
{
return (ENOSYS);
@@ -525,27 +599,31 @@
}
int
-cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp)
+sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
{
- KASSERT(fp_cap->f_type != DTYPE_CAPABILITY,
- ("cap_funwrap: saw capability"));
+ return (ENOSYS);
+}
- *fpp = fp_cap;
- return (0);
+int
+sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
+{
+
+ return (ENOSYS);
}
int
-cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp,
- struct file **fpp)
+sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
{
- KASSERT(fp_cap->f_type != DTYPE_CAPABILITY,
- ("cap_funwrap_mmap: saw capability"));
+ return (ENOSYS);
+}
- *fpp = fp_cap;
- *maxprotp = VM_PROT_ALL;
- return (0);
+int
+sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
+{
+
+ return (ENOSYS);
}
#endif /* CAPABILITIES */
Modified: trunk/sys/kern/sys_generic.c
===================================================================
--- trunk/sys/kern/sys_generic.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_generic.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_generic.c 315481 2017-03-18 12:39:24Z mmokhi $");
#include "opt_capsicum.h"
#include "opt_compat.h"
@@ -44,11 +45,12 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/file.h>
+#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/socketvar.h>
@@ -74,9 +76,27 @@
#include <security/audit/audit.h>
+/*
+ * The following macro defines how many bytes will be allocated from
+ * the stack instead of memory allocated when passing the IOCTL data
+ * structures from userspace and to the kernel. Some IOCTLs having
+ * small data structures are used very frequently and this small
+ * buffer on the stack gives a significant speedup improvement for
+ * those requests. The value of this define should be greater or equal
+ * to 64 bytes and should also be power of two. The data structure is
+ * currently hard-aligned to a 8-byte boundary on the stack. This
+ * should currently be sufficient for all supported platforms.
+ */
+#define SYS_IOCTL_SMALL_SIZE 128 /* bytes */
+#define SYS_IOCTL_SMALL_ALIGN 8 /* bytes */
+
int iosize_max_clamp = 1;
SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW,
&iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX");
+int devfs_iosize_max_clamp = 1;
+SYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW,
+ &devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices");
+
/*
* Assert that the return value of read(2) and write(2) syscalls fits
* into a register. If not, an architecture will need to provide the
@@ -102,7 +122,7 @@
off_t, int);
static void doselwakeup(struct selinfo *, int);
static void seltdinit(struct thread *);
-static int seltdwait(struct thread *, int);
+static int seltdwait(struct thread *, sbintime_t, sbintime_t);
static void seltdclear(struct thread *);
/*
@@ -242,9 +262,10 @@
kern_readv(struct thread *td, int fd, struct uio *auio)
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
+ error = fget_read(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
if (error)
return (error);
error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
@@ -285,14 +306,16 @@
off_t offset;
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_read(td, fd, CAP_READ, &fp);
+ error = fget_read(td, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
error = ESPIPE;
- else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+ else if (offset < 0 &&
+ (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR))
error = EINVAL;
else
error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET);
@@ -451,9 +474,10 @@
kern_writev(struct thread *td, int fd, struct uio *auio)
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
+ error = fget_write(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
if (error)
return (error);
error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
@@ -494,14 +518,16 @@
off_t offset;
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_write(td, fd, CAP_WRITE, &fp);
+ error = fget_write(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
error = ESPIPE;
- else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+ else if (offset < 0 &&
+ (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR))
error = EINVAL;
else
error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET);
@@ -574,12 +600,13 @@
off_t length;
{
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(fd);
if (length < 0)
return (EINVAL);
- error = fget(td, fd, CAP_FTRUNCATE, &fp);
+ error = fget(td, fd, cap_rights_init(&rights, CAP_FTRUNCATE), &fp);
if (error)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
@@ -636,6 +663,7 @@
int
sys_ioctl(struct thread *td, struct ioctl_args *uap)
{
+ u_char smalldata[SYS_IOCTL_SMALL_SIZE] __aligned(SYS_IOCTL_SMALL_ALIGN);
u_long com;
int arg, error;
u_int size;
@@ -670,17 +698,18 @@
arg = (intptr_t)uap->data;
data = (void *)&arg;
size = 0;
- } else
- data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+ } else {
+ if (size > SYS_IOCTL_SMALL_SIZE)
+ data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+ else
+ data = smalldata;
+ }
} else
data = (void *)&uap->data;
if (com & IOC_IN) {
error = copyin(uap->data, data, (u_int)size);
- if (error) {
- if (size > 0)
- free(data, M_IOCTLOPS);
- return (error);
- }
+ if (error != 0)
+ goto out;
} else if (com & IOC_OUT) {
/*
* Zero the buffer so the user always
@@ -694,7 +723,8 @@
if (error == 0 && (com & IOC_OUT))
error = copyout(data, uap->data, (u_int)size);
- if (size > 0)
+out:
+ if (size > SYS_IOCTL_SMALL_SIZE)
free(data, M_IOCTLOPS);
return (error);
}
@@ -704,28 +734,64 @@
{
struct file *fp;
struct filedesc *fdp;
- int error;
- int tmp;
+#ifndef CAPABILITIES
+ cap_rights_t rights;
+#endif
+ int error, tmp, locked;
AUDIT_ARG_FD(fd);
AUDIT_ARG_CMD(com);
- if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0)
- return (error);
- if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
- fdrop(fp, td);
- return (EBADF);
- }
+
fdp = td->td_proc->p_fd;
+
switch (com) {
case FIONCLEX:
+ case FIOCLEX:
FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ locked = LA_XLOCKED;
+ break;
+ default:
+#ifdef CAPABILITIES
+ FILEDESC_SLOCK(fdp);
+ locked = LA_SLOCKED;
+#else
+ locked = LA_UNLOCKED;
+#endif
+ break;
+ }
+
+#ifdef CAPABILITIES
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
+ error = EBADF;
goto out;
+ }
+ if ((error = cap_ioctl_check(fdp, fd, com)) != 0) {
+ fp = NULL; /* fhold() was not called yet */
+ goto out;
+ }
+ fhold(fp);
+ if (locked == LA_SLOCKED) {
+ FILEDESC_SUNLOCK(fdp);
+ locked = LA_UNLOCKED;
+ }
+#else
+ error = fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp);
+ if (error != 0) {
+ fp = NULL;
+ goto out;
+ }
+#endif
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ switch (com) {
+ case FIONCLEX:
+ fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE;
+ goto out;
case FIOCLEX:
- FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
goto out;
case FIONBIO:
if ((tmp = *(int *)data))
@@ -745,7 +811,21 @@
error = fo_ioctl(fp, com, data, td->td_ucred, td);
out:
- fdrop(fp, td);
+ switch (locked) {
+ case LA_XLOCKED:
+ FILEDESC_XUNLOCK(fdp);
+ break;
+#ifdef CAPABILITIES
+ case LA_SLOCKED:
+ FILEDESC_SUNLOCK(fdp);
+ break;
+#endif
+ default:
+ FILEDESC_UNLOCK_ASSERT(fdp);
+ break;
+ }
+ if (fp != NULL)
+ fdrop(fp, td);
return (error);
}
@@ -903,9 +983,10 @@
*/
fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
- struct timeval atv, rtv, ttv;
- int error, lf, ndu, timo;
+ struct timeval rtv;
+ sbintime_t asbt, precision, rsbt;
u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
+ int error, lf, ndu;
if (nd < 0)
return (EINVAL);
@@ -995,19 +1076,30 @@
if (nbufbytes != 0)
bzero(selbits, nbufbytes / 2);
+ precision = 0;
if (tvp != NULL) {
- atv = *tvp;
- if (itimerfix(&atv)) {
+ rtv = *tvp;
+ if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
+ rtv.tv_usec >= 1000000) {
error = EINVAL;
goto done;
}
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
- }
- timo = 0;
+ if (!timevalisset(&rtv))
+ asbt = 0;
+ else if (rtv.tv_sec <= INT32_MAX) {
+ rsbt = tvtosbt(rtv);
+ precision = rsbt;
+ precision >>= tc_precexp;
+ if (TIMESEL(&asbt, rsbt))
+ asbt += tc_tick_sbt;
+ if (asbt <= SBT_MAX - rsbt)
+ asbt += rsbt;
+ else
+ asbt = -1;
+ } else
+ asbt = -1;
+ } else
+ asbt = -1;
seltdinit(td);
/* Iterate until the timeout expires or descriptors become ready. */
for (;;) {
@@ -1014,16 +1106,7 @@
error = selscan(td, ibits, obits, nd);
if (error || td->td_retval[0] != 0)
break;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- break;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
+ error = seltdwait(td, asbt, precision);
if (error)
break;
error = selrescan(td, ibits, obits);
@@ -1130,32 +1213,11 @@
static __inline int
getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp)
{
- struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
- int error;
-#endif
+ cap_rights_t rights;
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor references by the capability.
- */
- error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
- return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
- *fpp = fp;
- return (0);
+ cap_rights_init(&rights, CAP_EVENT);
+
+ return (fget_unlocked(fdp, fd, &rights, 0, fpp, NULL));
}
/*
@@ -1241,26 +1303,60 @@
return (0);
}
-#ifndef _SYS_SYSPROTO_H_
-struct poll_args {
- struct pollfd *fds;
- u_int nfds;
- int timeout;
-};
-#endif
int
-sys_poll(td, uap)
- struct thread *td;
- struct poll_args *uap;
+sys_poll(struct thread *td, struct poll_args *uap)
{
+ struct timespec ts, *tsp;
+
+ if (uap->timeout != INFTIM) {
+ if (uap->timeout < 0)
+ return (EINVAL);
+ ts.tv_sec = uap->timeout / 1000;
+ ts.tv_nsec = (uap->timeout % 1000) * 1000000;
+ tsp = &ts;
+ } else
+ tsp = NULL;
+
+ return (kern_poll(td, uap->fds, uap->nfds, tsp, NULL));
+}
+
+int
+kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
+ struct timespec *tsp, sigset_t *uset)
+{
struct pollfd *bits;
struct pollfd smallbits[32];
- struct timeval atv, rtv, ttv;
- int error, timo;
- u_int nfds;
+ sbintime_t sbt, precision, tmp;
+ time_t over;
+ struct timespec ts;
+ int error;
size_t ni;
- nfds = uap->nfds;
+ precision = 0;
+ if (tsp != NULL) {
+ if (tsp->tv_sec < 0)
+ return (EINVAL);
+ if (tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000)
+ return (EINVAL);
+ if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
+ sbt = 0;
+ else {
+ ts = *tsp;
+ if (ts.tv_sec > INT32_MAX / 2) {
+ over = ts.tv_sec - INT32_MAX / 2;
+ ts.tv_sec -= over;
+ } else
+ over = 0;
+ tmp = tstosbt(ts);
+ precision = tmp;
+ precision >>= tc_precexp;
+ if (TIMESEL(&sbt, tmp))
+ sbt += tc_tick_sbt;
+ sbt += tmp;
+ }
+ } else
+ sbt = -1;
+
if (nfds > maxfilesperproc && nfds > FD_SETSIZE)
return (EINVAL);
ni = nfds * sizeof(struct pollfd);
@@ -1268,23 +1364,26 @@
bits = malloc(ni, M_TEMP, M_WAITOK);
else
bits = smallbits;
- error = copyin(uap->fds, bits, ni);
+ error = copyin(fds, bits, ni);
if (error)
goto done;
- if (uap->timeout != INFTIM) {
- atv.tv_sec = uap->timeout / 1000;
- atv.tv_usec = (uap->timeout % 1000) * 1000;
- if (itimerfix(&atv)) {
- error = EINVAL;
+
+ if (uset != NULL) {
+ error = kern_sigprocmask(td, SIG_SETMASK, uset,
+ &td->td_oldsigmask, 0);
+ if (error)
goto done;
- }
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
+ td->td_pflags |= TDP_OLDMASK;
+ /*
+ * Make sure that ast() is called on return to
+ * usermode and TDP_OLDMASK is cleared, restoring old
+ * sigmask.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
}
- timo = 0;
+
seltdinit(td);
/* Iterate until the timeout expires or descriptors become ready. */
for (;;) {
@@ -1291,16 +1390,7 @@
error = pollscan(td, bits, nfds);
if (error || td->td_retval[0] != 0)
break;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- break;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
+ error = seltdwait(td, sbt, precision);
if (error)
break;
error = pollrescan(td);
@@ -1316,7 +1406,7 @@
if (error == EWOULDBLOCK)
error = 0;
if (error == 0) {
- error = pollout(td, bits, uap->fds, nfds);
+ error = pollout(td, bits, fds, nfds);
if (error)
goto out;
}
@@ -1326,6 +1416,35 @@
return (error);
}
+int
+sys_ppoll(struct thread *td, struct ppoll_args *uap)
+{
+ struct timespec ts, *tsp;
+ sigset_t set, *ssp;
+ int error;
+
+ if (uap->ts != NULL) {
+ error = copyin(uap->ts, &ts, sizeof(ts));
+ if (error)
+ return (error);
+ tsp = &ts;
+ } else
+ tsp = NULL;
+ if (uap->set != NULL) {
+ error = copyin(uap->set, &set, sizeof(set));
+ if (error)
+ return (error);
+ ssp = &set;
+ } else
+ ssp = NULL;
+ /*
+ * fds is still a pointer to user space. kern_poll() will
+ * take care of copyin that array to the kernel space.
+ */
+
+ return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp));
+}
+
static int
pollrescan(struct thread *td)
{
@@ -1336,6 +1455,9 @@
struct filedesc *fdp;
struct file *fp;
struct pollfd *fd;
+#ifdef CAPABILITIES
+ cap_rights_t rights;
+#endif
int n;
n = 0;
@@ -1349,13 +1471,15 @@
/* If the selinfo wasn't cleared the event didn't fire. */
if (si != NULL)
continue;
- fp = fdp->fd_ofiles[fd->fd];
+ fp = fdp->fd_ofiles[fd->fd].fde_file;
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fd->fd),
+ cap_rights_init(&rights, CAP_EVENT)) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fd->revents = POLLNVAL;
n++;
continue;
@@ -1408,25 +1532,29 @@
u_int nfd;
{
struct filedesc *fdp = td->td_proc->p_fd;
- int i;
struct file *fp;
- int n = 0;
+#ifdef CAPABILITIES
+ cap_rights_t rights;
+#endif
+ int i, n = 0;
FILEDESC_SLOCK(fdp);
for (i = 0; i < nfd; i++, fds++) {
- if (fds->fd >= fdp->fd_nfiles) {
+ if (fds->fd > fdp->fd_lastfile) {
fds->revents = POLLNVAL;
n++;
} else if (fds->fd < 0) {
fds->revents = 0;
} else {
- fp = fdp->fd_ofiles[fds->fd];
+ fp = fdp->fd_ofiles[fds->fd].fde_file;
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fds->fd),
+ cap_rights_init(&rights, CAP_EVENT)) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fds->revents = POLLNVAL;
n++;
} else {
@@ -1483,21 +1611,32 @@
int
selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
{
- struct timeval atv, rtv, ttv;
- int error, timo;
+ struct timeval rtv;
+ sbintime_t asbt, precision, rsbt;
+ int error;
+ precision = 0; /* stupid gcc! */
if (tvp != NULL) {
- atv = *tvp;
- if (itimerfix(&atv))
+ rtv = *tvp;
+ if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
+ rtv.tv_usec >= 1000000)
return (EINVAL);
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
- }
-
- timo = 0;
+ if (!timevalisset(&rtv))
+ asbt = 0;
+ else if (rtv.tv_sec <= INT32_MAX) {
+ rsbt = tvtosbt(rtv);
+ precision = rsbt;
+ precision >>= tc_precexp;
+ if (TIMESEL(&asbt, rsbt))
+ asbt += tc_tick_sbt;
+ if (asbt <= SBT_MAX - rsbt)
+ asbt += rsbt;
+ else
+ asbt = -1;
+ } else
+ asbt = -1;
+ } else
+ asbt = -1;
seltdinit(td);
/*
* Iterate until the timeout expires or the socket becomes ready.
@@ -1508,22 +1647,11 @@
/* error here is actually the ready events. */
if (error)
return (0);
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- seltdclear(td);
- return (EWOULDBLOCK);
- }
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
- seltdclear(td);
+ error = seltdwait(td, asbt, precision);
if (error)
break;
}
+ seltdclear(td);
/* XXX Duplicates ncp/smb behavior. */
if (error == ERESTART)
error = 0;
@@ -1698,7 +1826,7 @@
}
static int
-seltdwait(struct thread *td, int timo)
+seltdwait(struct thread *td, sbintime_t sbt, sbintime_t precision)
{
struct seltd *stp;
int error;
@@ -1717,8 +1845,11 @@
mtx_unlock(&stp->st_mtx);
return (0);
}
- if (timo > 0)
- error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+ if (sbt == 0)
+ error = EWOULDBLOCK;
+ else if (sbt != -1)
+ error = cv_timedwait_sig_sbt(&stp->st_wait, &stp->st_mtx,
+ sbt, precision, C_ABSOLUTE);
else
error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
mtx_unlock(&stp->st_mtx);
Modified: trunk/sys/kern/sys_pipe.c
===================================================================
--- trunk/sys/kern/sys_pipe.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_pipe.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,5 +1,7 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1996 John S. Dyson
+ * Copyright (c) 2012 Giovanni Trematerra
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -89,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_pipe.c 321020 2017-07-15 17:25:40Z dchagin $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -128,8 +130,6 @@
#include <vm/vm_page.h>
#include <vm/uma.h>
-int do_pipe(struct thread *td, int fildes[2], int flags);
-
/*
* Use this define if you want to disable *fancy* VM things. Expect an
* approx 30% decrease in transfer rate. This could be useful for
@@ -137,6 +137,9 @@
*/
/* #define PIPE_NODIRECT */
+#define PIPE_PEER(pipe) \
+ (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer))
+
/*
* interfaces to the outside world
*/
@@ -148,8 +151,10 @@
static fo_kqfilter_t pipe_kqfilter;
static fo_stat_t pipe_stat;
static fo_close_t pipe_close;
+static fo_chmod_t pipe_chmod;
+static fo_chown_t pipe_chown;
-static struct fileops pipeops = {
+struct fileops pipeops = {
.fo_read = pipe_read,
.fo_write = pipe_write,
.fo_truncate = pipe_truncate,
@@ -158,15 +163,23 @@
.fo_kqfilter = pipe_kqfilter,
.fo_stat = pipe_stat,
.fo_close = pipe_close,
- .fo_chmod = invfo_chmod,
- .fo_chown = invfo_chown,
+ .fo_chmod = pipe_chmod,
+ .fo_chown = pipe_chown,
+ .fo_sendfile = invfo_sendfile,
.fo_flags = DFLAG_PASSABLE
};
static void filt_pipedetach(struct knote *kn);
+static void filt_pipedetach_notsup(struct knote *kn);
+static int filt_pipenotsup(struct knote *kn, long hint);
static int filt_piperead(struct knote *kn, long hint);
static int filt_pipewrite(struct knote *kn, long hint);
+static struct filterops pipe_nfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_pipedetach_notsup,
+ .f_event = filt_pipenotsup
+};
static struct filterops pipe_rfiltops = {
.f_isfd = 1,
.f_detach = filt_pipedetach,
@@ -209,10 +222,10 @@
static void pipeinit(void *dummy __unused);
static void pipeclose(struct pipe *cpipe);
static void pipe_free_kmem(struct pipe *cpipe);
-static int pipe_create(struct pipe *pipe, int backing);
+static void pipe_create(struct pipe *pipe, int backing);
+static void pipe_paircreate(struct thread *td, struct pipepair **p_pp);
static __inline int pipelock(struct pipe *cpipe, int catch);
static __inline void pipeunlock(struct pipe *cpipe);
-static __inline void pipeselwakeup(struct pipe *cpipe);
#ifndef PIPE_NODIRECT
static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
static void pipe_destroy_write_buffer(struct pipe *wpipe);
@@ -303,7 +316,7 @@
pp = (struct pipepair *)mem;
- mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
+ mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF);
return (0);
}
@@ -319,26 +332,13 @@
mtx_destroy(&pp->pp_mtx);
}
-/*
- * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let
- * the zone pick up the pieces via pipeclose().
- */
-int
-kern_pipe(struct thread *td, int fildes[2])
+static void
+pipe_paircreate(struct thread *td, struct pipepair **p_pp)
{
- return (do_pipe(td, fildes, 0));
-}
-
-int
-do_pipe(struct thread *td, int fildes[2], int flags)
-{
- struct filedesc *fdp = td->td_proc->p_fd;
- struct file *rf, *wf;
struct pipepair *pp;
struct pipe *rpipe, *wpipe;
- int fd, fflags, error;
- pp = uma_zalloc(pipe_zone, M_WAITOK);
+ *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK);
#ifdef MAC
/*
* The MAC label is shared between the connected endpoints. As a
@@ -355,16 +355,63 @@
knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe));
/* Only the forward direction pipe is backed by default */
- if ((error = pipe_create(rpipe, 1)) != 0 ||
- (error = pipe_create(wpipe, 0)) != 0) {
- pipeclose(rpipe);
- pipeclose(wpipe);
- return (error);
- }
+ pipe_create(rpipe, 1);
+ pipe_create(wpipe, 0);
rpipe->pipe_state |= PIPE_DIRECTOK;
wpipe->pipe_state |= PIPE_DIRECTOK;
+}
+void
+pipe_named_ctor(struct pipe **ppipe, struct thread *td)
+{
+ struct pipepair *pp;
+
+ pipe_paircreate(td, &pp);
+ pp->pp_rpipe.pipe_state |= PIPE_NAMED;
+ *ppipe = &pp->pp_rpipe;
+}
+
+void
+pipe_dtor(struct pipe *dpipe)
+{
+ struct pipe *peer;
+ ino_t ino;
+
+ ino = dpipe->pipe_ino;
+ peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
+ funsetown(&dpipe->pipe_sigio);
+ pipeclose(dpipe);
+ if (peer != NULL) {
+ funsetown(&peer->pipe_sigio);
+ pipeclose(peer);
+ }
+ if (ino != 0 && ino != (ino_t)-1)
+ free_unr(pipeino_unr, ino);
+}
+
+/*
+ * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let
+ * the zone pick up the pieces via pipeclose().
+ */
+int
+kern_pipe(struct thread *td, int fildes[2])
+{
+
+ return (kern_pipe2(td, fildes, 0));
+}
+
+int
+kern_pipe2(struct thread *td, int fildes[2], int flags)
+{
+ struct file *rf, *wf;
+ struct pipe *rpipe, *wpipe;
+ struct pipepair *pp;
+ int fd, fflags, error;
+
+ pipe_paircreate(td, &pp);
+ rpipe = &pp->pp_rpipe;
+ wpipe = &pp->pp_wpipe;
error = falloc(td, &rf, &fd, flags);
if (error) {
pipeclose(rpipe);
@@ -387,7 +434,7 @@
finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops);
error = falloc(td, &wf, &fd, flags);
if (error) {
- fdclose(fdp, rf, fildes[0], td);
+ fdclose(td, rf, fildes[0]);
fdrop(rf, td);
/* rpipe has been closed by fdrop(). */
pipeclose(wpipe);
@@ -412,7 +459,7 @@
error = kern_pipe(td, fildes);
if (error)
return (error);
-
+
td->td_retval[0] = fildes[0];
td->td_retval[1] = fildes[1];
@@ -419,6 +466,24 @@
return (0);
}
+int
+sys_pipe2(struct thread *td, struct pipe2_args *uap)
+{
+ int error, fildes[2];
+
+ if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK))
+ return (EINVAL);
+ error = kern_pipe2(td, fildes, uap->flags);
+ if (error)
+ return (error);
+ error = copyout(fildes, uap->fildes, 2 * sizeof(int));
+ if (error) {
+ (void)kern_close(td, fildes[0]);
+ (void)kern_close(td, fildes[1]);
+ }
+ return (error);
+}
+
/*
* Allocate kva for pipe circular buffer, the space is pageable
* This routine will 'realloc' the size of a pipe safely, if it fails
@@ -447,7 +512,7 @@
buffer = (caddr_t) vm_map_min(pipe_map);
error = vm_map_find(pipe_map, NULL, 0,
- (vm_offset_t *) &buffer, size, 1,
+ (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error != KERN_SUCCESS) {
if ((cpipe->pipe_buffer.buffer == NULL) &&
@@ -545,7 +610,7 @@
}
}
-static __inline void
+void
pipeselwakeup(cpipe)
struct pipe *cpipe;
{
@@ -565,24 +630,27 @@
* Initialize and allocate VM and memory for pipe. The structure
* will start out zero'd from the ctor, so we just manage the kmem.
*/
-static int
+static void
pipe_create(pipe, backing)
struct pipe *pipe;
int backing;
{
- int error;
if (backing) {
+ /*
+ * Note that these functions can fail if pipe map is exhausted
+ * (as a result of too many pipes created), but we ignore the
+ * error as it is not fatal and could be provoked by
+ * unprivileged users. The only consequence is worse performance
+ * with given pipe.
+ */
if (amountpipekva > maxpipekva / 2)
- error = pipespace_new(pipe, SMALL_PIPE_SIZE);
+ (void)pipespace_new(pipe, SMALL_PIPE_SIZE);
else
- error = pipespace_new(pipe, PIPE_SIZE);
- } else {
- /* If we're not backing this pipe, no need to do anything. */
- error = 0;
+ (void)pipespace_new(pipe, PIPE_SIZE);
}
+
pipe->pipe_ino = -1;
- return (error);
}
/* ARGSUSED */
@@ -594,11 +662,12 @@
struct thread *td;
int flags;
{
- struct pipe *rpipe = fp->f_data;
+ struct pipe *rpipe;
int error;
int nread = 0;
int size;
+ rpipe = fp->f_data;
PIPE_LOCK(rpipe);
++rpipe->pipe_busy;
error = pipelock(rpipe, 1);
@@ -675,7 +744,7 @@
rpipe->pipe_map.pos += size;
rpipe->pipe_map.cnt -= size;
if (rpipe->pipe_map.cnt == 0) {
- rpipe->pipe_state &= ~PIPE_DIRECTW;
+ rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
wakeup(rpipe);
}
#endif
@@ -875,9 +944,10 @@
retry:
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
error = pipelock(wpipe, 1);
- if (wpipe->pipe_state & PIPE_EOF)
+ if (error != 0)
+ goto error1;
+ if ((wpipe->pipe_state & PIPE_EOF) != 0) {
error = EPIPE;
- if (error) {
pipeunlock(wpipe);
goto error1;
}
@@ -938,6 +1008,7 @@
wakeup(wpipe);
}
pipeselwakeup(wpipe);
+ wpipe->pipe_state |= PIPE_WANTW;
pipeunlock(wpipe);
error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
"pipdwt", 0);
@@ -978,8 +1049,7 @@
struct pipe *wpipe, *rpipe;
rpipe = fp->f_data;
- wpipe = rpipe->pipe_peer;
-
+ wpipe = PIPE_PEER(rpipe);
PIPE_LOCK(rpipe);
error = pipelock(wpipe, 1);
if (error) {
@@ -1224,13 +1294,13 @@
}
/*
- * Don't return EPIPE if I/O was successful
+ * Don't return EPIPE if any byte was written.
+ * EINTR and other interrupts are handled by generic I/O layer.
+ * Do not pretend that I/O succeeded for obvious user error
+ * like EFAULT.
*/
- if ((wpipe->pipe_buffer.cnt == 0) &&
- (uio->uio_resid == 0) &&
- (error == EPIPE)) {
+ if (uio->uio_resid != orig_resid && error == EPIPE)
error = 0;
- }
if (error == 0)
vfs_timestamp(&wpipe->pipe_mtime);
@@ -1256,6 +1326,9 @@
struct thread *td;
{
+ /* For named pipes call the vnode operation. */
+ if (fp->f_vnode != NULL)
+ return (vnops.fo_truncate(fp, length, active_cred, td));
return (EINVAL);
}
@@ -1298,6 +1371,11 @@
break;
case FIONREAD:
+ if (!(fp->f_flag & FREAD)) {
+ *(int *)data = 0;
+ PIPE_UNLOCK(mpipe);
+ return (0);
+ }
if (mpipe->pipe_state & PIPE_DIRECTW)
*(int *)data = mpipe->pipe_map.cnt;
else
@@ -1340,14 +1418,16 @@
struct ucred *active_cred;
struct thread *td;
{
- struct pipe *rpipe = fp->f_data;
+ struct pipe *rpipe;
struct pipe *wpipe;
- int revents = 0;
+ int levents, revents;
#ifdef MAC
int error;
#endif
- wpipe = rpipe->pipe_peer;
+ revents = 0;
+ rpipe = fp->f_data;
+ wpipe = PIPE_PEER(rpipe);
PIPE_LOCK(rpipe);
#ifdef MAC
error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair);
@@ -1354,12 +1434,12 @@
if (error)
goto locked_error;
#endif
- if (events & (POLLIN | POLLRDNORM))
+ if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM))
if ((rpipe->pipe_state & PIPE_DIRECTW) ||
(rpipe->pipe_buffer.cnt > 0))
revents |= events & (POLLIN | POLLRDNORM);
- if (events & (POLLOUT | POLLWRNORM))
+ if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM))
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF) ||
(((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
@@ -1367,6 +1447,12 @@
wpipe->pipe_buffer.size == 0)))
revents |= events & (POLLOUT | POLLWRNORM);
+ levents = events &
+ (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
+ if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
+ fp->f_seqcount == rpipe->pipe_wgen)
+ events |= POLLINIGNEOF;
+
if ((events & POLLINIGNEOF) == 0) {
if (rpipe->pipe_state & PIPE_EOF) {
revents |= (events & (POLLIN | POLLRDNORM));
@@ -1377,13 +1463,13 @@
}
if (revents == 0) {
- if (events & (POLLIN | POLLRDNORM)) {
+ if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) {
selrecord(td, &rpipe->pipe_sel);
if (SEL_WAITING(&rpipe->pipe_sel))
rpipe->pipe_state |= PIPE_SEL;
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) {
selrecord(td, &wpipe->pipe_sel);
if (SEL_WAITING(&wpipe->pipe_sel))
wpipe->pipe_state |= PIPE_SEL;
@@ -1423,6 +1509,13 @@
return (error);
}
#endif
+
+ /* For named pipes ask the underlying filesystem. */
+ if (pipe->pipe_state & PIPE_NAMED) {
+ PIPE_UNLOCK(pipe);
+ return (vnops.fo_stat(fp, ub, active_cred, td));
+ }
+
/*
* Lazily allocate an inode number for the pipe. Most pipe
* users do not call fstat(2) on the pipe, which means that
@@ -1469,15 +1562,48 @@
struct file *fp;
struct thread *td;
{
- struct pipe *cpipe = fp->f_data;
+ if (fp->f_vnode != NULL)
+ return vnops.fo_close(fp, td);
fp->f_ops = &badfileops;
+ pipe_dtor(fp->f_data);
fp->f_data = NULL;
- funsetown(&cpipe->pipe_sigio);
- pipeclose(cpipe);
return (0);
}
+static int
+pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td)
+{
+ struct pipe *cpipe;
+ int error;
+
+ cpipe = fp->f_data;
+ if (cpipe->pipe_state & PIPE_NAMED)
+ error = vn_chmod(fp, mode, active_cred, td);
+ else
+ error = invfo_chmod(fp, mode, active_cred, td);
+ return (error);
+}
+
+static int
+pipe_chown(fp, uid, gid, active_cred, td)
+ struct file *fp;
+ uid_t uid;
+ gid_t gid;
+ struct ucred *active_cred;
+ struct thread *td;
+{
+ struct pipe *cpipe;
+ int error;
+
+ cpipe = fp->f_data;
+ if (cpipe->pipe_state & PIPE_NAMED)
+ error = vn_chown(fp, uid, gid, active_cred, td);
+ else
+ error = invfo_chown(fp, uid, gid, active_cred, td);
+ return (error);
+}
+
static void
pipe_free_kmem(cpipe)
struct pipe *cpipe;
@@ -1511,7 +1637,6 @@
{
struct pipepair *pp;
struct pipe *ppipe;
- ino_t ino;
KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL"));
@@ -1570,12 +1695,6 @@
knlist_destroy(&cpipe->pipe_sel.si_note);
/*
- * Postpone the destroy of the fake inode number allocated for
- * our end, until pipe mtx is unlocked.
- */
- ino = cpipe->pipe_ino;
-
- /*
* If both endpoints are now closed, release the memory for the
* pipe pair. If not, unlock.
*/
@@ -1587,9 +1706,6 @@
uma_zfree(pipe_zone, cpipe->pipe_pair);
} else
PIPE_UNLOCK(cpipe);
-
- if (ino != 0 && ino != (ino_t)-1)
- free_unr(pipeino_unr, ino);
}
/*ARGSUSED*/
@@ -1598,7 +1714,20 @@
{
struct pipe *cpipe;
- cpipe = kn->kn_fp->f_data;
+ /*
+ * If a filter is requested that is not supported by this file
+ * descriptor, don't return an error, but also don't ever generate an
+ * event.
+ */
+ if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) {
+ kn->kn_fop = &pipe_nfiltops;
+ return (0);
+ }
+ if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) {
+ kn->kn_fop = &pipe_nfiltops;
+ return (0);
+ }
+ cpipe = fp->f_data;
PIPE_LOCK(cpipe);
switch (kn->kn_filter) {
case EVFILT_READ:
@@ -1611,7 +1740,7 @@
PIPE_UNLOCK(cpipe);
return (EPIPE);
}
- cpipe = cpipe->pipe_peer;
+ cpipe = PIPE_PEER(cpipe);
break;
default:
PIPE_UNLOCK(cpipe);
@@ -1618,6 +1747,7 @@
return (EINVAL);
}
+ kn->kn_hook = cpipe;
knlist_add(&cpipe->pipe_sel.si_note, kn, 1);
PIPE_UNLOCK(cpipe);
return (0);
@@ -1626,11 +1756,9 @@
static void
filt_pipedetach(struct knote *kn)
{
- struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
+ struct pipe *cpipe = kn->kn_hook;
PIPE_LOCK(cpipe);
- if (kn->kn_filter == EVFILT_WRITE)
- cpipe = cpipe->pipe_peer;
knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
PIPE_UNLOCK(cpipe);
}
@@ -1639,11 +1767,11 @@
static int
filt_piperead(struct knote *kn, long hint)
{
- struct pipe *rpipe = kn->kn_fp->f_data;
+ struct pipe *rpipe = kn->kn_hook;
struct pipe *wpipe = rpipe->pipe_peer;
int ret;
- PIPE_LOCK(rpipe);
+ PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
kn->kn_data = rpipe->pipe_buffer.cnt;
if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
kn->kn_data = rpipe->pipe_map.cnt;
@@ -1652,11 +1780,9 @@
wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_flags |= EV_EOF;
- PIPE_UNLOCK(rpipe);
return (1);
}
ret = kn->kn_data > 0;
- PIPE_UNLOCK(rpipe);
return ret;
}
@@ -1664,15 +1790,14 @@
static int
filt_pipewrite(struct knote *kn, long hint)
{
- struct pipe *rpipe = kn->kn_fp->f_data;
- struct pipe *wpipe = rpipe->pipe_peer;
-
- PIPE_LOCK(rpipe);
+ struct pipe *wpipe;
+
+ wpipe = kn->kn_hook;
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
- PIPE_UNLOCK(rpipe);
return (1);
}
kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
@@ -1680,6 +1805,18 @@
if (wpipe->pipe_state & PIPE_DIRECTW)
kn->kn_data = 0;
- PIPE_UNLOCK(rpipe);
return (kn->kn_data >= PIPE_BUF);
}
+
+static void
+filt_pipedetach_notsup(struct knote *kn)
+{
+
+}
+
+static int
+filt_pipenotsup(struct knote *kn, long hint)
+{
+
+ return (0);
+}
Modified: trunk/sys/kern/sys_procdesc.c
===================================================================
--- trunk/sys/kern/sys_procdesc.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_procdesc.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009 Robert N. M. Watson
* All rights reserved.
@@ -59,12 +60,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_procdesc.c 280258 2015-03-19 13:37:36Z rwatson $");
#include "opt_procdesc.h"
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -113,6 +114,7 @@
.fo_close = procdesc_close,
.fo_chmod = procdesc_chmod,
.fo_chown = procdesc_chown,
+ .fo_sendfile = invfo_sendfile,
.fo_flags = DFLAG_PASSABLE,
};
@@ -137,7 +139,7 @@
* died.
*/
int
-procdesc_find(struct thread *td, int fd, cap_rights_t rights,
+procdesc_find(struct thread *td, int fd, cap_rights_t *rightsp,
struct proc **p)
{
struct procdesc *pd;
@@ -144,7 +146,7 @@
struct file *fp;
int error;
- error = fget(td, fd, rights, &fp);
+ error = fget(td, fd, rightsp, &fp);
if (error)
return (error);
if (fp->f_type != DTYPE_PROCDESC) {
@@ -184,12 +186,12 @@
* Retrieve the PID associated with a process descriptor.
*/
int
-kern_pdgetpid(struct thread *td, int fd, cap_rights_t rights, pid_t *pidp)
+kern_pdgetpid(struct thread *td, int fd, cap_rights_t *rightsp, pid_t *pidp)
{
struct file *fp;
int error;
- error = fget(td, fd, rights, &fp);
+ error = fget(td, fd, rightsp, &fp);
if (error)
return (error);
if (fp->f_type != DTYPE_PROCDESC) {
@@ -208,11 +210,13 @@
int
sys_pdgetpid(struct thread *td, struct pdgetpid_args *uap)
{
+ cap_rights_t rights;
pid_t pid;
int error;
AUDIT_ARG_FD(uap->fd);
- error = kern_pdgetpid(td, uap->fd, CAP_PDGETPID, &pid);
+ error = kern_pdgetpid(td, uap->fd,
+ cap_rights_init(&rights, CAP_PDGETPID), &pid);
if (error == 0)
error = copyout(&pid, uap->pidp, sizeof(pid));
return (error);
@@ -333,12 +337,13 @@
pd = p->p_procdesc;
pd->pd_proc = NULL;
+ p->p_procdesc = NULL;
procdesc_free(pd);
}
/*
* procdesc_close() - last close on a process descriptor. If the process is
- * still running, terminate with SIGKILL (unless PD_DAEMON is set) and let
+ * still running, terminate with SIGKILL (unless PDF_DAEMON is set) and let
* init(8) clean up the mess; if not, we have to clean up the zombie ourselves.
*/
static int
@@ -358,14 +363,20 @@
pd->pd_flags |= PDF_CLOSED;
PROCDESC_UNLOCK(pd);
p = pd->pd_proc;
- PROC_LOCK(p);
- if (p->p_state == PRS_ZOMBIE) {
+ if (p == NULL) {
/*
+ * This is the case where process' exit status was already
+ * collected and procdesc_reap() was already called.
+ */
+ sx_xunlock(&proctree_lock);
+ } else if (p->p_state == PRS_ZOMBIE) {
+ /*
* If the process is already dead and just awaiting reaping,
* do that now. This will release the process's reference to
* the process descriptor when it calls back into
* procdesc_reap().
*/
+ PROC_LOCK(p);
PROC_SLOCK(p);
proc_reap(curthread, p, NULL, 0);
} else {
@@ -376,6 +387,7 @@
* process from its descriptor so that its exit status will
* be reported normally.
*/
+ PROC_LOCK(p);
pd->pd_proc = NULL;
p->p_procdesc = NULL;
procdesc_free(pd);
@@ -386,7 +398,7 @@
*/
p->p_sigparent = SIGCHLD;
proc_reparent(p, initproc);
- if ((pd->pd_flags & PD_DAEMON) == 0)
+ if ((pd->pd_flags & PDF_DAEMON) == 0)
kern_psignal(p, SIGKILL);
PROC_UNLOCK(p);
sx_xunlock(&proctree_lock);
Modified: trunk/sys/kern/sys_process.c
===================================================================
--- trunk/sys/kern/sys_process.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_process.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1994, Sean Eric Fagan
* All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_process.c 328379 2018-01-24 21:48:39Z jhb $");
#include "opt_compat.h"
@@ -41,9 +42,11 @@
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/ptrace.h>
+#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/malloc.h>
#include <sys/signalvar.h>
@@ -59,7 +62,6 @@
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
#include <vm/vm_param.h>
#ifdef COMPAT_FREEBSD32
@@ -94,7 +96,9 @@
sigset_t pl_siglist; /* LWP pending signal */
struct siginfo32 pl_siginfo; /* siginfo for signal */
char pl_tdname[MAXCOMLEN + 1]; /* LWP name. */
- int pl_child_pid; /* New child pid */
+ pid_t pl_child_pid; /* New child pid */
+ u_int pl_syscall_code;
+ u_int pl_syscall_narg;
};
#endif
@@ -335,7 +339,7 @@
struct vnode *vp;
char *freepath, *fullpath;
u_int pathlen;
- int error, index, vfslocked;
+ int error, index;
error = 0;
obj = NULL;
@@ -382,11 +386,10 @@
obj = entry->object.vm_object;
if (obj != NULL)
- VM_OBJECT_LOCK(obj);
+ VM_OBJECT_RLOCK(obj);
} while (0);
vm_map_unlock_read(map);
- vmspace_free(vm);
pve->pve_fsid = VNOVAL;
pve->pve_fileid = VNOVAL;
@@ -395,24 +398,23 @@
lobj = obj;
for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
if (tobj != obj)
- VM_OBJECT_LOCK(tobj);
+ VM_OBJECT_RLOCK(tobj);
if (lobj != obj)
- VM_OBJECT_UNLOCK(lobj);
+ VM_OBJECT_RUNLOCK(lobj);
lobj = tobj;
pve->pve_offset += tobj->backing_object_offset;
}
- vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
+ vp = vm_object_vnode(lobj);
if (vp != NULL)
vref(vp);
if (lobj != obj)
- VM_OBJECT_UNLOCK(lobj);
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_RUNLOCK(lobj);
+ VM_OBJECT_RUNLOCK(obj);
if (vp != NULL) {
freepath = NULL;
fullpath = NULL;
vn_fullpath(td, vp, &fullpath, &freepath);
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_SHARED | LK_RETRY);
if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) {
pve->pve_fileid = vattr.va_fileid;
@@ -419,7 +421,6 @@
pve->pve_fsid = vattr.va_fsid;
}
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
if (fullpath != NULL) {
pve->pve_pathlen = strlen(fullpath) + 1;
@@ -433,12 +434,16 @@
free(freepath, M_TEMP);
}
}
+ vmspace_free(vm);
+ if (error == 0)
+ CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p",
+ p->p_pid, pve->pve_entry, pve->pve_start);
return (error);
}
#ifdef COMPAT_FREEBSD32
-static int
+static int
ptrace_vm_entry32(struct thread *td, struct proc *p,
struct ptrace_vm_entry32 *pve32)
{
@@ -470,6 +475,7 @@
struct ptrace_lwpinfo32 *pl32)
{
+ bzero(pl32, sizeof(*pl32));
pl32->pl_lwpid = pl->pl_lwpid;
pl32->pl_event = pl->pl_event;
pl32->pl_flags = pl->pl_flags;
@@ -478,6 +484,8 @@
siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo);
strcpy(pl32->pl_tdname, pl->pl_tdname);
pl32->pl_child_pid = pl->pl_child_pid;
+ pl32->pl_syscall_code = pl->pl_syscall_code;
+ pl32->pl_syscall_narg = pl->pl_syscall_narg;
}
#endif /* COMPAT_FREEBSD32 */
@@ -536,6 +544,7 @@
struct ptrace_lwpinfo32 pl32;
struct ptrace_vm_entry32 pve32;
#endif
+ int ptevents;
} r;
void *addr;
int error = 0;
@@ -550,6 +559,7 @@
AUDIT_ARG_VALUE(uap->data);
addr = &r;
switch (uap->req) {
+ case PT_GET_EVENT_MASK:
case PT_GETREGS:
case PT_GETFPREGS:
case PT_GETDBREGS:
@@ -564,6 +574,12 @@
case PT_SETDBREGS:
error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg);
break;
+ case PT_SET_EVENT_MASK:
+ if (uap->data != sizeof(r.ptevents))
+ error = EINVAL;
+ else
+ error = copyin(uap->addr, &r.ptevents, uap->data);
+ break;
case PT_IO:
error = COPYIN(uap->addr, &r.piod, sizeof r.piod);
break;
@@ -597,7 +613,12 @@
case PT_GETDBREGS:
error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg);
break;
+ case PT_GET_EVENT_MASK:
+ /* NB: The size in uap->data is validated in kern_ptrace(). */
+ error = copyout(&r.ptevents, uap->addr, uap->data);
+ break;
case PT_LWPINFO:
+ /* NB: The size in uap->data is validated in kern_ptrace(). */
error = copyout(&r.pl, uap->addr, uap->data);
break;
}
@@ -629,6 +650,18 @@
#define PROC_WRITE(w, t, a) proc_write_ ## w (t, a)
#endif
+void
+proc_set_traced(struct proc *p, bool stop)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ p->p_flag |= P_TRACED;
+ if (stop)
+ p->p_flag2 |= P2_PTRACE_FSTP;
+ p->p_ptevents = PTRACE_DEFAULT;
+ p->p_oppid = p->p_pptr->p_pid;
+}
+
int
kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
{
@@ -635,7 +668,7 @@
struct iovec iov;
struct uio uio;
struct proc *curp, *p, *pp;
- struct thread *td2 = NULL;
+ struct thread *td2 = NULL, *td3;
struct ptrace_io_desc *piod = NULL;
struct ptrace_lwpinfo *pl;
int error, write, tmp, num;
@@ -660,6 +693,9 @@
case PT_TO_SCX:
case PT_SYSCALL:
case PT_FOLLOW_FORK:
+ case PT_LWP_EVENTS:
+ case PT_GET_EVENT_MASK:
+ case PT_SET_EVENT_MASK:
case PT_DETACH:
sx_xlock(&proctree_lock);
proctree_locked = 1;
@@ -737,12 +773,23 @@
*/
switch (req) {
case PT_TRACE_ME:
- /* Always legal. */
+ /*
+ * Always legal, when there is a parent process which
+ * could trace us. Otherwise, reject.
+ */
+ if ((p->p_flag & P_TRACED) != 0) {
+ error = EBUSY;
+ goto fail;
+ }
+ if (p->p_pptr == initproc) {
+ error = EPERM;
+ goto fail;
+ }
break;
case PT_ATTACH:
/* Self */
- if (p->p_pid == td->td_proc->p_pid) {
+ if (p == td->td_proc) {
error = EINVAL;
goto fail;
}
@@ -823,10 +870,10 @@
switch (req) {
case PT_TRACE_ME:
/* set my trace flag and "owner" so it can read/write me */
- p->p_flag |= P_TRACED;
+ proc_set_traced(p, false);
if (p->p_flag & P_PPWAIT)
p->p_flag |= P_PPTRACE;
- p->p_oppid = p->p_pptr->p_pid;
+ CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
break;
case PT_ATTACH:
@@ -840,23 +887,30 @@
* The old parent is remembered so we can put things back
* on a "detach".
*/
- p->p_flag |= P_TRACED;
- p->p_oppid = p->p_pptr->p_pid;
+ proc_set_traced(p, true);
if (p->p_pptr != td->td_proc) {
proc_reparent(p, td->td_proc);
}
data = SIGSTOP;
+ CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid,
+ p->p_oppid);
goto sendsig; /* in PT_CONTINUE below */
case PT_CLEARSTEP:
+ CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
error = ptrace_clear_single_step(td2);
break;
case PT_SETSTEP:
+ CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
error = ptrace_single_step(td2);
break;
case PT_SUSPEND:
+ CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
td2->td_dbgflags |= TDB_SUSPEND;
thread_lock(td2);
td2->td_flags |= TDF_NEEDSUSPCHK;
@@ -864,16 +918,57 @@
break;
case PT_RESUME:
+ CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
td2->td_dbgflags &= ~TDB_SUSPEND;
break;
case PT_FOLLOW_FORK:
+ CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid,
+ p->p_ptevents & PTRACE_FORK ? "enabled" : "disabled",
+ data ? "enabled" : "disabled");
if (data)
- p->p_flag |= P_FOLLOWFORK;
+ p->p_ptevents |= PTRACE_FORK;
else
- p->p_flag &= ~P_FOLLOWFORK;
+ p->p_ptevents &= ~PTRACE_FORK;
break;
+ case PT_LWP_EVENTS:
+ CTR3(KTR_PTRACE, "PT_LWP_EVENTS: pid %d %s -> %s", p->p_pid,
+ p->p_ptevents & PTRACE_LWP ? "enabled" : "disabled",
+ data ? "enabled" : "disabled");
+ if (data)
+ p->p_ptevents |= PTRACE_LWP;
+ else
+ p->p_ptevents &= ~PTRACE_LWP;
+ break;
+
+ case PT_GET_EVENT_MASK:
+ if (data != sizeof(p->p_ptevents)) {
+ error = EINVAL;
+ break;
+ }
+ CTR2(KTR_PTRACE, "PT_GET_EVENT_MASK: pid %d mask %#x", p->p_pid,
+ p->p_ptevents);
+ *(int *)addr = p->p_ptevents;
+ break;
+
+ case PT_SET_EVENT_MASK:
+ if (data != sizeof(p->p_ptevents)) {
+ error = EINVAL;
+ break;
+ }
+ tmp = *(int *)addr;
+ if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX |
+ PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ CTR3(KTR_PTRACE, "PT_SET_EVENT_MASK: pid %d mask %#x -> %#x",
+ p->p_pid, p->p_ptevents, tmp);
+ p->p_ptevents = tmp;
+ break;
+
case PT_STEP:
case PT_CONTINUE:
case PT_TO_SCE:
@@ -888,6 +983,8 @@
switch (req) {
case PT_STEP:
+ CTR3(KTR_PTRACE, "PT_STEP: tid %d (pid %d), sig = %d",
+ td2->td_tid, p->p_pid, data);
error = ptrace_single_step(td2);
if (error)
goto out;
@@ -904,38 +1001,71 @@
}
switch (req) {
case PT_TO_SCE:
- p->p_stops |= S_PT_SCE;
+ p->p_ptevents |= PTRACE_SCE;
+ CTR4(KTR_PTRACE,
+ "PT_TO_SCE: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
+ (u_long)(uintfptr_t)addr, data);
break;
case PT_TO_SCX:
- p->p_stops |= S_PT_SCX;
+ p->p_ptevents |= PTRACE_SCX;
+ CTR4(KTR_PTRACE,
+ "PT_TO_SCX: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
+ (u_long)(uintfptr_t)addr, data);
break;
case PT_SYSCALL:
- p->p_stops |= S_PT_SCE | S_PT_SCX;
+ p->p_ptevents |= PTRACE_SYSCALL;
+ CTR4(KTR_PTRACE,
+ "PT_SYSCALL: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
+ (u_long)(uintfptr_t)addr, data);
break;
+ case PT_CONTINUE:
+ CTR3(KTR_PTRACE,
+ "PT_CONTINUE: pid %d, PC = %#lx, sig = %d",
+ p->p_pid, (u_long)(uintfptr_t)addr, data);
+ break;
}
break;
case PT_DETACH:
- /* reset process parent */
+ /*
+ * Reset the process parent.
+ *
+ * NB: This clears P_TRACED before reparenting
+ * a detached process back to its original
+ * parent. Otherwise the debugee will be set
+ * as an orphan of the debugger.
+ */
+ p->p_flag &= ~(P_TRACED | P_WAITED);
if (p->p_oppid != p->p_pptr->p_pid) {
- struct proc *pp;
-
PROC_LOCK(p->p_pptr);
sigqueue_take(p->p_ksi);
PROC_UNLOCK(p->p_pptr);
- PROC_UNLOCK(p);
- pp = pfind(p->p_oppid);
- if (pp == NULL)
- pp = initproc;
- else
- PROC_UNLOCK(pp);
- PROC_LOCK(p);
+ pp = proc_realparent(p);
proc_reparent(p, pp);
if (pp == initproc)
p->p_sigparent = SIGCHLD;
+ CTR3(KTR_PTRACE,
+ "PT_DETACH: pid %d reparented to pid %d, sig %d",
+ p->p_pid, pp->p_pid, data);
+ } else
+ CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d",
+ p->p_pid, data);
+ p->p_oppid = 0;
+ p->p_ptevents = 0;
+ FOREACH_THREAD_IN_PROC(p, td3) {
+ if ((td3->td_dbgflags & TDB_FSTP) != 0) {
+ sigqueue_delete(&td3->td_sigqueue,
+ SIGSTOP);
+ }
+ td3->td_dbgflags &= ~(TDB_XSIG | TDB_FSTP);
}
- p->p_oppid = 0;
- p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK);
+ if ((p->p_flag2 & P2_PTRACE_FSTP) != 0) {
+ sigqueue_delete(&p->p_sigqueue, SIGSTOP);
+ p->p_flag2 &= ~P2_PTRACE_FSTP;
+ }
/* should we send SIGCHLD? */
/* childproc_continued(p); */
@@ -943,6 +1073,13 @@
}
sendsig:
+ /*
+ * Clear the pending event for the thread that just
+ * reported its event (p_xthread). This may not be
+ * the thread passed to PT_CONTINUE, PT_STEP, etc. if
+ * the debugger is resuming a different thread.
+ */
+ td2 = p->p_xthread;
if (proctree_locked) {
sx_xunlock(&proctree_lock);
proctree_locked = 0;
@@ -954,11 +1091,19 @@
td2->td_dbgflags &= ~TDB_XSIG;
td2->td_xsig = data;
+ /*
+ * P_WKILLED is insurance that a PT_KILL/SIGKILL always
+ * works immediately, even if another thread is
+ * unsuspended first and attempts to handle a different
+ * signal or if the POSIX.1b style signal queue cannot
+ * accommodate any new signals.
+ */
+ if (data == SIGKILL)
+ p->p_flag |= P_WKILLED;
+
if (req == PT_DETACH) {
- struct thread *td3;
- FOREACH_THREAD_IN_PROC(p, td3) {
- td3->td_dbgflags &= ~TDB_SUSPEND;
- }
+ FOREACH_THREAD_IN_PROC(p, td3)
+ td3->td_dbgflags &= ~TDB_SUSPEND;
}
/*
* unsuspend all threads, to not let a thread run,
@@ -969,6 +1114,8 @@
p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
thread_unsuspend(p);
PROC_SUNLOCK(p);
+ if (req == PT_ATTACH)
+ kern_psignal(p, data);
} else {
if (data)
kern_psignal(p, data);
@@ -1010,6 +1157,14 @@
}
if (!write)
td->td_retval[0] = tmp;
+ if (error == 0) {
+ if (write)
+ CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x",
+ p->p_pid, addr, data);
+ else
+ CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x",
+ p->p_pid, addr, tmp);
+ }
PROC_LOCK(p);
break;
@@ -1042,10 +1197,14 @@
switch (tmp) {
case PIOD_READ_D:
case PIOD_READ_I:
+ CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)",
+ p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
uio.uio_rw = UIO_READ;
break;
case PIOD_WRITE_D:
case PIOD_WRITE_I:
+ CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)",
+ p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid);
td2->td_dbgflags |= TDB_USERWR;
uio.uio_rw = UIO_WRITE;
break;
@@ -1065,33 +1224,46 @@
break;
case PT_KILL:
+ CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid);
data = SIGKILL;
goto sendsig; /* in PT_CONTINUE above */
case PT_SETREGS:
+ CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
td2->td_dbgflags |= TDB_USERWR;
error = PROC_WRITE(regs, td2, addr);
break;
case PT_GETREGS:
+ CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
error = PROC_READ(regs, td2, addr);
break;
case PT_SETFPREGS:
+ CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
td2->td_dbgflags |= TDB_USERWR;
error = PROC_WRITE(fpregs, td2, addr);
break;
case PT_GETFPREGS:
+ CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
error = PROC_READ(fpregs, td2, addr);
break;
case PT_SETDBREGS:
+ CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
td2->td_dbgflags |= TDB_USERWR;
error = PROC_WRITE(dbregs, td2, addr);
break;
case PT_GETDBREGS:
+ CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid,
+ p->p_pid);
error = PROC_READ(dbregs, td2, addr);
break;
@@ -1113,6 +1285,7 @@
} else
#endif
pl = addr;
+ bzero(pl, sizeof(*pl));
pl->pl_lwpid = td2->td_tid;
pl->pl_event = PL_EVENT_NONE;
pl->pl_flags = 0;
@@ -1133,8 +1306,6 @@
pl->pl_siginfo = td2->td_dbgksi.ksi_info;
}
}
- if ((pl->pl_flags & PL_FLAG_SI) == 0)
- bzero(&pl->pl_siginfo, sizeof(pl->pl_siginfo));
if (td2->td_dbgflags & TDB_SCE)
pl->pl_flags |= PL_FLAG_SCE;
else if (td2->td_dbgflags & TDB_SCX)
@@ -1144,23 +1315,46 @@
if (td2->td_dbgflags & TDB_FORK) {
pl->pl_flags |= PL_FLAG_FORKED;
pl->pl_child_pid = td2->td_dbg_forked;
- }
+ if (td2->td_dbgflags & TDB_VFORK)
+ pl->pl_flags |= PL_FLAG_VFORKED;
+ } else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) ==
+ TDB_VFORK)
+ pl->pl_flags |= PL_FLAG_VFORK_DONE;
if (td2->td_dbgflags & TDB_CHILD)
pl->pl_flags |= PL_FLAG_CHILD;
+ if (td2->td_dbgflags & TDB_BORN)
+ pl->pl_flags |= PL_FLAG_BORN;
+ if (td2->td_dbgflags & TDB_EXIT)
+ pl->pl_flags |= PL_FLAG_EXITED;
pl->pl_sigmask = td2->td_sigmask;
pl->pl_siglist = td2->td_siglist;
strcpy(pl->pl_tdname, td2->td_name);
+ if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) {
+ pl->pl_syscall_code = td2->td_dbg_sc_code;
+ pl->pl_syscall_narg = td2->td_dbg_sc_narg;
+ } else {
+ pl->pl_syscall_code = 0;
+ pl->pl_syscall_narg = 0;
+ }
#ifdef COMPAT_FREEBSD32
if (wrap32)
ptrace_lwpinfo_to32(pl, pl32);
#endif
+ CTR6(KTR_PTRACE,
+ "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d",
+ td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags,
+ pl->pl_child_pid, pl->pl_syscall_code);
break;
case PT_GETNUMLWPS:
+ CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid,
+ p->p_numthreads);
td->td_retval[0] = p->p_numthreads;
break;
case PT_GETLWPLIST:
+ CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d",
+ p->p_pid, data, p->p_numthreads);
if (data <= 0) {
error = EINVAL;
break;
@@ -1184,6 +1378,8 @@
break;
case PT_VM_TIMESTAMP:
+ CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d",
+ p->p_pid, p->p_vmspace->vm_map.timestamp);
td->td_retval[0] = p->p_vmspace->vm_map.timestamp;
break;
@@ -1234,6 +1430,8 @@
PROC_LOCK_ASSERT(p, MA_OWNED);
p->p_step = 1;
+ CTR3(KTR_PTRACE, "stopevent: pid %d event %u val %u", p->p_pid, event,
+ val);
do {
p->p_xstat = val;
p->p_xthread = NULL;
Modified: trunk/sys/kern/sys_socket.c
===================================================================
--- trunk/sys/kern/sys_socket.c 2018-05-26 14:27:13 UTC (rev 9956)
+++ trunk/sys/kern/sys_socket.c 2018-05-26 14:27:48 UTC (rev 9957)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sys_socket.c 254356 2013-08-15 07:54:31Z glebius $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -66,6 +67,7 @@
.fo_close = soo_close,
.fo_chmod = invfo_chmod,
.fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
.fo_flags = DFLAG_PASSABLE
};
More information about the Midnightbsd-cvs
mailing list