[Midnightbsd-cvs] src [9954] trunk/sys/kern: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sat May 26 10:25:55 EDT 2018
Revision: 9954
http://svnweb.midnightbsd.org/src/?rev=9954
Author: laffer1
Date: 2018-05-26 10:25:55 -0400 (Sat, 26 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/kern/uipc_accf.c
trunk/sys/kern/uipc_debug.c
trunk/sys/kern/uipc_domain.c
trunk/sys/kern/uipc_mbuf.c
trunk/sys/kern/uipc_mbuf2.c
trunk/sys/kern/uipc_mqueue.c
trunk/sys/kern/uipc_sem.c
trunk/sys/kern/uipc_shm.c
trunk/sys/kern/uipc_sockbuf.c
trunk/sys/kern/uipc_socket.c
trunk/sys/kern/uipc_syscalls.c
trunk/sys/kern/uipc_usrreq.c
Modified: trunk/sys/kern/uipc_accf.c
===================================================================
--- trunk/sys/kern/uipc_accf.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_accf.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2000 Paycounter, Inc.
* Copyright (c) 2005 Robert N. M. Watson
@@ -27,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_accf.c 201145 2009-12-28 22:56:30Z antoine $");
#define ACCEPT_FILTER_MOD
Modified: trunk/sys/kern/uipc_debug.c
===================================================================
--- trunk/sys/kern/uipc_debug.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_debug.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007 Robert N. M. Watson
* All rights reserved.
@@ -29,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_debug.c 255138 2013-09-01 23:34:53Z davide $");
#include "opt_ddb.h"
@@ -411,7 +412,7 @@
db_print_indent(indent);
db_printf("sb_ctl: %u ", sb->sb_ctl);
db_printf("sb_lowat: %d ", sb->sb_lowat);
- db_printf("sb_timeo: %d\n", sb->sb_timeo);
+ db_printf("sb_timeo: %jd\n", sb->sb_timeo);
db_print_indent(indent);
db_printf("sb_flags: 0x%x (", sb->sb_flags);
Modified: trunk/sys/kern/uipc_domain.c
===================================================================
--- trunk/sys/kern/uipc_domain.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_domain.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_domain.c 314667 2017-03-04 13:03:31Z avg $");
#include <sys/param.h>
#include <sys/socket.h>
@@ -46,8 +47,6 @@
#include <net/vnet.h>
-#include <vm/uma.h>
-
/*
* System initialization
*
@@ -138,8 +137,10 @@
#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
DEFAULT(pu->pru_accept, pru_accept_notsupp);
DEFAULT(pu->pru_bind, pru_bind_notsupp);
+ DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
DEFAULT(pu->pru_connect, pru_connect_notsupp);
DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
+ DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
DEFAULT(pu->pru_control, pru_control_notsupp);
DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
DEFAULT(pu->pru_listen, pru_listen_notsupp);
@@ -239,33 +240,16 @@
mtx_unlock(&dom_mtx);
}
-static void
-socket_zone_change(void *tag)
-{
-
- uma_zone_set_max(socket_zone, maxsockets);
-}
-
/* ARGSUSED*/
static void
domaininit(void *dummy)
{
- /*
- * Before we do any setup, make sure to initialize the
- * zone allocator we get struct sockets from.
- */
- socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(socket_zone, maxsockets);
- EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
- EVENTHANDLER_PRI_FIRST);
-
if (max_linkhdr < 16) /* XXX */
max_linkhdr = 16;
- callout_init(&pffast_callout, CALLOUT_MPSAFE);
- callout_init(&pfslow_callout, CALLOUT_MPSAFE);
+ callout_init(&pffast_callout, 1);
+ callout_init(&pfslow_callout, 1);
mtx_lock(&dom_mtx);
KASSERT(domain_init_status == 0, ("domaininit called too late!"));
@@ -287,6 +271,17 @@
callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
}
+struct domain *
+pffinddomain(int family)
+{
+ struct domain *dp;
+
+ for (dp = domains; dp != NULL; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ return (dp);
+ return (NULL);
+}
+
struct protosw *
pffindtype(int family, int type)
{
@@ -293,15 +288,14 @@
struct domain *dp;
struct protosw *pr;
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_type && pr->pr_type == type)
return (pr);
- return (0);
+ return (NULL);
}
struct protosw *
@@ -309,21 +303,22 @@
{
struct domain *dp;
struct protosw *pr;
- struct protosw *maybe = 0;
+ struct protosw *maybe;
+ maybe = NULL;
if (family == 0)
- return (0);
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ return (NULL);
+
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
return (pr);
if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
- pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+ pr->pr_protocol == 0 && maybe == NULL)
maybe = pr;
}
return (maybe);
@@ -351,12 +346,10 @@
return (ENXIO);
/* Try to find the specified domain based on the family. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
/* Initialize backpointer to struct domain. */
npr->pr_domain = dp;
fpr = NULL;
@@ -422,12 +415,10 @@
return (EPROTOTYPE);
/* Try to find the specified domain based on the family type. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
dpr = NULL;
/* Lock out everyone else while we are manipulating the protosw. */
Modified: trunk/sys/kern/uipc_mbuf.c
===================================================================
--- trunk/sys/kern/uipc_mbuf.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_mbuf.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1988, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_mbuf.c 308376 2016-11-06 16:44:33Z avos $");
#include "opt_param.h"
#include "opt_mbuf_stress_test.h"
@@ -85,6 +86,77 @@
#endif
/*
+ * Ensure the correct size of various mbuf parameters. It could be off due
+ * to compiler-induced padding and alignment artifacts.
+ */
+CTASSERT(sizeof(struct mbuf) == MSIZE);
+CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
+CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
+
+/*
+ * m_get2() allocates minimum mbuf that would fit "size" argument.
+ */
+struct mbuf *
+m_get2(int size, int how, short type, int flags)
+{
+ struct mb_args args;
+ struct mbuf *m, *n;
+
+ args.flags = flags;
+ args.type = type;
+
+ if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0))
+ return (uma_zalloc_arg(zone_mbuf, &args, how));
+ if (size <= MCLBYTES)
+ return (uma_zalloc_arg(zone_pack, &args, how));
+
+ if (size > MJUMPAGESIZE)
+ return (NULL);
+
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ if (m == NULL)
+ return (NULL);
+
+ n = uma_zalloc_arg(zone_jumbop, m, how);
+ if (n == NULL) {
+ uma_zfree(zone_mbuf, m);
+ return (NULL);
+ }
+
+ return (m);
+}
+
+/*
+ * m_getjcl() returns an mbuf with a cluster of the specified size attached.
+ * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ */
+struct mbuf *
+m_getjcl(int how, short type, int flags, int size)
+{
+ struct mb_args args;
+ struct mbuf *m, *n;
+ uma_zone_t zone;
+
+ if (size == MCLBYTES)
+ return m_getcl(how, type, flags);
+
+ args.flags = flags;
+ args.type = type;
+
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ if (m == NULL)
+ return (NULL);
+
+ zone = m_getzone(size);
+ n = uma_zalloc_arg(zone, m, how);
+ if (n == NULL) {
+ uma_zfree(zone_mbuf, m);
+ return (NULL);
+ }
+ return (m);
+}
+
+/*
* Allocate a given length worth of mbufs and/or clusters (whatever fits
* best) and return a pointer to the top of the allocated chain. If an
* existing mbuf chain is provided, then we will append the new chain
@@ -182,25 +254,31 @@
* Returns:
* Nothing.
*/
-void
+int
m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
- void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type)
+ int (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2,
+ int flags, int type, int wait)
{
KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
if (type != EXT_EXTREF)
- mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
- if (mb->m_ext.ref_cnt != NULL) {
- *(mb->m_ext.ref_cnt) = 1;
- mb->m_flags |= (M_EXT | flags);
- mb->m_ext.ext_buf = buf;
- mb->m_data = mb->m_ext.ext_buf;
- mb->m_ext.ext_size = size;
- mb->m_ext.ext_free = freef;
- mb->m_ext.ext_arg1 = arg1;
- mb->m_ext.ext_arg2 = arg2;
- mb->m_ext.ext_type = type;
- }
+ mb->m_ext.ref_cnt = uma_zalloc(zone_ext_refcnt, wait);
+
+ if (mb->m_ext.ref_cnt == NULL)
+ return (ENOMEM);
+
+ *(mb->m_ext.ref_cnt) = 1;
+ mb->m_flags |= (M_EXT | flags);
+ mb->m_ext.ext_buf = buf;
+ mb->m_data = mb->m_ext.ext_buf;
+ mb->m_ext.ext_size = size;
+ mb->m_ext.ext_free = freef;
+ mb->m_ext.ext_arg1 = arg1;
+ mb->m_ext.ext_arg2 = arg2;
+ mb->m_ext.ext_type = type;
+ mb->m_ext.ext_flags = 0;
+
+ return (0);
}
/*
@@ -215,12 +293,11 @@
KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
-
/*
* check if the header is embedded in the cluster
- */
+ */
skipmbuf = (m->m_flags & M_NOFREE);
-
+
/* Free attached storage if this mbuf is the only reference to it. */
if (*(m->m_ext.ref_cnt) == 1 ||
atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
@@ -253,7 +330,7 @@
case EXT_EXTREF:
KASSERT(m->m_ext.ext_free != NULL,
("%s: ext_free not set", __func__));
- (*(m->m_ext.ext_free))(m->m_ext.ext_arg1,
+ (void)(*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1,
m->m_ext.ext_arg2);
break;
default:
@@ -275,6 +352,7 @@
m->m_ext.ref_cnt = NULL;
m->m_ext.ext_size = 0;
m->m_ext.ext_type = 0;
+ m->m_ext.ext_flags = 0;
m->m_flags &= ~M_EXT;
uma_zfree(zone_mbuf, m);
}
@@ -301,6 +379,7 @@
n->m_ext.ext_size = m->m_ext.ext_size;
n->m_ext.ref_cnt = m->m_ext.ref_cnt;
n->m_ext.ext_type = m->m_ext.ext_type;
+ n->m_ext.ext_flags = m->m_ext.ext_flags;
n->m_flags |= M_EXT;
n->m_flags |= m->m_flags & M_RDONLY;
}
@@ -327,7 +406,7 @@
m_freem(m->m_nextpkt);
m->m_nextpkt = NULL;
}
- m->m_flags = m->m_flags & (M_EXT|M_RDONLY|M_FREELIST|M_NOFREE);
+ m->m_flags = m->m_flags & (M_EXT|M_RDONLY|M_NOFREE);
}
}
@@ -368,11 +447,6 @@
M_SANITY_ACTION("m_data outside mbuf data range right");
if ((caddr_t)m->m_data + m->m_len > b)
M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
- if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.header) {
- if ((caddr_t)m->m_pkthdr.header < a ||
- (caddr_t)m->m_pkthdr.header > b)
- M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
- }
/* m->m_nextpkt may only be set on first mbuf in chain. */
if (m != m0 && m->m_nextpkt != NULL) {
@@ -462,8 +536,8 @@
#if 0
/*
* The mbuf allocator only initializes the pkthdr
- * when the mbuf is allocated with MGETHDR. Many users
- * (e.g. m_copy*, m_prepend) use MGET and then
+ * when the mbuf is allocated with m_gethdr(). Many users
+ * (e.g. m_copy*, m_prepend) use m_get() and then
* smash the pkthdr as needed causing these
* assertions to trip. For now just disable them.
*/
@@ -495,15 +569,15 @@
struct mbuf *mn;
if (m->m_flags & M_PKTHDR)
- MGETHDR(mn, how, m->m_type);
+ mn = m_gethdr(how, m->m_type);
else
- MGET(mn, how, m->m_type);
+ mn = m_get(how, m->m_type);
if (mn == NULL) {
m_freem(m);
return (NULL);
}
if (m->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(mn, m);
+ m_move_pkthdr(mn, m);
mn->m_next = m;
m = mn;
if(m->m_flags & M_PKTHDR) {
@@ -520,7 +594,7 @@
/*
* Make a copy of an mbuf chain starting "off0" bytes from the beginning,
* continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
- * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
* Note that the copy is read-only, because clusters are not copied,
* only their reference counts are incremented.
*/
@@ -553,9 +627,9 @@
break;
}
if (copyhdr)
- MGETHDR(n, wait, m->m_type);
+ n = m_gethdr(wait, m->m_type);
else
- MGET(n, wait, m->m_type);
+ n = m_get(wait, m->m_type);
*np = n;
if (n == NULL)
goto nospace;
@@ -581,13 +655,10 @@
m = m->m_next;
np = &n->m_next;
}
- if (top == NULL)
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (top);
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -679,7 +750,6 @@
return NULL;
bcopy(&buf, mm->m_ext.ext_buf, mm->m_len);
mm->m_data = mm->m_ext.ext_buf;
- mm->m_pkthdr.header = NULL;
}
if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) {
bcopy(mm->m_data, &buf, mm->m_len);
@@ -690,7 +760,6 @@
mm->m_ext.ext_size - mm->m_len, mm->m_len);
mm->m_data = (caddr_t)mm->m_ext.ext_buf +
mm->m_ext.ext_size - mm->m_len;
- mm->m_pkthdr.header = NULL;
}
/* Append/prepend as many mbuf (clusters) as necessary to fit len. */
@@ -754,7 +823,7 @@
struct mbuf *top, *n, *o;
MBUF_CHECKSLEEP(how);
- MGET(n, how, m->m_type);
+ n = m_get(how, m->m_type);
top = n;
if (n == NULL)
goto nospace;
@@ -772,7 +841,7 @@
m = m->m_next;
while (m) {
- MGET(o, how, m->m_type);
+ o = m_get(how, m->m_type);
if (o == NULL)
goto nospace;
@@ -792,7 +861,6 @@
return top;
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -867,6 +935,7 @@
}
if ((n->m_flags & M_EXT) == 0)
nsize = MHLEN;
+ n->m_flags &= ~M_RDONLY;
}
n->m_len = 0;
@@ -896,7 +965,6 @@
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -911,8 +979,8 @@
while (m->m_next)
m = m->m_next;
while (n) {
- if (m->m_flags & M_EXT ||
- m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+ if (!M_WRITABLE(m) ||
+ M_TRAILINGSPACE(m) < n->m_len) {
/* just join the two chains */
m->m_next = n;
return;
@@ -1000,8 +1068,8 @@
/*
* Rearange an mbuf chain so that len bytes are contiguous
- * and in the data area of an mbuf (so that mtod and dtom
- * will work for a structure of size len). Returns the resulting
+ * and in the data area of an mbuf (so that mtod will work
+ * for a structure of size len). Returns the resulting
* mbuf chain on success, frees it and returns null on failure.
* If there is room, it will add up to max_protohdr-len extra bytes to the
* contiguous region in an attempt to avoid being called next time.
@@ -1028,12 +1096,11 @@
} else {
if (len > MHLEN)
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ m = m_get(M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
- m->m_len = 0;
if (n->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(m, n);
+ m_move_pkthdr(m, n);
}
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
@@ -1057,7 +1124,6 @@
return (m);
bad:
m_freem(n);
- mbstat.m_mpfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -1076,12 +1142,11 @@
if (len > (MHLEN - dstoff))
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ m = m_get(M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
- m->m_len = 0;
if (n->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(m, n);
+ m_move_pkthdr(m, n);
m->m_data += dstoff;
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
@@ -1131,13 +1196,23 @@
if (m == NULL)
return (NULL);
remain = m->m_len - len;
- if (m0->m_flags & M_PKTHDR) {
- MGETHDR(n, wait, m0->m_type);
+ if (m0->m_flags & M_PKTHDR && remain == 0) {
+ n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
+ n->m_next = m->m_next;
+ m->m_next = NULL;
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
+ return (n);
+ } else if (m0->m_flags & M_PKTHDR) {
+ n = m_gethdr(wait, m0->m_type);
+ if (n == NULL)
+ return (NULL);
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+ m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
goto extpacket;
if (remain > MHLEN) {
@@ -1158,7 +1233,7 @@
m->m_next = NULL;
return (n);
} else {
- MGET(n, wait, m->m_type);
+ n = m_get(wait, m->m_type);
if (n == NULL)
return (NULL);
M_ALIGN(n, remain);
@@ -1195,10 +1270,10 @@
while (totlen > 0) {
if (top == NULL) { /* First one, must be PKTHDR */
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
len = MCLBYTES;
} else {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
len = MHLEN;
/* Place initial small packet/header at end of mbuf */
@@ -1213,10 +1288,10 @@
m->m_pkthdr.len = totlen;
} else {
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, 0);
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
len = MCLBYTES;
} else {
- m = m_get(M_DONTWAIT, MT_DATA);
+ m = m_get(M_NOWAIT, MT_DATA);
len = MLEN;
}
if (m == NULL) {
@@ -1260,7 +1335,7 @@
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
goto out;
bzero(mtod(n, caddr_t), MLEN);
@@ -1284,7 +1359,7 @@
if (len == 0)
break;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, len);
@@ -1328,7 +1403,7 @@
* Allocate a new mbuf; could check space
* and allocate a cluster instead.
*/
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, remainder);
@@ -1584,7 +1659,7 @@
n = m->m_next;
if (n == NULL)
break;
- if ((m->m_flags & M_RDONLY) == 0 &&
+ if (M_WRITABLE(m) &&
n->m_len < M_TRAILINGSPACE(m)) {
bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
n->m_len);
@@ -1811,14 +1886,22 @@
void
m_align(struct mbuf *m, int len)
{
+#ifdef INVARIANTS
+ const char *msg = "%s: not a virgin mbuf";
+#endif
int adjust;
- if (m->m_flags & M_EXT)
+ if (m->m_flags & M_EXT) {
+ KASSERT(m->m_data == m->m_ext.ext_buf, (msg, __func__));
adjust = m->m_ext.ext_size - len;
- else if (m->m_flags & M_PKTHDR)
+ } else if (m->m_flags & M_PKTHDR) {
+ KASSERT(m->m_data == m->m_pktdat, (msg, __func__));
adjust = MHLEN - len;
- else
+ } else {
+ KASSERT(m->m_data == m->m_dat, (msg, __func__));
adjust = MLEN - len;
+ }
+
m->m_data += adjust &~ (sizeof(long)-1);
}
@@ -1900,43 +1983,23 @@
}
/*
- * Allocate new space to hold the copy...
+ * Allocate new space to hold the copy and copy the data.
+ * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by
+ * splitting them into clusters. We could just malloc a
+ * buffer and make it external but too many device drivers
+ * don't know how to break up the non-contiguous memory when
+ * doing DMA.
*/
- /* XXX why can M_PKTHDR be set past the first mbuf? */
- if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
- /*
- * NB: if a packet header is present we must
- * allocate the mbuf separately from any cluster
- * because M_MOVE_PKTHDR will smash the data
- * pointer and drop the M_EXT marker.
- */
- MGETHDR(n, how, m->m_type);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
- M_MOVE_PKTHDR(n, m);
- MCLGET(n, how);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- m_freem(m0);
- return (NULL);
- }
- } else {
- n = m_getcl(how, m->m_type, m->m_flags);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
+ n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
+ if (n == NULL) {
+ m_freem(m0);
+ return (NULL);
}
- /*
- * ... and copy the data. We deal with jumbo mbufs
- * (i.e. m_len > MCLBYTES) by splitting them into
- * clusters. We could just malloc a buffer and make
- * it external but too many device drivers don't know
- * how to break up the non-contiguous memory when
- * doing DMA.
- */
+ if (m->m_flags & M_PKTHDR) {
+ KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR",
+ __func__, m0, m));
+ m_move_pkthdr(n, m);
+ }
len = m->m_len;
off = 0;
mfirst = n;
@@ -1957,7 +2020,7 @@
break;
off += cc;
- n = m_getcl(how, m->m_type, m->m_flags);
+ n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
if (n == NULL) {
m_freem(mfirst);
m_freem(m0);
Modified: trunk/sys/kern/uipc_mbuf2.c
===================================================================
--- trunk/sys/kern/uipc_mbuf2.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_mbuf2.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/* $KAME: uipc_mbuf2.c,v 1.31 2001/11/28 11:08:53 itojun Exp $ */
/* $NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $ */
@@ -61,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_mbuf2.c 312442 2017-01-19 23:44:05Z rpokala $");
/*#define PULLDOWN_DEBUG*/
@@ -141,7 +142,7 @@
* Ideally, the requirement should only be (iii).
*
* If we're writable, we're sure we're writable, because the ref. count
- * cannot increase from 1, as that would require posession of mbuf
+ * cannot increase from 1, as that would require possession of mbuf
* n by someone else (which is impossible). However, if we're _not_
* writable, we may eventually become writable )if the ref. count drops
* to 1), but we'll fail to notice it unless we re-evaluate
@@ -161,7 +162,7 @@
* the target data is on <n, off>.
* if we got enough data on the mbuf "n", we're done.
*/
- if ((off == 0 || offp) && len <= n->m_len - off && writable)
+ if ((off == 0 || offp) && len <= n->m_len - off)
goto ok;
/*
@@ -171,7 +172,7 @@
* chop the current mbuf into two pieces, set off to 0.
*/
if (len <= n->m_len - off) {
- o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
+ o = m_dup1(n, off, n->m_len - off, M_NOWAIT);
if (o == NULL) {
m_freem(m);
return NULL; /* ENOBUFS */
@@ -231,9 +232,9 @@
* on both end.
*/
if (len > MLEN)
- o = m_getcl(M_DONTWAIT, m->m_type, 0);
+ o = m_getcl(M_NOWAIT, m->m_type, 0);
else
- o = m_get(M_DONTWAIT, m->m_type);
+ o = m_get(M_NOWAIT, m->m_type);
if (!o) {
m_freem(m);
return NULL; /* ENOBUFS */
Modified: trunk/sys/kern/uipc_mqueue.c
===================================================================
--- trunk/sys/kern/uipc_mqueue.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_mqueue.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 David Xu <davidxu at freebsd.org>
* All rights reserved.
@@ -43,8 +44,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_mqueue.c 325783 2017-11-13 23:21:17Z jamie $");
+#include "opt_capsicum.h"
#include "opt_compat.h"
#include <sys/param.h>
@@ -51,8 +53,9 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/limits.h>
+#include <sys/malloc.h>
#include <sys/buf.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/dirent.h>
#include <sys/event.h>
#include <sys/eventhandler.h>
@@ -59,8 +62,8 @@
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
+#include <sys/jail.h>
#include <sys/lock.h>
-#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/mqueue.h>
@@ -130,6 +133,7 @@
LIST_HEAD(,mqfs_node) mn_children;
LIST_ENTRY(mqfs_node) mn_sibling;
LIST_HEAD(,mqfs_vdata) mn_vnodes;
+ const void *mn_pr_root;
int mn_refcount;
mqfs_type_t mn_type;
int mn_deleted;
@@ -217,6 +221,7 @@
static uma_zone_t mqnoti_zone;
static struct vop_vector mqfs_vnodeops;
static struct fileops mqueueops;
+static unsigned mqfs_osd_jail_slot;
/*
* Directory structure construction and manipulation
@@ -234,6 +239,7 @@
static void mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
static void mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
static int mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
+static int mqfs_prison_remove(void *obj, void *data);
/*
* Message queue construction and maniplation
@@ -434,6 +440,7 @@
node = mqnode_alloc();
strncpy(node->mn_name, name, namelen);
+ node->mn_pr_root = cred->cr_prison->pr_root;
node->mn_type = nodetype;
node->mn_refcount = 1;
vfs_timestamp(&node->mn_birth);
@@ -582,7 +589,6 @@
mp->mnt_data = &mqfs_data;
MNT_ILOCK(mp);
mp->mnt_flag |= MNT_LOCAL;
- mp->mnt_kern_flag |= MNTK_MPSAFE;
MNT_IUNLOCK(mp);
vfs_getnewfsid(mp);
@@ -643,6 +649,9 @@
{
struct mqfs_node *root;
struct mqfs_info *mi;
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_REMOVE] = mqfs_prison_remove,
+ };
mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -669,6 +678,7 @@
EVENTHANDLER_PRI_ANY);
mq_fdclose = mqueue_fdclose;
p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
+ mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
return (0);
}
@@ -682,6 +692,7 @@
if (!unloadable)
return (EOPNOTSUPP);
+ osd_jail_deregister(mqfs_osd_jail_slot);
EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
mi = &mqfs_data;
mqfs_destroy(mi->mi_root);
@@ -703,7 +714,9 @@
{
struct vnode *vp = (struct vnode *)context;
- vrecycle(vp, curthread);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ vrecycle(vp);
+ VOP_UNLOCK(vp, 0);
vdrop(vp);
}
@@ -799,13 +812,17 @@
* Search a directory entry
*/
static struct mqfs_node *
-mqfs_search(struct mqfs_node *pd, const char *name, int len)
+mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
{
struct mqfs_node *pn;
+ const void *pr_root;
sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
+ pr_root = cred->cr_prison->pr_root;
LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
- if (strncmp(pn->mn_name, name, len) == 0 &&
+ /* Only match names within the same prison root directory */
+ if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
+ strncmp(pn->mn_name, name, len) == 0 &&
pn->mn_name[len] == '\0')
return (pn);
}
@@ -877,7 +894,7 @@
/* named node */
sx_xlock(&mqfs->mi_lock);
- pn = mqfs_search(pd, pname, namelen);
+ pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
if (pn != NULL)
mqnode_addref(pn);
sx_xunlock(&mqfs->mi_lock);
@@ -1065,7 +1082,7 @@
struct mqfs_node *pn = VTON(ap->a_vp);
if (pn->mn_deleted)
- vrecycle(ap->a_vp, ap->a_td);
+ vrecycle(ap->a_vp);
return (0);
}
@@ -1362,6 +1379,7 @@
struct mqfs_node *pn;
struct dirent entry;
struct uio *uio;
+ const void *pr_root;
int *tmp_ncookies = NULL;
off_t offset;
int error, i;
@@ -1386,10 +1404,18 @@
error = 0;
offset = 0;
+ pr_root = ap->a_cred->cr_prison->pr_root;
sx_xlock(&mi->mi_lock);
LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
entry.d_reclen = sizeof(entry);
+
+ /*
+ * Only show names within the same prison root directory
+ * (or not associated with a prison, e.g. "." and "..").
+ */
+ if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
+ continue;
if (!pn->mn_fileno)
mqfs_fileno_alloc(mi, pn);
entry.d_fileno = pn->mn_fileno;
@@ -1523,6 +1549,38 @@
#endif /* notyet */
/*
+ * See if this prison root is obsolete, and clean up associated queues if it is.
+ */
+static int
+mqfs_prison_remove(void *obj, void *data __unused)
+{
+ const struct prison *pr = obj;
+ const struct prison *tpr;
+ struct mqfs_node *pn, *tpn;
+ int found;
+
+ found = 0;
+ TAILQ_FOREACH(tpr, &allprison, pr_list) {
+ if (tpr->pr_root == pr->pr_root && tpr != pr && tpr->pr_ref > 0)
+ found = 1;
+ }
+ if (!found) {
+ /*
+ * No jails are rooted in this directory anymore,
+ * so no queues should be either.
+ */
+ sx_xlock(&mqfs_data.mi_lock);
+ LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
+ mn_sibling, tpn) {
+ if (pn->mn_pr_root == pr->pr_root)
+ (void)do_unlink(pn, curthread->td_ucred);
+ }
+ sx_xunlock(&mqfs_data.mi_lock);
+ }
+ return (0);
+}
+
+/*
* Allocate a message queue
*/
static struct mqueue *
@@ -1974,7 +2032,7 @@
* characters.
*/
len = strlen(path);
- if (len < 2 || path[0] != '/' || index(path + 1, '/') != NULL)
+ if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
return (EINVAL);
error = falloc(td, &fp, &fd, O_CLOEXEC);
@@ -1982,7 +2040,7 @@
return (error);
sx_xlock(&mqfs_data.mi_lock);
- pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
+ pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
if (pn == NULL) {
if (!(flags & O_CREAT)) {
error = ENOENT;
@@ -2021,7 +2079,7 @@
if (error) {
sx_xunlock(&mqfs_data.mi_lock);
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
return (error);
}
@@ -2046,7 +2104,7 @@
struct mq_attr attr;
int flags, error;
- if ((uap->flags & O_ACCMODE) == O_ACCMODE)
+ if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
return (EINVAL);
flags = FFLAGS(uap->flags);
if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
@@ -2073,11 +2131,11 @@
return (error);
len = strlen(path);
- if (len < 2 || path[0] != '/' || index(path + 1, '/') != NULL)
+ if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
return (EINVAL);
sx_xlock(&mqfs_data.mi_lock);
- pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
+ pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
if (pn != NULL)
error = do_unlink(pn, td->td_ucred);
else
@@ -2086,19 +2144,19 @@
return (error);
}
-typedef int (*_fgetf)(struct thread *, int, cap_rights_t, struct file **);
+typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
/*
* Get message queue by giving file slot
*/
static int
-_getmq(struct thread *td, int fd, cap_rights_t rights, _fgetf func,
+_getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
{
struct mqfs_node *pn;
int error;
- error = func(td, fd, rights, fpp);
+ error = func(td, fd, rightsp, fpp);
if (error)
return (error);
if (&mqueueops != (*fpp)->f_ops) {
@@ -2117,7 +2175,10 @@
getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
struct mqueue **pmq)
{
- return _getmq(td, fd, CAP_POLL_EVENT, fget, fpp, ppn, pmq);
+ cap_rights_t rights;
+
+ return _getmq(td, fd, cap_rights_init(&rights, CAP_EVENT), fget,
+ fpp, ppn, pmq);
}
static __inline int
@@ -2124,7 +2185,10 @@
getmq_read(struct thread *td, int fd, struct file **fpp,
struct mqfs_node **ppn, struct mqueue **pmq)
{
- return _getmq(td, fd, CAP_READ, fget_read, fpp, ppn, pmq);
+ cap_rights_t rights;
+
+ return _getmq(td, fd, cap_rights_init(&rights, CAP_READ), fget_read,
+ fpp, ppn, pmq);
}
static __inline int
@@ -2131,7 +2195,10 @@
getmq_write(struct thread *td, int fd, struct file **fpp,
struct mqfs_node **ppn, struct mqueue **pmq)
{
- return _getmq(td, fd, CAP_WRITE, fget_write, fpp, ppn, pmq);
+ cap_rights_t rights;
+
+ return _getmq(td, fd, cap_rights_init(&rights, CAP_WRITE), fget_write,
+ fpp, ppn, pmq);
}
static int
@@ -2177,10 +2244,10 @@
}
error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
&oattr);
- if (error != 0)
- return (error);
- if (uap->oattr != NULL)
+ if (error == 0 && uap->oattr != NULL) {
+ bzero(oattr.__reserved, sizeof(oattr.__reserved));
error = copyout(&oattr, uap->oattr, sizeof(oattr));
+ }
return (error);
}
@@ -2235,10 +2302,12 @@
return (error);
}
-int
-sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
+static int
+kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
{
- struct sigevent ev;
+#ifdef CAPABILITIES
+ cap_rights_t rights;
+#endif
struct filedesc *fdp;
struct proc *p;
struct mqueue *mq;
@@ -2246,37 +2315,37 @@
struct mqueue_notifier *nt, *newnt = NULL;
int error;
- p = td->td_proc;
- fdp = td->td_proc->p_fd;
- if (uap->sigev) {
- error = copyin(uap->sigev, &ev, sizeof(ev));
- if (error)
- return (error);
- if (ev.sigev_notify != SIGEV_SIGNAL &&
- ev.sigev_notify != SIGEV_THREAD_ID &&
- ev.sigev_notify != SIGEV_NONE)
+ if (sigev != NULL) {
+ if (sigev->sigev_notify != SIGEV_SIGNAL &&
+ sigev->sigev_notify != SIGEV_THREAD_ID &&
+ sigev->sigev_notify != SIGEV_NONE)
return (EINVAL);
- if ((ev.sigev_notify == SIGEV_SIGNAL ||
- ev.sigev_notify == SIGEV_THREAD_ID) &&
- !_SIG_VALID(ev.sigev_signo))
+ if ((sigev->sigev_notify == SIGEV_SIGNAL ||
+ sigev->sigev_notify == SIGEV_THREAD_ID) &&
+ !_SIG_VALID(sigev->sigev_signo))
return (EINVAL);
}
- error = getmq(td, uap->mqd, &fp, NULL, &mq);
+ p = td->td_proc;
+ fdp = td->td_proc->p_fd;
+ error = getmq(td, mqd, &fp, NULL, &mq);
if (error)
return (error);
again:
FILEDESC_SLOCK(fdp);
- fp2 = fget_locked(fdp, uap->mqd);
+ fp2 = fget_locked(fdp, mqd);
if (fp2 == NULL) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
goto out;
}
- error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2);
+#ifdef CAPABILITIES
+ error = cap_check(cap_rights(fdp, mqd),
+ cap_rights_init(&rights, CAP_EVENT));
if (error) {
FILEDESC_SUNLOCK(fdp);
goto out;
}
+#endif
if (fp2 != fp) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
@@ -2284,12 +2353,12 @@
}
mtx_lock(&mq->mq_mutex);
FILEDESC_SUNLOCK(fdp);
- if (uap->sigev != NULL) {
+ if (sigev != NULL) {
if (mq->mq_notifier != NULL) {
error = EBUSY;
} else {
PROC_LOCK(p);
- nt = notifier_search(p, uap->mqd);
+ nt = notifier_search(p, mqd);
if (nt == NULL) {
if (newnt == NULL) {
PROC_UNLOCK(p);
@@ -2312,10 +2381,10 @@
nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
nt->nt_ksi.ksi_code = SI_MESGQ;
nt->nt_proc = p;
- nt->nt_ksi.ksi_mqd = uap->mqd;
+ nt->nt_ksi.ksi_mqd = mqd;
notifier_insert(p, nt);
}
- nt->nt_sigev = ev;
+ nt->nt_sigev = *sigev;
mq->mq_notifier = nt;
PROC_UNLOCK(p);
/*
@@ -2328,7 +2397,7 @@
mqueue_send_notification(mq);
}
} else {
- notifier_remove(p, mq, uap->mqd);
+ notifier_remove(p, mq, mqd);
}
mtx_unlock(&mq->mq_mutex);
@@ -2339,6 +2408,23 @@
return (error);
}
+int
+sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
+{
+ struct sigevent ev, *evp;
+ int error;
+
+ if (uap->sigev == NULL) {
+ evp = NULL;
+ } else {
+ error = copyin(uap->sigev, &ev, sizeof(ev));
+ if (error != 0)
+ return (error);
+ evp = &ev;
+ }
+ return (kern_kmq_notify(td, uap->mqd, evp));
+}
+
static void
mqueue_fdclose(struct thread *td, int fd, struct file *fp)
{
@@ -2582,7 +2668,8 @@
.fo_stat = mqf_stat,
.fo_chmod = mqf_chmod,
.fo_chown = mqf_chown,
- .fo_close = mqf_close
+ .fo_close = mqf_close,
+ .fo_sendfile = invfo_sendfile,
};
static struct vop_vector mqfs_vnodeops = {
@@ -2635,6 +2722,7 @@
#ifdef COMPAT_FREEBSD32
#include <compat/freebsd32/freebsd32.h>
#include <compat/freebsd32/freebsd32_proto.h>
+#include <compat/freebsd32/freebsd32_signal.h>
#include <compat/freebsd32/freebsd32_syscall.h>
#include <compat/freebsd32/freebsd32_util.h>
@@ -2665,7 +2753,7 @@
struct mq_attr32 attr32;
int flags, error;
- if ((uap->flags & O_ACCMODE) == O_ACCMODE)
+ if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
return (EINVAL);
flags = FFLAGS(uap->flags);
if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
@@ -2693,10 +2781,9 @@
}
error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
&oattr);
- if (error != 0)
- return (error);
- if (uap->oattr != NULL) {
+ if (error == 0 && uap->oattr != NULL) {
mq_attr_to32(&oattr, &oattr32);
+ bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
}
return (error);
@@ -2713,7 +2800,7 @@
int error;
int waitok;
- error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
+ error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
if (error)
return (error);
if (uap->abs_timeout != NULL) {
@@ -2742,7 +2829,7 @@
struct timespec *abs_timeout, ets;
int error, waitok;
- error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
+ error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
if (error)
return (error);
if (uap->abs_timeout != NULL) {
@@ -2761,12 +2848,33 @@
return (error);
}
+int
+freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
+{
+ struct sigevent ev, *evp;
+ struct sigevent32 ev32;
+ int error;
+
+ if (uap->sigev == NULL) {
+ evp = NULL;
+ } else {
+ error = copyin(uap->sigev, &ev32, sizeof(ev32));
+ if (error != 0)
+ return (error);
+ error = convert_sigevent32(&ev32, &ev);
+ if (error != 0)
+ return (error);
+ evp = &ev;
+ }
+ return (kern_kmq_notify(td, uap->mqd, evp));
+}
+
static struct syscall_helper_data mq32_syscalls[] = {
SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
SYSCALL32_INIT_HELPER(freebsd32_kmq_setattr),
SYSCALL32_INIT_HELPER(freebsd32_kmq_timedsend),
SYSCALL32_INIT_HELPER(freebsd32_kmq_timedreceive),
- SYSCALL32_INIT_HELPER_COMPAT(kmq_notify),
+ SYSCALL32_INIT_HELPER(freebsd32_kmq_notify),
SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
SYSCALL_INIT_LAST
};
Modified: trunk/sys/kern/uipc_sem.c
===================================================================
--- trunk/sys/kern/uipc_sem.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_sem.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Alfred Perlstein <alfred at FreeBSD.org>
* Copyright (c) 2003-2005 SPARTA, Inc.
@@ -32,18 +33,19 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_sem.c 325783 2017-11-13 23:21:17Z jamie $");
#include "opt_compat.h"
#include "opt_posix.h"
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/condvar.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/ksem.h>
#include <sys/lock.h>
@@ -71,7 +73,6 @@
* TODO
*
* - Resource limits?
- * - Update fstat(1)
* - Replace global sem_lock with mtx_pool locks?
* - Add a MAC check_create() hook for creating new named semaphores.
*/
@@ -117,7 +118,7 @@
semid_t *semidp, mode_t mode, unsigned int value,
int flags, int compat32);
static void ksem_drop(struct ksem *ks);
-static int ksem_get(struct thread *td, semid_t id, cap_rights_t rights,
+static int ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
struct file **fpp);
static struct ksem *ksem_hold(struct ksem *ks);
static void ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
@@ -150,6 +151,7 @@
.fo_close = ksem_closef,
.fo_chmod = ksem_chmod,
.fo_chown = ksem_chown,
+ .fo_sendfile = invfo_sendfile,
.fo_flags = DFLAG_PASSABLE
};
@@ -407,6 +409,7 @@
map->km_path = path;
map->km_fnv = fnv;
map->km_ksem = ksem_hold(ks);
+ ks->ks_path = path;
LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
}
@@ -428,6 +431,7 @@
error = ksem_access(map->km_ksem, ucred);
if (error)
return (error);
+ map->km_ksem->ks_path = NULL;
LIST_REMOVE(map, km_link);
ksem_drop(map->km_ksem);
free(map->km_path, M_KSEM);
@@ -439,6 +443,32 @@
return (ENOENT);
}
+static void
+ksem_info_impl(struct ksem *ks, char *path, size_t size, uint32_t *value)
+{
+ const char *ks_path, *pr_path;
+ size_t pr_pathlen;
+
+ if (ks->ks_path == NULL)
+ return;
+ sx_slock(&ksem_dict_lock);
+ ks_path = ks->ks_path;
+ if (ks_path != NULL) {
+ pr_path = curthread->td_ucred->cr_prison->pr_path;
+ if (strcmp(pr_path, "/") != 0) {
+ /* Return the jail-rooted pathname. */
+ pr_pathlen = strlen(pr_path);
+ if (strncmp(ks_path, pr_path, pr_pathlen) == 0 &&
+ ks_path[pr_pathlen] == '/')
+ ks_path += pr_pathlen;
+ }
+ strlcpy(path, ks_path, size);
+ }
+ if (value != NULL)
+ *value = ks->ks_value;
+ sx_sunlock(&ksem_dict_lock);
+}
+
static int
ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
int compat32)
@@ -477,6 +507,8 @@
struct ksem *ks;
struct file *fp;
char *path;
+ const char *pr_path;
+ size_t pr_pathlen;
Fnv32_t fnv;
int error, fd;
@@ -499,7 +531,7 @@
*/
error = ksem_create_copyout_semid(td, semidp, fd, compat32);
if (error) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
return (error);
}
@@ -513,13 +545,19 @@
ks->ks_flags |= KS_ANONYMOUS;
} else {
path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
- error = copyinstr(name, path, MAXPATHLEN, NULL);
+ pr_path = td->td_ucred->cr_prison->pr_path;
+ /* Construct a full pathname for jailed callers. */
+ pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+ : strlcpy(path, pr_path, MAXPATHLEN);
+ error = copyinstr(name, path + pr_pathlen,
+ MAXPATHLEN - pr_pathlen, NULL);
+
/* Require paths to start with a '/' character. */
- if (error == 0 && path[0] != '/')
+ if (error == 0 && path[pr_pathlen] != '/')
error = EINVAL;
if (error) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
free(path, M_KSEM);
return (error);
@@ -570,7 +608,7 @@
if (error) {
KASSERT(ks == NULL, ("ksem_create error with a ksem"));
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
return (error);
}
@@ -584,13 +622,14 @@
}
static int
-ksem_get(struct thread *td, semid_t id, cap_rights_t rights, struct file **fpp)
+ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
+ struct file **fpp)
{
struct ksem *ks;
struct file *fp;
int error;
- error = fget(td, id, rights, &fp);
+ error = fget(td, id, rightsp, &fp);
if (error)
return (EINVAL);
if (fp->f_type != DTYPE_SEM) {
@@ -651,11 +690,17 @@
sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
{
char *path;
+ const char *pr_path;
+ size_t pr_pathlen;
Fnv32_t fnv;
int error;
path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
- error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
+ pr_path = td->td_ucred->cr_prison->pr_path;
+ pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+ : strlcpy(path, pr_path, MAXPATHLEN);
+ error = copyinstr(uap->name, path + pr_pathlen, MAXPATHLEN - pr_pathlen,
+ NULL);
if (error) {
free(path, M_TEMP);
return (error);
@@ -704,11 +749,13 @@
int
sys_ksem_post(struct thread *td, struct ksem_post_args *uap)
{
+ cap_rights_t rights;
struct file *fp;
struct ksem *ks;
int error;
- error = ksem_get(td, uap->id, CAP_SEM_POST, &fp);
+ error = ksem_get(td, uap->id,
+ cap_rights_init(&rights, CAP_SEM_POST), &fp);
if (error)
return (error);
ks = fp->f_data;
@@ -793,12 +840,13 @@
{
struct timespec ts1, ts2;
struct timeval tv;
+ cap_rights_t rights;
struct file *fp;
struct ksem *ks;
int error;
DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
- error = ksem_get(td, id, CAP_SEM_WAIT, &fp);
+ error = ksem_get(td, id, cap_rights_init(&rights, CAP_SEM_WAIT), &fp);
if (error)
return (error);
ks = fp->f_data;
@@ -860,11 +908,13 @@
int
sys_ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
{
+ cap_rights_t rights;
struct file *fp;
struct ksem *ks;
int error, val;
- error = ksem_get(td, uap->id, CAP_SEM_GETVALUE, &fp);
+ error = ksem_get(td, uap->id,
+ cap_rights_init(&rights, CAP_SEM_GETVALUE), &fp);
if (error)
return (error);
ks = fp->f_data;
@@ -1014,6 +1064,7 @@
p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 200112L);
p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
+ ksem_info = ksem_info_impl;
error = syscall_helper_register(ksem_syscalls);
if (error)
@@ -1035,6 +1086,7 @@
#endif
syscall_helper_unregister(ksem_syscalls);
+ ksem_info = NULL;
p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 0);
hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
sx_destroy(&ksem_dict_lock);
Modified: trunk/sys/kern/uipc_shm.c
===================================================================
--- trunk/sys/kern/uipc_shm.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_shm.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2006, 2011 Robert N. M. Watson
* All rights reserved.
@@ -42,17 +43,23 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_shm.c 325783 2017-11-13 23:21:17Z jamie $");
#include "opt_capsicum.h"
+#include "opt_ktrace.h"
#include <sys/param.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
+#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
#include <sys/kernel.h>
+#include <sys/uio.h>
+#include <sys/signal.h>
+#include <sys/jail.h>
+#include <sys/ktrace.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
@@ -61,6 +68,7 @@
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
@@ -68,6 +76,7 @@
#include <sys/sx.h>
#include <sys/time.h>
#include <sys/vnode.h>
+#include <sys/unistd.h>
#include <security/mac/mac_framework.h>
@@ -95,12 +104,14 @@
static struct sx shm_dict_lock;
static struct mtx shm_timestamp_lock;
static u_long shm_hash;
+static struct unrhdr *shm_ino_unr;
+static dev_t shm_dev_ino;
#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash])
static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags);
static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode);
-static void shm_dict_init(void *arg);
+static void shm_init(void *arg);
static void shm_drop(struct shmfd *shmfd);
static struct shmfd *shm_hold(struct shmfd *shmfd);
static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
@@ -118,6 +129,7 @@
static fo_close_t shm_close;
static fo_chmod_t shm_chmod;
static fo_chown_t shm_chown;
+static fo_seek_t shm_seek;
/* File descriptor operations. */
static struct fileops shm_ops = {
@@ -131,17 +143,170 @@
.fo_close = shm_close,
.fo_chmod = shm_chmod,
.fo_chown = shm_chown,
- .fo_flags = DFLAG_PASSABLE
+ .fo_sendfile = vn_sendfile,
+ .fo_seek = shm_seek,
+ .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
};
FEATURE(posix_shm, "POSIX shared memory");
static int
+uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio)
+{
+ vm_page_t m;
+ vm_pindex_t idx;
+ size_t tlen;
+ int error, offset, rv;
+
+ idx = OFF_TO_IDX(uio->uio_offset);
+ offset = uio->uio_offset & PAGE_MASK;
+ tlen = MIN(PAGE_SIZE - offset, len);
+
+ VM_OBJECT_WLOCK(obj);
+
+ /*
+ * Parallel reads of the page content from disk are prevented
+ * by exclusive busy.
+ *
+ * Although the tmpfs vnode lock is held here, it is
+ * nonetheless safe to sleep waiting for a free page. The
+ * pageout daemon does not need to acquire the tmpfs vnode
+ * lock to page out tobj's pages because tobj is a OBJT_SWAP
+ * type object.
+ */
+ m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL);
+ if (m->valid != VM_PAGE_BITS_ALL) {
+ if (vm_pager_has_page(obj, idx, NULL, NULL)) {
+ rv = vm_pager_get_pages(obj, &m, 1, 0);
+ m = vm_page_lookup(obj, idx);
+ if (m == NULL) {
+ printf(
+ "uiomove_object: vm_obj %p idx %jd null lookup rv %d\n",
+ obj, idx, rv);
+ VM_OBJECT_WUNLOCK(obj);
+ return (EIO);
+ }
+ if (rv != VM_PAGER_OK) {
+ printf(
+ "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n",
+ obj, idx, m->valid, rv);
+ vm_page_lock(m);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ VM_OBJECT_WUNLOCK(obj);
+ return (EIO);
+ }
+ } else
+ vm_page_zero_invalid(m, TRUE);
+ }
+ vm_page_xunbusy(m);
+ vm_page_lock(m);
+ vm_page_hold(m);
+ if (m->queue == PQ_NONE) {
+ vm_page_deactivate(m);
+ } else {
+ /* Requeue to maintain LRU ordering. */
+ vm_page_requeue(m);
+ }
+ vm_page_unlock(m);
+ VM_OBJECT_WUNLOCK(obj);
+ error = uiomove_fromphys(&m, offset, tlen, uio);
+ if (uio->uio_rw == UIO_WRITE && error == 0) {
+ VM_OBJECT_WLOCK(obj);
+ vm_page_dirty(m);
+ vm_pager_page_unswapped(m);
+ VM_OBJECT_WUNLOCK(obj);
+ }
+ vm_page_lock(m);
+ vm_page_unhold(m);
+ vm_page_unlock(m);
+
+ return (error);
+}
+
+int
+uiomove_object(vm_object_t obj, off_t obj_size, struct uio *uio)
+{
+ ssize_t resid;
+ size_t len;
+ int error;
+
+ error = 0;
+ while ((resid = uio->uio_resid) > 0) {
+ if (obj_size <= uio->uio_offset)
+ break;
+ len = MIN(obj_size - uio->uio_offset, resid);
+ if (len == 0)
+ break;
+ error = uiomove_object_page(obj, len, uio);
+ if (error != 0 || resid == uio->uio_resid)
+ break;
+ }
+ return (error);
+}
+
+static int
+shm_seek(struct file *fp, off_t offset, int whence, struct thread *td)
+{
+ struct shmfd *shmfd;
+ off_t foffset;
+ int error;
+
+ shmfd = fp->f_data;
+ foffset = foffset_lock(fp, 0);
+ error = 0;
+ switch (whence) {
+ case L_INCR:
+ if (foffset < 0 ||
+ (offset > 0 && foffset > OFF_MAX - offset)) {
+ error = EOVERFLOW;
+ break;
+ }
+ offset += foffset;
+ break;
+ case L_XTND:
+ if (offset > 0 && shmfd->shm_size > OFF_MAX - offset) {
+ error = EOVERFLOW;
+ break;
+ }
+ offset += shmfd->shm_size;
+ break;
+ case L_SET:
+ break;
+ default:
+ error = EINVAL;
+ }
+ if (error == 0) {
+ if (offset < 0 || offset > shmfd->shm_size)
+ error = EINVAL;
+ else
+ *(off_t *)(td->td_retval) = offset;
+ }
+ foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
+ return (error);
+}
+
+static int
shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
+ struct shmfd *shmfd;
+ void *rl_cookie;
+ int error;
- return (EOPNOTSUPP);
+ shmfd = fp->f_data;
+#ifdef MAC
+ error = mac_posixshm_check_read(active_cred, fp->f_cred, shmfd);
+ if (error)
+ return (error);
+#endif
+ foffset_lock_uio(fp, uio, flags);
+ rl_cookie = rangelock_rlock(&shmfd->shm_rl, uio->uio_offset,
+ uio->uio_offset + uio->uio_resid, &shmfd->shm_mtx);
+ error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio);
+ rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+ foffset_unlock_uio(fp, uio, flags);
+ return (error);
}
static int
@@ -148,8 +313,29 @@
shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
+ struct shmfd *shmfd;
+ void *rl_cookie;
+ int error;
- return (EOPNOTSUPP);
+ shmfd = fp->f_data;
+#ifdef MAC
+ error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd);
+ if (error)
+ return (error);
+#endif
+ foffset_lock_uio(fp, uio, flags);
+ if ((flags & FOF_OFFSET) == 0) {
+ rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX,
+ &shmfd->shm_mtx);
+ } else {
+ rl_cookie = rangelock_wlock(&shmfd->shm_rl, uio->uio_offset,
+ uio->uio_offset + uio->uio_resid, &shmfd->shm_mtx);
+ }
+
+ error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio);
+ rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+ foffset_unlock_uio(fp, uio, flags);
+ return (error);
}
static int
@@ -227,6 +413,8 @@
sb->st_uid = shmfd->shm_uid;
sb->st_gid = shmfd->shm_gid;
mtx_unlock(&shm_timestamp_lock);
+ sb->st_dev = shm_dev_ino;
+ sb->st_ino = shmfd->shm_ino;
return (0);
}
@@ -252,10 +440,11 @@
vm_ooffset_t delta;
int base, rv;
+ KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
object = shmfd->shm_object;
- VM_OBJECT_LOCK(object);
+ VM_OBJECT_WLOCK(object);
if (length == shmfd->shm_size) {
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
return (0);
}
nobjsize = OFF_TO_IDX(length + PAGE_MASK);
@@ -267,7 +456,7 @@
* object is mapped into the kernel.
*/
if (shmfd->shm_kmappings > 0) {
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
return (EBUSY);
}
@@ -280,17 +469,14 @@
retry:
m = vm_page_lookup(object, idx);
if (m != NULL) {
- if ((m->oflags & VPO_BUSY) != 0 ||
- m->busy != 0) {
- vm_page_sleep(m, "shmtrc");
+ if (vm_page_sleep_if_busy(m, "shmtrc"))
goto retry;
- }
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
if (m == NULL) {
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
VM_WAIT;
- VM_OBJECT_LOCK(object);
+ VM_OBJECT_WLOCK(object);
goto retry;
} else if (m->valid != VM_PAGE_BITS_ALL) {
ma[0] = m;
@@ -304,11 +490,11 @@
if (rv == VM_PAGER_OK) {
vm_page_deactivate(m);
vm_page_unlock(m);
- vm_page_wakeup(m);
+ vm_page_xunbusy(m);
} else {
vm_page_free(m);
vm_page_unlock(m);
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
return (EIO);
}
}
@@ -320,7 +506,7 @@
vm_pager_page_unswapped(m);
}
}
- delta = ptoa(object->size - nobjsize);
+ delta = IDX_TO_OFF(object->size - nobjsize);
/* Toss in memory pages. */
if (nobjsize < object->size)
@@ -335,10 +521,10 @@
swap_release_by_cred(delta, object->cred);
object->charge -= delta;
} else {
- /* Attempt to reserve the swap */
- delta = ptoa(nobjsize - object->size);
+ /* Try to reserve additional swap space. */
+ delta = IDX_TO_OFF(nobjsize - object->size);
if (!swap_reserve_by_cred(delta, object->cred)) {
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
return (ENOMEM);
}
object->charge += delta;
@@ -349,7 +535,7 @@
shmfd->shm_mtime = shmfd->shm_ctime;
mtx_unlock(&shm_timestamp_lock);
object->size = nobjsize;
- VM_OBJECT_UNLOCK(object);
+ VM_OBJECT_WUNLOCK(object);
return (0);
}
@@ -361,6 +547,7 @@
shm_alloc(struct ucred *ucred, mode_t mode)
{
struct shmfd *shmfd;
+ int ino;
shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
shmfd->shm_size = 0;
@@ -370,14 +557,22 @@
shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL,
shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
- VM_OBJECT_LOCK(shmfd->shm_object);
+ shmfd->shm_object->pg_color = 0;
+ VM_OBJECT_WLOCK(shmfd->shm_object);
vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING);
- vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT);
- VM_OBJECT_UNLOCK(shmfd->shm_object);
+ vm_object_set_flag(shmfd->shm_object, OBJ_COLORED | OBJ_NOSPLIT);
+ VM_OBJECT_WUNLOCK(shmfd->shm_object);
vfs_timestamp(&shmfd->shm_birthtime);
shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime =
shmfd->shm_birthtime;
+ ino = alloc_unr(shm_ino_unr);
+ if (ino == -1)
+ shmfd->shm_ino = 0;
+ else
+ shmfd->shm_ino = ino;
refcount_init(&shmfd->shm_refs, 1);
+ mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF);
+ rangelock_init(&shmfd->shm_rl);
#ifdef MAC
mac_posixshm_init(shmfd);
mac_posixshm_create(ucred, shmfd);
@@ -402,7 +597,11 @@
#ifdef MAC
mac_posixshm_destroy(shmfd);
#endif
+ rangelock_destroy(&shmfd->shm_rl);
+ mtx_destroy(&shmfd->shm_mtx);
vm_object_deallocate(shmfd->shm_object);
+ if (shmfd->shm_ino != 0)
+ free_unr(shm_ino_unr, shmfd->shm_ino);
free(shmfd, M_SHMFD);
}
}
@@ -435,14 +634,18 @@
* the mappings in a hash table.
*/
static void
-shm_dict_init(void *arg)
+shm_init(void *arg)
{
mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF);
sx_init(&shm_dict_lock, "shm dictionary");
shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash);
+ shm_ino_unr = new_unrhdr(1, INT32_MAX, NULL);
+ KASSERT(shm_ino_unr != NULL, ("shm fake inodes not initialized"));
+ shm_dev_ino = devfs_alloc_cdp_inode();
+ KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized"));
}
-SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL);
+SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL);
static struct shmfd *
shm_lookup(char *path, Fnv32_t fnv)
@@ -511,6 +714,8 @@
struct shmfd *shmfd;
struct file *fp;
char *path;
+ const char *pr_path;
+ size_t pr_pathlen;
Fnv32_t fnv;
mode_t cmode;
int fd, error;
@@ -527,7 +732,7 @@
(uap->flags & O_ACCMODE) != O_RDWR)
return (EINVAL);
- if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0)
+ if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0)
return (EINVAL);
fdp = td->td_proc->p_fd;
@@ -541,7 +746,7 @@
if (uap->path == SHM_ANON) {
/* A read-only anonymous object is pointless. */
if ((uap->flags & O_ACCMODE) == O_RDONLY) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
return (EINVAL);
}
@@ -548,13 +753,22 @@
shmfd = shm_alloc(td->td_ucred, cmode);
} else {
path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
- error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+ pr_path = td->td_ucred->cr_prison->pr_path;
+ /* Construct a full pathname for jailed callers. */
+ pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+ : strlcpy(path, pr_path, MAXPATHLEN);
+ error = copyinstr(uap->path, path + pr_pathlen,
+ MAXPATHLEN - pr_pathlen, NULL);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(curthread, KTR_NAMEI))
+ ktrnamei(path);
+#endif
/* Require paths to start with a '/' character. */
- if (error == 0 && path[0] != '/')
+ if (error == 0 && path[pr_pathlen] != '/')
error = EINVAL;
if (error) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
free(path, M_SHMFD);
return (error);
@@ -620,7 +834,7 @@
sx_xunlock(&shm_dict_lock);
if (error) {
- fdclose(fdp, fp, fd, td);
+ fdclose(td, fp, fd);
fdrop(fp, td);
return (error);
}
@@ -638,16 +852,25 @@
sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap)
{
char *path;
+ const char *pr_path;
+ size_t pr_pathlen;
Fnv32_t fnv;
int error;
path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
- error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+ pr_path = td->td_ucred->cr_prison->pr_path;
+ pr_pathlen = strcmp(pr_path, "/") == 0 ? 0
+ : strlcpy(path, pr_path, MAXPATHLEN);
+ error = copyinstr(uap->path, path + pr_pathlen, MAXPATHLEN - pr_pathlen,
+ NULL);
if (error) {
free(path, M_TEMP);
return (error);
}
-
+#ifdef KTRACE
+ if (KTRPOINT(curthread, KTR_NAMEI))
+ ktrnamei(path);
+#endif
fnv = fnv_32_str(path, FNV1_32_INIT);
sx_xlock(&shm_dict_lock);
error = shm_remove(path, fnv, td->td_ucred);
@@ -757,7 +980,7 @@
return (EINVAL);
shmfd = fp->f_data;
obj = shmfd->shm_object;
- VM_OBJECT_LOCK(obj);
+ VM_OBJECT_WLOCK(obj);
/*
* XXXRW: This validation is probably insufficient, and subject to
* sign errors. It should be fixed.
@@ -764,13 +987,13 @@
*/
if (offset >= shmfd->shm_size ||
offset + size > round_page(shmfd->shm_size)) {
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_WUNLOCK(obj);
return (EINVAL);
}
shmfd->shm_kmappings++;
vm_object_reference_locked(obj);
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_WUNLOCK(obj);
/* Map the object into the kernel_map and wire it. */
kva = vm_map_min(kernel_map);
@@ -777,8 +1000,8 @@
ofs = offset & PAGE_MASK;
offset = trunc_page(offset);
size = round_page(size + ofs);
- rv = vm_map_find(kernel_map, obj, offset, &kva, size,
- VMFS_ALIGNED_SPACE, VM_PROT_READ | VM_PROT_WRITE,
+ rv = vm_map_find(kernel_map, obj, offset, &kva, size, 0,
+ VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
VM_PROT_READ | VM_PROT_WRITE, 0);
if (rv == KERN_SUCCESS) {
rv = vm_map_wire(kernel_map, kva, kva + size,
@@ -792,9 +1015,9 @@
vm_object_deallocate(obj);
/* On failure, drop our mapping reference. */
- VM_OBJECT_LOCK(obj);
+ VM_OBJECT_WLOCK(obj);
shmfd->shm_kmappings--;
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_WUNLOCK(obj);
return (vm_mmap_to_errno(rv));
}
@@ -836,10 +1059,10 @@
if (obj != shmfd->shm_object)
return (EINVAL);
vm_map_remove(map, kva, kva + size);
- VM_OBJECT_LOCK(obj);
+ VM_OBJECT_WLOCK(obj);
KASSERT(shmfd->shm_kmappings > 0, ("shm_unmap: object not mapped"));
shmfd->shm_kmappings--;
- VM_OBJECT_UNLOCK(obj);
+ VM_OBJECT_WUNLOCK(obj);
return (0);
}
@@ -846,11 +1069,23 @@
void
shm_path(struct shmfd *shmfd, char *path, size_t size)
{
+ const char *shm_path, *pr_path;
+ size_t pr_pathlen;
if (shmfd->shm_path == NULL)
return;
sx_slock(&shm_dict_lock);
- if (shmfd->shm_path != NULL)
- strlcpy(path, shmfd->shm_path, size);
+ shm_path = shmfd->shm_path;
+ if (shm_path != NULL) {
+ pr_path = curthread->td_ucred->cr_prison->pr_path;
+ if (strcmp(pr_path, "/") != 0) {
+ /* Return the jail-rooted pathname. */
+ pr_pathlen = strlen(pr_path);
+ if (strncmp(shm_path, pr_path, pr_pathlen) == 0 &&
+ shm_path[pr_pathlen] == '/')
+ shm_path += pr_pathlen;
+ }
+ strlcpy(path, shm_path, size);
+ }
sx_sunlock(&shm_dict_lock);
}
Modified: trunk/sys/kern/uipc_sockbuf.c
===================================================================
--- trunk/sys/kern/uipc_sockbuf.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_sockbuf.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_sockbuf.c 274043 2014-11-03 12:38:29Z hselasky $");
#include "opt_param.h"
@@ -65,7 +66,7 @@
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
-static void sbdrop_internal(struct sockbuf *sb, int len);
+static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
static void sbflush_internal(struct sockbuf *sb);
/*
@@ -127,9 +128,9 @@
SOCKBUF_LOCK_ASSERT(sb);
sb->sb_flags |= SB_WAIT;
- return (msleep(&sb->sb_cc, &sb->sb_mtx,
+ return (msleep_sbt(&sb->sb_cc, &sb->sb_mtx,
(sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
- sb->sb_timeo));
+ sb->sb_timeo, 0, 0));
}
int
@@ -188,7 +189,7 @@
}
KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
if (sb->sb_upcall != NULL) {
- ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT);
+ ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
KASSERT(sb == &so->so_rcv,
("SO_SND upcall returned SU_ISCONNECTED"));
@@ -528,6 +529,9 @@
SBLASTMBUFCHK(sb);
+ /* Remove all packet headers and mbuf tags to get a pure data chain. */
+ m_demote(m, 1);
+
sbcompress(sb, m, sb->sb_mbtail);
sb->sb_lastrecord = sb->sb_mb;
@@ -617,40 +621,23 @@
SOCKBUF_UNLOCK(sb);
}
-/*
- * Append address and data, and optionally, control (ancillary) data to the
- * receive queue of a socket. If present, m0 must include a packet header
- * with total length. Returns 0 if no space in sockbuf or insufficient
- * mbufs.
- */
-int
-sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
- struct mbuf *m0, struct mbuf *control)
+/* Helper routine that appends data, control, and address to a sockbuf. */
+static int
+sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
+ struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
{
struct mbuf *m, *n, *nlast;
- int space = asa->sa_len;
-
- SOCKBUF_LOCK_ASSERT(sb);
-
- if (m0 && (m0->m_flags & M_PKTHDR) == 0)
- panic("sbappendaddr_locked");
- if (m0)
- space += m0->m_pkthdr.len;
- space += m_length(control, &n);
-
- if (space > sbspace(sb))
- return (0);
#if MSIZE <= 256
if (asa->sa_len > MLEN)
return (0);
#endif
- MGET(m, M_DONTWAIT, MT_SONAME);
- if (m == 0)
+ m = m_get(M_NOWAIT, MT_SONAME);
+ if (m == NULL)
return (0);
m->m_len = asa->sa_len;
bcopy(asa, mtod(m, caddr_t), asa->sa_len);
- if (n)
- n->m_next = m0; /* concatenate data to control */
+ if (ctrl_last)
+ ctrl_last->m_next = m0; /* concatenate data to control */
else
control = m0;
m->m_next = control;
@@ -674,6 +661,50 @@
* mbufs.
*/
int
+sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
+ struct mbuf *m0, struct mbuf *control)
+{
+ struct mbuf *ctrl_last;
+ int space = asa->sa_len;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+ panic("sbappendaddr_locked");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ space += m_length(control, &ctrl_last);
+
+ if (space > sbspace(sb))
+ return (0);
+ return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data to the
+ * receive queue of a socket. If present, m0 must include a packet header
+ * with total length. Returns 0 if insufficient mbufs. Does not validate space
+ * on the receiving sockbuf.
+ */
+int
+sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
+ struct mbuf *m0, struct mbuf *control)
+{
+ struct mbuf *ctrl_last;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ ctrl_last = (control == NULL) ? NULL : m_last(control);
+ return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data to the
+ * receive queue of a socket. If present, m0 must include a packet header
+ * with total length. Returns 0 if no space in sockbuf or insufficient
+ * mbufs.
+ */
+int
sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control)
{
@@ -815,7 +846,7 @@
*/
if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
- sbdrop_internal(sb, (int)sb->sb_cc);
+ m_freem(sbcut_internal(sb, (int)sb->sb_cc));
}
if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
@@ -840,15 +871,16 @@
}
/*
- * Drop data from (the front of) a sockbuf.
+ * Cut data from (the front of) a sockbuf.
*/
-static void
-sbdrop_internal(struct sockbuf *sb, int len)
+static struct mbuf *
+sbcut_internal(struct sockbuf *sb, int len)
{
- struct mbuf *m;
- struct mbuf *next;
+ struct mbuf *m, *n, *next, *mfree;
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ mfree = NULL;
+
while (len > 0) {
if (m == 0) {
if (next == 0)
@@ -869,11 +901,17 @@
}
len -= m->m_len;
sbfree(sb, m);
- m = m_free(m);
+ n = m->m_next;
+ m->m_next = mfree;
+ mfree = m;
+ m = n;
}
while (m && m->m_len == 0) {
sbfree(sb, m);
- m = m_free(m);
+ n = m->m_next;
+ m->m_next = mfree;
+ mfree = m;
+ m = n;
}
if (m) {
sb->sb_mb = m;
@@ -891,6 +929,8 @@
} else if (m->m_nextpkt == NULL) {
sb->sb_lastrecord = m;
}
+
+ return (mfree);
}
/*
@@ -901,17 +941,31 @@
{
SOCKBUF_LOCK_ASSERT(sb);
+ m_freem(sbcut_internal(sb, len));
+}
- sbdrop_internal(sb, len);
+/*
+ * Drop data from (the front of) a sockbuf,
+ * and return it to caller.
+ */
+struct mbuf *
+sbcut_locked(struct sockbuf *sb, int len)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ return (sbcut_internal(sb, len));
}
void
sbdrop(struct sockbuf *sb, int len)
{
+ struct mbuf *mfree;
SOCKBUF_LOCK(sb);
- sbdrop_locked(sb, len);
+ mfree = sbcut_internal(sb, len);
SOCKBUF_UNLOCK(sb);
+
+ m_freem(mfree);
}
/*
@@ -939,6 +993,13 @@
/* Return closest mbuf in chain for current offset. */
*moff = off - sb->sb_sndptroff;
m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
+ if (*moff == m->m_len) {
+ *moff = 0;
+ sb->sb_sndptroff += m->m_len;
+ m = ret = m->m_next;
+ KASSERT(ret->m_len > 0,
+ ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
+ }
/* Advance by len to be as close as possible for the next transmit. */
for (off = off - sb->sb_sndptroff + len - 1;
@@ -955,6 +1016,37 @@
}
/*
+ * Return the first mbuf and the mbuf data offset for the provided
+ * send offset without changing the "sb_sndptroff" field.
+ */
+struct mbuf *
+sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
+{
+ struct mbuf *m;
+
+ KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+
+ /*
+ * If the "off" is below the stored offset, which happens on
+ * retransmits, just use "sb_mb":
+ */
+ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
+ m = sb->sb_mb;
+ } else {
+ m = sb->sb_sndptr;
+ off -= sb->sb_sndptroff;
+ }
+ while (off > 0 && m != NULL) {
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ *moff = off;
+ return (m);
+}
+
+/*
* Drop a record off the front of a sockbuf and move the next record to the
* front.
*/
@@ -1002,9 +1094,9 @@
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN)
- m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
+ m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
else
- m = m_get(M_DONTWAIT, MT_CONTROL);
+ m = m_get(M_NOWAIT, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
cp = mtod(m, struct cmsghdr *);
@@ -1053,4 +1145,4 @@
SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
&sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
- &sb_efficiency, 0, "");
+ &sb_efficiency, 0, "Socket buffer size waste factor");
Modified: trunk/sys/kern/uipc_socket.c
===================================================================
--- trunk/sys/kern/uipc_socket.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_socket.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California.
@@ -92,7 +93,7 @@
* from a listen queue to a file descriptor, in order to prevent garbage
* collection of the socket at an untimely moment. For a number of reasons,
* these interfaces are not preferred, and should be avoided.
- *
+ *
* NOTE: With regard to VNETs the general rule is that callers do not set
* curvnet. Exceptions to this rule include soabort(), sodisconnect(),
* sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
@@ -101,11 +102,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_socket.c 305261 2016-09-02 00:14:28Z markj $");
#include "opt_inet.h"
#include "opt_inet6.h"
-#include "opt_zero.h"
#include "opt_compat.h"
#include <sys/param.h>
@@ -135,6 +135,8 @@
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/jail.h>
+#include <sys/syslog.h>
+#include <netinet/in.h>
#include <net/vnet.h>
@@ -173,11 +175,8 @@
.f_event = filt_sowrite,
};
-uma_zone_t socket_zone;
so_gen_t so_gencnt; /* generation count for sockets */
-int maxsockets;
-
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
@@ -185,26 +184,42 @@
VNET_ASSERT(curvnet != NULL, \
("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+/*
+ * Limit on the number of connections in the listen queue waiting
+ * for accept(2).
+ * NB: The original sysctl somaxconn is still available but hidden
+ * to prevent confusion about the actual purpose of this number.
+ */
static int somaxconn = SOMAXCONN;
-static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
-/* XXX: we dont have SYSCTL_USHORT */
-SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW,
- 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection "
- "queue size");
+
+static int
+sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int val;
+
+ val = somaxconn;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr )
+ return (error);
+
+ if (val < 1 || val > USHRT_MAX)
+ return (EINVAL);
+
+ somaxconn = val;
+ return (0);
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW,
+ 0, sizeof(int), sysctl_somaxconn, "I",
+ "Maximum listen socket pending connection accept queue size");
+SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
+ 0, sizeof(int), sysctl_somaxconn, "I",
+ "Maximum listen socket pending connection accept queue size (compat)");
+
static int numopensockets;
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
&numopensockets, 0, "Number of open sockets");
-#ifdef ZERO_COPY_SOCKETS
-/* These aren't static because they're used in other files. */
-int so_zero_copy_send = 1;
-int so_zero_copy_receive = 1;
-SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
- "Zero copy controls");
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
- &so_zero_copy_receive, 0, "Enable zero copy receive");
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
- &so_zero_copy_send, 0, "Enable zero copy send");
-#endif /* ZERO_COPY_SOCKETS */
/*
* accept_mtx locks down per-socket fields relating to accept queues. See
@@ -227,6 +242,46 @@
SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
/*
+ * Initialize the socket subsystem and set up the socket
+ * memory allocator.
+ */
+static uma_zone_t socket_zone;
+int maxsockets;
+
+static void
+socket_zone_change(void *tag)
+{
+
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
+}
+
+static void
+socket_init(void *tag)
+{
+
+ socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
+ uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
+ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
+ EVENTHANDLER_PRI_FIRST);
+}
+SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
+
+/*
+ * Initialise maxsockets. This SYSINIT must be run after
+ * tunable_mbinit().
+ */
+static void
+init_maxsockets(void *ignored)
+{
+
+ TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
+ maxsockets = imax(maxsockets, maxfiles);
+}
+SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
+
+/*
* Sysctl to get and set the maximum global sockets limit. Notify protocols
* of the change so that they can update their dependent limits as required.
*/
@@ -238,12 +293,9 @@
newmaxsockets = maxsockets;
error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
if (error == 0 && req->newptr) {
- if (newmaxsockets > maxsockets) {
+ if (newmaxsockets > maxsockets &&
+ newmaxsockets <= maxfiles) {
maxsockets = newmaxsockets;
- if (maxsockets > ((maxfiles / 4) * 3)) {
- maxfiles = (maxsockets * 5) / 4;
- maxfilesperproc = (maxfiles * 9) / 10;
- }
EVENTHANDLER_INVOKE(maxsockets_change);
} else
error = EINVAL;
@@ -250,25 +302,11 @@
}
return (error);
}
-
SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
&maxsockets, 0, sysctl_maxsockets, "IU",
"Maximum number of sockets avaliable");
/*
- * Initialise maxsockets. This SYSINIT must be run after
- * tunable_mbinit().
- */
-static void
-init_maxsockets(void *ignored)
-{
-
- TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
- maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
-}
-SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
-
-/*
* Socket operation routines. These routines are called by the routines in
* sys_socket.c or from a system process, and implement the semantics of
* socket operations by switching out to the protocol specific routines.
@@ -374,7 +412,16 @@
else
prp = pffindtype(dom, type);
- if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL ||
+ if (prp == NULL) {
+ /* No support for domain. */
+ if (pffinddomain(dom) == NULL)
+ return (EAFNOSUPPORT);
+ /* No support for socket type. */
+ if (proto == 0 && type != 0)
+ return (EPROTOTYPE);
+ return (EPROTONOSUPPORT);
+ }
+ if (prp->pr_usrreqs->pru_attach == NULL ||
prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
return (EPROTONOSUPPORT);
@@ -440,6 +487,10 @@
struct socket *
sonewconn(struct socket *head, int connstatus)
{
+ static struct timeval lastover;
+ static struct timeval overinterval = { 60, 0 };
+ static int overcount;
+
struct socket *so;
int over;
@@ -447,16 +498,32 @@
over = (head->so_qlen > 3 * head->so_qlimit / 2);
ACCEPT_UNLOCK();
#ifdef REGRESSION
- if (regression_sonewconn_earlytest && over)
+ if (regression_sonewconn_earlytest && over) {
#else
- if (over)
+ if (over) {
#endif
+ overcount++;
+
+ if (ratecheck(&lastover, &overinterval)) {
+ log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
+ "%i already in queue awaiting acceptance "
+ "(%d occurrences)\n",
+ __func__, head->so_pcb, head->so_qlen, overcount);
+
+ overcount = 0;
+ }
+
return (NULL);
+ }
VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
__func__, __LINE__, head));
so = soalloc(head->so_vnet);
- if (so == NULL)
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
return (NULL);
+ }
if ((head->so_options & SO_ACCEPTFILTER) != 0)
connstatus = 0;
so->so_head = head;
@@ -473,11 +540,18 @@
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
- (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
return (NULL);
}
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
so->so_snd.sb_lowat = head->so_snd.sb_lowat;
so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
@@ -486,6 +560,20 @@
so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
ACCEPT_LOCK();
+ /*
+ * The accept socket may be tearing down but we just
+ * won a race on the ACCEPT_LOCK.
+ * However, if sctp_peeloff() is called on a 1-to-many
+ * style socket, the SO_ACCEPTCONN doesn't need to be set.
+ */
+ if (!(head->so_options & SO_ACCEPTCONN) &&
+ ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
+ (head->so_type != SOCK_SEQPACKET))) {
+ SOCK_LOCK(so);
+ so->so_head = NULL;
+ sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
+ return (NULL);
+ }
if (connstatus) {
TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
so->so_qstate |= SQ_COMP;
@@ -529,9 +617,20 @@
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
CURVNET_RESTORE();
- return error;
+ return (error);
}
+int
+sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ int error;
+
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
+ CURVNET_RESTORE();
+ return (error);
+}
+
/*
* solisten() transitions a socket from a non-listening state to a listening
* state, but can also be used to update the listen queue depth on an
@@ -552,7 +651,7 @@
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
CURVNET_RESTORE();
- return error;
+ return (error);
}
int
@@ -634,15 +733,21 @@
("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
- if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
- (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) {
+ if (pr->pr_domain->dom_family == AF_LOCAL)
+ unp_dispose_so(so);
+ else
+ (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+ }
if (pr->pr_usrreqs->pru_detach != NULL)
(*pr->pr_usrreqs->pru_detach)(so);
@@ -701,7 +806,8 @@
goto drop;
while (so->so_state & SS_ISCONNECTED) {
error = tsleep(&so->so_timeo,
- PSOCK | PCATCH, "soclos", so->so_linger * hz);
+ PSOCK | PCATCH, "soclos",
+ so->so_linger * hz);
if (error)
break;
}
@@ -711,9 +817,14 @@
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
+ ACCEPT_LOCK();
if (so->so_options & SO_ACCEPTCONN) {
struct socket *sp;
- ACCEPT_LOCK();
+ /*
+ * Prevent new additions to the accept queues due
+ * to ACCEPT_LOCK races while we are draining them.
+ */
+ so->so_options &= ~SO_ACCEPTCONN;
while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
TAILQ_REMOVE(&so->so_incomp, sp, so_list);
so->so_incqlen--;
@@ -732,13 +843,15 @@
soabort(sp);
ACCEPT_LOCK();
}
- ACCEPT_UNLOCK();
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("%s: so_comp populated", __func__));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("%s: so_incomp populated", __func__));
}
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so);
+ sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
CURVNET_RESTORE();
return (error);
}
@@ -800,6 +913,13 @@
int
soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+
+ return (soconnectat(AT_FDCWD, so, nam, td));
+}
+
+int
+soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
+{
int error;
if (so->so_options & SO_ACCEPTCONN)
@@ -821,7 +941,13 @@
* biting us.
*/
so->so_error = 0;
- error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
+ if (fd == AT_FDCWD) {
+ error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
+ nam, td);
+ } else {
+ error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
+ so, nam, td);
+ }
}
CURVNET_RESTORE();
@@ -853,135 +979,6 @@
return (error);
}
-#ifdef ZERO_COPY_SOCKETS
-struct so_zerocopy_stats{
- int size_ok;
- int align_ok;
- int found_ifp;
-};
-struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
-#include <netinet/in.h>
-#include <net/route.h>
-#include <netinet/in_pcb.h>
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
-
-/*
- * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
- * sosend_dgram() and sosend_generic() use m_uiotombuf().
- *
- * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
- * all of the data referenced by the uio. If desired, it uses zero-copy.
- * *space will be updated to reflect data copied in.
- *
- * NB: If atomic I/O is requested, the caller must already have checked that
- * space can hold resid bytes.
- *
- * NB: In the event of an error, the caller may need to free the partial
- * chain pointed to by *mpp. The contents of both *uio and *space may be
- * modified even in the case of an error.
- */
-static int
-sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
- int flags)
-{
- struct mbuf *m, **mp, *top;
- long len;
- ssize_t resid;
- int error;
-#ifdef ZERO_COPY_SOCKETS
- int cow_send;
-#endif
-
- *retmp = top = NULL;
- mp = ⊤
- len = 0;
- resid = uio->uio_resid;
- error = 0;
- do {
-#ifdef ZERO_COPY_SOCKETS
- cow_send = 0;
-#endif /* ZERO_COPY_SOCKETS */
- if (resid >= MINCLSIZE) {
-#ifdef ZERO_COPY_SOCKETS
- if (top == NULL) {
- m = m_gethdr(M_WAITOK, MT_DATA);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
- } else
- m = m_get(M_WAITOK, MT_DATA);
- if (so_zero_copy_send &&
- resid>=PAGE_SIZE &&
- *space>=PAGE_SIZE &&
- uio->uio_iov->iov_len>=PAGE_SIZE) {
- so_zerocp_stats.size_ok++;
- so_zerocp_stats.align_ok++;
- cow_send = socow_setup(m, uio);
- len = cow_send;
- }
- if (!cow_send) {
- m_clget(m, M_WAITOK);
- len = min(min(MCLBYTES, resid), *space);
- }
-#else /* ZERO_COPY_SOCKETS */
- if (top == NULL) {
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
- } else
- m = m_getcl(M_WAIT, MT_DATA, 0);
- len = min(min(MCLBYTES, resid), *space);
-#endif /* ZERO_COPY_SOCKETS */
- } else {
- if (top == NULL) {
- m = m_gethdr(M_WAIT, MT_DATA);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
-
- len = min(min(MHLEN, resid), *space);
- /*
- * For datagram protocols, leave room
- * for protocol headers in first mbuf.
- */
- if (atomic && m && len < MHLEN)
- MH_ALIGN(m, len);
- } else {
- m = m_get(M_WAIT, MT_DATA);
- len = min(min(MLEN, resid), *space);
- }
- }
- if (m == NULL) {
- error = ENOBUFS;
- goto out;
- }
-
- *space -= len;
-#ifdef ZERO_COPY_SOCKETS
- if (cow_send)
- error = 0;
- else
-#endif /* ZERO_COPY_SOCKETS */
- error = uiomove(mtod(m, void *), (int)len, uio);
- resid = uio->uio_resid;
- m->m_len = len;
- *mp = m;
- top->m_pkthdr.len += len;
- if (error)
- goto out;
- mp = &m->m_next;
- if (resid <= 0) {
- if (flags & MSG_EOR)
- top->m_flags |= M_EOR;
- break;
- }
- } while (*space > 0 && atomic);
-out:
- *retmp = top;
- return (error);
-}
-#endif /*ZERO_COPY_SOCKETS*/
-
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
int
@@ -991,13 +988,10 @@
long space;
ssize_t resid;
int clen = 0, error, dontroute;
-#ifdef ZERO_COPY_SOCKETS
- int atomic = sosendallatonce(so) || top;
-#endif
- KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM"));
+ KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
- ("sodgram_send: !PR_ATOMIC"));
+ ("sosend_dgram: !PR_ATOMIC"));
if (uio != NULL)
resid = uio->uio_resid;
@@ -1076,11 +1070,6 @@
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
-#ifdef ZERO_COPY_SOCKETS
- error = sosend_copyin(uio, &top, atomic, &space, flags);
- if (error)
- goto out;
-#else
/*
* Copy the data from userland into a mbuf chain.
* If no data is to be copied in, a single empty mbuf
@@ -1093,7 +1082,6 @@
goto out;
}
space -= resid - uio->uio_resid;
-#endif
resid = uio->uio_resid;
}
KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
@@ -1108,7 +1096,7 @@
}
/*
* XXX all the SBS_CANTSENDMORE checks previously done could be out
- * of date. We could have recieved a reset packet in an interrupt or
+ * of date. We could have received a reset packet in an interrupt or
* maybe we slept while doing page faults in uiomove() etc. We could
* probably recheck again inside the locking protection here, but
* there are probably other places that this also happens. We must
@@ -1265,12 +1253,6 @@
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
-#ifdef ZERO_COPY_SOCKETS
- error = sosend_copyin(uio, &top, atomic,
- &space, flags);
- if (error != 0)
- goto release;
-#else
/*
* Copy the data from userland into a mbuf
* chain. If no data is to be copied in,
@@ -1285,7 +1267,6 @@
goto release;
}
space -= resid - uio->uio_resid;
-#endif
resid = uio->uio_resid;
}
if (dontroute) {
@@ -1295,7 +1276,7 @@
}
/*
* XXX all the SBS_CANTSENDMORE checks previously
- * done could be out of date. We could have recieved
+ * done could be out of date. We could have received
* a reset packet in an interrupt or maybe we slept
* while doing page faults in uiomove() etc. We
* could probably recheck again inside the locking
@@ -1372,26 +1353,11 @@
KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
VNET_SO_ASSERT(so);
- m = m_get(M_WAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
do {
-#ifdef ZERO_COPY_SOCKETS
- if (so_zero_copy_receive) {
- int disposable;
-
- if ((m->m_flags & M_EXT)
- && (m->m_ext.ext_type == EXT_DISPOSABLE))
- disposable = 1;
- else
- disposable = 0;
-
- error = uiomoveco(mtod(m, void *),
- min(uio->uio_resid, m->m_len),
- uio, disposable);
- } else
-#endif /* ZERO_COPY_SOCKETS */
error = uiomove(mtod(m, void *),
(int) min(uio->uio_resid, m->m_len), uio);
m = m_free(m);
@@ -1424,20 +1390,19 @@
else
sb->sb_mb = nextrecord;
- /*
- * Now update any dependent socket buffer fields to reflect the new
- * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
* addition of a second clause that takes care of the case where
* sb_mb has been updated, but remains the last record.
- */
- if (sb->sb_mb == NULL) {
- sb->sb_mbtail = NULL;
- sb->sb_lastrecord = NULL;
- } else if (sb->sb_mb->m_nextpkt == NULL)
- sb->sb_lastrecord = sb->sb_mb;
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
}
-
/*
* Implement receive operations on a socket. We depend on the way that
* records are added to the sockbuf by sbappend. In particular, each record
@@ -1626,7 +1591,7 @@
SOCKBUF_UNLOCK(&so->so_rcv);
VNET_SO_ASSERT(so);
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
SOCKBUF_LOCK(&so->so_rcv);
} else if (controlp != NULL)
*controlp = cm;
@@ -1687,7 +1652,7 @@
/*
* If the type of mbuf has changed since the last mbuf
* examined ('type'), end the receive operation.
- */
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
if (type != m->m_type)
@@ -1715,21 +1680,6 @@
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
-#ifdef ZERO_COPY_SOCKETS
- if (so_zero_copy_receive) {
- int disposable;
-
- if ((m->m_flags & M_EXT)
- && (m->m_ext.ext_type == EXT_DISPOSABLE))
- disposable = 1;
- else
- disposable = 0;
-
- error = uiomoveco(mtod(m, char *) + moff,
- (int)len, uio,
- disposable);
- } else
-#endif /* ZERO_COPY_SOCKETS */
error = uiomove(mtod(m, char *) + moff, (int)len, uio);
SOCKBUF_LOCK(&so->so_rcv);
if (error) {
@@ -1760,6 +1710,7 @@
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
if (mp != NULL) {
+ m->m_nextpkt = NULL;
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
@@ -1780,26 +1731,26 @@
int copy_flag;
if (flags & MSG_DONTWAIT)
- copy_flag = M_DONTWAIT;
+ copy_flag = M_NOWAIT;
else
copy_flag = M_WAIT;
- if (copy_flag == M_WAIT)
+ if (copy_flag == M_WAITOK)
SOCKBUF_UNLOCK(&so->so_rcv);
*mp = m_copym(m, 0, len, copy_flag);
- if (copy_flag == M_WAIT)
+ if (copy_flag == M_WAITOK)
SOCKBUF_LOCK(&so->so_rcv);
- if (*mp == NULL) {
- /*
- * m_copym() couldn't
+ if (*mp == NULL) {
+ /*
+ * m_copym() couldn't
* allocate an mbuf. Adjust
* uio_resid back (it was
* adjusted down by len
* bytes, which we didn't end
* up "copying" over).
- */
- uio->uio_resid += len;
- break;
- }
+ */
+ uio->uio_resid += len;
+ break;
+ }
}
m->m_data += len;
m->m_len -= len;
@@ -1832,7 +1783,8 @@
while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
!sosendallatonce(so) && nextrecord == NULL) {
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ if (so->so_error ||
+ so->so_rcv.sb_state & SBS_CANTRCVMORE)
break;
/*
* Notify the protocol that some data has been
@@ -1915,6 +1867,7 @@
/*
* Optimized version of soreceive() for stream (TCP) sockets.
+ * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
*/
int
soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
@@ -1993,7 +1946,7 @@
/* Socket buffer got some data that we shall deliver now. */
if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
- ((sb->sb_flags & SS_NBIO) ||
+ ((so->so_state & SS_NBIO) ||
(flags & (MSG_DONTWAIT|MSG_NBIO)) ||
sb->sb_cc >= sb->sb_lowat ||
sb->sb_cc >= uio->uio_resid ||
@@ -2003,7 +1956,7 @@
/* On MSG_WAITALL we must wait until all data or error arrives. */
if ((flags & MSG_WAITALL) &&
- (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
+ (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
goto deliver;
/*
@@ -2029,7 +1982,11 @@
if (mp0 != NULL) {
/* Dequeue as many mbufs as possible. */
if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
- for (*mp0 = m = sb->sb_mb;
+ if (*mp0 == NULL)
+ *mp0 = sb->sb_mb;
+ else
+ m_cat(*mp0, sb->sb_mb);
+ for (m = sb->sb_mb;
m != NULL && m->m_len <= len;
m = m->m_next) {
len -= m->m_len;
@@ -2037,10 +1994,11 @@
sbfree(sb, m);
n = m;
}
+ n->m_next = NULL;
sb->sb_mb = m;
+ sb->sb_lastrecord = sb->sb_mb;
if (sb->sb_mb == NULL)
SB_EMPTY_FIXUP(sb);
- n->m_next = NULL;
}
/* Copy the remainder. */
if (len > 0) {
@@ -2047,13 +2005,13 @@
KASSERT(sb->sb_mb != NULL,
("%s: len > 0 && sb->sb_mb empty", __func__));
- m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
+ m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
if (m == NULL)
len = 0; /* Don't flush data from sockbuf. */
else
- uio->uio_resid -= m->m_len;
+ uio->uio_resid -= len;
if (*mp0 != NULL)
- n->m_next = m;
+ m_cat(*mp0, m);
else
*mp0 = m;
if (*mp0 == NULL) {
@@ -2236,7 +2194,8 @@
* Process one or more MT_CONTROL mbufs present before any data mbufs
* in the first mbuf chain on the socket buffer. We call into the
* protocol to perform externalization (or freeing if controlp ==
- * NULL).
+ * NULL). In some cases there can be only MT_CONTROL mbufs without
+ * MT_DATA mbufs.
*/
if (m->m_type == MT_CONTROL) {
struct mbuf *cm = NULL, *cmn;
@@ -2254,7 +2213,7 @@
cm->m_next = NULL;
if (pr->pr_domain->dom_externalize != NULL) {
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
} else if (controlp != NULL)
*controlp = cm;
else
@@ -2266,8 +2225,8 @@
cm = cmn;
}
}
- KASSERT(m->m_type == MT_DATA, ("soreceive_dgram: !data"));
-
+ KASSERT(m == NULL || m->m_type == MT_DATA,
+ ("soreceive_dgram: !data"));
while (m != NULL && uio->uio_resid > 0) {
len = uio->uio_resid;
if (len > m->m_len)
@@ -2284,9 +2243,10 @@
m->m_len -= len;
}
}
- if (m != NULL)
+ if (m != NULL) {
flags |= MSG_TRUNC;
- m_freem(m);
+ m_freem(m);
+ }
if (flagsp != NULL)
*flagsp |= flags;
return (0);
@@ -2315,16 +2275,17 @@
return (EINVAL);
CURVNET_SET(so->so_vnet);
- if (pr->pr_usrreqs->pru_flush != NULL) {
- (*pr->pr_usrreqs->pru_flush)(so, how);
- }
+ if (pr->pr_usrreqs->pru_flush != NULL)
+ (*pr->pr_usrreqs->pru_flush)(so, how);
if (how != SHUT_WR)
sorflush(so);
if (how != SHUT_RD) {
error = (*pr->pr_usrreqs->pru_shutdown)(so);
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (error);
}
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (0);
}
@@ -2334,7 +2295,7 @@
{
struct sockbuf *sb = &so->so_rcv;
struct protosw *pr = so->so_proto;
- struct sockbuf asb;
+ struct socket aso;
VNET_SO_ASSERT(so);
@@ -2359,8 +2320,9 @@
* and mutex data unchanged.
*/
SOCKBUF_LOCK(sb);
- bzero(&asb, offsetof(struct sockbuf, sb_startzero));
- bcopy(&sb->sb_startzero, &asb.sb_startzero,
+ bzero(&aso, sizeof(aso));
+ aso.so_pcb = so->so_pcb;
+ bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
bzero(&sb->sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
@@ -2368,12 +2330,16 @@
sbunlock(sb);
/*
- * Dispose of special rights and flush the socket buffer. Don't call
- * any unsafe routines (that rely on locks being initialized) on asb.
+ * Dispose of special rights and flush the copied socket. Don't call
+ * any unsafe routines (that rely on locks being initialized) on aso.
*/
- if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
- (*pr->pr_domain->dom_dispose)(asb.sb_mb);
- sbrelease_internal(&asb, so);
+ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) {
+ if (pr->pr_domain->dom_family == AF_LOCAL)
+ unp_dispose_so(&aso);
+ else
+ (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
+ }
+ sbrelease_internal(&aso.so_rcv, so);
}
/*
@@ -2432,7 +2398,7 @@
int error, optval;
struct linger l;
struct timeval tv;
- u_long val;
+ sbintime_t val;
uint32_t val32;
#ifdef MAC
struct mac extmac;
@@ -2484,7 +2450,7 @@
case SO_NO_DDP:
case SO_NO_OFFLOAD:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
SOCK_LOCK(so);
@@ -2497,7 +2463,7 @@
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2515,7 +2481,7 @@
case SO_USER_COOKIE:
error = sooptcopyin(sopt, &val32, sizeof val32,
- sizeof val32);
+ sizeof val32);
if (error)
goto bad;
so->so_user_cookie = val32;
@@ -2526,7 +2492,7 @@
case SO_SNDLOWAT:
case SO_RCVLOWAT:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2589,23 +2555,15 @@
sizeof tv);
if (error)
goto bad;
-
- /* assert(hz > 0); */
- if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz ||
- tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
+ if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
+ tv.tv_usec >= 1000000) {
error = EDOM;
goto bad;
}
- /* assert(tick > 0); */
- /* assert(ULONG_MAX - INT_MAX >= 1000000); */
- val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
- if (val > INT_MAX) {
- error = EDOM;
- goto bad;
- }
- if (val == 0 && tv.tv_usec != 0)
- val = 1;
-
+ if (tv.tv_sec > INT32_MAX)
+ val = SBT_MAX;
+ else
+ val = tvtosbt(tv);
switch (sopt->sopt_name) {
case SO_SNDTIMEO:
so->so_snd.sb_timeo = val;
@@ -2755,11 +2713,8 @@
case SO_SNDTIMEO:
case SO_RCVTIMEO:
- optval = (sopt->sopt_name == SO_SNDTIMEO ?
- so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
-
- tv.tv_sec = optval / hz;
- tv.tv_usec = (optval % hz) * tick;
+ tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
+ so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
#ifdef COMPAT_FREEBSD32
if (SV_CURPROC_FLAG(SV_ILP32)) {
struct timeval32 tv32;
@@ -2828,7 +2783,6 @@
return (error);
}
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
int
soopt_getm(struct sockopt *sopt, struct mbuf **mp)
{
@@ -2835,11 +2789,11 @@
struct mbuf *m, *m_prev;
int sopt_size = sopt->sopt_valsize;
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL)
return ENOBUFS;
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return ENOBUFS;
@@ -2853,14 +2807,14 @@
m_prev = m;
while (sopt_size) {
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL) {
m_freem(*mp);
return ENOBUFS;
}
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td != NULL ? M_WAIT :
- M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
+ M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_freem(m);
m_freem(*mp);
@@ -2877,7 +2831,6 @@
return (0);
}
-/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
int
soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
{
@@ -2890,7 +2843,7 @@
int error;
error = copyin(sopt->sopt_val, mtod(m, char *),
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
@@ -2906,7 +2859,6 @@
return (0);
}
-/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
int
soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
{
@@ -2920,7 +2872,7 @@
int error;
error = copyout(mtod(m, char *), sopt->sopt_val,
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
@@ -2927,10 +2879,10 @@
}
} else
bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
- sopt->sopt_valsize -= m->m_len;
- sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
- valsize += m->m_len;
- m = m->m_next;
+ sopt->sopt_valsize -= m->m_len;
+ sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
+ valsize += m->m_len;
+ m = m->m_next;
}
if (m != NULL) {
/* enough soopt buffer should be given from user-land */
@@ -3067,6 +3019,14 @@
}
int
+pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+
+ return EOPNOTSUPP;
+}
+
+int
pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
{
@@ -3074,6 +3034,14 @@
}
int
+pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+
+ return EOPNOTSUPP;
+}
+
+int
pru_connect2_notsupp(struct socket *so1, struct socket *so2)
{
@@ -3258,7 +3226,7 @@
struct socket *so = kn->kn_fp->f_data;
kn->kn_data = so->so_qlen;
- return (! TAILQ_EMPTY(&so->so_comp));
+ return (!TAILQ_EMPTY(&so->so_comp));
}
int
@@ -3272,24 +3240,6 @@
return (0);
}
-static int
-sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
-{
- int error;
- int val;
-
- val = somaxconn;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error || !req->newptr )
- return (error);
-
- if (val < 1 || val > USHRT_MAX)
- return (EINVAL);
-
- somaxconn = val;
- return (0);
-}
-
/*
* These functions are used by protocols to notify the socket layer (and its
* consumers) of state changes in the sockets driven by protocol-side events.
@@ -3335,7 +3285,7 @@
void
soisconnected(struct socket *so)
{
- struct socket *head;
+ struct socket *head;
int ret;
restart:
@@ -3363,7 +3313,7 @@
head->so_accf->so_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_DONTWAIT);
+ head->so_accf->so_accept_filter_arg, M_NOWAIT);
if (ret == SU_ISCONNECTED)
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
@@ -3440,7 +3390,7 @@
int (*func)(struct socket *, void *, int), void *arg)
{
struct sockbuf *sb;
-
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3524,9 +3474,10 @@
*/
void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg)
+so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
+ void *arg)
{
-
+
TAILQ_FOREACH(so, &so->so_comp, so_list)
func(so, arg);
}
@@ -3646,6 +3597,7 @@
void
so_lock(struct socket *so)
{
+
SOCK_LOCK(so);
}
@@ -3652,5 +3604,6 @@
void
so_unlock(struct socket *so)
{
+
SOCK_UNLOCK(so);
}
Modified: trunk/sys/kern/uipc_syscalls.c
===================================================================
--- trunk/sys/kern/uipc_syscalls.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_syscalls.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1990, 1993
* The Regents of the University of California. All rights reserved.
@@ -33,18 +34,18 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_syscalls.c 321021 2017-07-15 17:28:03Z dchagin $");
#include "opt_capsicum.h"
#include "opt_inet.h"
#include "opt_inet6.h"
-#include "opt_sctp.h"
#include "opt_compat.h"
#include "opt_ktrace.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
+#include <sys/condvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -57,9 +58,11 @@
#include <sys/file.h>
#include <sys/filio.h>
#include <sys/jail.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
+#include <sys/rwlock.h>
#include <sys/sf_buf.h>
#include <sys/sysent.h>
#include <sys/socket.h>
@@ -85,77 +88,80 @@
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_peeloff.h>
-#endif /* SCTP */
-#endif /* INET || INET6 */
+/*
+ * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
+ * and SOCK_NONBLOCK.
+ */
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT 0x2
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
-static int accept1(struct thread *td, struct accept_args *uap, int compat);
-static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
+static int accept1(struct thread *td, int s, struct sockaddr *uname,
+ socklen_t *anamelen, int flags);
+static int do_sendfile(struct thread *td, struct sendfile_args *uap,
+ int compat);
static int getsockname1(struct thread *td, struct getsockname_args *uap,
int compat);
static int getpeername1(struct thread *td, struct getpeername_args *uap,
int compat);
+counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
+
/*
- * NSFBUFS-related variables and associated sysctls
+ * sendfile(2)-related variables and associated sysctls
*/
-int nsfbufs;
-int nsfbufspeak;
-int nsfbufsused;
+static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0,
+ "sendfile(2) tunables");
+static int sfreadahead = 1;
+SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW,
+ &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
+static void
+sfstat_init(const void *unused)
+{
+
+ COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
+ M_WAITOK);
+}
+SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
+
+static int
+sfstat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct sfstat s;
+
+ COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
+ if (req->newptr)
+ COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
+ return (SYSCTL_OUT(req, &s, sizeof(s)));
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
+ NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
+
/*
- * Convert a user file descriptor to a kernel file entry and check that, if
- * it is a capability, the right rights are present. A reference on the file
- * entry is held upon returning.
+ * Convert a user file descriptor to a kernel file entry and check if required
+ * capability rights are present.
+ * A reference on the file entry is held upon returning.
*/
-static int
-getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
+int
+getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp, u_int *fflagp)
{
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
int error;
-#endif
- fp = NULL;
- if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor referenced by the capability.
- */
- error = cap_funwrap(fp, rights, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
+ error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, 0, &fp, NULL);
+ if (error != 0)
return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
if (fp->f_type != DTYPE_SOCKET) {
- fdrop(fp, curthread);
+ fdrop(fp, td);
return (ENOTSOCK);
}
if (fflagp != NULL)
@@ -180,29 +186,42 @@
int protocol;
} */ *uap;
{
- struct filedesc *fdp;
struct socket *so;
struct file *fp;
- int fd, error;
+ int fd, error, type, oflag, fflag;
AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
+
+ type = uap->type;
+ oflag = 0;
+ fflag = 0;
+ if ((type & SOCK_CLOEXEC) != 0) {
+ type &= ~SOCK_CLOEXEC;
+ oflag |= O_CLOEXEC;
+ }
+ if ((type & SOCK_NONBLOCK) != 0) {
+ type &= ~SOCK_NONBLOCK;
+ fflag |= FNONBLOCK;
+ }
+
#ifdef MAC
- error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
+ error = mac_socket_check_create(td->td_ucred, uap->domain, type,
uap->protocol);
- if (error)
+ if (error != 0)
return (error);
#endif
- fdp = td->td_proc->p_fd;
- error = falloc(td, &fp, &fd, 0);
- if (error)
+ error = falloc(td, &fp, &fd, oflag);
+ if (error != 0)
return (error);
/* An extra reference on `fp' has been held for us by falloc(). */
- error = socreate(uap->domain, &so, uap->type, uap->protocol,
+ error = socreate(uap->domain, &so, type, uap->protocol,
td->td_ucred, td);
- if (error) {
- fdclose(fdp, fp, fd, td);
+ if (error != 0) {
+ fdclose(td, fp, fd);
} else {
- finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
+ finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
+ if ((fflag & FNONBLOCK) != 0)
+ (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
td->td_retval[0] = fd;
}
fdrop(fp, td);
@@ -222,27 +241,27 @@
struct sockaddr *sa;
int error;
- if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
- return (error);
-
- error = kern_bind(td, uap->s, sa);
- free(sa, M_SONAME);
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_bind(td, uap->s, sa);
+ free(sa, M_SONAME);
+ }
return (error);
}
-int
-kern_bind(td, fd, sa)
- struct thread *td;
- int fd;
- struct sockaddr *sa;
+static int
+kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
- if (error)
+ AUDIT_ARG_SOCKADDR(td, dirfd, sa);
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
#ifdef KTRACE
@@ -251,15 +270,50 @@
#endif
#ifdef MAC
error = mac_socket_check_bind(td->td_ucred, so, sa);
- if (error == 0)
+ if (error == 0) {
#endif
- error = sobind(so, sa, td);
+ if (dirfd == AT_FDCWD)
+ error = sobind(so, sa, td);
+ else
+ error = sobindat(dirfd, so, sa, td);
+#ifdef MAC
+ }
+#endif
fdrop(fp, td);
return (error);
}
+int
+kern_bind(struct thread *td, int fd, struct sockaddr *sa)
+{
+
+ return (kern_bindat(td, AT_FDCWD, fd, sa));
+}
+
/* ARGSUSED */
int
+sys_bindat(td, uap)
+ struct thread *td;
+ struct bindat_args /* {
+ int fd;
+ int s;
+ caddr_t name;
+ int namelen;
+ } */ *uap;
+{
+ struct sockaddr *sa;
+ int error;
+
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_bindat(td, uap->fd, uap->s, sa);
+ free(sa, M_SONAME);
+ }
+ return (error);
+}
+
+/* ARGSUSED */
+int
sys_listen(td, uap)
struct thread *td;
struct listen_args /* {
@@ -269,10 +323,12 @@
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
+ error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
#ifdef MAC
@@ -289,14 +345,12 @@
* accept1()
*/
static int
-accept1(td, uap, compat)
+accept1(td, s, uname, anamelen, flags)
struct thread *td;
- struct accept_args /* {
- int s;
- struct sockaddr * __restrict name;
- socklen_t * __restrict anamelen;
- } */ *uap;
- int compat;
+ int s;
+ struct sockaddr *uname;
+ socklen_t *anamelen;
+ int flags;
{
struct sockaddr *name;
socklen_t namelen;
@@ -303,38 +357,37 @@
struct file *fp;
int error;
- if (uap->name == NULL)
- return (kern_accept(td, uap->s, NULL, NULL, NULL));
+ if (uname == NULL)
+ return (kern_accept4(td, s, NULL, NULL, flags, NULL));
- error = copyin(uap->anamelen, &namelen, sizeof (namelen));
- if (error)
+ error = copyin(anamelen, &namelen, sizeof (namelen));
+ if (error != 0)
return (error);
- error = kern_accept(td, uap->s, &name, &namelen, &fp);
+ error = kern_accept4(td, s, &name, &namelen, flags, &fp);
/*
* return a namelen of zero for older code which might
* ignore the return value from accept.
*/
- if (error) {
- (void) copyout(&namelen,
- uap->anamelen, sizeof(*uap->anamelen));
+ if (error != 0) {
+ (void) copyout(&namelen, anamelen, sizeof(*anamelen));
return (error);
}
- if (error == 0 && name != NULL) {
+ if (error == 0 && uname != NULL) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (flags & ACCEPT4_COMPAT)
((struct osockaddr *)name)->sa_family =
name->sa_family;
#endif
- error = copyout(name, uap->name, namelen);
+ error = copyout(name, uname, namelen);
}
if (error == 0)
- error = copyout(&namelen, uap->anamelen,
+ error = copyout(&namelen, anamelen,
sizeof(namelen));
- if (error)
- fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
+ if (error != 0)
+ fdclose(td, fp, td->td_retval[0]);
fdrop(fp, td);
free(name, M_SONAME);
return (error);
@@ -344,26 +397,28 @@
kern_accept(struct thread *td, int s, struct sockaddr **name,
socklen_t *namelen, struct file **fp)
{
- struct filedesc *fdp;
+ return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
+}
+
+int
+kern_accept4(struct thread *td, int s, struct sockaddr **name,
+ socklen_t *namelen, int flags, struct file **fp)
+{
struct file *headfp, *nfp = NULL;
struct sockaddr *sa = NULL;
- int error;
struct socket *head, *so;
- int fd;
+ cap_rights_t rights;
u_int fflag;
pid_t pgid;
- int tmp;
+ int error, fd, tmp;
- if (name) {
+ if (name != NULL)
*name = NULL;
- if (*namelen < 0)
- return (EINVAL);
- }
AUDIT_ARG_FD(s);
- fdp = td->td_proc->p_fd;
- error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
- if (error)
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT),
+ &headfp, &fflag);
+ if (error != 0)
return (error);
head = headfp->f_data;
if ((head->so_options & SO_ACCEPTCONN) == 0) {
@@ -375,8 +430,8 @@
if (error != 0)
goto done;
#endif
- error = falloc(td, &nfp, &fd, 0);
- if (error)
+ error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0);
+ if (error != 0)
goto done;
ACCEPT_LOCK();
if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
@@ -391,7 +446,7 @@
}
error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
"accept", 0);
- if (error) {
+ if (error != 0) {
ACCEPT_UNLOCK();
goto noconnection;
}
@@ -416,7 +471,10 @@
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
+ if (flags & ACCEPT4_INHERIT)
+ so->so_state |= (head->so_state & SS_NBIO);
+ else
+ so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
@@ -429,9 +487,15 @@
/* connection has been removed from the listen queue */
KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
- pgid = fgetown(&head->so_sigio);
- if (pgid != 0)
- fsetown(pgid, &so->so_sigio);
+ if (flags & ACCEPT4_INHERIT) {
+ pgid = fgetown(&head->so_sigio);
+ if (pgid != 0)
+ fsetown(pgid, &so->so_sigio);
+ } else {
+ fflag &= ~(FNONBLOCK | FASYNC);
+ if (flags & SOCK_NONBLOCK)
+ fflag |= FNONBLOCK;
+ }
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
/* Sync socket nonblocking/async state with file flags */
@@ -441,7 +505,7 @@
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
sa = 0;
error = soaccept(so, &sa);
- if (error) {
+ if (error != 0) {
/*
* return a namelen of zero for older code which might
* ignore the return value from accept.
@@ -455,6 +519,7 @@
*namelen = 0;
goto done;
}
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
if (name) {
/* check sa_len before it is destroyed */
if (*namelen > sa->sa_len)
@@ -467,15 +532,14 @@
sa = NULL;
}
noconnection:
- if (sa)
- free(sa, M_SONAME);
+ free(sa, M_SONAME);
/*
* close the new descriptor, assuming someone hasn't ripped it
* out from under us.
*/
- if (error)
- fdclose(fdp, nfp, fd, td);
+ if (error != 0)
+ fdclose(td, nfp, fd);
/*
* Release explicitly held references before returning. We return
@@ -501,9 +565,21 @@
struct accept_args *uap;
{
- return (accept1(td, uap, 0));
+ return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
}
+int
+sys_accept4(td, uap)
+ struct thread *td;
+ struct accept4_args *uap;
+{
+
+ if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return (EINVAL);
+
+ return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
+}
+
#ifdef COMPAT_OLDSOCK
int
oaccept(td, uap)
@@ -511,7 +587,8 @@
struct accept_args *uap;
{
- return (accept1(td, uap, 1));
+ return (accept1(td, uap->s, uap->name, uap->anamelen,
+ ACCEPT4_INHERIT | ACCEPT4_COMPAT));
}
#endif /* COMPAT_OLDSOCK */
@@ -529,29 +606,26 @@
int error;
error = getsockaddr(&sa, uap->name, uap->namelen);
- if (error)
- return (error);
-
- error = kern_connect(td, uap->s, sa);
- free(sa, M_SONAME);
+ if (error == 0) {
+ error = kern_connect(td, uap->s, sa);
+ free(sa, M_SONAME);
+ }
return (error);
}
-
-int
-kern_connect(td, fd, sa)
- struct thread *td;
- int fd;
- struct sockaddr *sa;
+static int
+kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
{
struct socket *so;
struct file *fp;
- int error;
- int interrupted = 0;
+ cap_rights_t rights;
+ int error, interrupted = 0;
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
- if (error)
+ AUDIT_ARG_SOCKADDR(td, dirfd, sa);
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
if (so->so_state & SS_ISCONNECTING) {
@@ -564,11 +638,14 @@
#endif
#ifdef MAC
error = mac_socket_check_connect(td->td_ucred, so, sa);
- if (error)
+ if (error != 0)
goto bad;
#endif
- error = soconnect(so, sa, td);
- if (error)
+ if (dirfd == AT_FDCWD)
+ error = soconnect(so, sa, td);
+ else
+ error = soconnectat(dirfd, so, sa, td);
+ if (error != 0)
goto bad;
if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
error = EINPROGRESS;
@@ -578,7 +655,7 @@
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
"connec", 0);
- if (error) {
+ if (error != 0) {
if (error == EINTR || error == ERESTART)
interrupted = 1;
break;
@@ -600,41 +677,80 @@
}
int
+kern_connect(struct thread *td, int fd, struct sockaddr *sa)
+{
+
+ return (kern_connectat(td, AT_FDCWD, fd, sa));
+}
+
+/* ARGSUSED */
+int
+sys_connectat(td, uap)
+ struct thread *td;
+ struct connectat_args /* {
+ int fd;
+ int s;
+ caddr_t name;
+ int namelen;
+ } */ *uap;
+{
+ struct sockaddr *sa;
+ int error;
+
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_connectat(td, uap->fd, uap->s, sa);
+ free(sa, M_SONAME);
+ }
+ return (error);
+}
+
+int
kern_socketpair(struct thread *td, int domain, int type, int protocol,
int *rsv)
{
- struct filedesc *fdp = td->td_proc->p_fd;
struct file *fp1, *fp2;
struct socket *so1, *so2;
- int fd, error;
+ int fd, error, oflag, fflag;
AUDIT_ARG_SOCKET(domain, type, protocol);
+
+ oflag = 0;
+ fflag = 0;
+ if ((type & SOCK_CLOEXEC) != 0) {
+ type &= ~SOCK_CLOEXEC;
+ oflag |= O_CLOEXEC;
+ }
+ if ((type & SOCK_NONBLOCK) != 0) {
+ type &= ~SOCK_NONBLOCK;
+ fflag |= FNONBLOCK;
+ }
#ifdef MAC
/* We might want to have a separate check for socket pairs. */
error = mac_socket_check_create(td->td_ucred, domain, type,
protocol);
- if (error)
+ if (error != 0)
return (error);
#endif
error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
- if (error)
+ if (error != 0)
return (error);
error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
- if (error)
+ if (error != 0)
goto free1;
/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
- error = falloc(td, &fp1, &fd, 0);
- if (error)
+ error = falloc(td, &fp1, &fd, oflag);
+ if (error != 0)
goto free2;
rsv[0] = fd;
fp1->f_data = so1; /* so1 already has ref count */
- error = falloc(td, &fp2, &fd, 0);
- if (error)
+ error = falloc(td, &fp2, &fd, oflag);
+ if (error != 0)
goto free3;
fp2->f_data = so2; /* so2 already has ref count */
rsv[1] = fd;
error = soconnect2(so1, so2);
- if (error)
+ if (error != 0)
goto free4;
if (type == SOCK_DGRAM) {
/*
@@ -641,19 +757,25 @@
* Datagram socket connection is asymmetric.
*/
error = soconnect2(so2, so1);
- if (error)
+ if (error != 0)
goto free4;
}
- finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
- finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
+ finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
+ &socketops);
+ finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
+ &socketops);
+ if ((fflag & FNONBLOCK) != 0) {
+ (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
+ (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
+ }
fdrop(fp1, td);
fdrop(fp2, td);
return (0);
free4:
- fdclose(fdp, fp2, rsv[1], td);
+ fdclose(td, fp2, rsv[1]);
fdrop(fp2, td);
free3:
- fdclose(fdp, fp1, rsv[0], td);
+ fdclose(td, fp1, rsv[0]);
fdrop(fp1, td);
free2:
if (so2 != NULL)
@@ -671,10 +793,10 @@
error = kern_socketpair(td, uap->domain, uap->type,
uap->protocol, sv);
- if (error)
+ if (error != 0)
return (error);
error = copyout(sv, uap->rsv, 2 * sizeof(int));
- if (error) {
+ if (error != 0) {
(void)kern_close(td, sv[0]);
(void)kern_close(td, sv[1]);
}
@@ -699,7 +821,7 @@
if (mp->msg_name != NULL) {
error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
- if (error) {
+ if (error != 0) {
to = NULL;
goto bad;
}
@@ -719,13 +841,13 @@
}
error = sockargs(&control, mp->msg_control,
mp->msg_controllen, MT_CONTROL);
- if (error)
+ if (error != 0)
goto bad;
#ifdef COMPAT_OLDSOCK
if (mp->msg_flags == MSG_COMPAT) {
struct cmsghdr *cm;
- M_PREPEND(control, sizeof(*cm), M_WAIT);
+ M_PREPEND(control, sizeof(*cm), M_WAITOK);
cm = mtod(control, struct cmsghdr *);
cm->cmsg_len = control->m_len;
cm->cmsg_level = SOL_SOCKET;
@@ -739,8 +861,7 @@
error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
bad:
- if (to)
- free(to, M_SONAME);
+ free(to, M_SONAME);
return (error);
}
@@ -757,19 +878,21 @@
struct uio auio;
struct iovec *iov;
struct socket *so;
- int i, error;
- ssize_t len;
cap_rights_t rights;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
+ ssize_t len;
+ int i, error;
AUDIT_ARG_FD(s);
- rights = CAP_WRITE;
- if (mp->msg_name != NULL)
- rights |= CAP_CONNECT;
- error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
- if (error)
+ cap_rights_init(&rights, CAP_SEND);
+ if (mp->msg_name != NULL) {
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
+ cap_rights_set(&rights, CAP_CONNECT);
+ }
+ error = getsock_cap(td, s, &rights, &fp, NULL);
+ if (error != 0)
return (error);
so = (struct socket *)fp->f_data;
@@ -781,11 +904,11 @@
if (mp->msg_name != NULL) {
error = mac_socket_check_connect(td->td_ucred, so,
mp->msg_name);
- if (error)
+ if (error != 0)
goto bad;
}
error = mac_socket_check_send(td->td_ucred, so);
- if (error)
+ if (error != 0)
goto bad;
#endif
@@ -809,7 +932,7 @@
#endif
len = auio.uio_resid;
error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
- if (error) {
+ if (error != 0) {
if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
@@ -848,7 +971,6 @@
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = uap->to;
msg.msg_namelen = uap->tolen;
@@ -860,8 +982,7 @@
#endif
aiov.iov_base = uap->buf;
aiov.iov_len = uap->len;
- error = sendit(td, uap->s, &msg, uap->flags);
- return (error);
+ return (sendit(td, uap->s, &msg, uap->flags));
}
#ifdef COMPAT_OLDSOCK
@@ -877,7 +998,6 @@
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = 0;
msg.msg_namelen = 0;
@@ -887,8 +1007,7 @@
aiov.iov_len = uap->len;
msg.msg_control = 0;
msg.msg_flags = 0;
- error = sendit(td, uap->s, &msg, uap->flags);
- return (error);
+ return (sendit(td, uap->s, &msg, uap->flags));
}
int
@@ -905,10 +1024,10 @@
int error;
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_iov = iov;
msg.msg_flags = MSG_COMPAT;
@@ -932,10 +1051,10 @@
int error;
error = copyin(uap->msg, &msg, sizeof (msg));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_iov = iov;
#ifdef COMPAT_OLDSOCK
@@ -956,30 +1075,31 @@
{
struct uio auio;
struct iovec *iov;
- int i;
- ssize_t len;
- int error;
- struct mbuf *m, *control = 0;
+ struct mbuf *m, *control = NULL;
caddr_t ctlbuf;
struct file *fp;
struct socket *so;
- struct sockaddr *fromsa = 0;
+ struct sockaddr *fromsa = NULL;
+ cap_rights_t rights;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
+ ssize_t len;
+ int error, i;
if (controlp != NULL)
*controlp = NULL;
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
- if (error)
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
#ifdef MAC
error = mac_socket_check_receive(td->td_ucred, so);
- if (error) {
+ if (error != 0) {
fdrop(fp, td);
return (error);
}
@@ -1004,14 +1124,16 @@
ktruio = cloneuio(&auio);
#endif
len = auio.uio_resid;
- error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
- (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
+ error = soreceive(so, &fromsa, &auio, NULL,
+ (mp->msg_control || controlp) ? &control : NULL,
&mp->msg_flags);
- if (error) {
+ if (error != 0) {
if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
}
+ if (fromsa != NULL)
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
#ifdef KTRACE
if (ktruio != NULL) {
ktruio->uio_resid = len - auio.uio_resid;
@@ -1018,12 +1140,12 @@
ktrgenio(s, UIO_READ, ktruio, error);
}
#endif
- if (error)
+ if (error != 0)
goto out;
td->td_retval[0] = len - auio.uio_resid;
if (mp->msg_name) {
len = mp->msg_namelen;
- if (len <= 0 || fromsa == 0)
+ if (len <= 0 || fromsa == NULL)
len = 0;
else {
/* save sa_len before it is destroyed by MSG_COMPAT */
@@ -1036,7 +1158,7 @@
if (fromseg == UIO_USERSPACE) {
error = copyout(fromsa, mp->msg_name,
(unsigned)len);
- if (error)
+ if (error != 0)
goto out;
} else
bcopy(fromsa, mp->msg_name, len);
@@ -1095,10 +1217,9 @@
if (fromsa && KTRPOINT(td, KTR_STRUCT))
ktrsockaddr(fromsa);
#endif
- if (fromsa)
- free(fromsa, M_SONAME);
+ free(fromsa, M_SONAME);
- if (error == 0 && controlp != NULL)
+ if (error == 0 && controlp != NULL)
*controlp = control;
else if (control)
m_freem(control);
@@ -1116,9 +1237,9 @@
int error;
error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
- if (error)
+ if (error != 0)
return (error);
- if (namelenp) {
+ if (namelenp != NULL) {
error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
#ifdef COMPAT_OLDSOCK
if (mp->msg_flags & MSG_COMPAT)
@@ -1147,7 +1268,7 @@
if (uap->fromlenaddr) {
error = copyin(uap->fromlenaddr,
&msg.msg_namelen, sizeof (msg.msg_namelen));
- if (error)
+ if (error != 0)
goto done2;
} else {
msg.msg_namelen = 0;
@@ -1161,7 +1282,7 @@
msg.msg_flags = uap->flags;
error = recvit(td, uap->s, &msg, uap->fromlenaddr);
done2:
- return(error);
+ return (error);
}
#ifdef COMPAT_OLDSOCK
@@ -1189,7 +1310,6 @@
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = 0;
msg.msg_namelen = 0;
@@ -1199,8 +1319,7 @@
aiov.iov_len = uap->len;
msg.msg_control = 0;
msg.msg_flags = uap->flags;
- error = recvit(td, uap->s, &msg, NULL);
- return (error);
+ return (recvit(td, uap->s, &msg, NULL));
}
/*
@@ -1222,10 +1341,10 @@
int error;
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_flags = uap->flags | MSG_COMPAT;
msg.msg_iov = iov;
@@ -1252,10 +1371,10 @@
int error;
error = copyin(uap->msg, &msg, sizeof (msg));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_flags = uap->flags;
#ifdef COMPAT_OLDSOCK
@@ -1283,11 +1402,12 @@
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
- NULL);
+ error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = soshutdown(so, uap->how);
@@ -1323,10 +1443,11 @@
enum uio_seg valseg;
socklen_t valsize;
{
- int error;
struct socket *so;
struct file *fp;
struct sockopt sopt;
+ cap_rights_t rights;
+ int error;
if (val == NULL && valsize != 0)
return (EFAULT);
@@ -1350,7 +1471,8 @@
}
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sosetopt(so, &sopt);
@@ -1372,11 +1494,11 @@
} */ *uap;
{
socklen_t valsize;
- int error;
+ int error;
if (uap->val) {
error = copyin(uap->avalsize, &valsize, sizeof (valsize));
- if (error)
+ if (error != 0)
return (error);
}
@@ -1402,10 +1524,11 @@
enum uio_seg valseg;
socklen_t *valsize;
{
+ struct socket *so;
+ struct file *fp;
+ struct sockopt sopt;
+ cap_rights_t rights;
int error;
- struct socket *so;
- struct file *fp;
- struct sockopt sopt;
if (val == NULL)
*valsize = 0;
@@ -1429,7 +1552,8 @@
}
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sogetopt(so, &sopt);
@@ -1458,11 +1582,11 @@
int error;
error = copyin(uap->alen, &len, sizeof(len));
- if (error)
+ if (error != 0)
return (error);
error = kern_getsockname(td, uap->fdes, &sa, &len);
- if (error)
+ if (error != 0)
return (error);
if (len != 0) {
@@ -1484,15 +1608,14 @@
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
socklen_t len;
int error;
- if (*alen < 0)
- return (EINVAL);
-
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
- if (error)
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
*sa = NULL;
@@ -1499,7 +1622,7 @@
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
CURVNET_RESTORE();
- if (error)
+ if (error != 0)
goto bad;
if (*sa == NULL)
len = 0;
@@ -1512,7 +1635,7 @@
#endif
bad:
fdrop(fp, td);
- if (error && *sa) {
+ if (error != 0 && *sa != NULL) {
free(*sa, M_SONAME);
*sa = NULL;
}
@@ -1558,11 +1681,11 @@
int error;
error = copyin(uap->alen, &len, sizeof (len));
- if (error)
+ if (error != 0)
return (error);
error = kern_getpeername(td, uap->fdes, &sa, &len);
- if (error)
+ if (error != 0)
return (error);
if (len != 0) {
@@ -1584,15 +1707,14 @@
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
socklen_t len;
int error;
- if (*alen < 0)
- return (EINVAL);
-
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
- if (error)
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
@@ -1603,7 +1725,7 @@
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
CURVNET_RESTORE();
- if (error)
+ if (error != 0)
goto bad;
if (*sa == NULL)
len = 0;
@@ -1615,7 +1737,7 @@
ktrsockaddr(*sa);
#endif
bad:
- if (error && *sa) {
+ if (error != 0 && *sa != NULL) {
free(*sa, M_SONAME);
*sa = NULL;
}
@@ -1646,27 +1768,31 @@
#endif /* COMPAT_OLDSOCK */
int
-sockargs(struct mbuf **mp, char *buf, int buflen, int type)
+sockargs(mp, buf, buflen, type)
+ struct mbuf **mp;
+ caddr_t buf;
+ int buflen, type;
{
struct sockaddr *sa;
struct mbuf *m;
int error;
- if ((u_int)buflen > MLEN) {
+ if (buflen < 0)
+ return (EINVAL);
+
+ if (buflen > MLEN) {
#ifdef COMPAT_OLDSOCK
- if (type == MT_SONAME && (u_int)buflen <= 112)
+ if (type == MT_SONAME && buflen <= 112)
buflen = MLEN; /* unix domain compat. hack */
else
#endif
- if ((u_int)buflen > MCLBYTES)
+ if (buflen > MCLBYTES)
return (EINVAL);
}
- m = m_get(M_WAIT, type);
- if ((u_int)buflen > MLEN)
- MCLGET(m, M_WAIT);
+ m = m_get2(buflen, M_WAITOK, type, 0);
m->m_len = buflen;
- error = copyin(buf, mtod(m, void *), buflen);
- if (error)
+ error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
+ if (error != 0)
(void) m_free(m);
else {
*mp = m;
@@ -1698,7 +1824,7 @@
return (EINVAL);
sa = malloc(len, M_SONAME, M_WAITOK);
error = copyin(uaddr, sa, len);
- if (error) {
+ if (error != 0) {
free(sa, M_SONAME);
} else {
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
@@ -1711,19 +1837,17 @@
return (error);
}
-#include <sys/condvar.h>
-
struct sendfile_sync {
struct mtx mtx;
struct cv cv;
- unsigned count;
+ unsigned count;
};
/*
* Detach mapped page and release resources back to the system.
*/
-void
-sf_buf_mext(void *addr, void *args)
+int
+sf_buf_mext(struct mbuf *mb, void *addr, void *args)
{
vm_page_t m;
struct sendfile_sync *sfs;
@@ -1741,7 +1865,7 @@
vm_page_free(m);
vm_page_unlock(m);
if (addr == NULL)
- return;
+ return (EXT_FREE_OK);
sfs = addr;
mtx_lock(&sfs->mtx);
KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
@@ -1748,6 +1872,7 @@
if (--sfs->count == 0)
cv_signal(&sfs->cv);
mtx_unlock(&sfs->mtx);
+ return (EXT_FREE_OK);
}
/*
@@ -1773,33 +1898,54 @@
{
struct sf_hdtr hdtr;
struct uio *hdr_uio, *trl_uio;
+ struct file *fp;
+ cap_rights_t rights;
int error;
+ /*
+ * File offset must be positive. If it goes beyond EOF
+ * we send only the header/trailer and no payload data.
+ */
+ if (uap->offset < 0)
+ return (EINVAL);
+
hdr_uio = trl_uio = NULL;
if (uap->hdtr != NULL) {
error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
- if (error)
+ if (error != 0)
goto out;
if (hdtr.headers != NULL) {
error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
- if (error)
+ if (error != 0)
goto out;
}
if (hdtr.trailers != NULL) {
error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
- if (error)
+ if (error != 0)
goto out;
}
}
- error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
+ AUDIT_ARG_FD(uap->fd);
+
+ /*
+ * sendfile(2) can start at any offset within a file so we require
+ * CAP_READ+CAP_SEEK = CAP_PREAD.
+ */
+ if ((error = fget_read(td, uap->fd,
+ cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
+ goto out;
+ }
+
+ error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
+ uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
+ fdrop(fp, td);
+
out:
- if (hdr_uio)
- free(hdr_uio, M_IOV);
- if (trl_uio)
- free(trl_uio, M_IOV);
+ free(hdr_uio, M_IOV);
+ free(trl_uio, M_IOV);
return (error);
}
@@ -1821,103 +1967,253 @@
}
#endif /* COMPAT_FREEBSD4 */
-int
-kern_sendfile(struct thread *td, struct sendfile_args *uap,
- struct uio *hdr_uio, struct uio *trl_uio, int compat)
+static int
+sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd,
+ off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res)
{
- struct file *sock_fp;
+ vm_page_t m;
+ vm_pindex_t pindex;
+ ssize_t resid;
+ int error, readahead, rv;
+
+ pindex = OFF_TO_IDX(off);
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY |
+ VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
+
+ /*
+ * Check if page is valid for what we need, otherwise initiate I/O.
+ *
+ * The non-zero nd argument prevents disk I/O, instead we
+ * return the caller what he specified in nd. In particular,
+ * if we already turned some pages into mbufs, nd == EAGAIN
+ * and the main function send them the pages before we come
+ * here again and block.
+ */
+ if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) {
+ if (vp == NULL)
+ vm_page_xunbusy(m);
+ VM_OBJECT_WUNLOCK(obj);
+ *res = m;
+ return (0);
+ } else if (nd != 0) {
+ if (vp == NULL)
+ vm_page_xunbusy(m);
+ error = nd;
+ goto free_page;
+ }
+
+ /*
+ * Get the page from backing store.
+ */
+ error = 0;
+ if (vp != NULL) {
+ VM_OBJECT_WUNLOCK(obj);
+ readahead = sfreadahead * MAXBSIZE;
+
+ /*
+ * Use vn_rdwr() instead of the pager interface for
+ * the vnode, to allow the read-ahead.
+ *
+ * XXXMAC: Because we don't have fp->f_cred here, we
+ * pass in NOCRED. This is probably wrong, but is
+ * consistent with our original implementation.
+ */
+ error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off),
+ UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead /
+ bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td);
+ SFSTAT_INC(sf_iocnt);
+ VM_OBJECT_WLOCK(obj);
+ } else {
+ if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
+ rv = vm_pager_get_pages(obj, &m, 1, 0);
+ SFSTAT_INC(sf_iocnt);
+ m = vm_page_lookup(obj, pindex);
+ if (m == NULL)
+ error = EIO;
+ else if (rv != VM_PAGER_OK) {
+ vm_page_lock(m);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ m = NULL;
+ error = EIO;
+ }
+ } else {
+ pmap_zero_page(m);
+ m->valid = VM_PAGE_BITS_ALL;
+ m->dirty = 0;
+ }
+ if (m != NULL)
+ vm_page_xunbusy(m);
+ }
+ if (error == 0) {
+ *res = m;
+ } else if (m != NULL) {
+free_page:
+ vm_page_lock(m);
+ vm_page_unwire(m, 0);
+
+ /*
+ * See if anyone else might know about this page. If
+ * not and it is not valid, then free it.
+ */
+ if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m))
+ vm_page_free(m);
+ vm_page_unlock(m);
+ }
+ KASSERT(error != 0 || (m->wire_count > 0 &&
+ vm_page_is_valid(m, off & PAGE_MASK, xfsize)),
+ ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off,
+ xfsize));
+ VM_OBJECT_WUNLOCK(obj);
+ return (error);
+}
+
+static int
+sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
+ struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
+ int *bsize)
+{
+ struct vattr va;
+ vm_object_t obj;
struct vnode *vp;
- struct vm_object *obj = NULL;
- struct socket *so = NULL;
- struct mbuf *m = NULL;
- struct sf_buf *sf;
- struct vm_page *pg;
- struct vattr va;
- off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
- int error, hdrlen = 0, mnw = 0;
- int vfslocked;
- int bsize;
- struct sendfile_sync *sfs = NULL;
+ struct shmfd *shmfd;
+ int error;
+ vp = *vp_res = NULL;
+ obj = NULL;
+ shmfd = *shmfd_res = NULL;
+ *bsize = 0;
+
/*
* The file descriptor must be a regular file and have a
* backing VM object.
- * File offset must be positive. If it goes beyond EOF
- * we send only the header/trailer and no payload data.
*/
- AUDIT_ARG_FD(uap->fd);
- if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
- goto out;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vn_lock(vp, LK_SHARED | LK_RETRY);
- if (vp->v_type == VREG) {
- bsize = vp->v_mount->mnt_stat.f_iosize;
- if (uap->nbytes == 0) {
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0) {
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- obj = NULL;
- goto out;
- }
- rem = va.va_size;
- } else
- rem = uap->nbytes;
+ if (fp->f_type == DTYPE_VNODE) {
+ vp = fp->f_vnode;
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ if (vp->v_type != VREG) {
+ error = EINVAL;
+ goto out;
+ }
+ *bsize = vp->v_mount->mnt_stat.f_iosize;
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0)
+ goto out;
+ *obj_size = va.va_size;
obj = vp->v_object;
- if (obj != NULL) {
- /*
- * Temporarily increase the backing VM
- * object's reference count so that a forced
- * reclamation of its vnode does not
- * immediately destroy it.
- */
- VM_OBJECT_LOCK(obj);
- if ((obj->flags & OBJ_DEAD) == 0) {
- vm_object_reference_locked(obj);
- VM_OBJECT_UNLOCK(obj);
- } else {
- VM_OBJECT_UNLOCK(obj);
- obj = NULL;
- }
+ if (obj == NULL) {
+ error = EINVAL;
+ goto out;
}
- } else
- bsize = 0; /* silence gcc */
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- if (obj == NULL) {
+ } else if (fp->f_type == DTYPE_SHM) {
+ error = 0;
+ shmfd = fp->f_data;
+ obj = shmfd->shm_object;
+ *obj_size = shmfd->shm_size;
+ } else {
error = EINVAL;
goto out;
}
- if (uap->offset < 0) {
- error = EINVAL;
+
+ VM_OBJECT_WLOCK(obj);
+ if ((obj->flags & OBJ_DEAD) != 0) {
+ VM_OBJECT_WUNLOCK(obj);
+ error = EBADF;
goto out;
}
/*
+ * Temporarily increase the backing VM object's reference
+ * count so that a forced reclamation of its vnode does not
+ * immediately destroy it.
+ */
+ vm_object_reference_locked(obj);
+ VM_OBJECT_WUNLOCK(obj);
+ *obj_res = obj;
+ *vp_res = vp;
+ *shmfd_res = shmfd;
+
+out:
+ if (vp != NULL)
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+static int
+kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
+ struct socket **so)
+{
+ cap_rights_t rights;
+ int error;
+
+ *sock_fp = NULL;
+ *so = NULL;
+
+ /*
* The socket must be a stream socket and connected.
- * Remember if it a blocking or non-blocking socket.
*/
- if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
- &sock_fp, NULL)) != 0)
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SEND),
+ sock_fp, NULL);
+ if (error != 0)
+ return (error);
+ *so = (*sock_fp)->f_data;
+ if ((*so)->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (((*so)->so_state & SS_ISCONNECTED) == 0)
+ return (ENOTCONN);
+ return (0);
+}
+
+int
+vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
+ struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
+ int kflags, struct thread *td)
+{
+ struct file *sock_fp;
+ struct vnode *vp;
+ struct vm_object *obj;
+ struct socket *so;
+ struct mbuf *m;
+ struct sf_buf *sf;
+ struct vm_page *pg;
+ struct shmfd *shmfd;
+ struct sendfile_sync *sfs;
+ struct vattr va;
+ off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
+ int error, bsize, nd, hdrlen, mnw;
+ bool inflight_called;
+
+ pg = NULL;
+ obj = NULL;
+ so = NULL;
+ m = NULL;
+ sfs = NULL;
+ fsbytes = sbytes = 0;
+ hdrlen = mnw = 0;
+ rem = nbytes;
+ obj_size = 0;
+ inflight_called = false;
+
+ error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
+ if (error != 0)
+ return (error);
+ if (rem == 0)
+ rem = obj_size;
+
+ error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so);
+ if (error != 0)
goto out;
- so = sock_fp->f_data;
- if (so->so_type != SOCK_STREAM) {
- error = EINVAL;
- goto out;
- }
- if ((so->so_state & SS_ISCONNECTED) == 0) {
- error = ENOTCONN;
- goto out;
- }
+
/*
* Do not wait on memory allocations but return ENOMEM for
* caller to retry later.
* XXX: Experimental.
*/
- if (uap->flags & SF_MNOWAIT)
+ if (flags & SF_MNOWAIT)
mnw = 1;
- if (uap->flags & SF_SYNC) {
+ if (flags & SF_SYNC) {
sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
cv_init(&sfs->cv, "sendfile");
@@ -1925,7 +2221,7 @@
#ifdef MAC
error = mac_socket_check_send(td->td_ucred, so);
- if (error)
+ if (error != 0)
goto out;
#endif
@@ -1939,11 +2235,11 @@
* the header. If compat is specified subtract the
* header size from nbytes.
*/
- if (compat) {
- if (uap->nbytes > hdr_uio->uio_resid)
- uap->nbytes -= hdr_uio->uio_resid;
+ if (kflags & SFK_COMPAT) {
+ if (nbytes > hdr_uio->uio_resid)
+ nbytes -= hdr_uio->uio_resid;
else
- uap->nbytes = 0;
+ nbytes = 0;
}
m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
0, 0, 0);
@@ -1974,14 +2270,14 @@
* The outer loop checks the state and available space of the socket
* and takes care of the overall progress.
*/
- for (off = uap->offset; ; ) {
+ for (off = offset; ; ) {
struct mbuf *mtail;
int loopbytes;
int space;
int done;
- if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
- (uap->nbytes == 0 && va.va_size == fsbytes))
+ if ((nbytes != 0 && nbytes == fsbytes) ||
+ (nbytes == 0 && obj_size == fsbytes))
break;
mtail = NULL;
@@ -2040,7 +2336,7 @@
* been interrupted by a signal. If we've sent anything
* then return bytes sent, otherwise return the error.
*/
- if (error) {
+ if (error != 0) {
SOCKBUF_UNLOCK(&so->so_snd);
goto done;
}
@@ -2055,19 +2351,17 @@
*/
space -= hdrlen;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_lock(vp, LK_SHARED);
- if (error != 0) {
- VFS_UNLOCK_GIANT(vfslocked);
- goto done;
+ if (vp != NULL) {
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0)
+ goto done;
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0 || off >= va.va_size) {
+ VOP_UNLOCK(vp, 0);
+ goto done;
+ }
+ obj_size = va.va_size;
}
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0 || off >= va.va_size) {
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- goto done;
- }
- VFS_UNLOCK_GIANT(vfslocked);
/*
* Loop and construct maximum sized mbuf chain to be bulk
@@ -2074,7 +2368,6 @@
* dumped into socket buffer.
*/
while (space > loopbytes) {
- vm_pindex_t pindex;
vm_offset_t pgoff;
struct mbuf *m0;
@@ -2084,11 +2377,10 @@
* or the passed in nbytes.
*/
pgoff = (vm_offset_t)(off & PAGE_MASK);
- if (uap->nbytes != 0)
- rem = (uap->nbytes - fsbytes - loopbytes);
- else
- rem = va.va_size -
- uap->offset - fsbytes - loopbytes;
+ rem = obj_size - offset;
+ if (nbytes != 0)
+ rem = omin(rem, nbytes);
+ rem -= fsbytes + loopbytes;
xfsize = omin(PAGE_SIZE - pgoff, rem);
xfsize = omin(space - loopbytes, xfsize);
if (xfsize <= 0) {
@@ -2100,66 +2392,15 @@
* Attempt to look up the page. Allocate
* if not found or wait and loop if busy.
*/
- pindex = OFF_TO_IDX(off);
- VM_OBJECT_LOCK(obj);
- pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
- VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
-
- /*
- * Check if page is valid for what we need,
- * otherwise initiate I/O.
- * If we already turned some pages into mbufs,
- * send them off before we come here again and
- * block.
- */
- if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
- VM_OBJECT_UNLOCK(obj);
- else if (m != NULL)
- error = EAGAIN; /* send what we already got */
- else if (uap->flags & SF_NODISKIO)
- error = EBUSY;
- else {
- ssize_t resid;
-
- /*
- * Ensure that our page is still around
- * when the I/O completes.
- */
- vm_page_io_start(pg);
- VM_OBJECT_UNLOCK(obj);
-
- /*
- * Get the page from backing store.
- * XXXMAC: Because we don't have fp->f_cred
- * here, we pass in NOCRED. This is probably
- * wrong, but is consistent with our original
- * implementation.
- */
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
- trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
- IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
- td->td_ucred, NOCRED, &resid, td);
- VFS_UNLOCK_GIANT(vfslocked);
- VM_OBJECT_LOCK(obj);
- vm_page_io_finish(pg);
- if (!error)
- VM_OBJECT_UNLOCK(obj);
- mbstat.sf_iocnt++;
- }
- if (error) {
- vm_page_lock(pg);
- vm_page_unwire(pg, 0);
- /*
- * See if anyone else might know about
- * this page. If not and it is not valid,
- * then free it.
- */
- if (pg->wire_count == 0 && pg->valid == 0 &&
- pg->busy == 0 && !(pg->oflags & VPO_BUSY))
- vm_page_free(pg);
- vm_page_unlock(pg);
- VM_OBJECT_UNLOCK(obj);
+ if (m != NULL)
+ nd = EAGAIN; /* send what we already got */
+ else if ((flags & SF_NODISKIO) != 0)
+ nd = EBUSY;
+ else
+ nd = 0;
+ error = sendfile_readpage(obj, vp, nd, off,
+ xfsize, bsize, td, &pg);
+ if (error != 0) {
if (error == EAGAIN)
error = 0; /* not a real error */
break;
@@ -2177,11 +2418,11 @@
sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
SFB_CATCH);
if (sf == NULL) {
- mbstat.sf_allocfail++;
+ SFSTAT_INC(sf_allocfail);
vm_page_lock(pg);
vm_page_unwire(pg, 0);
KASSERT(pg->object != NULL,
- ("kern_sendfile: object disappeared"));
+ ("%s: object disappeared", __func__));
vm_page_unlock(pg);
if (m == NULL)
error = (mnw ? EAGAIN : EINTR);
@@ -2195,11 +2436,17 @@
m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
if (m0 == NULL) {
error = (mnw ? EAGAIN : ENOBUFS);
- sf_buf_mext(NULL, sf);
+ (void)sf_buf_mext(NULL, NULL, sf);
break;
}
- MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
- sfs, sf, M_RDONLY, EXT_SFBUF);
+ if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE,
+ sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF,
+ (mnw ? M_NOWAIT : M_WAITOK)) != 0) {
+ error = (mnw ? EAGAIN : ENOBUFS);
+ (void)sf_buf_mext(NULL, NULL, sf);
+ m_freem(m0);
+ break;
+ }
m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
m0->m_len = xfsize;
@@ -2223,7 +2470,8 @@
}
}
- VOP_UNLOCK(vp, 0);
+ if (vp != NULL)
+ VOP_UNLOCK(vp, 0);
/* Add the buffer chain to the socket buffer. */
if (m != NULL) {
@@ -2264,9 +2512,9 @@
}
/* Quit outer loop on error or when we're done. */
- if (done)
+ if (done)
break;
- if (error)
+ if (error != 0)
goto done;
}
@@ -2275,7 +2523,7 @@
*/
if (trl_uio != NULL) {
sbunlock(&so->so_snd);
- error = kern_writev(td, uap->s, trl_uio);
+ error = kern_writev(td, sockfd, trl_uio);
if (error == 0)
sbytes += td->td_retval[0];
goto out;
@@ -2291,16 +2539,11 @@
if (error == 0) {
td->td_retval[0] = 0;
}
- if (uap->sbytes != NULL) {
- copyout(&sbytes, uap->sbytes, sizeof(off_t));
+ if (sent != NULL) {
+ copyout(&sbytes, sent, sizeof(off_t));
}
if (obj != NULL)
vm_object_deallocate(obj);
- if (vp != NULL) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (so)
fdrop(sock_fp, td);
if (m)
@@ -2321,493 +2564,3 @@
return (error);
}
-
-/*
- * SCTP syscalls.
- * Functionality only compiled in if SCTP is defined in the kernel Makefile,
- * otherwise all return EOPNOTSUPP.
- * XXX: We should make this loadable one day.
- */
-int
-sys_sctp_peeloff(td, uap)
- struct thread *td;
- struct sctp_peeloff_args /* {
- int sd;
- caddr_t name;
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct filedesc *fdp;
- struct file *nfp = NULL;
- int error;
- struct socket *head, *so;
- int fd;
- u_int fflag;
-
- fdp = td->td_proc->p_fd;
- AUDIT_ARG_FD(uap->sd);
- error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
- if (error)
- goto done2;
- if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto done;
- }
- error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
- if (error)
- goto done;
- /*
- * At this point we know we do have a assoc to pull
- * we proceed to get the fd setup. This may block
- * but that is ok.
- */
-
- error = falloc(td, &nfp, &fd, 0);
- if (error)
- goto done;
- td->td_retval[0] = fd;
-
- CURVNET_SET(head->so_vnet);
- so = sonewconn(head, SS_ISCONNECTED);
- if (so == NULL)
- goto noconnection;
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so);
- soref(so); /* file descriptor reference */
- SOCK_UNLOCK(so);
-
- ACCEPT_LOCK();
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_state &= ~SS_NOFDREF;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- ACCEPT_UNLOCK();
- finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
- error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
- if (error)
- goto noconnection;
- if (head->so_sigio != NULL)
- fsetown(fgetown(&head->so_sigio), &so->so_sigio);
-
-noconnection:
- /*
- * close the new descriptor, assuming someone hasn't ripped it
- * out from under us.
- */
- if (error)
- fdclose(fdp, nfp, fd, td);
-
- /*
- * Release explicitly held references before returning.
- */
- CURVNET_RESTORE();
-done:
- if (nfp != NULL)
- fdrop(nfp, td);
- fputsock(head);
-done2:
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_sendmsg (td, uap)
- struct thread *td;
- struct sctp_generic_sendmsg_args /* {
- int sd,
- caddr_t msg,
- int mlen,
- caddr_t to,
- __socklen_t tolen,
- struct sctp_sndrcvinfo *sinfo,
- int flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
- struct socket *so;
- struct file *fp = NULL;
- int error = 0, len;
- struct sockaddr *to = NULL;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
- struct uio auio;
- struct iovec iov[1];
- cap_rights_t rights;
-
- if (uap->sinfo) {
- error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
- if (error)
- return (error);
- u_sinfo = &sinfo;
- }
-
- rights = CAP_WRITE;
- if (uap->tolen) {
- error = getsockaddr(&to, uap->to, uap->tolen);
- if (error) {
- to = NULL;
- goto sctp_bad2;
- }
- rights |= CAP_CONNECT;
- }
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
- if (error)
- goto sctp_bad;
-#ifdef KTRACE
- if (to && (KTRPOINT(td, KTR_STRUCT)))
- ktrsockaddr(to);
-#endif
-
- iov[0].iov_base = uap->msg;
- iov[0].iov_len = uap->mlen;
-
- so = (struct socket *)fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto sctp_bad;
- }
-#ifdef MAC
- error = mac_socket_check_send(td->td_ucred, so);
- if (error)
- goto sctp_bad;
-#endif /* MAC */
-
- auio.uio_iov = iov;
- auio.uio_iovcnt = 1;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- len = auio.uio_resid = uap->mlen;
- CURVNET_SET(so->so_vnet);
- error = sctp_lower_sosend(so, to, &auio,
- (struct mbuf *)NULL, (struct mbuf *)NULL,
- uap->flags, u_sinfo, td);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- /* Generation of SIGPIPE can be controlled per socket. */
- if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
- !(uap->flags & MSG_NOSIGNAL)) {
- PROC_LOCK(td->td_proc);
- tdsignal(td, SIGPIPE);
- PROC_UNLOCK(td->td_proc);
- }
- }
- if (error == 0)
- td->td_retval[0] = len - auio.uio_resid;
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = td->td_retval[0];
- ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
- }
-#endif /* KTRACE */
-sctp_bad:
- if (fp)
- fdrop(fp, td);
-sctp_bad2:
- if (to)
- free(to, M_SONAME);
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_sendmsg_iov(td, uap)
- struct thread *td;
- struct sctp_generic_sendmsg_iov_args /* {
- int sd,
- struct iovec *iov,
- int iovlen,
- caddr_t to,
- __socklen_t tolen,
- struct sctp_sndrcvinfo *sinfo,
- int flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
- struct socket *so;
- struct file *fp = NULL;
- int error=0, i;
- ssize_t len;
- struct sockaddr *to = NULL;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
- struct uio auio;
- struct iovec *iov, *tiov;
- cap_rights_t rights;
-
- if (uap->sinfo) {
- error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
- if (error)
- return (error);
- u_sinfo = &sinfo;
- }
- rights = CAP_WRITE;
- if (uap->tolen) {
- error = getsockaddr(&to, uap->to, uap->tolen);
- if (error) {
- to = NULL;
- goto sctp_bad2;
- }
- rights |= CAP_CONNECT;
- }
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
- if (error)
- goto sctp_bad1;
-
-#ifdef COMPAT_FREEBSD32
- if (SV_CURPROC_FLAG(SV_ILP32))
- error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
- uap->iovlen, &iov, EMSGSIZE);
- else
-#endif
- error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
- if (error)
- goto sctp_bad1;
-#ifdef KTRACE
- if (to && (KTRPOINT(td, KTR_STRUCT)))
- ktrsockaddr(to);
-#endif
-
- so = (struct socket *)fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto sctp_bad;
- }
-#ifdef MAC
- error = mac_socket_check_send(td->td_ucred, so);
- if (error)
- goto sctp_bad;
-#endif /* MAC */
-
- auio.uio_iov = iov;
- auio.uio_iovcnt = uap->iovlen;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- tiov = iov;
- for (i = 0; i <uap->iovlen; i++, tiov++) {
- if ((auio.uio_resid += tiov->iov_len) < 0) {
- error = EINVAL;
- goto sctp_bad;
- }
- }
- len = auio.uio_resid;
- CURVNET_SET(so->so_vnet);
- error = sctp_lower_sosend(so, to, &auio,
- (struct mbuf *)NULL, (struct mbuf *)NULL,
- uap->flags, u_sinfo, td);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- /* Generation of SIGPIPE can be controlled per socket */
- if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
- !(uap->flags & MSG_NOSIGNAL)) {
- PROC_LOCK(td->td_proc);
- tdsignal(td, SIGPIPE);
- PROC_UNLOCK(td->td_proc);
- }
- }
- if (error == 0)
- td->td_retval[0] = len - auio.uio_resid;
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = td->td_retval[0];
- ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
- }
-#endif /* KTRACE */
-sctp_bad:
- free(iov, M_IOV);
-sctp_bad1:
- if (fp)
- fdrop(fp, td);
-sctp_bad2:
- if (to)
- free(to, M_SONAME);
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_recvmsg(td, uap)
- struct thread *td;
- struct sctp_generic_recvmsg_args /* {
- int sd,
- struct iovec *iov,
- int iovlen,
- struct sockaddr *from,
- __socklen_t *fromlenaddr,
- struct sctp_sndrcvinfo *sinfo,
- int *msg_flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- uint8_t sockbufstore[256];
- struct uio auio;
- struct iovec *iov, *tiov;
- struct sctp_sndrcvinfo sinfo;
- struct socket *so;
- struct file *fp = NULL;
- struct sockaddr *fromsa;
- int fromlen;
- ssize_t len;
- int i, msg_flags;
- int error = 0;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
- if (error) {
- return (error);
- }
-#ifdef COMPAT_FREEBSD32
- if (SV_CURPROC_FLAG(SV_ILP32))
- error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
- uap->iovlen, &iov, EMSGSIZE);
- else
-#endif
- error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
- if (error)
- goto out1;
-
- so = fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto out;
- }
-#ifdef MAC
- error = mac_socket_check_receive(td->td_ucred, so);
- if (error) {
- goto out;
- }
-#endif /* MAC */
-
- if (uap->fromlenaddr) {
- error = copyin(uap->fromlenaddr,
- &fromlen, sizeof (fromlen));
- if (error) {
- goto out;
- }
- } else {
- fromlen = 0;
- }
- if (uap->msg_flags) {
- error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
- if (error) {
- goto out;
- }
- } else {
- msg_flags = 0;
- }
- auio.uio_iov = iov;
- auio.uio_iovcnt = uap->iovlen;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- tiov = iov;
- for (i = 0; i <uap->iovlen; i++, tiov++) {
- if ((auio.uio_resid += tiov->iov_len) < 0) {
- error = EINVAL;
- goto out;
- }
- }
- len = auio.uio_resid;
- fromsa = (struct sockaddr *)sockbufstore;
-
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_GENIO))
- ktruio = cloneuio(&auio);
-#endif /* KTRACE */
- memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
- CURVNET_SET(so->so_vnet);
- error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
- fromsa, fromlen, &msg_flags,
- (struct sctp_sndrcvinfo *)&sinfo, 1);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- } else {
- if (uap->sinfo)
- error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
- }
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = len - auio.uio_resid;
- ktrgenio(uap->sd, UIO_READ, ktruio, error);
- }
-#endif /* KTRACE */
- if (error)
- goto out;
- td->td_retval[0] = len - auio.uio_resid;
-
- if (fromlen && uap->from) {
- len = fromlen;
- if (len <= 0 || fromsa == 0)
- len = 0;
- else {
- len = MIN(len, fromsa->sa_len);
- error = copyout(fromsa, uap->from, (size_t)len);
- if (error)
- goto out;
- }
- error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
- if (error) {
- goto out;
- }
- }
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_STRUCT))
- ktrsockaddr(fromsa);
-#endif
- if (uap->msg_flags) {
- error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
- if (error) {
- goto out;
- }
- }
-out:
- free(iov, M_IOV);
-out1:
- if (fp)
- fdrop(fp, td);
-
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
Modified: trunk/sys/kern/uipc_usrreq.c
===================================================================
--- trunk/sys/kern/uipc_usrreq.c 2018-05-26 14:25:17 UTC (rev 9953)
+++ trunk/sys/kern/uipc_usrreq.c 2018-05-26 14:25:55 UTC (rev 9954)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1991, 1993
* The Regents of the University of California.
@@ -51,17 +52,17 @@
*
* TODO:
* RDM
- * distinguish datagram size limits from flow control limits in SEQPACKET
* rethink name space problems
* need a proper out-of-band
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_usrreq.c 305261 2016-09-02 00:14:28Z markj $");
#include "opt_ddb.h"
#include <sys/param.h>
+#include <sys/capsicum.h>
#include <sys/domain.h>
#include <sys/fcntl.h>
#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
@@ -101,6 +102,8 @@
#include <vm/uma.h>
+MALLOC_DECLARE(M_FILECAPS);
+
/*
* Locking key:
* (l) Locked using list lock
@@ -271,6 +274,8 @@
static int uipc_ctloutput(struct socket *, struct sockopt *);
static int unp_connect(struct socket *, struct sockaddr *,
struct thread *);
+static int unp_connectat(int, struct socket *, struct sockaddr *,
+ struct thread *);
static int unp_connect2(struct socket *so, struct socket *so2, int);
static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
static void unp_dispose(struct mbuf *);
@@ -277,13 +282,13 @@
static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
-static void unp_scan(struct mbuf *, void (*)(struct file *));
+static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
static void unp_discard(struct file *);
-static void unp_freerights(struct file **, int);
+static void unp_freerights(struct filedescent **, int);
static void unp_init(void);
static int unp_internalize(struct mbuf **, struct thread *);
static void unp_internalize_fp(struct file *);
-static int unp_externalize(struct mbuf *, struct mbuf **);
+static int unp_externalize(struct mbuf *, struct mbuf **, int);
static int unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
static void unp_process_defers(void * __unused, int);
@@ -320,6 +325,7 @@
*/
.pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
PR_RIGHTS,
+ .pr_ctloutput = &uipc_ctloutput,
.pr_usrreqs = &uipc_usrreqs_seqpacket,
},
};
@@ -424,6 +430,8 @@
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
+ if (so->so_head != NULL)
+ unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
unp->unp_gencnt = ++unp_gencnt;
@@ -450,17 +458,21 @@
}
static int
-uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vattr vattr;
- int error, namelen, vfslocked;
+ int error, namelen;
struct nameidata nd;
struct unpcb *unp;
struct vnode *vp;
struct mount *mp;
+ cap_rights_t rights;
char *buf;
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
+
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
@@ -496,15 +508,13 @@
buf[namelen] = 0;
restart:
- vfslocked = 0;
- NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
error = namei(&nd);
if (error)
goto error;
vp = nd.ni_vp;
- vfslocked = NDHASGIANT(&nd);
if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
@@ -519,7 +529,6 @@
error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
if (error)
goto error;
- VFS_UNLOCK_GIANT(vfslocked);
goto restart;
}
VATTR_NULL(&vattr);
@@ -551,12 +560,10 @@
UNP_LINK_WUNLOCK();
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
- VFS_UNLOCK_GIANT(vfslocked);
free(buf, M_TEMP);
return (0);
error:
- VFS_UNLOCK_GIANT(vfslocked);
UNP_PCB_LOCK(unp);
unp->unp_flags &= ~UNP_BINDING;
UNP_PCB_UNLOCK(unp);
@@ -565,6 +572,13 @@
}
static int
+uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (uipc_bindat(AT_FDCWD, so, nam, td));
+}
+
+static int
uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
int error;
@@ -576,6 +590,19 @@
return (error);
}
+static int
+uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+ int error;
+
+ KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
+ UNP_LINK_WLOCK();
+ error = unp_connectat(fd, so, nam, td);
+ UNP_LINK_WUNLOCK();
+ return (error);
+}
+
static void
uipc_close(struct socket *so)
{
@@ -627,14 +654,22 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
- UNP_LINK_WLOCK();
+ vp = NULL;
+ local_unp_rights = 0;
+
UNP_LIST_LOCK();
- UNP_PCB_LOCK(unp);
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
UNP_LIST_UNLOCK();
+ if ((unp->unp_flags & UNP_NASCENT) != 0) {
+ UNP_PCB_LOCK(unp);
+ goto teardown;
+ }
+ UNP_LINK_WLOCK();
+ UNP_PCB_LOCK(unp);
+
/*
* XXXRW: Should assert vp->v_socket == so.
*/
@@ -662,6 +697,7 @@
}
local_unp_rights = unp_rights;
UNP_LINK_WUNLOCK();
+teardown:
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -674,13 +710,8 @@
uma_zfree(unp_zone, unp);
} else
UNP_PCB_UNLOCK(unp);
- if (vp) {
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if (vp)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (local_unp_rights)
taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
}
@@ -772,7 +803,6 @@
struct unpcb *unp, *unp2;
struct socket *so2;
u_int mbcnt, sbcc;
- u_long newhiwat;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
@@ -794,6 +824,15 @@
mbcnt = so->so_rcv.sb_mbcnt;
sbcc = so->so_rcv.sb_cc;
SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * There is a benign race condition at this point. If we're planning to
+ * clear SB_STOP, but uipc_send is called on the connected socket at
+ * this instant, it might add data to the sockbuf and set SB_STOP. Then
+ * we would erroneously clear SB_STOP below, even though the sockbuf is
+ * full. The race is benign because the only ill effect is to allow the
+ * sockbuf to exceed its size limit, and the size limits are not
+ * strictly guaranteed anyway.
+ */
UNP_PCB_LOCK(unp);
unp2 = unp->unp_conn;
if (unp2 == NULL) {
@@ -802,13 +841,9 @@
}
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_snd);
- so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
- newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
- (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
+ if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
+ so2->so_snd.sb_flags &= ~SB_STOP;
sowwakeup_locked(so2);
- unp->unp_mbcnt = mbcnt;
- unp->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp);
return (0);
}
@@ -819,8 +854,7 @@
{
struct unpcb *unp, *unp2;
struct socket *so2;
- u_int mbcnt_delta, sbcc;
- u_int newhiwat;
+ u_int mbcnt, sbcc;
int error = 0;
unp = sotounpcb(so);
@@ -875,7 +909,8 @@
from = &sun_noname;
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_rcv);
- if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
+ if (sbappendaddr_locked(&so2->so_rcv, from, m,
+ control)) {
sorwakeup_locked(so2);
m = NULL;
control = NULL;
@@ -936,7 +971,8 @@
SOCKBUF_LOCK(&so2->so_rcv);
if (unp2->unp_flags & UNP_WANTCRED) {
/*
- * Credentials are passed only once on SOCK_STREAM.
+ * Credentials are passed only once on SOCK_STREAM
+ * and SOCK_SEQPACKET.
*/
unp2->unp_flags &= ~UNP_WANTCRED;
control = unp_addsockcred(td, control);
@@ -959,34 +995,34 @@
const struct sockaddr *from;
from = &sun_noname;
- if (sbappendaddr_locked(&so2->so_rcv, from, m,
- control))
+ /*
+ * Don't check for space available in so2->so_rcv.
+ * Unix domain sockets only check for space in the
+ * sending sockbuf, and that check is performed one
+ * level up the stack.
+ */
+ if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
+ from, m, control))
control = NULL;
break;
}
}
- /*
- * XXXRW: While fine for SOCK_STREAM, this conflates maximum
- * datagram size and back-pressure for SOCK_SEQPACKET, which
- * can lead to undesired return of EMSGSIZE on send instead
- * of more desirable blocking.
- */
- mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
- unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
+ mbcnt = so2->so_rcv.sb_mbcnt;
sbcc = so2->so_rcv.sb_cc;
sorwakeup_locked(so2);
+ /*
+ * The PCB lock on unp2 protects the SB_STOP flag. Without it,
+ * it would be possible for uipc_rcvd to be called at this
+ * point, drain the receiving sockbuf, clear SB_STOP, and then
+ * we would set SB_STOP below. That could lead to an empty
+ * sockbuf having SB_STOP set
+ */
SOCKBUF_LOCK(&so->so_snd);
- if ((int)so->so_snd.sb_hiwat >= (int)(sbcc - unp2->unp_cc))
- newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
- else
- newhiwat = 0;
- (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
- so->so_snd.sb_mbmax -= mbcnt_delta;
+ if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
+ so->so_snd.sb_flags |= SB_STOP;
SOCKBUF_UNLOCK(&so->so_snd);
- unp2->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp2);
m = NULL;
break;
@@ -1024,27 +1060,18 @@
static int
uipc_sense(struct socket *so, struct stat *sb)
{
- struct unpcb *unp, *unp2;
- struct socket *so2;
+ struct unpcb *unp;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
sb->st_blksize = so->so_snd.sb_hiwat;
- UNP_LINK_RLOCK();
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
- unp2 != NULL) {
- so2 = unp2->unp_socket;
- sb->st_blksize += so2->so_rcv.sb_cc;
- }
sb->st_dev = NODEV;
if (unp->unp_ino == 0)
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
UNP_PCB_UNLOCK(unp);
- UNP_LINK_RUNLOCK();
return (0);
}
@@ -1090,7 +1117,9 @@
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1110,7 +1139,9 @@
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1130,7 +1161,9 @@
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1242,15 +1275,27 @@
static int
unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+
+ return (unp_connectat(AT_FDCWD, so, nam, td));
+}
+
+static int
+unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
struct socket *so2, *so3;
struct unpcb *unp, *unp2, *unp3;
- int error, len, vfslocked;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
struct sockaddr *sa;
+ cap_rights_t rights;
+ int error, len;
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
+
UNP_LINK_WLOCK_ASSERT();
unp = sotounpcb(so);
@@ -1274,8 +1319,8 @@
UNP_PCB_UNLOCK(unp);
sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
- NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKSHARED | LOCKLEAF,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
error = namei(&nd);
if (error)
vp = NULL;
@@ -1282,7 +1327,6 @@
else
vp = nd.ni_vp;
ASSERT_VOP_LOCKED(vp, "unp_connect");
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error)
goto bad;
@@ -1299,7 +1343,6 @@
error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
if (error)
goto bad;
- VFS_UNLOCK_GIANT(vfslocked);
unp = sotounpcb(so);
KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
@@ -1342,7 +1385,7 @@
}
/*
- * The connecter's (client's) credentials are copied from its
+ * The connector's (client's) credentials are copied from its
* process structure at the time of connect() (which is now).
*/
cru2x(td->td_ucred, &unp3->unp_peercred);
@@ -1382,16 +1425,9 @@
UNP_PCB_UNLOCK(unp);
bad2:
UNP_LINK_WUNLOCK();
- if (vfslocked)
- /*
- * Giant has been previously acquired. This means filesystem
- * isn't MPSAFE. Do it once again.
- */
- mtx_lock(&Giant);
bad:
if (vp != NULL)
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
free(sa, M_SONAME);
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
@@ -1417,6 +1453,7 @@
if (so2->so_type != so->so_type)
return (EPROTOTYPE);
+ unp2->unp_flags &= ~UNP_NASCENT;
unp->unp_conn = unp2;
switch (so->so_type) {
@@ -1660,31 +1697,33 @@
}
static void
-unp_freerights(struct file **rp, int fdcount)
+unp_freerights(struct filedescent **fdep, int fdcount)
{
+ struct file *fp;
int i;
- struct file *fp;
+ KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
+
for (i = 0; i < fdcount; i++) {
- fp = *rp;
- *rp++ = NULL;
+ fp = fdep[i]->fde_file;
+ filecaps_free(&fdep[i]->fde_caps);
unp_discard(fp);
}
+ free(fdep[0], M_FILECAPS);
}
static int
-unp_externalize(struct mbuf *control, struct mbuf **controlp)
+unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
{
struct thread *td = curthread; /* XXX */
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
int i;
int *fdp;
- struct file **rp;
- struct file *fp;
+ struct filedesc *fdesc = td->td_proc->p_fd;
+ struct filedescent *fde, **fdep;
void *data;
socklen_t clen = control->m_len, datalen;
int error, newfds;
- int f;
u_int newlen;
UNP_LINK_UNLOCK_ASSERT();
@@ -1701,22 +1740,17 @@
datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
if (cm->cmsg_level == SOL_SOCKET
&& cm->cmsg_type == SCM_RIGHTS) {
- newfds = datalen / sizeof(struct file *);
- rp = data;
+ newfds = datalen / sizeof(*fdep);
+ if (newfds == 0)
+ goto next;
+ fdep = data;
/* If we're not outputting the descriptors free them. */
if (error || controlp == NULL) {
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
- FILEDESC_XLOCK(td->td_proc->p_fd);
- /* if the new FD's will not fit free them. */
- if (!fdavail(td, newfds)) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
- error = EMSGSIZE;
- unp_freerights(rp, newfds);
- goto next;
- }
+ FILEDESC_XLOCK(fdesc);
/*
* Now change each pointer to an fd in the global
@@ -1728,23 +1762,33 @@
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = E2BIG;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
fdp = (int *)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < newfds; i++) {
- if (fdalloc(td, 0, &f))
- panic("unp_externalize fdalloc failed");
- fp = *rp++;
- td->td_proc->p_fd->fd_ofiles[f] = fp;
- unp_externalize_fp(fp);
- *fdp++ = f;
+ if (fdallocn(td, 0, fdp, newfds) != 0) {
+ FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ error = EMSGSIZE;
+ unp_freerights(fdep, newfds);
+ m_freem(*controlp);
+ *controlp = NULL;
+ goto next;
}
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ for (i = 0; i < newfds; i++, fdp++) {
+ fde = &fdesc->fd_ofiles[*fdp];
+ fde->fde_file = fdep[i]->fde_file;
+ filecaps_move(&fdep[i]->fde_caps,
+ &fde->fde_caps);
+ if ((flags & MSG_CMSG_CLOEXEC) != 0)
+ fde->fde_flags |= UF_EXCLOSE;
+ unp_externalize_fp(fde->fde_file);
+ }
+ FILEDESC_XUNLOCK(fdesc);
+ free(fdep[0], M_FILECAPS);
} else {
/* We can just copy anything else across. */
if (error || controlp == NULL)
@@ -1796,6 +1840,7 @@
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
+ uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
LIST_INIT(&unp_dhead);
@@ -1814,14 +1859,14 @@
{
struct mbuf *control = *controlp;
struct proc *p = td->td_proc;
- struct filedesc *fdescp = p->p_fd;
+ struct filedesc *fdesc = p->p_fd;
struct bintime *bt;
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
struct cmsgcred *cmcred;
- struct file **rp;
+ struct filedescent *fde, **fdep, *fdev;
struct file *fp;
struct timeval *tv;
- int i, fd, *fdp;
+ int i, *fdp;
void *data;
socklen_t clen = control->m_len, datalen;
int error, oldfds;
@@ -1833,7 +1878,7 @@
*controlp = NULL;
while (cm != NULL) {
if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
- || cm->cmsg_len > clen) {
+ || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
error = EINVAL;
goto out;
}
@@ -1866,23 +1911,23 @@
case SCM_RIGHTS:
oldfds = datalen / sizeof (int);
+ if (oldfds == 0)
+ break;
/*
* Check that all the FDs passed in refer to legal
* files. If not, reject the entire operation.
*/
fdp = data;
- FILEDESC_SLOCK(fdescp);
- for (i = 0; i < oldfds; i++) {
- fd = *fdp++;
- if ((unsigned)fd >= fdescp->fd_nfiles ||
- fdescp->fd_ofiles[fd] == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SLOCK(fdesc);
+ for (i = 0; i < oldfds; i++, fdp++) {
+ fp = fget_locked(fdesc, *fdp);
+ if (fp == NULL) {
+ FILEDESC_SUNLOCK(fdesc);
error = EBADF;
goto out;
}
- fp = fdescp->fd_ofiles[fd];
if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = EOPNOTSUPP;
goto out;
}
@@ -1891,25 +1936,30 @@
/*
* Now replace the integer FDs with pointers to the
- * associated global file table entry..
+ * file structure and capability rights.
*/
- newlen = oldfds * sizeof(struct file *);
+ newlen = oldfds * sizeof(fdep[0]);
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = E2BIG;
goto out;
}
fdp = data;
- rp = (struct file **)
+ fdep = (struct filedescent **)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < oldfds; i++) {
- fp = fdescp->fd_ofiles[*fdp++];
- *rp++ = fp;
- unp_internalize_fp(fp);
+ fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
+ M_WAITOK);
+ for (i = 0; i < oldfds; i++, fdev++, fdp++) {
+ fde = &fdesc->fd_ofiles[*fdp];
+ fdep[i] = fdev;
+ fdep[i]->fde_file = fde->fde_file;
+ filecaps_copy(&fde->fde_caps,
+ &fdep[i]->fde_caps);
+ unp_internalize_fp(fdep[i]->fde_file);
}
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
break;
case SCM_TIMESTAMP:
@@ -2105,17 +2155,22 @@
static int unp_unreachable;
static void
-unp_accessable(struct file *fp)
+unp_accessable(struct filedescent **fdep, int fdcount)
{
struct unpcb *unp;
+ struct file *fp;
+ int i;
- if ((unp = fptounp(fp)) == NULL)
- return;
- if (unp->unp_gcflag & UNPGC_REF)
- return;
- unp->unp_gcflag &= ~UNPGC_DEAD;
- unp->unp_gcflag |= UNPGC_REF;
- unp_marked++;
+ for (i = 0; i < fdcount; i++) {
+ fp = fdep[i]->fde_file;
+ if ((unp = fptounp(fp)) == NULL)
+ continue;
+ if (unp->unp_gcflag & UNPGC_REF)
+ continue;
+ unp->unp_gcflag &= ~UNPGC_DEAD;
+ unp->unp_gcflag |= UNPGC_REF;
+ unp_marked++;
+ }
}
static void
@@ -2146,9 +2201,11 @@
* Mark all sockets we reference with RIGHTS.
*/
so = unp->unp_socket;
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
/*
* Mark all sockets in our accept queue.
@@ -2155,6 +2212,8 @@
*/
ACCEPT_LOCK();
TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+ if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
+ continue;
SOCKBUF_LOCK(&soa->so_rcv);
unp_scan(soa->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&soa->so_rcv);
@@ -2184,11 +2243,13 @@
unp_taskcount++;
UNP_LIST_LOCK();
/*
- * First clear all gc flags from previous runs.
+ * First clear all gc flags from previous runs, apart from
+ * UNPGC_IGNORE_RIGHTS.
*/
for (head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
- unp->unp_gcflag = 0;
+ unp->unp_gcflag =
+ (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
/*
* Scan marking all reachable sockets with UNPGC_REF. Once a socket
@@ -2262,19 +2323,31 @@
{
if (m)
- unp_scan(m, unp_discard);
+ unp_scan(m, unp_freerights);
}
+/*
+ * Synchronize against unp_gc, which can trip over data as we are freeing it.
+ */
+void
+unp_dispose_so(struct socket *so)
+{
+ struct unpcb *unp;
+
+ unp = sotounpcb(so);
+ UNP_LIST_LOCK();
+ unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
+ UNP_LIST_UNLOCK();
+ unp_dispose(so->so_rcv.sb_mb);
+}
+
static void
-unp_scan(struct mbuf *m0, void (*op)(struct file *))
+unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
{
struct mbuf *m;
- struct file **rp;
struct cmsghdr *cm;
void *data;
- int i;
socklen_t clen, datalen;
- int qfds;
while (m0 != NULL) {
for (m = m0; m; m = m->m_next) {
@@ -2294,10 +2367,8 @@
if (cm->cmsg_level == SOL_SOCKET &&
cm->cmsg_type == SCM_RIGHTS) {
- qfds = datalen / sizeof (struct file *);
- rp = data;
- for (i = 0; i < qfds; i++)
- (*op)(*rp++);
+ (*op)(data, datalen /
+ sizeof(struct filedescent *));
}
if (CMSG_SPACE(datalen) < clen) {
@@ -2310,7 +2381,7 @@
}
}
}
- m0 = m0->m_act;
+ m0 = m0->m_nextpkt;
}
}
@@ -2443,7 +2514,7 @@
db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket,
unp->unp_vnode);
- db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino,
+ db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino,
unp->unp_conn);
db_printf("unp_refs:\n");
@@ -2452,8 +2523,7 @@
/* XXXRW: Would be nice to print the full address, if any. */
db_printf("unp_addr: %p\n", unp->unp_addr);
- db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n",
- unp->unp_cc, unp->unp_mbcnt,
+ db_printf("unp_gencnt: %llu\n",
(unsigned long long)unp->unp_gencnt);
db_printf("unp_flags: %x (", unp->unp_flags);
More information about the Midnightbsd-cvs
mailing list