[Midnightbsd-cvs] src [9136] trunk: Merge libzfs_core and other ZFS bugfixes from FreeBSD and Illumos

Sat Oct 1 20:51:03 EDT 2016

Revision: 9136
          http://svnweb.midnightbsd.org/src/?rev=9136
Author:   laffer1
Date:     2016-10-01 20:51:03 -0400 (Sat, 01 Oct 2016)
Log Message:
-----------
Merge libzfs_core and other ZFS bugfixes from FreeBSD and Illumos

Modified Paths:
--------------
    trunk/Makefile.inc1
    trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c
    trunk/cddl/contrib/opensolaris/cmd/zfs/zfs.8
    trunk/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
    trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c
    trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
    trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
    trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
    trunk/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
    trunk/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
    trunk/cddl/lib/Makefile
    trunk/cddl/lib/libzfs/Makefile
    trunk/cddl/sbin/zfs/Makefile
    trunk/cddl/sbin/zpool/Makefile
    trunk/cddl/usr.bin/zinject/Makefile
    trunk/cddl/usr.bin/ztest/Makefile
    trunk/cddl/usr.sbin/zdb/Makefile
    trunk/rescue/rescue/Makefile
    trunk/sys/cddl/compat/opensolaris/sys/cred.h
    trunk/sys/cddl/compat/opensolaris/sys/sdt.h
    trunk/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c
    trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c
    trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h
    trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c
    trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h
    trunk/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
    trunk/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
    trunk/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h

Modified: trunk/Makefile.inc1
===================================================================

--- trunk/Makefile.inc1	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/Makefile.inc1	2016-10-02 00:51:03 UTC (rev 9136)
@@ -1310,6 +1310,7 @@
 		lib/libopie lib/libpam ${_lib_libthr} \
 		lib/libradius lib/libsbuf lib/libtacplus \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
+		${_cddl_lib_libzfs_core} \
 		lib/libutil ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_secure_lib_libssh} \
 		${_secure_lib_libssl} lib/libsqlite3 \
@@ -1334,7 +1335,9 @@
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
+_cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib= cddl/lib
+cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 .endif
 
 .if ${MK_CRYPT} != "no"

Modified: trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/zdb/zdb.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,10 +21,11 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
+#include <unistd.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <ctype.h>
@@ -57,6 +58,7 @@
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include <sys/zfeature.h>
+#include <zfs_comutil.h>
 #undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
@@ -206,7 +208,28 @@
 	nvlist_free(nv);
 }
 
+/* ARGSUSED */
 static void
+dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	spa_history_phys_t *shp = data;
+
+	if (shp == NULL)
+		return;
+
+	(void) printf("\t\tpool_create_len = %llu\n",
+	    (u_longlong_t)shp->sh_pool_create_len);
+	(void) printf("\t\tphys_max_off = %llu\n",
+	    (u_longlong_t)shp->sh_phys_max_off);
+	(void) printf("\t\tbof = %llu\n",
+	    (u_longlong_t)shp->sh_bof);
+	(void) printf("\t\teof = %llu\n",
+	    (u_longlong_t)shp->sh_eof);
+	(void) printf("\t\trecords_lost = %llu\n",
+	    (u_longlong_t)shp->sh_records_lost);
+}
+
+static void
 zdb_nicenum(uint64_t num, char *buf)
 {
 	if (dump_opt['P'])
@@ -215,18 +238,18 @@
 		nicenum(num, buf);
 }
 
-const char dump_zap_stars[] = "****************************************";
-const int dump_zap_width = sizeof (dump_zap_stars) - 1;
+const char histo_stars[] = "****************************************";
+const int histo_width = sizeof (histo_stars) - 1;
 
 static void
-dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
+dump_histogram(const uint64_t *histo, int size)
 {
 	int i;
-	int minidx = ZAP_HISTOGRAM_SIZE - 1;
+	int minidx = size - 1;
 	int maxidx = 0;
 	uint64_t max = 0;
 
-	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
+	for (i = 0; i < size; i++) {
 		if (histo[i] > max)
 			max = histo[i];
 		if (histo[i] > 0 && i > maxidx)
@@ -235,12 +258,14 @@
 			minidx = i;
 	}
 
-	if (max < dump_zap_width)
-		max = dump_zap_width;
+	if (max < histo_width)
+		max = histo_width;
 
-	for (i = minidx; i <= maxidx; i++)
-		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
-		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
+	for (i = minidx; i <= maxidx; i++) {
+		(void) printf("\t\t\t%3u: %6llu %s\n",
+		    i, (u_longlong_t)histo[i],
+		    &histo_stars[(max - histo[i]) * histo_width / max]);
+	}
 }
 
 static void
@@ -291,19 +316,19 @@
 	    (u_longlong_t)zs.zs_salt);
 
 	(void) printf("\t\tLeafs with 2^n pointers:\n");
-	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
+	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE);
 
 	(void) printf("\t\tBlocks with n*5 entries:\n");
-	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
+	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE);
 
 	(void) printf("\t\tBlocks n/10 full:\n");
-	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
+	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE);
 
 	(void) printf("\t\tEntries with n chunks:\n");
-	dump_zap_histogram(zs.zs_entries_using_n_chunks);
+	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE);
 
 	(void) printf("\t\tBuckets with n entries:\n");
-	dump_zap_histogram(zs.zs_buckets_with_n_entries);
+	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE);
 }
 
 /*ARGSUSED*/
@@ -857,21 +882,22 @@
 	for (int i = 0; i < num; i++) {
 		uint64_t time, txg, ievent;
 		char *cmd, *intstr;
+		boolean_t printed = B_FALSE;
 
 		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
 		    &time) != 0)
-			continue;
+			goto next;
 		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
 		    &cmd) != 0) {
 			if (nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
-				continue;
+				goto next;
 			verify(nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_TXG, &txg) == 0);
 			verify(nvlist_lookup_string(events[i],
 			    ZPOOL_HIST_INT_STR, &intstr) == 0);
-			if (ievent >= LOG_END)
-				continue;
+			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
+				goto next;
 
 			(void) snprintf(internalstr,
 			    sizeof (internalstr),
@@ -884,6 +910,14 @@
 		(void) localtime_r(&tsec, &t);
 		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
 		(void) printf("%s %s\n", tbuf, cmd);
+		printed = B_TRUE;
+
+next:
+		if (dump_opt['h'] > 1) {
+			if (!printed)
+				(void) printf("unrecognized record:\n");
+			dump_nvlist(events[i], 2);
+		}
 	}
 }
 
@@ -916,7 +950,7 @@
 	const dva_t *dva = bp->blk_dva;
 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
 
-	if (dump_opt['b'] >= 5) {
+	if (dump_opt['b'] >= 6) {
 		sprintf_blkptr(blkbuf, bp);
 		return;
 	}
@@ -1496,7 +1530,7 @@
 	dump_zap,		/* other ZAP			*/
 	dump_zap,		/* persistent error log		*/
 	dump_uint8,		/* SPA history			*/
-	dump_uint64,		/* SPA history offsets		*/
+	dump_history_offsets,	/* SPA history offsets		*/
 	dump_zap,		/* Pool properties		*/
 	dump_zap,		/* DSL permissions		*/
 	dump_acl,		/* ZFS ACL			*/
@@ -1661,7 +1695,9 @@
 	int print_header = 1;
 	int i, error;
 
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 
 	if (dds.dds_type < DMU_OST_NUMTYPES)
 		type = objset_types[dds.dds_type];
@@ -1953,11 +1989,13 @@
 /*
  * Block statistics.
  */
+#define	PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
 typedef struct zdb_blkstats {
-	uint64_t	zb_asize;
-	uint64_t	zb_lsize;
-	uint64_t	zb_psize;
-	uint64_t	zb_count;
+	uint64_t zb_asize;
+	uint64_t zb_lsize;
+	uint64_t zb_psize;
+	uint64_t zb_count;
+	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
 } zdb_blkstats_t;
 
 /*
@@ -1981,6 +2019,9 @@
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
 	uint64_t	zcb_dedup_asize;
 	uint64_t	zcb_dedup_blocks;
+	uint64_t	zcb_start;
+	uint64_t	zcb_lastprint;
+	uint64_t	zcb_totalasize;
 	uint64_t	zcb_errors[256];
 	int		zcb_readfails;
 	int		zcb_haderrors;
@@ -2007,6 +2048,7 @@
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_count++;
+		zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
 	}
 
 	if (dump_opt['L'])
@@ -2070,7 +2112,6 @@
 		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
 
 		free(data);
-
 		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
 			zcb->zcb_haderrors = 1;
 			zcb->zcb_errors[ioerr]++;
@@ -2094,7 +2135,7 @@
 
 	zcb->zcb_readfails = 0;
 
-	if (dump_opt['b'] >= 4) {
+	if (dump_opt['b'] >= 5) {
 		sprintf_blkptr(blkbuf, bp);
 		(void) printf("objset %llu object %llu "
 		    "level %lld offset 0x%llx %s\n",
@@ -2105,6 +2146,28 @@
 		    blkbuf);
 	}
 
+	if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
+	    gethrtime() > zcb->zcb_lastprint + NANOSEC) {
+		uint64_t now = gethrtime();
+		char buf[10];
+		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
+		int kb_per_sec =
+		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
+		int sec_remaining =
+		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
+
+		zfs_nicenum(bytes, buf, sizeof (buf));
+		(void) fprintf(stderr,
+		    "\r%5s completed (%4dMB/s) "
+		    "estimated time remaining: %uhr %02umin %02usec        ",
+		    buf, kb_per_sec / 1024,
+		    sec_remaining / 60 / 60,
+		    sec_remaining / 60 % 60,
+		    sec_remaining % 60);
+
+		zcb->zcb_lastprint = now;
+	}
+
 	return (0);
 }
 
@@ -2236,7 +2299,7 @@
 {
 	zdb_cb_t *zcb = arg;
 
-	if (dump_opt['b'] >= 4) {
+	if (dump_opt['b'] >= 5) {
 		char blkbuf[BP_SPRINTF_LEN];
 		sprintf_blkptr(blkbuf, bp);
 		(void) printf("[%s] %s\n",
@@ -2255,7 +2318,7 @@
 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
 	int leaks = 0;
 
-	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
 	    (dump_opt['c'] == 1) ? "metadata " : "",
 	    dump_opt['c'] ? "checksums " : "",
@@ -2291,6 +2354,8 @@
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
 
+	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
+	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
 	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
 	if (zcb.zcb_haderrors) {
@@ -2418,6 +2483,14 @@
 				else
 					(void) printf("    L%d %s\n",
 					    level, typename);
+
+				if (dump_opt['b'] >= 4) {
+					(void) printf("psize "
+					    "(in 512-byte sectors): "
+					    "number of blocks\n");
+					dump_histogram(zb->zb_psize_histogram,
+					    PSIZE_HISTO_SIZE);
+				}
 			}
 		}
 	}

Modified: trunk/cddl/contrib/opensolaris/cmd/zfs/zfs.8
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zfs/zfs.8	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/zfs/zfs.8	2016-10-02 00:51:03 UTC (rev 9136)
@@ -25,7 +25,7 @@
 .\"
 .\" $MidnightBSD$
 .\"
-.Dd November 26, 2012
+.Dd March 31, 2013
 .Dt ZFS 8
 .Os
 .Sh NAME
@@ -62,6 +62,7 @@
 .Op Fl r
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem at snapname Ns | Ns Ar volume at snapname
+.Ar filesystem at snapname Ns | Ns Ar volume at snapname Ns ...
 .Nm
 .Cm rollback
 .Op Fl rRf
@@ -1606,7 +1607,11 @@
 Destroy (or mark for deferred deletion) all snapshots with this name in
 descendent file systems.
 .It Fl R
-Recursively destroy all dependents.
+Recursively destroy all clones of these snapshots, including the clones,
+snapshots, and children.
+If this flag is specified, the
+.Op fl d
+flag will have no effect.
 .It Fl n
 Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in
 conjunction with the
@@ -1634,17 +1639,18 @@
 .Op Fl r
 .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
 .Ar filesystem at snapname Ns | Ns volume at snapname
+.Ar filesystem at snapname Ns | Ns volume at snapname Ns ...
 .Xc
 .Pp
-Creates a snapshot with the given name. All previous modifications by
-successful system calls to the file system are part of the snapshot. See the
+Creates snapshots with the given names. All previous modifications by
+successful system calls to the file system are part of the snapshots.
+Snapshots are taken atomically, so that all snapshots correspond to the same
+moment in time. See the
 .Qq Sx Snapshots
 section for details.
 .Bl -tag -width indent
 .It Fl r
-Recursively create snapshots of all descendent datasets. Snapshots are taken
-atomically, so that all recursive snapshots correspond to the same moment in
-time.
+Recursively create snapshots of all descendent datasets
 .It Fl o Ar property Ns = Ns Ar value
 Sets the specified property; see
 .Qq Nm Cm create

Modified: trunk/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -58,6 +58,7 @@
 #include <time.h>
 
 #include <libzfs.h>
+#include <libzfs_core.h>
 #include <zfs_prop.h>
 #include <zfs_deleg.h>
 #include <libuutil.h>
@@ -74,6 +75,7 @@
 
 static FILE *mnttab_file;
 static char history_str[HIS_MAX_RECORD_LEN];
+static boolean_t log_history = B_TRUE;
 
 static int zfs_do_clone(int argc, char **argv);
 static int zfs_do_create(int argc, char **argv);
@@ -276,7 +278,7 @@
 		return (gettext("\tshare <-a | filesystem>\n"));
 	case HELP_SNAPSHOT:
 		return (gettext("\tsnapshot [-r] [-o property=value] ... "
-		    "<filesystem at snapname|volume at snapname>\n"));
+		    "<filesystem at snapname|volume at snapname> ...\n"));
 	case HELP_UNMOUNT:
 		return (gettext("\tunmount [-f] "
 		    "<-a | filesystem|mountpoint>\n"));
@@ -903,11 +905,12 @@
 	boolean_t	cb_parsable;
 	boolean_t	cb_dryrun;
 	nvlist_t	*cb_nvl;
+	nvlist_t	*cb_batchedsnaps;
 
 	/* first snap in contiguous run */
-	zfs_handle_t	*cb_firstsnap;
+	char		*cb_firstsnap;
 	/* previous snap in contiguous run */
-	zfs_handle_t	*cb_prevsnap;
+	char		*cb_prevsnap;
 	int64_t		cb_snapused;
 	char		*cb_snapspec;
 } destroy_cbdata_t;
@@ -999,9 +1002,27 @@
 		zfs_close(zhp);
 		return (0);
 	}
+	if (cb->cb_dryrun) {
+		zfs_close(zhp);
+		return (0);
+	}
 
-	if (!cb->cb_dryrun) {
-		if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
+	/*
+	 * We batch up all contiguous snapshots (even of different
+	 * filesystems) and destroy them with one ioctl.  We can't
+	 * simply do all snap deletions and then all fs deletions,
+	 * because we must delete a clone before its origin.
+	 */
+	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
+		fnvlist_add_boolean(cb->cb_batchedsnaps, name);
+	} else {
+		int error = zfs_destroy_snaps_nvl(g_zfs,
+		    cb->cb_batchedsnaps, B_FALSE);
+		fnvlist_free(cb->cb_batchedsnaps);
+		cb->cb_batchedsnaps = fnvlist_alloc();
+
+		if (error != 0 ||
+		    zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
 		    zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
 			zfs_close(zhp);
 			return (-1);
@@ -1021,11 +1042,13 @@
 
 	if (nvlist_exists(cb->cb_nvl, name)) {
 		if (cb->cb_firstsnap == NULL)
-			cb->cb_firstsnap = zfs_handle_dup(zhp);
+			cb->cb_firstsnap = strdup(name);
 		if (cb->cb_prevsnap != NULL)
-			zfs_close(cb->cb_prevsnap);
+			free(cb->cb_prevsnap);
 		/* this snap continues the current range */
-		cb->cb_prevsnap = zfs_handle_dup(zhp);
+		cb->cb_prevsnap = strdup(name);
+		if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL)
+			nomem();
 		if (cb->cb_verbose) {
 			if (cb->cb_parsable) {
 				(void) printf("destroy\t%s\n", name);
@@ -1040,12 +1063,12 @@
 	} else if (cb->cb_firstsnap != NULL) {
 		/* end of this range */
 		uint64_t used = 0;
-		err = zfs_get_snapused_int(cb->cb_firstsnap,
+		err = lzc_snaprange_space(cb->cb_firstsnap,
 		    cb->cb_prevsnap, &used);
 		cb->cb_snapused += used;
-		zfs_close(cb->cb_firstsnap);
+		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
-		zfs_close(cb->cb_prevsnap);
+		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	zfs_close(zhp);
@@ -1062,13 +1085,13 @@
 	if (cb->cb_firstsnap != NULL) {
 		uint64_t used = 0;
 		if (err == 0) {
-			err = zfs_get_snapused_int(cb->cb_firstsnap,
+			err = lzc_snaprange_space(cb->cb_firstsnap,
 			    cb->cb_prevsnap, &used);
 		}
 		cb->cb_snapused += used;
-		zfs_close(cb->cb_firstsnap);
+		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
-		zfs_close(cb->cb_prevsnap);
+		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	return (err);
@@ -1155,8 +1178,10 @@
 zfs_do_destroy(int argc, char **argv)
 {
 	destroy_cbdata_t cb = { 0 };
+	int rv = 0;
+	int err = 0;
 	int c;
-	zfs_handle_t *zhp;
+	zfs_handle_t *zhp = NULL;
 	char *at;
 	zfs_type_t type = ZFS_TYPE_DATASET;
 
@@ -1210,11 +1235,9 @@
 
 	at = strchr(argv[0], '@');
 	if (at != NULL) {
-		int err = 0;
 
 		/* Build the list of snaps to destroy in cb_nvl. */
-		if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0)
-			nomem();
+		cb.cb_nvl = fnvlist_alloc();
 
 		*at = '\0';
 		zhp = zfs_open(g_zfs, argv[0],
@@ -1225,17 +1248,15 @@
 		cb.cb_snapspec = at + 1;
 		if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
 		    cb.cb_error) {
-			zfs_close(zhp);
-			nvlist_free(cb.cb_nvl);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
 		if (nvlist_empty(cb.cb_nvl)) {
 			(void) fprintf(stderr, gettext("could not find any "
 			    "snapshots to destroy; check snapshot names.\n"));
-			zfs_close(zhp);
-			nvlist_free(cb.cb_nvl);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
 		if (cb.cb_verbose) {
@@ -1254,18 +1275,26 @@
 		}
 
 		if (!cb.cb_dryrun) {
-			if (cb.cb_doclones)
+			if (cb.cb_doclones) {
+				cb.cb_batchedsnaps = fnvlist_alloc();
 				err = destroy_clones(&cb);
+				if (err == 0) {
+					err = zfs_destroy_snaps_nvl(g_zfs,
+					    cb.cb_batchedsnaps, B_FALSE);
+				}
+				if (err != 0) {
+					rv = 1;
+					goto out;
+				}
+			}
 			if (err == 0) {
-				err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl,
+				err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
 				    cb.cb_defer_destroy);
 			}
 		}
 
-		zfs_close(zhp);
-		nvlist_free(cb.cb_nvl);
 		if (err != 0)
-			return (1);
+			rv = 1;
 	} else {
 		/* Open the given dataset */
 		if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
@@ -1286,8 +1315,8 @@
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
 			    "to destroy the pool itself\n"), zfs_get_name(zhp));
-			zfs_close(zhp);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
 		/*
@@ -1297,19 +1326,20 @@
 		if (!cb.cb_doclones &&
 		    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
 		    &cb) != 0) {
-			zfs_close(zhp);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
 		if (cb.cb_error) {
-			zfs_close(zhp);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
+		cb.cb_batchedsnaps = fnvlist_alloc();
 		if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
 		    &cb) != 0) {
-			zfs_close(zhp);
-			return (1);
+			rv = 1;
+			goto out;
 		}
 
 		/*
@@ -1316,11 +1346,22 @@
 		 * Do the real thing.  The callback will close the
 		 * handle regardless of whether it succeeds or not.
 		 */
-		if (destroy_callback(zhp, &cb) != 0)
-			return (1);
+		err = destroy_callback(zhp, &cb);
+		zhp = NULL;
+		if (err == 0) {
+			err = zfs_destroy_snaps_nvl(g_zfs,
+			    cb.cb_batchedsnaps, cb.cb_defer_destroy);
+		}
+		if (err != 0)
+			rv = 1;
 	}
 
-	return (0);
+out:
+	fnvlist_free(cb.cb_batchedsnaps);
+	fnvlist_free(cb.cb_nvl);
+	if (zhp != NULL)
+		zfs_close(zhp);
+	return (rv);
 }
 
 static boolean_t
@@ -1921,9 +1962,11 @@
 			/*
 			 * If they did "zfs upgrade -a", then we could
 			 * be doing ioctls to different pools.  We need
-			 * to log this history once to each pool.
+			 * to log this history once to each pool, and bypass
+			 * the normal history logging that happens in main().
 			 */
-			verify(zpool_stage_history(g_zfs, history_str) == 0);
+			(void) zpool_log_history(g_zfs, history_str);
+			log_history = B_FALSE;
 		}
 		if (zfs_prop_set(zhp, "version", verstr) == 0)
 			cb->cb_numupgraded++;
@@ -3461,6 +3504,32 @@
 	return (ret);
 }
 
+typedef struct snap_cbdata {
+	nvlist_t *sd_nvl;
+	boolean_t sd_recursive;
+	const char *sd_snapname;
+} snap_cbdata_t;
+
+static int
+zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
+{
+	snap_cbdata_t *sd = arg;
+	char *name;
+	int rv = 0;
+	int error;
+
+	error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
+	if (error == -1)
+		nomem();
+	fnvlist_add_boolean(sd->sd_nvl, name);
+	free(name);
+
+	if (sd->sd_recursive)
+		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
+	zfs_close(zhp);
+	return (rv);
+}
+
 /*
  * zfs snapshot [-r] [-o prop=value] ... <fs at snap>
  *
@@ -3470,13 +3539,16 @@
 static int
 zfs_do_snapshot(int argc, char **argv)
 {
-	boolean_t recursive = B_FALSE;
 	int ret = 0;
 	char c;
 	nvlist_t *props;
+	snap_cbdata_t sd = { 0 };
+	boolean_t multiple_snaps = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
+	if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0)
+		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ro:")) != -1) {
@@ -3486,7 +3558,8 @@
 				return (1);
 			break;
 		case 'r':
-			recursive = B_TRUE;
+			sd.sd_recursive = B_TRUE;
+			multiple_snaps = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -3503,18 +3576,35 @@
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		goto usage;
 	}
-	if (argc > 1) {
-		(void) fprintf(stderr, gettext("too many arguments\n"));
-		goto usage;
+
+	if (argc > 1)
+		multiple_snaps = B_TRUE;
+	for (; argc > 0; argc--, argv++) {
+		char *atp;
+		zfs_handle_t *zhp;
+
+		atp = strchr(argv[0], '@');
+		if (atp == NULL)
+			goto usage;
+		*atp = '\0';
+		sd.sd_snapname = atp + 1;
+		zhp = zfs_open(g_zfs, argv[0],
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL)
+			goto usage;
+		if (zfs_snapshot_cb(zhp, &sd) != 0)
+			goto usage;
 	}
 
-	ret = zfs_snapshot(g_zfs, argv[0], recursive, props);
+	ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props);
+	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
-	if (ret && recursive)
+	if (ret != 0 && multiple_snaps)
 		(void) fprintf(stderr, gettext("no snapshots were created\n"));
 	return (ret != 0);
 
 usage:
+	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
@@ -5057,14 +5147,6 @@
 	return (error);
 }
 
-/*
- * zfs allow [-r] [-t] <tag> <snap> ...
- *
- *	-r	Recursively hold
- *	-t	Temporary hold (hidden option)
- *
- * Apply a user-hold with the given tag to the list of snapshots.
- */
 static int
 zfs_do_allow(int argc, char **argv)
 {
@@ -5071,14 +5153,6 @@
 	return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
 }
 
-/*
- * zfs unallow [-r] [-t] <tag> <snap> ...
- *
- *	-r	Recursively hold
- *	-t	Temporary hold (hidden option)
- *
- * Apply a user-hold with the given tag to the list of snapshots.
- */
 static int
 zfs_do_unallow(int argc, char **argv)
 {
@@ -5092,7 +5166,6 @@
 	int i;
 	const char *tag;
 	boolean_t recursive = B_FALSE;
-	boolean_t temphold = B_FALSE;
 	const char *opts = holding ? "rt" : "r";
 	int c;
 
@@ -5102,9 +5175,6 @@
 		case 'r':
 			recursive = B_TRUE;
 			break;
-		case 't':
-			temphold = B_TRUE;
-			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -5153,7 +5223,7 @@
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive,
-			    temphold, B_FALSE, -1, 0, 0) != 0)
+			    B_FALSE, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
@@ -5169,7 +5239,6 @@
  * zfs hold [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
- *	-t	Temporary hold (hidden option)
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
@@ -6591,8 +6660,7 @@
 		return (1);
 	}
 
-	zpool_set_history_str("zfs", argc, argv, history_str);
-	verify(zpool_stage_history(g_zfs, history_str) == 0);
+	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
@@ -6661,6 +6729,9 @@
 
 	(void) fclose(mnttab_file);
 
+	if (ret == 0 && log_history)
+		(void) zpool_log_history(g_zfs, history_str);
+
 	libzfs_fini(g_zfs);
 
 	/*

Modified: trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/zhack/zhack.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -46,6 +46,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfeature.h>
+#include <sys/dmu_tx.h>
 #undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
@@ -273,12 +274,15 @@
 }
 
 static void
-feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_enable_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	zfeature_info_t *feature = arg2;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	zfeature_info_t *feature = arg;
 
 	spa_feature_enable(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack enable feature", tx,
+	    "name=%s can_readonly=%u",
+	    feature->fi_guid, feature->fi_can_readonly);
 }
 
 static void
@@ -341,8 +345,8 @@
 	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
 		fatal("feature already enabled: %s", feature.fi_guid);
 
-	VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
-	    feature_enable_sync, spa, &feature, 5));
+	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+	    feature_enable_sync, &feature, 5));
 
 	spa_close(spa, FTAG);
 
@@ -350,21 +354,25 @@
 }
 
 static void
-feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_incr_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	zfeature_info_t *feature = arg2;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	zfeature_info_t *feature = arg;
 
 	spa_feature_incr(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack feature incr", tx,
+	    "name=%s", feature->fi_guid);
 }
 
 static void
-feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_decr_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	zfeature_info_t *feature = arg2;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	zfeature_info_t *feature = arg;
 
 	spa_feature_decr(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack feature decr", tx,
+	    "name=%s", feature->fi_guid);
 }
 
 static void
@@ -435,8 +443,8 @@
 	if (decr && !spa_feature_is_active(spa, &feature))
 		fatal("feature refcount already 0: %s", feature.fi_guid);
 
-	VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
-	    decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
+	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+	    decr ? feature_decr_sync : feature_incr_sync, &feature, 5));
 
 	spa_close(spa, FTAG);
 }

Modified: trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -192,9 +192,9 @@
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
 
-zpool_command_t *current_command;
+static zpool_command_t *current_command;
 static char history_str[HIS_MAX_RECORD_LEN];
-
+static boolean_t log_history = B_TRUE;
 static uint_t timestamp_fmt = NODATE;
 
 static const char *
@@ -1092,8 +1092,11 @@
 		return (1);
 	}
 
-	ret = (zpool_destroy(zhp) != 0);
+	/* The history must be logged as part of the export */
+	log_history = B_FALSE;
 
+	ret = (zpool_destroy(zhp, history_str) != 0);
+
 	zpool_close(zhp);
 
 	return (ret);
@@ -1156,10 +1159,13 @@
 			continue;
 		}
 
+		/* The history must be logged as part of the export */
+		log_history = B_FALSE;
+
 		if (hardforce) {
-			if (zpool_export_force(zhp) != 0)
+			if (zpool_export_force(zhp, history_str) != 0)
 				ret = 1;
-		} else if (zpool_export(zhp, force) != 0) {
+		} else if (zpool_export(zhp, force, history_str) != 0) {
 			ret = 1;
 		}
 
@@ -4562,6 +4568,14 @@
 		if (count > 0) {
 			cbp->cb_first = B_FALSE;
 			printnl = B_TRUE;
+			/*
+			 * If they did "zpool upgrade -a", then we could
+			 * be doing ioctls to different pools.  We need
+			 * to log this history once to each pool, and bypass
+			 * the normal history logging that happens in main().
+			 */
+			(void) zpool_log_history(g_zfs, history_str);
+			log_history = B_FALSE;
 		}
 	}
 
@@ -4923,8 +4937,8 @@
 
 typedef struct hist_cbdata {
 	boolean_t first;
-	int longfmt;
-	int internal;
+	boolean_t longfmt;
+	boolean_t internal;
 } hist_cbdata_t;
 
 /*
@@ -4936,21 +4950,8 @@
 	nvlist_t *nvhis;
 	nvlist_t **records;
 	uint_t numrecords;
-	char *cmdstr;
-	char *pathstr;
-	uint64_t dst_time;
-	time_t tsec;
-	struct tm t;
-	char tbuf[30];
 	int ret, i;
-	uint64_t who;
-	struct passwd *pwd;
-	char *hostname;
-	char *zonename;
-	char internalstr[MAXPATHLEN];
 	hist_cbdata_t *cb = (hist_cbdata_t *)data;
-	uint64_t txg;
-	uint64_t ievent;
 
 	cb->first = B_FALSE;
 
@@ -4962,36 +4963,73 @@
 	verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,
 	    &records, &numrecords) == 0);
 	for (i = 0; i < numrecords; i++) {
-		if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
-		    &dst_time) != 0)
-			continue;
+		nvlist_t *rec = records[i];
+		char tbuf[30] = "";
 
-		/* is it an internal event or a standard event? */
-		if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
-		    &cmdstr) != 0) {
-			if (cb->internal == 0)
-				continue;
+		if (nvlist_exists(rec, ZPOOL_HIST_TIME)) {
+			time_t tsec;
+			struct tm t;
 
-			if (nvlist_lookup_uint64(records[i],
-			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+			tsec = fnvlist_lookup_uint64(records[i],
+			    ZPOOL_HIST_TIME);
+			(void) localtime_r(&tsec, &t);
+			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+		}
+
+		if (nvlist_exists(rec, ZPOOL_HIST_CMD)) {
+			(void) printf("%s %s", tbuf,
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_CMD));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) {
+			int ievent =
+			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT);
+			if (!cb->internal)
 				continue;
-			verify(nvlist_lookup_uint64(records[i],
-			    ZPOOL_HIST_TXG, &txg) == 0);
-			verify(nvlist_lookup_string(records[i],
-			    ZPOOL_HIST_INT_STR, &pathstr) == 0);
-			if (ievent >= LOG_END)
+			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) {
+				(void) printf("%s unrecognized record:\n",
+				    tbuf);
+				dump_nvlist(rec, 4);
 				continue;
-			(void) snprintf(internalstr,
-			    sizeof (internalstr),
-			    "[internal %s txg:%lld] %s",
-			    zfs_history_event_names[ievent], txg,
-			    pathstr);
-			cmdstr = internalstr;
+			}
+			(void) printf("%s [internal %s txg:%lld] %s", tbuf,
+			    zfs_history_event_names[ievent],
+			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s [txg:%lld] %s", tbuf,
+			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME));
+			if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) {
+				(void) printf(" %s (%llu)",
+				    fnvlist_lookup_string(rec,
+				    ZPOOL_HIST_DSNAME),
+				    fnvlist_lookup_uint64(rec,
+				    ZPOOL_HIST_DSID));
+			}
+			(void) printf(" %s", fnvlist_lookup_string(rec,
+			    ZPOOL_HIST_INT_STR));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s ioctl %s\n", tbuf,
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL));
+			if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) {
+				(void) printf("    input:\n");
+				dump_nvlist(fnvlist_lookup_nvlist(rec,
+				    ZPOOL_HIST_INPUT_NVL), 8);
+			}
+			if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) {
+				(void) printf("    output:\n");
+				dump_nvlist(fnvlist_lookup_nvlist(rec,
+				    ZPOOL_HIST_OUTPUT_NVL), 8);
+			}
+		} else {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s unrecognized record:\n", tbuf);
+			dump_nvlist(rec, 4);
 		}
-		tsec = dst_time;
-		(void) localtime_r(&tsec, &t);
-		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
-		(void) printf("%s %s", tbuf, cmdstr);
 
 		if (!cb->longfmt) {
 			(void) printf("\n");
@@ -4998,28 +5036,21 @@
 			continue;
 		}
 		(void) printf(" [");
-		if (nvlist_lookup_uint64(records[i],
-		    ZPOOL_HIST_WHO, &who) == 0) {
-			pwd = getpwuid((uid_t)who);
-			if (pwd)
-				(void) printf("user %s on",
-				    pwd->pw_name);
-			else
-				(void) printf("user %d on",
-				    (int)who);
-		} else {
-			(void) printf(gettext("no info]\n"));
-			continue;
+		if (nvlist_exists(rec, ZPOOL_HIST_WHO)) {
+			uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO);
+			struct passwd *pwd = getpwuid(who);
+			(void) printf("user %d ", (int)who);
+			if (pwd != NULL)
+				(void) printf("(%s) ", pwd->pw_name);
 		}
-		if (nvlist_lookup_string(records[i],
-		    ZPOOL_HIST_HOST, &hostname) == 0) {
-			(void) printf(" %s", hostname);
+		if (nvlist_exists(rec, ZPOOL_HIST_HOST)) {
+			(void) printf("on %s",
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_HOST));
 		}
-		if (nvlist_lookup_string(records[i],
-		    ZPOOL_HIST_ZONE, &zonename) == 0) {
-			(void) printf(":%s", zonename);
+		if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) {
+			(void) printf(":%s",
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE));
 		}
-
 		(void) printf("]");
 		(void) printf("\n");
 	}
@@ -5034,8 +5065,6 @@
  *
  * Displays the history of commands that modified pools.
  */
-
-
 int
 zpool_do_history(int argc, char **argv)
 {
@@ -5048,10 +5077,10 @@
 	while ((c = getopt(argc, argv, "li")) != -1) {
 		switch (c) {
 		case 'l':
-			cbdata.longfmt = 1;
+			cbdata.longfmt = B_TRUE;
 			break;
 		case 'i':
-			cbdata.internal = 1;
+			cbdata.internal = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -5276,8 +5305,7 @@
 	if (strcmp(cmdname, "-?") == 0)
 		usage(B_TRUE);
 
-	zpool_set_history_str("zpool", argc, argv, history_str);
-	verify(zpool_stage_history(g_zfs, history_str) == 0);
+	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	/*
 	 * Run the appropriate command.
@@ -5304,6 +5332,9 @@
 		usage(B_FALSE);
 	}
 
+	if (ret == 0 && log_history)
+		(void) zpool_log_history(g_zfs, history_str);
+
 	libzfs_fini(g_zfs);
 
 	/*

Modified: trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c
===================================================================
--- trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/cmd/ztest/ztest.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -104,10 +104,12 @@
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
+#include <sys/dsl_destroy.h>
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/refcount.h>
 #include <sys/zfeature.h>
+#include <sys/dsl_userhold.h>
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
@@ -367,7 +369,7 @@
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
 	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
-	{ ztest_vdev_attach_detach,		1,	&zopt_rarely	},
+	{ ztest_vdev_attach_detach,		1,	&zopt_sometimes	},
 	{ ztest_vdev_LUN_growth,		1,	&zopt_rarely	},
 	{ ztest_vdev_add_remove,		1,
 	    &ztest_opts.zo_vdevtime				},
@@ -1008,9 +1010,8 @@
 	uint64_t curval;
 	int error;
 
-	error = dsl_prop_set(osname, propname,
-	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
-	    sizeof (value), 1, &value);
+	error = dsl_prop_set_int(osname, propname,
+	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
@@ -1018,8 +1019,7 @@
 	}
 	ASSERT0(error);
 
-	VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
-	    1, &curval, setpoint), ==, 0);
+	VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
 
 	if (ztest_opts.zo_verbose >= 6) {
 		VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
@@ -2332,7 +2332,7 @@
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
 	VERIFY3U(ENOENT, ==,
-	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
+	    spa_create("ztest_bad_file", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
@@ -2340,7 +2340,7 @@
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
 	VERIFY3U(ENOENT, ==,
-	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
+	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
@@ -2349,7 +2349,7 @@
 	 */
 	(void) rw_rdlock(&ztest_name_lock);
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
-	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
+	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 	VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
 	VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
@@ -2407,7 +2407,7 @@
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
-	VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
+	VERIFY0(spa_create(name, nvroot, props, NULL));
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
@@ -2481,8 +2481,7 @@
 	int error;
 
 	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
-	leaves =
-	    MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
@@ -3182,7 +3181,7 @@
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
-	VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
+	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
 	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
 	if (error != ENOENT) {
 		/* We could have crashed in the middle of destroying it */
@@ -3190,12 +3189,16 @@
 		ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
 		ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
 	}
-	dmu_objset_rele(os, FTAG);
+	dmu_objset_disown(os, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
-	VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
+	if (strchr(name, '@') != NULL) {
+		VERIFY0(dsl_destroy_snapshot(name, B_FALSE));
+	} else {
+		VERIFY0(dsl_destroy_head(name));
+	}
 	return (0);
 }
 
@@ -3205,17 +3208,17 @@
 	char snapname[MAXNAMELEN];
 	int error;
 
-	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
-	    (u_longlong_t)id);
+	(void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
 
-	error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (B_FALSE);
 	}
-	if (error != 0 && error != EEXIST)
-		fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
+	if (error != 0 && error != EEXIST) {
+		fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname,
+		    snapname, error);
+	}
 	return (B_TRUE);
 }
 
@@ -3228,7 +3231,7 @@
 	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
 	    (u_longlong_t)id);
 
-	error = dmu_objset_destroy(snapname, B_FALSE);
+	error = dsl_destroy_snapshot(snapname, B_FALSE);
 	if (error != 0 && error != ENOENT)
 		fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
 	return (B_TRUE);
@@ -3274,7 +3277,8 @@
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
-	VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
+	VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
+	    FTAG, &os));
 
 	/*
 	 * Verify that we can create a new dataset.
@@ -3289,8 +3293,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", name, error);
 	}
 
-	VERIFY3U(0, ==,
-	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
+	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
 
 	ztest_zd_init(&zdtmp, NULL, os);
 
@@ -3366,21 +3369,21 @@
 	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
 	(void) snprintf(snap3name, MAXNAMELEN, "%s at s3_%llu", clone1name, id);
 
-	error = dmu_objset_destroy(clone2name, B_FALSE);
+	error = dsl_destroy_head(clone2name);
 	if (error && error != ENOENT)
-		fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
-	error = dmu_objset_destroy(snap3name, B_FALSE);
+		fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error);
+	error = dsl_destroy_snapshot(snap3name, B_FALSE);
 	if (error && error != ENOENT)
-		fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
-	error = dmu_objset_destroy(snap2name, B_FALSE);
+		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error);
+	error = dsl_destroy_snapshot(snap2name, B_FALSE);
 	if (error && error != ENOENT)
-		fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
-	error = dmu_objset_destroy(clone1name, B_FALSE);
+		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error);
+	error = dsl_destroy_head(clone1name);
 	if (error && error != ENOENT)
-		fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
-	error = dmu_objset_destroy(snap1name, B_FALSE);
+		fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error);
+	error = dsl_destroy_snapshot(snap1name, B_FALSE);
 	if (error && error != ENOENT)
-		fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
+		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
 }
 
 /*
@@ -3389,8 +3392,7 @@
 void
 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
 {
-	objset_t *clone;
-	dsl_dataset_t *ds;
+	objset_t *os;
 	char snap1name[MAXNAMELEN];
 	char clone1name[MAXNAMELEN];
 	char snap2name[MAXNAMELEN];
@@ -3409,8 +3411,7 @@
 	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
 	(void) snprintf(snap3name, MAXNAMELEN, "%s at s3_%llu", clone1name, id);
 
-	error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
@@ -3419,12 +3420,7 @@
 		fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
-	error = dmu_objset_hold(snap1name, FTAG, &clone);
-	if (error)
-		fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
-
-	error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
-	dmu_objset_rele(clone, FTAG);
+	error = dmu_objset_clone(clone1name, snap1name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
@@ -3433,8 +3429,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
 	}
 
-	error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
@@ -3443,8 +3438,7 @@
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
 	}
 
-	error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
@@ -3453,12 +3447,7 @@
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
-	error = dmu_objset_hold(snap3name, FTAG, &clone);
-	if (error)
-		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
-
-	error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
-	dmu_objset_rele(clone, FTAG);
+	error = dmu_objset_clone(clone2name, snap3name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
@@ -3467,14 +3456,14 @@
 		fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
-	error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
+	error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
 	if (error)
-		fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
+		fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
 	error = dsl_dataset_promote(clone2name, NULL);
 	if (error != EBUSY)
 		fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
 		    error);
-	dsl_dataset_disown(ds, FTAG);
+	dmu_objset_disown(os, FTAG);
 
 out:
 	ztest_dsl_dataset_cleanup(osname, id);
@@ -4286,7 +4275,7 @@
 	}
 
 	count = -1ULL;
-	VERIFY(zap_count(os, object, &count) == 0);
+	VERIFY0(zap_count(os, object, &count));
 	ASSERT(count != -1ULL);
 
 	/*
@@ -4597,6 +4586,22 @@
 	(void) rw_unlock(&ztest_name_lock);
 }
 
+static int
+user_release_one(const char *snapname, const char *holdname)
+{
+	nvlist_t *snaps, *holds;
+	int error;
+
+	snaps = fnvlist_alloc();
+	holds = fnvlist_alloc();
+	fnvlist_add_boolean(holds, holdname);
+	fnvlist_add_nvlist(snaps, snapname, holds);
+	fnvlist_free(holds);
+	error = dsl_dataset_user_release(snaps, NULL);
+	fnvlist_free(snaps);
+	return (error);
+}
+
 /*
  * Test snapshot hold/release and deferred destroy.
  */
@@ -4611,29 +4616,36 @@
 	char clonename[100];
 	char tag[100];
 	char osname[MAXNAMELEN];
+	nvlist_t *holds;
 
 	(void) rw_rdlock(&ztest_name_lock);
 
 	dmu_objset_name(os, osname);
 
-	(void) snprintf(snapname, 100, "sh1_%llu", id);
-	(void) snprintf(fullname, 100, "%s@%s", osname, snapname);
-	(void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
-	(void) snprintf(tag, 100, "%tag_%llu", id);
+	(void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
+	(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
+	(void) snprintf(clonename, sizeof (clonename),
+	    "%s/ch1_%llu", osname, id);
+	(void) snprintf(tag, sizeof (tag), "tag_%llu", id);
 
 	/*
 	 * Clean up from any previous run.
 	 */
-	(void) dmu_objset_destroy(clonename, B_FALSE);
-	(void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
-	(void) dmu_objset_destroy(fullname, B_FALSE);
+	error = dsl_destroy_head(clonename);
+	if (error != ENOENT)
+		ASSERT0(error);
+	error = user_release_one(fullname, tag);
+	if (error != ESRCH && error != ENOENT)
+		ASSERT0(error);
+	error = dsl_destroy_snapshot(fullname, B_FALSE);
+	if (error != ENOENT)
+		ASSERT0(error);
 
 	/*
 	 * Create snapshot, clone it, mark snap for deferred destroy,
 	 * destroy clone, verify snap was also destroyed.
 	 */
-	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
-	    FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
@@ -4642,12 +4654,7 @@
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
-	error = dmu_objset_hold(fullname, FTAG, &origin);
-	if (error)
-		fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
-
-	error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
-	dmu_objset_rele(origin, FTAG);
+	error = dmu_objset_clone(clonename, fullname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_clone");
@@ -4656,15 +4663,15 @@
 		fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
 	}
 
-	error = dmu_objset_destroy(fullname, B_TRUE);
+	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
-		fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+		fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
-	error = dmu_objset_destroy(clonename, B_FALSE);
+	error = dsl_destroy_head(clonename);
 	if (error)
-		fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
+		fatal(0, "dsl_destroy_head(%s) = %d", clonename, error);
 
 	error = dmu_objset_hold(fullname, FTAG, &origin);
 	if (error != ENOENT)
@@ -4675,8 +4682,7 @@
 	 * destroy a held snapshot, mark for deferred destroy,
 	 * release hold, verify snapshot was destroyed.
 	 */
-	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
-	    FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
@@ -4685,28 +4691,31 @@
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
-	error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
-	    B_TRUE, -1);
+	holds = fnvlist_alloc();
+	fnvlist_add_string(holds, fullname, tag);
+	error = dsl_dataset_user_hold(holds, 0, NULL);
+	fnvlist_free(holds);
+
 	if (error)
 		fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
 
-	error = dmu_objset_destroy(fullname, B_FALSE);
+	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != EBUSY) {
-		fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
+		fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
 		    fullname, error);
 	}
 
-	error = dmu_objset_destroy(fullname, B_TRUE);
+	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
-		fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+		fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
-	error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
+	error = user_release_one(fullname, tag);
 	if (error)
-		fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
+		fatal(0, "user_release_one(%s)", fullname, tag);
 
-	VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
+	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
 
 out:
 	(void) rw_unlock(&ztest_name_lock);
@@ -4960,8 +4969,12 @@
 	 */
 	for (int i = 0; i < copies; i++) {
 		uint64_t offset = i * blocksize;
-		VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db,
-		    DMU_READ_NO_PREFETCH));
+		int error = dmu_buf_hold(os, object, offset, FTAG, &db,
+		    DMU_READ_NO_PREFETCH);
+		if (error != 0) {
+			fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
+			    os, (long long)object, (long long) offset, error);
+		}
 		ASSERT(db->db_offset == offset);
 		ASSERT(db->db_size == blocksize);
 		ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
@@ -5172,6 +5185,7 @@
 	nvlist_t *config, *newconfig;
 	uint64_t pool_guid;
 	spa_t *spa;
+	int error;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("import/export: old = %s, new = %s\n",
@@ -5216,7 +5230,12 @@
 	/*
 	 * Import it under the new name.
 	 */
-	VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
+	error = spa_import(newname, config, NULL, 0);
+	if (error != 0) {
+		dump_nvlist(config, 0);
+		fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
+		    oldname, newname, error);
+	}
 
 	ztest_walk_pool_directory("pools after import");
 
@@ -5423,7 +5442,7 @@
 	}
 	ASSERT(error == 0 || error == EEXIST);
 
-	VERIFY0(dmu_objset_hold(name, zd, &os));
+	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
 	(void) rw_unlock(&ztest_name_lock);
 
 	ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@@ -5464,7 +5483,7 @@
 	ztest_ds_t *zd = &ztest_ds[d];
 
 	zil_close(zd->zd_zilog);
-	dmu_objset_rele(zd->zd_os, zd);
+	dmu_objset_disown(zd->zd_os, zd);
 
 	ztest_zd_fini(zd);
 }
@@ -5508,13 +5527,14 @@
 	 * Open our pool.
 	 */
 	kernel_init(FREAD | FWRITE);
-	VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0);
+	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	spa->spa_debug = B_TRUE;
 	ztest_spa = spa;
 
-	VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os));
+	VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
+	    DMU_OST_ANY, B_TRUE, FTAG, &os));
 	zs->zs_guid = dmu_objset_fsid_guid(os);
-	dmu_objset_rele(os, FTAG);
+	dmu_objset_disown(os, FTAG);
 
 	spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
 
@@ -5791,8 +5811,7 @@
 		    spa_feature_table[i].fi_uname);
 		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
 	}
-	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props,
-	    NULL, NULL));
+	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
 	nvlist_free(nvroot);
 
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -57,7 +57,8 @@
 /*
  * libzfs errors
  */
-enum {
+typedef enum zfs_error {
+	EZFS_SUCCESS = 0,	/* no error -- success */
 	EZFS_NOMEM = 2000,	/* out of memory */
 	EZFS_BADPROP,		/* invalid property value */
 	EZFS_PROPREADONLY,	/* cannot set readonly property */
@@ -129,7 +130,7 @@
 	EZFS_DIFFDATA,		/* bad zfs diff data */
 	EZFS_POOLREADONLY,	/* pool is in read-only mode */
 	EZFS_UNKNOWN
-};
+} zfs_error_t;
 
 /*
  * The following data structures are all part
@@ -185,6 +186,9 @@
 
 extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
 
+extern void zfs_save_arguments(int argc, char **, char *, int);
+extern int zpool_log_history(libzfs_handle_t *, const char *);
+
 extern int libzfs_errno(libzfs_handle_t *);
 extern const char *libzfs_error_action(libzfs_handle_t *);
 extern const char *libzfs_error_description(libzfs_handle_t *);
@@ -220,7 +224,7 @@
  */
 extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
     nvlist_t *, nvlist_t *);
-extern int zpool_destroy(zpool_handle_t *);
+extern int zpool_destroy(zpool_handle_t *, const char *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);
 
 typedef struct splitflags {
@@ -343,8 +347,8 @@
 /*
  * Import and export functions
  */
-extern int zpool_export(zpool_handle_t *, boolean_t);
-extern int zpool_export_force(zpool_handle_t *);
+extern int zpool_export(zpool_handle_t *, boolean_t, const char *);
+extern int zpool_export_force(zpool_handle_t *, const char *);
 extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
     char *altroot);
 extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
@@ -378,7 +382,7 @@
  */
 struct zfs_cmd;
 
-extern const char *zfs_history_event_names[LOG_END];
+extern const char *zfs_history_event_names[];
 
 extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
     boolean_t verbose);
@@ -386,12 +390,9 @@
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
 extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
     nvlist_t ***, uint_t *);
-extern void zpool_set_history_str(const char *subcommand, int argc,
-    char **argv, char *history_str);
-extern int zpool_stage_history(libzfs_handle_t *, const char *);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
     size_t len);
-extern int zfs_ioctl(libzfs_handle_t *, unsigned long, struct zfs_cmd *);
+extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *);
 extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
 extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
     nvlist_t *);
@@ -441,8 +442,6 @@
     char *propbuf, int proplen, boolean_t literal);
 extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
     char *buf, size_t len);
-extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
-    uint64_t *usedp);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
 extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
 extern const char *zfs_prop_values(zfs_prop_t);
@@ -555,9 +554,11 @@
 extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
 extern int zfs_destroy(zfs_handle_t *, boolean_t);
 extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
-extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
+extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
+extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
+    nvlist_t *props);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
 
 typedef struct renameflags {
@@ -609,8 +610,8 @@
     sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
 
 extern int zfs_promote(zfs_handle_t *);
-extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
-    boolean_t, boolean_t, int, uint64_t, uint64_t);
+extern int zfs_hold(zfs_handle_t *, const char *, const char *,
+    boolean_t, boolean_t, int);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -1447,7 +1447,6 @@
 	nvlist_t *nvl = NULL, *realprops;
 	zfs_prop_t prop;
 	boolean_t do_prefix = B_TRUE;
-	uint64_t idx;
 	int added_resv;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
@@ -2017,10 +2016,7 @@
 	    NULL, NULL, 0, B_TRUE) != 0)
 		goto out;
 	if (strcmp(gca->buf, gca->origin) == 0) {
-		if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) {
-			zfs_close(zhp);
-			return (no_memory(zhp->zfs_hdl));
-		}
+		fnvlist_add_boolean(gca->value, zfs_get_name(zhp));
 		gca->numclones--;
 	}
 
@@ -2711,25 +2707,6 @@
 	return (0);
 }
 
-int
-zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
-    uint64_t *usedp)
-{
-	int err;
-	zfs_cmd_t zc = { 0 };
-
-	(void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value));
-
-	err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc);
-	if (err)
-		return (err);
-
-	*usedp = zc.zc_cookie;
-
-	return (0);
-}
-
 /*
  * Returns the name of the given zfs handle.
  */
@@ -2930,7 +2907,6 @@
 	 */
 	for (cp = target + prefixlen + 1;
 	    cp = strchr(cp, '/'); *cp = '/', cp++) {
-		char *logstr;
 
 		*cp = '\0';
 
@@ -2941,16 +2917,12 @@
 			continue;
 		}
 
-		logstr = hdl->libzfs_log_str;
-		hdl->libzfs_log_str = NULL;
 		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
 		    NULL) != 0) {
-			hdl->libzfs_log_str = logstr;
 			opname = dgettext(TEXT_DOMAIN, "create");
 			goto ancestorerr;
 		}
 
-		hdl->libzfs_log_str = logstr;
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 		if (h == NULL) {
 			opname = dgettext(TEXT_DOMAIN, "open");
@@ -3008,12 +2980,12 @@
 zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
     nvlist_t *props)
 {
-	zfs_cmd_t zc = { 0 };
 	int ret;
 	uint64_t size = 0;
 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
 	char errbuf[1024];
 	uint64_t zoned;
+	dmu_objset_type_t ost;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), path);
@@ -3033,8 +3005,7 @@
 	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
 	 * first try to see if the dataset exists.
 	 */
-	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
-	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+	if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset already exists"));
 		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
@@ -3041,9 +3012,9 @@
 	}
 
 	if (type == ZFS_TYPE_VOLUME)
-		zc.zc_objset_type = DMU_OST_ZVOL;
+		ost = DMU_OST_ZVOL;
 	else
-		zc.zc_objset_type = DMU_OST_ZFS;
+		ost = DMU_OST_ZFS;
 
 	if (props && (props = zfs_valid_proplist(hdl, type, props,
 	    zoned, NULL, errbuf)) == 0)
@@ -3095,15 +3066,10 @@
 		}
 	}
 
-	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
-		return (-1);
+	/* create the dataset */
+	ret = lzc_create(path, ost, props);
 	nvlist_free(props);
 
-	/* create the dataset */
-	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
-
-	zcmd_free_nvlists(&zc);
-
 	/* check for failure */
 	if (ret != 0) {
 		char parent[ZFS_MAXNAMELEN];
@@ -3228,7 +3194,7 @@
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
 		    zhp->zfs_name, snapname);
 	} else {
-		ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer);
+		ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer);
 	}
 	nvlist_free(dd.nvl);
 	return (ret);
@@ -3235,40 +3201,46 @@
 }
 
 /*
- * Destroys all the snapshots named in the nvlist.  They must be underneath
- * the zhp (either snapshots of it, or snapshots of its descendants).
+ * Destroys all the snapshots named in the nvlist.
  */
 int
-zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
+zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
 {
 	int ret;
-	zfs_cmd_t zc = { 0 };
+	nvlist_t *errlist;
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0)
-		return (-1);
-	zc.zc_defer_destroy = defer;
+	ret = lzc_destroy_snaps(snaps, defer, &errlist);
 
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc);
-	if (ret != 0) {
+	if (ret == 0)
+		return (0);
+
+	if (nvlist_next_nvpair(errlist, NULL) == NULL) {
 		char errbuf[1024];
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
 
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot destroy snapshots in %s"), zc.zc_name);
+		ret = zfs_standard_error(hdl, ret, errbuf);
+	}
+	for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
+		char errbuf[1024];
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
+		    nvpair_name(pair));
 
-		switch (errno) {
+		switch (fnvpair_value_int32(pair)) {
 		case EEXIST:
-			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
-			    "snapshot is cloned"));
-			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
-
+			zfs_error_aux(hdl,
+			    dgettext(TEXT_DOMAIN, "snapshot is cloned"));
+			ret = zfs_error(hdl, EZFS_EXISTS, errbuf);
+			break;
 		default:
-			return (zfs_standard_error(zhp->zfs_hdl, errno,
-			    errbuf));
+			ret = zfs_standard_error(hdl, errno, errbuf);
+			break;
 		}
 	}
 
-	return (0);
+	return (ret);
 }
 
 /*
@@ -3277,12 +3249,10 @@
 int
 zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 {
-	zfs_cmd_t zc = { 0 };
 	char parent[ZFS_MAXNAMELEN];
 	int ret;
 	char errbuf[1024];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	zfs_type_t type;
 	uint64_t zoned;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
@@ -3301,33 +3271,22 @@
 	(void) parent_name(target, parent, sizeof (parent));
 
 	/* do the clone */
-	if (ZFS_IS_VOLUME(zhp)) {
-		zc.zc_objset_type = DMU_OST_ZVOL;
-		type = ZFS_TYPE_VOLUME;
-	} else {
-		zc.zc_objset_type = DMU_OST_ZFS;
-		type = ZFS_TYPE_FILESYSTEM;
-	}
 
 	if (props) {
+		zfs_type_t type;
+		if (ZFS_IS_VOLUME(zhp)) {
+			type = ZFS_TYPE_VOLUME;
+		} else {
+			type = ZFS_TYPE_FILESYSTEM;
+		}
 		if ((props = zfs_valid_proplist(hdl, type, props, zoned,
 		    zhp, errbuf)) == NULL)
 			return (-1);
-
-		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
-			nvlist_free(props);
-			return (-1);
-		}
-
-		nvlist_free(props);
 	}
 
-	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
+	ret = lzc_clone(target, zhp->zfs_name, props);
+	nvlist_free(props);
 
-	zcmd_free_nvlists(&zc);
-
 	if (ret != 0) {
 		switch (errno) {
 
@@ -3411,74 +3370,134 @@
 	return (ret);
 }
 
+typedef struct snapdata {
+	nvlist_t *sd_nvl;
+	const char *sd_snapname;
+} snapdata_t;
+
+static int
+zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
+{
+	snapdata_t *sd = arg;
+	char name[ZFS_MAXNAMELEN];
+	int rv = 0;
+
+	(void) snprintf(name, sizeof (name),
+	    "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
+
+	fnvlist_add_boolean(sd->sd_nvl, name);
+
+	rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
+	zfs_close(zhp);
+	return (rv);
+}
+
 /*
- * Takes a snapshot of the given dataset.
+ * Creates snapshots.  The keys in the snaps nvlist are the snapshots to be
+ * created.
  */
 int
-zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
-    nvlist_t *props)
+zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
 {
-	const char *delim;
-	char parent[ZFS_MAXNAMELEN];
-	zfs_handle_t *zhp;
-	zfs_cmd_t zc = { 0 };
 	int ret;
 	char errbuf[1024];
+	nvpair_t *elem;
+	nvlist_t *errors;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot snapshot '%s'"), path);
+	    "cannot create snapshots "));
 
-	/* validate the target name */
-	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
-		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) {
+		const char *snapname = nvpair_name(elem);
 
-	if (props) {
-		if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
-		    props, B_FALSE, NULL, errbuf)) == NULL)
-			return (-1);
+		/* validate the target name */
+		if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT,
+		    B_TRUE)) {
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "cannot create snapshot '%s'"), snapname);
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+	}
 
-		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
-			nvlist_free(props);
-			return (-1);
+	if (props != NULL &&
+	    (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
+	    props, B_FALSE, NULL, errbuf)) == NULL) {
+		return (-1);
+	}
+
+	ret = lzc_snapshot(snaps, props, &errors);
+
+	if (ret != 0) {
+		boolean_t printed = B_FALSE;
+		for (elem = nvlist_next_nvpair(errors, NULL);
+		    elem != NULL;
+		    elem = nvlist_next_nvpair(errors, elem)) {
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "cannot create snapshot '%s'"), nvpair_name(elem));
+			(void) zfs_standard_error(hdl,
+			    fnvpair_value_int32(elem), errbuf);
+			printed = B_TRUE;
 		}
+		if (!printed) {
+			switch (ret) {
+			case EXDEV:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple snapshots of same "
+				    "fs not allowed"));
+				(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
 
-		nvlist_free(props);
+				break;
+			default:
+				(void) zfs_standard_error(hdl, ret, errbuf);
+			}
+		}
 	}
 
-	/* make sure the parent exists and is of the appropriate type */
-	delim = strchr(path, '@');
-	(void) strncpy(parent, path, delim - path);
-	parent[delim - path] = '\0';
+	nvlist_free(props);
+	nvlist_free(errors);
+	return (ret);
+}
 
-	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
+int
+zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
+    nvlist_t *props)
+{
+	int ret;
+	snapdata_t sd = { 0 };
+	char fsname[ZFS_MAXNAMELEN];
+	char *cp;
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot %s"), path);
+
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	(void) strlcpy(fsname, path, sizeof (fsname));
+	cp = strchr(fsname, '@');
+	*cp = '\0';
+	sd.sd_snapname = cp + 1;
+
+	if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
-		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
-	if (ZFS_IS_VOLUME(zhp))
-		zc.zc_objset_type = DMU_OST_ZVOL;
-	else
-		zc.zc_objset_type = DMU_OST_ZFS;
-	zc.zc_cookie = recursive;
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
-
-	zcmd_free_nvlists(&zc);
-
-	/*
-	 * if it was recursive, the one that actually failed will be in
-	 * zc.zc_name.
-	 */
-	if (ret != 0) {
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
-		(void) zfs_standard_error(hdl, errno, errbuf);
+	verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0);
+	if (recursive) {
+		(void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd);
+	} else {
+		fnvlist_add_boolean(sd.sd_nvl, path);
 	}
 
+	ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props);
+	nvlist_free(sd.sd_nvl);
 	zfs_close(zhp);
-
 	return (ret);
 }
 
@@ -3506,7 +3525,6 @@
 		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
 		    cbp->cb_create) {
-			char *logstr;
 
 			cbp->cb_dependent = B_TRUE;
 			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
@@ -3513,10 +3531,7 @@
 			    rollback_destroy, cbp);
 			cbp->cb_dependent = B_FALSE;
 
-			logstr = zhp->zfs_hdl->libzfs_log_str;
-			zhp->zfs_hdl->libzfs_log_str = NULL;
 			cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
-			zhp->zfs_hdl->libzfs_log_str = logstr;
 		}
 	} else {
 		/* We must destroy this clone; first unmount it */
@@ -4120,7 +4135,7 @@
 
 		zc.zc_nvlist_dst_size = sizeof (buf);
 		if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
-			char errbuf[ZFS_MAXNAMELEN + 32];
+			char errbuf[1024];
 
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
@@ -4142,37 +4157,94 @@
 	return (0);
 }
 
+struct holdarg {
+	nvlist_t *nvl;
+	const char *snapname;
+	const char *tag;
+	boolean_t recursive;
+};
+
+static int
+zfs_hold_one(zfs_handle_t *zhp, void *arg)
+{
+	struct holdarg *ha = arg;
+	zfs_handle_t *szhp;
+	char name[ZFS_MAXNAMELEN];
+	int rv = 0;
+
+	(void) snprintf(name, sizeof (name),
+	    "%s@%s", zhp->zfs_name, ha->snapname);
+
+	szhp = make_dataset_handle(zhp->zfs_hdl, name);
+	if (szhp) {
+		fnvlist_add_string(ha->nvl, name, ha->tag);
+		zfs_close(szhp);
+	}
+
+	if (ha->recursive)
+		rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
+	zfs_close(zhp);
+	return (rv);
+}
+
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
-    boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
-    int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
+    boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
 {
-	zfs_cmd_t zc = { 0 };
+	int ret;
+	struct holdarg ha;
+	nvlist_t *errors;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	nvpair_t *elem;
 
-	ASSERT(!recursive || dsobj == 0);
+	ha.nvl = fnvlist_alloc();
+	ha.snapname = snapname;
+	ha.tag = tag;
+	ha.recursive = recursive;
+	(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
-	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
-	    >= sizeof (zc.zc_string))
-		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
-	zc.zc_cookie = recursive;
-	zc.zc_temphold = temphold;
-	zc.zc_cleanup_fd = cleanup_fd;
-	zc.zc_sendobj = dsobj;
-	zc.zc_createtxg = createtxg;
+	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+		fnvlist_free(ha.nvl);
+		ret = ENOENT;
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot hold snapshot '%s@%s'"),
+		    zhp->zfs_name, snapname);
+		(void) zfs_standard_error(hdl, ret, errbuf);
+		return (ret);
+	}
 
-	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
-		char errbuf[ZFS_MAXNAMELEN+32];
+	ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
+	fnvlist_free(ha.nvl);
 
-		/*
-		 * if it was recursive, the one that actually failed will be in
-		 * zc.zc_name.
-		 */
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot hold '%s@%s'"), zc.zc_name, snapname);
-		switch (errno) {
+	if (ret == 0)
+		return (0);
+
+	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+		/* no hold-specific errors */
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot hold"));
+		switch (ret) {
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, ret, errbuf);
+		}
+	}
+
+	for (elem = nvlist_next_nvpair(errors, NULL);
+	    elem != NULL;
+	    elem = nvlist_next_nvpair(errors, elem)) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN,
+		    "cannot hold snapshot '%s'"), nvpair_name(elem));
+		switch (fnvpair_value_int32(elem)) {
 		case E2BIG:
 			/*
 			 * Temporary tags wind up having the ds object id
@@ -4180,66 +4252,131 @@
 			 * above, it's still possible for the tag to wind
 			 * up being slightly too long.
 			 */
-			return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
-		case ENOTSUP:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "pool must be upgraded"));
-			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+			(void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf);
+			break;
 		case EINVAL:
-			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+			break;
 		case EEXIST:
-			return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
+			(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
+			break;
 		case ENOENT:
 			if (enoent_ok)
 				return (ENOENT);
 			/* FALLTHROUGH */
 		default:
-			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+			(void) zfs_standard_error(hdl,
+			    fnvpair_value_int32(elem), errbuf);
 		}
 	}
 
-	return (0);
+	fnvlist_free(errors);
+	return (ret);
 }
 
+struct releasearg {
+	nvlist_t *nvl;
+	const char *snapname;
+	const char *tag;
+	boolean_t recursive;
+};
+
+static int
+zfs_release_one(zfs_handle_t *zhp, void *arg)
+{
+	struct holdarg *ha = arg;
+	zfs_handle_t *szhp;
+	char name[ZFS_MAXNAMELEN];
+	int rv = 0;
+
+	(void) snprintf(name, sizeof (name),
+	    "%s@%s", zhp->zfs_name, ha->snapname);
+
+	szhp = make_dataset_handle(zhp->zfs_hdl, name);
+	if (szhp) {
+		nvlist_t *holds = fnvlist_alloc();
+		fnvlist_add_boolean(holds, ha->tag);
+		fnvlist_add_nvlist(ha->nvl, name, holds);
+		zfs_close(szhp);
+	}
+
+	if (ha->recursive)
+		rv = zfs_iter_filesystems(zhp, zfs_release_one, ha);
+	zfs_close(zhp);
+	return (rv);
+}
+
 int
 zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive)
 {
-	zfs_cmd_t zc = { 0 };
+	int ret;
+	struct holdarg ha;
+	nvlist_t *errors;
+	nvpair_t *elem;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
-	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
-	    >= sizeof (zc.zc_string))
-		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
-	zc.zc_cookie = recursive;
+	ha.nvl = fnvlist_alloc();
+	ha.snapname = snapname;
+	ha.tag = tag;
+	ha.recursive = recursive;
+	(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
 
-	if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
-		char errbuf[ZFS_MAXNAMELEN+32];
+	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+		fnvlist_free(ha.nvl);
+		ret = ENOENT;
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN,
+		    "cannot release hold from snapshot '%s@%s'"),
+		    zhp->zfs_name, snapname);
+		(void) zfs_standard_error(hdl, ret, errbuf);
+		return (ret);
+	}
 
-		/*
-		 * if it was recursive, the one that actually failed will be in
-		 * zc.zc_name.
-		 */
+	ret = lzc_release(ha.nvl, &errors);
+	fnvlist_free(ha.nvl);
+
+	if (ret == 0)
+		return (0);
+
+	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
-		    snapname);
+		    "cannot release"));
 		switch (errno) {
-		case ESRCH:
-			return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
-			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error_fmt(hdl, errno, errbuf);
+		}
+	}
+
+	for (elem = nvlist_next_nvpair(errors, NULL);
+	    elem != NULL;
+	    elem = nvlist_next_nvpair(errors, elem)) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN,
+		    "cannot release hold from snapshot '%s'"),
+		    nvpair_name(elem));
+		switch (fnvpair_value_int32(elem)) {
+		case ESRCH:
+			(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
+			break;
 		case EINVAL:
-			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+			break;
 		default:
-			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+			(void) zfs_standard_error_fmt(hdl,
+			    fnvpair_value_int32(elem), errbuf);
 		}
 	}
 
-	return (0);
+	fnvlist_free(errors);
+	return (ret);
 }
 
 int
@@ -4250,7 +4387,7 @@
 	int nvsz = 2048;
 	void *nvbuf;
 	int err = 0;
-	char errbuf[ZFS_MAXNAMELEN+32];
+	char errbuf[1024];
 
 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@@ -4315,7 +4452,7 @@
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char *nvbuf;
-	char errbuf[ZFS_MAXNAMELEN+32];
+	char errbuf[1024];
 	size_t nvsz;
 	int err;
 
@@ -4366,38 +4503,18 @@
 int
 zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 {
-	zfs_cmd_t zc = { 0 };
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	int nvsz = 2048;
-	void *nvbuf;
-	int err = 0;
-	char errbuf[ZFS_MAXNAMELEN+32];
+	int err;
+	char errbuf[1024];
 
-	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+	err = lzc_get_holds(zhp->zfs_name, nvl);
 
-tryagain:
+	if (err != 0) {
+		libzfs_handle_t *hdl = zhp->zfs_hdl;
 
-	nvbuf = malloc(nvsz);
-	if (nvbuf == NULL) {
-		err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
-		goto out;
-	}
-
-	zc.zc_nvlist_dst_size = nvsz;
-	zc.zc_nvlist_dst = (uintptr_t)nvbuf;
-
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
-
-	if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
-		    zc.zc_name);
-		switch (errno) {
-		case ENOMEM:
-			free(nvbuf);
-			nvsz = zc.zc_nvlist_dst_size;
-			goto tryagain;
-
+		    zhp->zfs_name);
+		switch (err) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
@@ -4413,19 +4530,8 @@
 			err = zfs_standard_error_fmt(hdl, errno, errbuf);
 			break;
 		}
-	} else {
-		/* success */
-		int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
-		if (rc) {
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
-			    zc.zc_name);
-			err = zfs_standard_error_fmt(hdl, rc, errbuf);
-		}
 	}
 
-	free(nvbuf);
-out:
 	return (err);
 }
 

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -23,12 +23,12 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  */
 
-#ifndef	_LIBFS_IMPL_H
-#define	_LIBFS_IMPL_H
+#ifndef	_LIBZFS_IMPL_H
+#define	_LIBZFS_IMPL_H
 
 #include <sys/dmu.h>
 #include <sys/fs/zfs.h>
@@ -39,9 +39,9 @@
 #include <libshare.h>
 #include <libuutil.h>
 #include <libzfs.h>
+#include <libzfs_core.h>
+#include <libzfs_compat.h>
 
-#include "zfs_ioctl_compat.h"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -70,7 +70,6 @@
 	int libzfs_desc_active;
 	char libzfs_action[1024];
 	char libzfs_desc[1024];
-	char *libzfs_log_str;
 	int libzfs_printerr;
 	int libzfs_storeerr; /* stuff error messages into buffer */
 	void *libzfs_sharehdl; /* libshare handle */
@@ -215,62 +214,8 @@
 
 extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
 
-#ifndef sun
-static int zfs_kernel_version = 0;
-static int zfs_ioctl_version = 0;
-
-/*
- * This is FreeBSD version of ioctl, because Solaris' ioctl() updates
- * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
- * error is returned zc_nvlist_dst_size won't be updated.
- */
-static __inline int
-zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
-{
-	size_t oldsize, zfs_kernel_version_size, zfs_ioctl_version_size;
-	int version, ret, cflag = ZFS_CMD_COMPAT_NONE;
-
-	zfs_ioctl_version_size = sizeof(zfs_ioctl_version);
-	if (zfs_ioctl_version == 0) {
-		sysctlbyname("vfs.zfs.version.ioctl", &zfs_ioctl_version,
-		    &zfs_ioctl_version_size, NULL, 0);
-	}
-
-	/*
-	 * If vfs.zfs.version.ioctl is not defined, assume we have v28
-	 * compatible binaries and use vfs.zfs.version.spa to test for v15
-	 */
-	if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) {
-		cflag = ZFS_CMD_COMPAT_V28;
-		zfs_kernel_version_size = sizeof(zfs_kernel_version);
-
-		if (zfs_kernel_version == 0) {
-			sysctlbyname("vfs.zfs.version.spa",
-			    &zfs_kernel_version,
-			    &zfs_kernel_version_size, NULL, 0);
-		}
-
-		if (zfs_kernel_version == SPA_VERSION_15 ||
-		    zfs_kernel_version == SPA_VERSION_14 ||
-		    zfs_kernel_version == SPA_VERSION_13)
-			cflag = ZFS_CMD_COMPAT_V15;
-	}
-
-	oldsize = zc->zc_nvlist_dst_size;
-	ret = zcmd_ioctl_compat(fd, cmd, zc, cflag);
-
-	if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
-		ret = -1;
-		errno = ENOMEM;
-	}
-
-	return (ret);
-}
-#define	ioctl(fd, cmd, zc)	zcmd_ioctl((fd), (cmd), (zc))
-#endif	/* !sun */
-
 #ifdef	__cplusplus
 }
 #endif
 
-#endif	/* _LIBFS_IMPL_H */
+#endif	/* _LIBZFS_IMPL_H */

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
  */
@@ -308,12 +308,11 @@
 zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,
     zfs_iter_f func, void *arg)
 {
-	char buf[ZFS_MAXNAMELEN];
-	char *comma_separated, *cp;
+	char *buf, *comma_separated, *cp;
 	int err = 0;
 	int ret = 0;
 
-	(void) strlcpy(buf, spec_orig, sizeof (buf));
+	buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig);
 	cp = buf;
 
 	while ((comma_separated = strsep(&cp, ",")) != NULL) {
@@ -371,6 +370,7 @@
 		}
 	}
 
+	free(buf);
 	return (ret);
 }
 

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -36,6 +36,7 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <libgen.h>
 #include <sys/zfs_ioctl.h>
 #include <dlfcn.h>
 
@@ -1237,7 +1238,7 @@
  * datasets left in the pool.
  */
 int
-zpool_destroy(zpool_handle_t *zhp)
+zpool_destroy(zpool_handle_t *zhp, const char *log_str)
 {
 	zfs_cmd_t zc = { 0 };
 	zfs_handle_t *zfp = NULL;
@@ -1249,6 +1250,7 @@
 		return (-1);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -1403,8 +1405,9 @@
  * Exports the pool from the system.  The caller must ensure that there are no
  * mounted datasets in the pool.
  */
-int
-zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
+static int
+zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
+    const char *log_str)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
@@ -1415,6 +1418,7 @@
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = force;
 	zc.zc_guid = hardforce;
+	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
 		switch (errno) {
@@ -1436,15 +1440,15 @@
 }
 
 int
-zpool_export(zpool_handle_t *zhp, boolean_t force)
+zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
 {
-	return (zpool_export_common(zhp, force, B_FALSE));
+	return (zpool_export_common(zhp, force, B_FALSE, log_str));
 }
 
 int
-zpool_export_force(zpool_handle_t *zhp)
+zpool_export_force(zpool_handle_t *zhp, const char *log_str)
 {
-	return (zpool_export_common(zhp, B_TRUE, B_TRUE));
+	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
 }
 
 static void
@@ -3632,40 +3636,30 @@
 }
 
 void
-zpool_set_history_str(const char *subcommand, int argc, char **argv,
-    char *history_str)
+zfs_save_arguments(int argc, char **argv, char *string, int len)
 {
-	int i;
-
-	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
-	for (i = 1; i < argc; i++) {
-		if (strlen(history_str) + 1 + strlen(argv[i]) >
-		    HIS_MAX_RECORD_LEN)
-			break;
-		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
-		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
+	(void) strlcpy(string, basename(argv[0]), len);
+	for (int i = 1; i < argc; i++) {
+		(void) strlcat(string, " ", len);
+		(void) strlcat(string, argv[i], len);
 	}
 }
 
-/*
- * Stage command history for logging.
- */
 int
-zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
+zpool_log_history(libzfs_handle_t *hdl, const char *message)
 {
-	if (history_str == NULL)
-		return (EINVAL);
+	zfs_cmd_t zc = { 0 };
+	nvlist_t *args;
+	int err;
 
-	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
-		return (EINVAL);
-
-	if (hdl->libzfs_log_str != NULL)
-		free(hdl->libzfs_log_str);
-
-	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
-		return (no_memory(hdl));
-
-	return (0);
+	args = fnvlist_alloc();
+	fnvlist_add_string(args, "message", message);
+	err = zcmd_write_src_nvlist(hdl, &zc, args);
+	if (err == 0)
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
+	nvlist_free(args);
+	zcmd_free_nvlists(&zc);
+	return (err);
 }
 
 /*

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -53,6 +53,10 @@
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 
+#ifdef __FreeBSD__
+extern int zfs_ioctl_version;
+#endif
+
 /* in libzfs_dataset.c */
 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 /* We need to use something for ENODATA. */
@@ -978,9 +982,7 @@
 	 */
 	if (pzhp) {
 		error = zfs_hold(pzhp, thissnap, sdd->holdtag,
-		    B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
-		    zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
-		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
+		    B_FALSE, B_TRUE, sdd->cleanup_fd);
 		zfs_close(pzhp);
 	}
 
@@ -1719,12 +1721,11 @@
 		err = ENOENT;
 	}
 
-	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
+	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
 		seq++;
 
-		(void) strncpy(newname, name, baselen);
-		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
-		    "recv-%u-%u", getpid(), seq);
+		(void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
+		    baselen, name, getpid(), seq);
 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
 
 		if (flags->verbose) {
@@ -2649,9 +2650,17 @@
 	/*
 	 * Determine name of destination snapshot, store in zc_value.
 	 */
-	(void) strcpy(zc.zc_top_ds, tosnap);
 	(void) strcpy(zc.zc_value, tosnap);
 	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
+#ifdef __FreeBSD__
+	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
+		zfs_ioctl_version = get_zfs_ioctl_version();
+	/*
+	 * For forward compatibility hide tosnap in zc_value
+	 */
+	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
+		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
+#endif
 	free(cp);
 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
 		zcmd_free_nvlists(&zc);

Modified: trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -48,6 +48,7 @@
 #include <sys/types.h>
 
 #include <libzfs.h>
+#include <libzfs_core.h>
 
 #include "libzfs_impl.h"
 #include "zfs_prop.h"
@@ -657,6 +658,14 @@
 
 	hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r");
 
+	if (libzfs_core_init() != 0) {
+		(void) close(hdl->libzfs_fd);
+		(void) fclose(hdl->libzfs_mnttab);
+		(void) fclose(hdl->libzfs_sharetab);
+		free(hdl);
+		return (NULL);
+	}
+
 	zfs_prop_init();
 	zpool_prop_init();
 	zpool_feature_init();
@@ -674,8 +683,6 @@
 	if (hdl->libzfs_sharetab)
 		(void) fclose(hdl->libzfs_sharetab);
 	zfs_uninit_libshare(hdl);
-	if (hdl->libzfs_log_str)
-		(void) free(hdl->libzfs_log_str);
 	zpool_free_handles(hdl);
 #ifdef sun
 	libzfs_fru_clear(hdl, B_TRUE);
@@ -682,6 +689,7 @@
 #endif
 	namespace_clear(hdl);
 	libzfs_mnttab_fini(hdl);
+	libzfs_core_fini();
 	free(hdl);
 }
 
@@ -855,19 +863,9 @@
 }
 
 int
-zfs_ioctl(libzfs_handle_t *hdl, unsigned long request, zfs_cmd_t *zc)
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
 {
-	int error;
-
-	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
-	error = ioctl(hdl->libzfs_fd, request, zc);
-	if (hdl->libzfs_log_str) {
-		free(hdl->libzfs_log_str);
-		hdl->libzfs_log_str = NULL;
-	}
-	zc->zc_history = 0;
-
-	return (error);
+	return (ioctl(hdl->libzfs_fd, request, zc));
 }
 
 /*

Modified: trunk/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -33,6 +33,7 @@
 #include <sys/stat.h>
 #include <sys/processor.h>
 #include <sys/zfs_context.h>
+#include <sys/rrwlock.h>
 #include <sys/zmod.h>
 #include <sys/utsname.h>
 #include <sys/systeminfo.h>
@@ -885,6 +886,8 @@
 void
 kernel_init(int mode)
 {
+	extern uint_t rrw_tsd_key;
+
 	umem_nofail_callback(umem_out_of_memory);
 
 	physmem = sysconf(_SC_PHYS_PAGES);
@@ -905,6 +908,8 @@
 #endif
 
 	spa_init(mode);
+
+	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 }
 
 void
@@ -952,6 +957,12 @@
 	return (0);
 }
 
+uid_t
+crgetruid(cred_t *cr)
+{
+	return (0);
+}
+
 gid_t
 crgetgid(cred_t *cr)
 {

Modified: trunk/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
===================================================================
--- trunk/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
@@ -60,6 +61,8 @@
 #include <umem.h>
 #include <inttypes.h>
 #include <fsshare.h>
+#include <pthread.h>
+#include <sys/debug.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
@@ -84,6 +87,9 @@
 #include <sys/sysevent/dev.h>
 #include <machine/atomic.h>
 #include <sys/debug.h>
+#ifdef illumos
+#include "zfs.h"
+#endif
 
 #define	ZFS_EXPORTS_PATH	"/etc/zfs/exports"
 
@@ -131,30 +137,66 @@
 
 #ifdef DTRACE_PROBE
 #undef	DTRACE_PROBE
-#define	DTRACE_PROBE(a)	((void)0)
 #endif	/* DTRACE_PROBE */
+#ifdef illumos
+#define	DTRACE_PROBE(a) \
+	ZFS_PROBE0(#a)
+#endif
 
 #ifdef DTRACE_PROBE1
 #undef	DTRACE_PROBE1
-#define	DTRACE_PROBE1(a, b, c)	((void)0)
 #endif	/* DTRACE_PROBE1 */
+#ifdef illumos
+#define	DTRACE_PROBE1(a, b, c) \
+	ZFS_PROBE1(#a, (unsigned long)c)
+#endif
 
 #ifdef DTRACE_PROBE2
 #undef	DTRACE_PROBE2
-#define	DTRACE_PROBE2(a, b, c, d, e)	((void)0)
 #endif	/* DTRACE_PROBE2 */
+#ifdef illumos
+#define	DTRACE_PROBE2(a, b, c, d, e) \
+	ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e)
+#endif
 
 #ifdef DTRACE_PROBE3
 #undef	DTRACE_PROBE3
-#define	DTRACE_PROBE3(a, b, c, d, e, f, g)	((void)0)
 #endif	/* DTRACE_PROBE3 */
+#ifdef illumos
+#define	DTRACE_PROBE3(a, b, c, d, e, f, g) \
+	ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g)
+#endif
 
 #ifdef DTRACE_PROBE4
 #undef	DTRACE_PROBE4
-#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
 #endif	/* DTRACE_PROBE4 */
+#ifdef illumos
+#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \
+	ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \
+	(unsigned long)i)
+#endif
 
+#ifdef illumos
 /*
+ * We use the comma operator so that this macro can be used without much
+ * additional code.  For example, "return (EINVAL);" becomes
+ * "return (SET_ERROR(EINVAL));".  Note that the argument will be evaluated
+ * twice, so it should not have side effects (e.g. something like:
+ * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
+ */
+#define	SET_ERROR(err)	(ZFS_SET_ERROR(err), err)
+#else	/* !illumos */
+
+#define	DTRACE_PROBE(a)	((void)0)
+#define	DTRACE_PROBE1(a, b, c)	((void)0)
+#define	DTRACE_PROBE2(a, b, c, d, e)	((void)0)
+#define	DTRACE_PROBE3(a, b, c, d, e, f, g)	((void)0)
+#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
+
+#define SET_ERROR(err) (err)
+#endif	/* !illumos */
+
+/*
  * Threads
  */
 #define	curthread	((void *)(uintptr_t)thr_self())
@@ -242,6 +284,9 @@
 #define	RW_WRITE_HELD(x)	((x)->rw_owner == curthread)
 #define	RW_LOCK_HELD(x)		rw_lock_held(x)
 
+#undef RW_LOCK_HELD
+#define	RW_LOCK_HELD(x)		(RW_READ_HELD(x) || RW_WRITE_HELD(x))
+
 extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
 extern void rw_destroy(krwlock_t *rwlp);
 extern void rw_enter(krwlock_t *rwlp, krw_t rw);
@@ -252,6 +297,7 @@
 #define	rw_downgrade(rwlp) do { } while (0)
 
 extern uid_t crgetuid(cred_t *cr);
+extern uid_t crgetruid(cred_t *cr);
 extern gid_t crgetgid(cred_t *cr);
 extern int crgetngroups(cred_t *cr);
 extern gid_t *crgetgroups(cred_t *cr);
@@ -271,6 +317,14 @@
 extern void cv_broadcast(kcondvar_t *cv);
 
 /*
+ * Thread-specific data
+ */
+#define	tsd_get(k) pthread_getspecific(k)
+#define	tsd_set(k, v) pthread_setspecific(k, v)
+#define	tsd_create(kp, d) pthread_key_create(kp, d)
+#define	tsd_destroy(kp) /* nothing */
+
+/*
  * Kernel memory
  */
 #define	KM_SLEEP		UMEM_NOFAIL
@@ -527,7 +581,7 @@
 #define	INGLOBALZONE(z)			(1)
 
 extern char *kmem_asprintf(const char *fmt, ...);
-#define	strfree(str) kmem_free((str), strlen(str)+1)
+#define	strfree(str) kmem_free((str), strlen(str) + 1)
 
 /*
  * Hostname information

Modified: trunk/cddl/lib/Makefile
===================================================================
--- trunk/cddl/lib/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/lib/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -9,10 +9,12 @@
 	libnvpair \
 	libumem \
 	libuutil \
+	${_libzfs_core} \
 	${_libzfs} \
 	${_libzpool}
 
 .if ${MK_ZFS} != "no"
+_libzfs_core=	libzfs_core
 _libzfs=	libzfs
 .if ${MK_LIBTHR} != "no"
 _libzpool=	libzpool

Modified: trunk/cddl/lib/libzfs/Makefile
===================================================================
--- trunk/cddl/lib/libzfs/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/lib/libzfs/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -6,8 +6,9 @@
 .PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
 
 LIB=	zfs
-DPADD=	${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR}
-LDADD=	-lmd -lpthread -lumem -lutil -lm -lnvpair
+DPADD=	${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR} \
+	${LIBZFS_CORE}
+LDADD=	-lmd -lpthread -lumem -lutil -lm -lnvpair -lzfs_core
 
 SRCS=	deviceid.c \
 	fsshare.c \
@@ -17,6 +18,7 @@
 	zone.c
 
 SRCS+=	libzfs_changelist.c \
+	libzfs_compat.c \
 	libzfs_config.c \
 	libzfs_dataset.c \
 	libzfs_diff.c \
@@ -54,5 +56,6 @@
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
 
 .include <bsd.lib.mk>

Modified: trunk/cddl/sbin/zfs/Makefile
===================================================================
--- trunk/cddl/sbin/zfs/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/sbin/zfs/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -15,6 +15,7 @@
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
@@ -23,7 +24,7 @@
 CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
 
 DPADD=	${LIBGEOM} ${LIBJAIL} ${LIBNVPAIR} ${LIBUMEM} \
-	${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
-LDADD=	-lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs
+	${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS}
+LDADD=	-lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs
 
 .include <bsd.prog.mk>

Modified: trunk/cddl/sbin/zpool/Makefile
===================================================================
--- trunk/cddl/sbin/zpool/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/sbin/zpool/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -28,7 +28,7 @@
 CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/stat/common
 
 DPADD=	${LIBAVL} ${LIBGEOM} ${LIBNVPAIR} \
-	${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
-LDADD=	-lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs
+	${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS}
+LDADD=	-lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs
 
 .include <bsd.prog.mk>

Modified: trunk/cddl/usr.bin/zinject/Makefile
===================================================================
--- trunk/cddl/usr.bin/zinject/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/usr.bin/zinject/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
 CFLAGS+= -I${.CURDIR}/../../lib/libumem
 
 DPADD=	${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBUUTIL} \
-	${LIBZFS} ${LIBZPOOL}
-LDADD=	-lgeom -lm -lnvpair -lumem -luutil -lzfs -lzpool
+	${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL}
+LDADD=	-lgeom -lm -lnvpair -lumem -luutil -lzfs_core -lzfs -lzpool
 
 .include <bsd.prog.mk>

Modified: trunk/cddl/usr.bin/ztest/Makefile
===================================================================
--- trunk/cddl/usr.bin/ztest/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/usr.bin/ztest/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -19,8 +19,9 @@
 CFLAGS+= -I${.CURDIR}/../../lib/libumem
 
 DPADD=	${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
-	${LIBPTHREAD} ${LIBAVL} ${LIBZFS} ${LIBUUTIL}
-LDADD=	-lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs -luutil
+	${LIBPTHREAD} ${LIBAVL} ${LIBZFS_CORE} ${LIBZFS} ${LIBUUTIL}
+LDADD=	-lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs_core -lzfs \
+	-luutil
 
 CSTD=	c99
 

Modified: trunk/cddl/usr.sbin/zdb/Makefile
===================================================================
--- trunk/cddl/usr.sbin/zdb/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/cddl/usr.sbin/zdb/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -24,8 +24,8 @@
 CFLAGS+= -I${.CURDIR}/../../lib/libumem
 
 DPADD=	${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \
-	${LIBUUTIL} ${LIBZFS} ${LIBZPOOL}
-LDADD=	-lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool
+	${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL}
+LDADD=	-lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs_core -lzfs -lzpool
 
 CFLAGS+=	-DDEBUG=1
 #DEBUG_FLAGS+=	-g

Modified: trunk/rescue/rescue/Makefile
===================================================================
--- trunk/rescue/rescue/Makefile	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/rescue/rescue/Makefile	2016-10-02 00:51:03 UTC (rev 9136)
@@ -123,7 +123,7 @@
 CRUNCH_LIBS+= -lipx
 .endif
 .if ${MK_ZFS} != "no"
-CRUNCH_LIBS+= -lavl -ljail -lzfs -lnvpair -lpthread -luutil -lumem
+CRUNCH_LIBS+= -lavl -ljail -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem
 .endif
 CRUNCH_LIBS+= -lgeom -lbsdxml -ljail -lkiconv -lmd -lreadline -lsbuf -lufs -lz
 

Modified: trunk/sys/cddl/compat/opensolaris/sys/cred.h
===================================================================
--- trunk/sys/cddl/compat/opensolaris/sys/cred.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/compat/opensolaris/sys/cred.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -46,6 +46,7 @@
 #define	kcred	(thread0.td_ucred)
 
 #define	crgetuid(cred)		((cred)->cr_uid)
+#define	crgetruid(cred)		((cred)->cr_ruid)
 #define	crgetgid(cred)		((cred)->cr_gid)
 #define	crgetgroups(cred)	((cred)->cr_groups)
 #define	crgetngroups(cred)	((cred)->cr_ngroups)

Modified: trunk/sys/cddl/compat/opensolaris/sys/sdt.h
===================================================================
--- trunk/sys/cddl/compat/opensolaris/sys/sdt.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/compat/opensolaris/sys/sdt.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -41,6 +41,8 @@
 #define	DTRACE_PROBE1(name, type1, arg1)
 #define	DTRACE_PROBE2(name, type1, arg1, type2, arg2)
 #define	DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)
-#define	DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) 
+#define	DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
 
+#define	SET_ERROR(err)	(err)
+
 #endif	/* _OPENSOLARIS_SYS_SDT_H_ */

Modified: trunk/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -30,6 +30,8 @@
 #else
 #include <sys/debug.h>
 #include <sys/kmem.h>
+#include <sys/param.h>
+#include <sys/debug.h>
 #endif
 
 /*
@@ -116,6 +118,18 @@
 	VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
 }
 
+size_t
+fnvlist_num_pairs(nvlist_t *nvl)
+{
+	size_t count = 0;
+	nvpair_t *pair;
+
+	for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL;
+	    pair = nvlist_next_nvpair(nvl, pair))
+		count++;
+	return (count);
+}
+
 void
 fnvlist_add_boolean(nvlist_t *nvl, const char *name)
 {

Modified: trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -156,7 +157,11 @@
 	return (version);
 }
 
-const char *zfs_history_event_names[LOG_END] = {
+/*
+ * This is the table of legacy internal event names; it should not be modified.
+ * The internal events are now stored in the history log as strings.
+ */
+const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
 	"invalid event",
 	"pool create",
 	"vdev add",

Modified: trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_ZFS_COMUTIL_H
@@ -37,7 +38,8 @@
 
 extern int zfs_zpl_version_map(int spa_version);
 extern int zfs_spa_version_map(int zpl_version);
-extern const char *zfs_history_event_names[LOG_END];
+#define	ZFS_NUM_LEGACY_HISTORY_EVENTS 41
+extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];
 
 #ifdef	__cplusplus
 }

Modified: trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -33,6 +33,7 @@
 #include <sys/nvpair.h>
 #include <sys/dsl_deleg.h>
 #include <sys/zfs_ioctl.h>
+#include "zfs_namecheck.h"
 #include "zfs_ioctl_compat.h"
 
 static int zfs_version_ioctl = ZFS_IOCVER_CURRENT;
@@ -49,8 +50,52 @@
 {
 	zfs_cmd_v15_t *zc_c;
 	zfs_cmd_v28_t *zc28_c;
+	zfs_cmd_deadman_t *zcdm_c;
 
 	switch (cflag) {
+	case ZFS_CMD_COMPAT_DEADMAN:
+		zcdm_c = (void *)addr;
+		/* zc */
+		strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN);
+		strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2);
+		strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN);
+		zc->zc_guid = zcdm_c->zc_guid;
+		zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf;
+		zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size;
+		zc->zc_nvlist_src = zcdm_c->zc_nvlist_src;
+		zc->zc_nvlist_src_size = zcdm_c->zc_nvlist_src_size;
+		zc->zc_nvlist_dst = zcdm_c->zc_nvlist_dst;
+		zc->zc_nvlist_dst_size = zcdm_c->zc_nvlist_dst_size;
+		zc->zc_cookie = zcdm_c->zc_cookie;
+		zc->zc_objset_type = zcdm_c->zc_objset_type;
+		zc->zc_perm_action = zcdm_c->zc_perm_action;
+		zc->zc_history = zcdm_c->zc_history;
+		zc->zc_history_len = zcdm_c->zc_history_len;
+		zc->zc_history_offset = zcdm_c->zc_history_offset;
+		zc->zc_obj = zcdm_c->zc_obj;
+		zc->zc_iflags = zcdm_c->zc_iflags;
+		zc->zc_share = zcdm_c->zc_share;
+		zc->zc_jailid = zcdm_c->zc_jailid;
+		zc->zc_objset_stats = zcdm_c->zc_objset_stats;
+		zc->zc_begin_record = zcdm_c->zc_begin_record;
+		zc->zc_defer_destroy = zcdm_c->zc_defer_destroy;
+		zc->zc_temphold = zcdm_c->zc_temphold;
+		zc->zc_action_handle = zcdm_c->zc_action_handle;
+		zc->zc_cleanup_fd = zcdm_c->zc_cleanup_fd;
+		zc->zc_simple = zcdm_c->zc_simple;
+		bcopy(zcdm_c->zc_pad, zc->zc_pad, sizeof(zc->zc_pad));
+		zc->zc_sendobj = zcdm_c->zc_sendobj;
+		zc->zc_fromobj = zcdm_c->zc_fromobj;
+		zc->zc_createtxg = zcdm_c->zc_createtxg;
+		zc->zc_stat = zcdm_c->zc_stat;
+
+		/* zc_inject_record doesn't change in libzfs_core */
+		zcdm_c->zc_inject_record = zc->zc_inject_record;
+
+		/* we always assume zc_nvlist_dst_filled is true */
+		zc->zc_nvlist_dst_filled = B_TRUE;
+	break;
+
 	case ZFS_CMD_COMPAT_V28:
 		zc28_c = (void *)addr;
 
@@ -58,7 +103,6 @@
 		strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN);
 		strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2);
 		strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN);
-		strlcpy(zc->zc_top_ds, zc28_c->zc_top_ds, MAXPATHLEN);
 		zc->zc_guid = zc28_c->zc_guid;
 		zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf;
 		zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size;
@@ -174,12 +218,60 @@
 }
 
 void
-zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
+zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request,
+    const int cflag)
 {
 	zfs_cmd_v15_t *zc_c;
 	zfs_cmd_v28_t *zc28_c;
+	zfs_cmd_deadman_t *zcdm_c;
 
 	switch (cflag) {
+	case ZFS_CMD_COMPAT_DEADMAN:
+		zcdm_c = (void *)addr;
+
+		strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN);
+		strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
+		strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN);
+		zcdm_c->zc_guid = zc->zc_guid;
+		zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf;
+		zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
+		zcdm_c->zc_nvlist_src = zc->zc_nvlist_src;
+		zcdm_c->zc_nvlist_src_size = zc->zc_nvlist_src_size;
+		zcdm_c->zc_nvlist_dst = zc->zc_nvlist_dst;
+		zcdm_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size;
+		zcdm_c->zc_cookie = zc->zc_cookie;
+		zcdm_c->zc_objset_type = zc->zc_objset_type;
+		zcdm_c->zc_perm_action = zc->zc_perm_action;
+		zcdm_c->zc_history = zc->zc_history;
+		zcdm_c->zc_history_len = zc->zc_history_len;
+		zcdm_c->zc_history_offset = zc->zc_history_offset;
+		zcdm_c->zc_obj = zc->zc_obj;
+		zcdm_c->zc_iflags = zc->zc_iflags;
+		zcdm_c->zc_share = zc->zc_share;
+		zcdm_c->zc_jailid = zc->zc_jailid;
+		zcdm_c->zc_objset_stats = zc->zc_objset_stats;
+		zcdm_c->zc_begin_record = zc->zc_begin_record;
+		zcdm_c->zc_defer_destroy = zc->zc_defer_destroy;
+		zcdm_c->zc_temphold = zc->zc_temphold;
+		zcdm_c->zc_action_handle = zc->zc_action_handle;
+		zcdm_c->zc_cleanup_fd = zc->zc_cleanup_fd;
+		zcdm_c->zc_simple = zc->zc_simple;
+		bcopy(zc->zc_pad, zcdm_c->zc_pad, sizeof(zcdm_c->zc_pad));
+		zcdm_c->zc_sendobj = zc->zc_sendobj;
+		zcdm_c->zc_fromobj = zc->zc_fromobj;
+		zcdm_c->zc_createtxg = zc->zc_createtxg;
+		zcdm_c->zc_stat = zc->zc_stat;
+
+		/* zc_inject_record doesn't change in libzfs_core */
+		zc->zc_inject_record = zcdm_c->zc_inject_record;
+#ifndef _KERNEL
+		if (request == ZFS_IOC_RECV)
+			strlcpy(zcdm_c->zc_top_ds,
+			    zc->zc_value + strlen(zc->zc_value) + 1,
+			    (MAXPATHLEN * 2) - strlen(zc->zc_value) - 1);
+#endif
+	break;
+
 	case ZFS_CMD_COMPAT_V28:
 		zc28_c = (void *)addr;
 
@@ -186,7 +278,6 @@
 		strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN);
 		strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
 		strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN);
-		strlcpy(zc28_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN);
 		zc28_c->zc_guid = zc->zc_guid;
 		zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf;
 		zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
@@ -216,7 +307,12 @@
 		zc28_c->zc_fromobj = zc->zc_fromobj;
 		zc28_c->zc_createtxg = zc->zc_createtxg;
 		zc28_c->zc_stat = zc->zc_stat;
-
+#ifndef _KERNEL
+		if (request == ZFS_IOC_RECV)
+			strlcpy(zc28_c->zc_top_ds,
+			    zc->zc_value + strlen(zc->zc_value) + 1,
+			    MAXPATHLEN * 2 - strlen(zc->zc_value) - 1);
+#endif
 		/* zc_inject_record */
 		zc28_c->zc_inject_record.zi_objset =
 		    zc->zc_inject_record.zi_objset;
@@ -476,22 +572,33 @@
 
 #ifndef _KERNEL
 int
-zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag)
+zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag)
 {
 	int nc, ret;
 	void *zc_c;
 	unsigned long ncmd;
+	zfs_iocparm_t zp;
 
 	switch (cflag) {
 	case ZFS_CMD_COMPAT_NONE:
-		ret = ioctl(fd, cmd, zc);
-		return (ret);
+		ncmd = _IOWR('Z', request, struct zfs_iocparm);
+		zp.zfs_cmd = (uint64_t)zc;
+		zp.zfs_cmd_size = sizeof(zfs_cmd_t);
+		zp.zfs_ioctl_version = ZFS_IOCVER_CURRENT;
+		return (ioctl(fd, ncmd, &zp));
+	case ZFS_CMD_COMPAT_LZC:
+		ncmd = _IOWR('Z', request, struct zfs_cmd);
+		return (ioctl(fd, ncmd, zc));
+	case ZFS_CMD_COMPAT_DEADMAN:
+		zc_c = malloc(sizeof(zfs_cmd_deadman_t));
+		ncmd = _IOWR('Z', request, struct zfs_cmd_deadman);
+		break;
 	case ZFS_CMD_COMPAT_V28:
 		zc_c = malloc(sizeof(zfs_cmd_v28_t));
-		ncmd = _IOWR('Z', ZFS_IOC(cmd), struct zfs_cmd_v28);
+		ncmd = _IOWR('Z', request, struct zfs_cmd_v28);
 		break;
 	case ZFS_CMD_COMPAT_V15:
-		nc = zfs_ioctl_v28_to_v15[ZFS_IOC(cmd)];
+		nc = zfs_ioctl_v28_to_v15[request];
 		zc_c = malloc(sizeof(zfs_cmd_v15_t));
 		ncmd = _IOWR('Z', nc, struct zfs_cmd_v15);
 		break;
@@ -499,14 +606,15 @@
 		return (EINVAL);
 	}
 
-	if (ZFS_IOC(ncmd) == ZFS_IOC_COMPAT_FAIL)
+	if (ZFS_IOCREQ(ncmd) == ZFS_IOC_COMPAT_FAIL)
 		return (ENOTSUP);
 
-	zfs_cmd_compat_put(zc, (caddr_t)zc_c, cflag);
+	zfs_cmd_compat_put(zc, (caddr_t)zc_c, request, cflag);
+
 	ret = ioctl(fd, ncmd, zc_c);
 	if (cflag == ZFS_CMD_COMPAT_V15 &&
-	    nc == 2 /* ZFS_IOC_POOL_IMPORT */)
-		ret = ioctl(fd, _IOWR('Z', 4 /* ZFS_IOC_POOL_CONFIGS */,
+	    nc == ZFS_IOC_POOL_IMPORT)
+		ret = ioctl(fd, _IOWR('Z', ZFS_IOC_POOL_CONFIGS,
 		    struct zfs_cmd_v15), zc_c);
 	zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag);
 	free(zc_c);
@@ -513,10 +621,10 @@
 
 	if (cflag == ZFS_CMD_COMPAT_V15) {
 		switch (nc) {
-		case 2:	/* ZFS_IOC_POOL_IMPORT */
-		case 4: /* ZFS_IOC_POOL_CONFIGS */
-		case 5: /* ZFS_IOC_POOL_STATS */
-		case 6: /* ZFS_IOC_POOL_TRYIMPORT */
+		case ZFS_IOC_POOL_IMPORT:
+		case ZFS_IOC_POOL_CONFIGS:
+		case ZFS_IOC_POOL_STATS:
+		case ZFS_IOC_POOL_TRYIMPORT:
 			zfs_ioctl_compat_fix_stats(zc, nc);
 			break;
 		case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */
@@ -528,10 +636,16 @@
 	return (ret);
 }
 #else /* _KERNEL */
-void
+int
 zfs_ioctl_compat_pre(zfs_cmd_t *zc, int *vec, const int cflag)
 {
-	if (cflag == ZFS_CMD_COMPAT_V15)
+	int error = 0;
+
+	/* are we creating a clone? */
+	if (*vec == ZFS_IOC_CREATE && zc->zc_value[0] != '\0')
+		*vec = ZFS_IOC_CLONE;
+
+	if (cflag == ZFS_CMD_COMPAT_V15) {
 		switch (*vec) {
 
 		case 7: /* ZFS_IOC_POOL_SCRUB (v15) */
@@ -538,6 +652,9 @@
 			zc->zc_cookie = POOL_SCAN_SCRUB;
 			break;
 		}
+	}
+
+	return (error);
 }
 
 void
@@ -545,9 +662,9 @@
 {
 	if (cflag == ZFS_CMD_COMPAT_V15) {
 		switch (vec) {
-		case 4:	/* ZFS_IOC_POOL_CONFIGS */
-		case 5:	/* ZFS_IOC_POOL_STATS */
-		case 6:	/* ZFS_IOC_POOL_TRYIMPORT */
+		case ZFS_IOC_POOL_CONFIGS:
+		case ZFS_IOC_POOL_STATS:
+		case ZFS_IOC_POOL_TRYIMPORT:
 			zfs_ioctl_compat_fix_stats(zc, vec);
 			break;
 		case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */
@@ -556,4 +673,193 @@
 		}
 	}
 }
+
+nvlist_t *
+zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec,
+    const int cflag)
+{
+	nvlist_t *nvl, *tmpnvl, *hnvl;
+	nvpair_t *elem;
+	char *poolname, *snapname;
+	int err;
+
+	if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC)
+		goto out;
+
+	switch (vec) {
+	case ZFS_IOC_CREATE:
+		nvl = fnvlist_alloc();
+		fnvlist_add_int32(nvl, "type", zc->zc_objset_type);
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "props", innvl);
+			nvlist_free(innvl);
+		}
+		return (nvl);
+	break;
+	case ZFS_IOC_CLONE:
+		nvl = fnvlist_alloc();
+		fnvlist_add_string(nvl, "origin", zc->zc_value);
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "props", innvl);
+			nvlist_free(innvl);
+		}
+		return (nvl);
+	break;
+	case ZFS_IOC_SNAPSHOT:
+		if (innvl == NULL)
+			goto out;
+		nvl = fnvlist_alloc();
+		fnvlist_add_nvlist(nvl, "props", innvl);
+		tmpnvl = fnvlist_alloc();
+		snapname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
+		fnvlist_add_boolean(tmpnvl, snapname);
+		kmem_free(snapname, strlen(snapname + 1));
+		/* check if we are doing a recursive snapshot */
+		if (zc->zc_cookie)
+			dmu_get_recursive_snaps_nvl(zc->zc_name, zc->zc_value,
+			    tmpnvl);
+		fnvlist_add_nvlist(nvl, "snaps", tmpnvl);
+		fnvlist_free(tmpnvl);
+		nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_SPACE_SNAPS:
+		nvl = fnvlist_alloc();
+		fnvlist_add_string(nvl, "firstsnap", zc->zc_value);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		return (nvl);
+	break;
+	case ZFS_IOC_DESTROY_SNAPS:
+		if (innvl == NULL && cflag == ZFS_CMD_COMPAT_DEADMAN)
+			goto out;
+		nvl = fnvlist_alloc();
+		if (innvl != NULL) {
+			fnvlist_add_nvlist(nvl, "snaps", innvl);
+		} else {
+			/*
+			 * We are probably called by even older binaries,
+			 * allocate and populate nvlist with recursive
+			 * snapshots
+			 */
+			if (snapshot_namecheck(zc->zc_value, NULL,
+			    NULL) == 0) {
+				tmpnvl = fnvlist_alloc();
+				if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+				    zc->zc_value, tmpnvl) == 0)
+					fnvlist_add_nvlist(nvl, "snaps",
+					    tmpnvl);
+				nvlist_free(tmpnvl);
+			}
+		}
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_HOLD:
+		nvl = fnvlist_alloc();
+		tmpnvl = fnvlist_alloc();
+		if (zc->zc_cleanup_fd != -1)
+			fnvlist_add_int32(nvl, "cleanup_fd",
+			    (int32_t)zc->zc_cleanup_fd);
+		if (zc->zc_cookie) {
+			hnvl = fnvlist_alloc();
+			if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+			    zc->zc_value, hnvl) == 0) {
+				elem = NULL;
+				while ((elem = nvlist_next_nvpair(hnvl,
+				    elem)) != NULL) {
+					nvlist_add_string(tmpnvl,
+					    nvpair_name(elem), zc->zc_string);
+				}
+			}
+			nvlist_free(hnvl);
+		} else {
+			snapname = kmem_asprintf("%s@%s", zc->zc_name,
+			    zc->zc_value);
+			nvlist_add_string(tmpnvl, snapname, zc->zc_string);
+			kmem_free(snapname, strlen(snapname + 1));
+		}
+		fnvlist_add_nvlist(nvl, "holds", tmpnvl);
+		nvlist_free(tmpnvl);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	case ZFS_IOC_RELEASE:
+		nvl = fnvlist_alloc();
+		tmpnvl = fnvlist_alloc();
+		if (zc->zc_cookie) {
+			hnvl = fnvlist_alloc();
+			if (dmu_get_recursive_snaps_nvl(zc->zc_name,
+			    zc->zc_value, hnvl) == 0) {
+				elem = NULL;
+				while ((elem = nvlist_next_nvpair(hnvl,
+				    elem)) != NULL) {
+					fnvlist_add_boolean(tmpnvl,
+					    zc->zc_string);
+					fnvlist_add_nvlist(nvl,
+					    nvpair_name(elem), tmpnvl);
+				}
+			}
+			nvlist_free(hnvl);
+		} else {
+			snapname = kmem_asprintf("%s@%s", zc->zc_name,
+			    zc->zc_value);
+			fnvlist_add_boolean(tmpnvl, zc->zc_string);
+			fnvlist_add_nvlist(nvl, snapname, tmpnvl);
+			kmem_free(snapname, strlen(snapname + 1));
+		}
+		nvlist_free(tmpnvl);
+		if (innvl != NULL)
+			nvlist_free(innvl);
+		/* strip dataset part from zc->zc_name */
+		zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
+		return (nvl);
+	break;
+	}
+out:
+	return (innvl);
+}
+
+nvlist_t *
+zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec,
+    const int cflag)
+{
+	nvlist_t *tmpnvl;
+
+	if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC)
+		return (outnvl);
+
+	switch (vec) {
+	case ZFS_IOC_SPACE_SNAPS:
+		(void) nvlist_lookup_uint64(outnvl, "used", &zc->zc_cookie);
+		(void) nvlist_lookup_uint64(outnvl, "compressed",
+		    &zc->zc_objset_type);
+		(void) nvlist_lookup_uint64(outnvl, "uncompressed",
+		    &zc->zc_perm_action);
+		nvlist_free(outnvl);
+		/* return empty outnvl */
+		tmpnvl = fnvlist_alloc();
+		return (tmpnvl);
+	break;
+	case ZFS_IOC_CREATE:
+	case ZFS_IOC_CLONE:
+	case ZFS_IOC_HOLD:
+	case ZFS_IOC_RELEASE:
+		nvlist_free(outnvl);
+		/* return empty outnvl */
+		tmpnvl = fnvlist_alloc();
+		return (tmpnvl);
+	break;
+	}
+
+	return (outnvl);
+}
 #endif /* KERNEL */

Modified: trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -45,17 +45,31 @@
  */
 
 /* ioctl versions for vfs.zfs.version.ioctl */
+#define	ZFS_IOCVER_UNDEF	-1
+#define	ZFS_IOCVER_NONE		0
 #define	ZFS_IOCVER_DEADMAN	1
-#define	ZFS_IOCVER_CURRENT	ZFS_IOCVER_DEADMAN
+#define	ZFS_IOCVER_LZC		2
+#define	ZFS_IOCVER_ZCMD		3
+#define	ZFS_IOCVER_CURRENT	ZFS_IOCVER_ZCMD
 
 /* compatibility conversion flag */
 #define	ZFS_CMD_COMPAT_NONE	0
 #define	ZFS_CMD_COMPAT_V15	1
 #define	ZFS_CMD_COMPAT_V28	2
+#define	ZFS_CMD_COMPAT_DEADMAN	3
+#define	ZFS_CMD_COMPAT_LZC	4
 
 #define	ZFS_IOC_COMPAT_PASS	254
 #define	ZFS_IOC_COMPAT_FAIL	255
 
+#define	ZFS_IOCREQ(ioreq)	((ioreq) & 0xff)
+
+typedef struct zfs_iocparm {
+	uint32_t	zfs_ioctl_version;
+	uint64_t	zfs_cmd;
+	uint64_t	zfs_cmd_size;
+} zfs_iocparm_t;
+
 typedef struct zinject_record_v15 {
 	uint64_t	zi_objset;
 	uint64_t	zi_object;
@@ -148,6 +162,44 @@
 	zfs_stat_t	zc_stat;
 } zfs_cmd_v28_t;
 
+typedef struct zfs_cmd_deadman {
+	char		zc_name[MAXPATHLEN];
+	char		zc_value[MAXPATHLEN * 2];
+	char		zc_string[MAXNAMELEN];
+	char		zc_top_ds[MAXPATHLEN];
+	uint64_t	zc_guid;
+	uint64_t	zc_nvlist_conf;		/* really (char *) */
+	uint64_t	zc_nvlist_conf_size;
+	uint64_t	zc_nvlist_src;		/* really (char *) */
+	uint64_t	zc_nvlist_src_size;
+	uint64_t	zc_nvlist_dst;		/* really (char *) */
+	uint64_t	zc_nvlist_dst_size;
+	uint64_t	zc_cookie;
+	uint64_t	zc_objset_type;
+	uint64_t	zc_perm_action;
+	uint64_t 	zc_history;		/* really (char *) */
+	uint64_t 	zc_history_len;
+	uint64_t	zc_history_offset;
+	uint64_t	zc_obj;
+	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
+	zfs_share_t	zc_share;
+	uint64_t	zc_jailid;
+	dmu_objset_stats_t zc_objset_stats;
+	struct drr_begin zc_begin_record;
+	/* zc_inject_record doesn't change in libzfs_core */
+	zinject_record_t zc_inject_record;
+	boolean_t	zc_defer_destroy;
+	boolean_t	zc_temphold;
+	uint64_t	zc_action_handle;
+	int		zc_cleanup_fd;
+	uint8_t		zc_simple;
+	uint8_t		zc_pad[3];		/* alignment */
+	uint64_t	zc_sendobj;
+	uint64_t	zc_fromobj;
+	uint64_t	zc_createtxg;
+	zfs_stat_t	zc_stat;
+} zfs_cmd_deadman_t;
+
 #ifdef _KERNEL
 unsigned static long zfs_ioctl_v15_to_v28[] = {
 	0,	/*  0 ZFS_IOC_POOL_CREATE */
@@ -272,13 +324,17 @@
 #endif	/* ! _KERNEL */
 
 #ifdef _KERNEL
-void zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
+int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
 void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int);
+nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
+nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
 #else
-int zcmd_ioctl_compat(int, unsigned long, zfs_cmd_t *, const int);
+int zcmd_ioctl_compat(int, int, zfs_cmd_t *, const int);
 #endif	/* _KERNEL */
 void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int);
-void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int);
+void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int);
 
 #ifdef	__cplusplus
 }

Modified: trunk/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 /*
  * Common routines used by zfs and zpool property management.
@@ -129,7 +132,8 @@
     zprop_attr_t attr, int objset_types, const char *colname)
 {
 	zprop_register_impl(prop, name, type, 0, NULL, attr,
-	    objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
+	    objset_types, NULL, colname,
+	    type == PROP_TYPE_NUMBER, B_FALSE, NULL);
 }
 
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	2016-10-02 00:51:03 UTC (rev 9136)
@@ -49,8 +49,10 @@
 	dsl_dir.o		\
 	dsl_dataset.o		\
 	dsl_deadlist.o		\
+	dsl_destroy.o		\
 	dsl_pool.o		\
 	dsl_synctask.o		\
+	dsl_userhold.o		\
 	dmu_zfetch.o		\
 	dsl_deleg.o		\
 	dsl_prop.o		\
@@ -61,6 +63,7 @@
 	lzjb.o			\
 	metaslab.o		\
 	refcount.o		\
+	rrwlock.o		\
 	sa.o			\
 	sha256.o		\
 	spa.o			\
@@ -120,7 +123,6 @@
 	zfs_onexit.o		\
 	zfs_replay.o		\
 	zfs_rlock.o		\
-	rrwlock.o		\
 	zfs_vfsops.o		\
 	zfs_vnops.o		\
 	zvol.o

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -1790,12 +1790,12 @@
 	}
 }
 
-int
+boolean_t
 arc_buf_remove_ref(arc_buf_t *buf, void* tag)
 {
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 	kmutex_t *hash_lock = HDR_LOCK(hdr);
-	int no_callback = (buf->b_efunc == NULL);
+	boolean_t no_callback = (buf->b_efunc == NULL);
 
 	if (hdr->b_state == arc_anon) {
 		ASSERT(hdr->b_datacnt == 1);
@@ -2045,7 +2045,7 @@
 		ARCSTAT_INCR(arcstat_mutex_miss, missed);
 
 	/*
-	 * We have just evicted some date into the ghost state, make
+	 * We have just evicted some data into the ghost state, make
 	 * sure we also adjust the ghost state size if necessary.
 	 */
 	if (arc_no_grow &&
@@ -2878,7 +2878,7 @@
 {
 	if (zio == NULL || zio->io_error == 0)
 		bcopy(buf->b_data, arg, buf->b_hdr->b_size);
-	VERIFY(arc_buf_remove_ref(buf, arg) == 1);
+	VERIFY(arc_buf_remove_ref(buf, arg));
 }
 
 /* a generic arc_done_func_t */
@@ -2887,7 +2887,7 @@
 {
 	arc_buf_t **bufp = arg;
 	if (zio && zio->io_error) {
-		VERIFY(arc_buf_remove_ref(buf, arg) == 1);
+		VERIFY(arc_buf_remove_ref(buf, arg));
 		*bufp = NULL;
 	} else {
 		*bufp = buf;
@@ -3738,7 +3738,7 @@
 	 */
 	if (curproc == pageproc) {
 		if (page_load > available_memory / 4)
-			return (ERESTART);
+			return (SET_ERROR(ERESTART));
 		/* Note: reserve is inflated, so we deflate */
 		page_load += reserve / 8;
 		return (0);
@@ -3745,7 +3745,7 @@
 	} else if (page_load > 0 && arc_reclaim_needed()) {
 		/* memory is low, delay before restarting */
 		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 	page_load = 0;
 
@@ -3760,7 +3760,7 @@
 
 	if (inflight_data > available_memory / 4) {
 		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
-		return (ERESTART);
+		return (SET_ERROR(ERESTART));
 	}
 #endif
 	return (0);
@@ -3785,13 +3785,13 @@
 	 */
 	if (spa_get_random(10000) == 0) {
 		dprintf("forcing random failure\n");
-		return (ERESTART);
+		return (SET_ERROR(ERESTART));
 	}
 #endif
 	if (reserve > arc_c/4 && !arc_no_grow)
 		arc_c = MIN(arc_c_max, reserve * 4);
 	if (reserve > arc_c)
-		return (ENOMEM);
+		return (SET_ERROR(ENOMEM));
 
 	/*
 	 * Don't count loaned bufs as in flight dirty data to prevent long
@@ -3824,7 +3824,7 @@
 		    arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10,
 		    arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10,
 		    reserve>>10, arc_c>>10);
-		return (ERESTART);
+		return (SET_ERROR(ERESTART));
 	}
 	atomic_add_64(&arc_tempreserve, reserve);
 	return (0);
@@ -4518,7 +4518,7 @@
 		if (zio->io_error != 0) {
 			ARCSTAT_BUMP(arcstat_l2_io_error);
 		} else {
-			zio->io_error = EIO;
+			zio->io_error = SET_ERROR(EIO);
 		}
 		if (!equal)
 			ARCSTAT_BUMP(arcstat_l2_cksum_bad);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/bplist.h>
@@ -52,6 +53,12 @@
 	mutex_exit(&bpl->bpl_lock);
 }
 
+/*
+ * To aid debugging, we keep the most recently removed entry.  This way if
+ * we are in the callback, we can easily locate the entry.
+ */
+static bplist_entry_t *bplist_iterate_last_removed;
+
 void
 bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
 {
@@ -59,6 +66,7 @@
 
 	mutex_enter(&bpl->bpl_lock);
 	while (bpe = list_head(&bpl->bpl_list)) {
+		bplist_iterate_last_removed = bpe;
 		list_remove(&bpl->bpl_list, bpe);
 		mutex_exit(&bpl->bpl_lock);
 		func(arg, &bpe->bpe_blk, tx);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -392,6 +392,10 @@
 		    DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
 	}
 
+	dmu_object_info_t doi;
+	ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
+	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
+
 	mutex_enter(&bpo->bpo_lock);
 	dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
 	    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -39,7 +39,7 @@
 #include <sys/sa_impl.h>
 
 static void dbuf_destroy(dmu_buf_impl_t *db);
-static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
+static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
 
 /*
@@ -499,7 +499,7 @@
 	} else {
 		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 		ASSERT3P(db->db_buf, ==, NULL);
-		VERIFY(arc_buf_remove_ref(buf, db) == 1);
+		VERIFY(arc_buf_remove_ref(buf, db));
 		db->db_state = DB_UNCACHED;
 	}
 	cv_broadcast(&db->db_changed);
@@ -598,7 +598,7 @@
 	ASSERT(!refcount_is_zero(&db->db_holds));
 
 	if (db->db_state == DB_NOFILL)
-		return (EIO);
+		return (SET_ERROR(EIO));
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
@@ -655,7 +655,7 @@
 				cv_wait(&db->db_changed, &db->db_mtx);
 			}
 			if (db->db_state == DB_UNCACHED)
-				err = EIO;
+				err = SET_ERROR(EIO);
 		}
 		mutex_exit(&db->db_mtx);
 	}
@@ -828,10 +828,12 @@
 			continue;
 
 		/* found a level 0 buffer in the range */
-		if (dbuf_undirty(db, tx))
+		mutex_enter(&db->db_mtx);
+		if (dbuf_undirty(db, tx)) {
+			/* mutex has been dropped and dbuf destroyed */
 			continue;
+		}
 
-		mutex_enter(&db->db_mtx);
 		if (db->db_state == DB_UNCACHED ||
 		    db->db_state == DB_NOFILL ||
 		    db->db_state == DB_EVICTING) {
@@ -958,7 +960,7 @@
 
 	mutex_enter(&db->db_mtx);
 	dbuf_set_data(db, buf);
-	VERIFY(arc_buf_remove_ref(obuf, db) == 1);
+	VERIFY(arc_buf_remove_ref(obuf, db));
 	db->db.db_size = size;
 
 	if (db->db_level == 0) {
@@ -1258,7 +1260,10 @@
 	return (dr);
 }
 
-static int
+/*
+ * Return TRUE if this evicted the dbuf.
+ */
+static boolean_t
 dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
 	dnode_t *dn;
@@ -1267,8 +1272,9 @@
 
 	ASSERT(txg != 0);
 	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
+	ASSERT0(db->db_level);
+	ASSERT(MUTEX_HELD(&db->db_mtx));
 
-	mutex_enter(&db->db_mtx);
 	/*
 	 * If this buffer is not dirty, we're done.
 	 */
@@ -1275,10 +1281,8 @@
 	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
 		if (dr->dr_txg <= txg)
 			break;
-	if (dr == NULL || dr->dr_txg < txg) {
-		mutex_exit(&db->db_mtx);
-		return (0);
-	}
+	if (dr == NULL || dr->dr_txg < txg)
+		return (B_FALSE);
 	ASSERT(dr->dr_txg == txg);
 	ASSERT(dr->dr_dbuf == db);
 
@@ -1286,24 +1290,12 @@
 	dn = DB_DNODE(db);
 
 	/*
-	 * If this buffer is currently held, we cannot undirty
-	 * it, since one of the current holders may be in the
-	 * middle of an update.  Note that users of dbuf_undirty()
-	 * should not place a hold on the dbuf before the call.
-	 * Also note: we can get here with a spill block, so
-	 * test for that similar to how dbuf_dirty does.
+	 * Note:  This code will probably work even if there are concurrent
+	 * holders, but it is untested in that scenerio, as the ZPL and
+	 * ztest have additional locking (the range locks) that prevents
+	 * that type of concurrent access.
 	 */
-	if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
-		mutex_exit(&db->db_mtx);
-		/* Make sure we don't toss this buffer at sync phase */
-		if (db->db_blkid != DMU_SPILL_BLKID) {
-			mutex_enter(&dn->dn_mtx);
-			dnode_clear_range(dn, db->db_blkid, 1, tx);
-			mutex_exit(&dn->dn_mtx);
-		}
-		DB_DNODE_EXIT(db);
-		return (0);
-	}
+	ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
 
 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
 
@@ -1332,21 +1324,13 @@
 	}
 	DB_DNODE_EXIT(db);
 
-	if (db->db_level == 0) {
-		if (db->db_state != DB_NOFILL) {
-			dbuf_unoverride(dr);
+	if (db->db_state != DB_NOFILL) {
+		dbuf_unoverride(dr);
 
-			ASSERT(db->db_buf != NULL);
-			ASSERT(dr->dt.dl.dr_data != NULL);
-			if (dr->dt.dl.dr_data != db->db_buf)
-				VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
-				    db) == 1);
-		}
-	} else {
 		ASSERT(db->db_buf != NULL);
-		ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
-		mutex_destroy(&dr->dt.di.dr_mtx);
-		list_destroy(&dr->dt.di.dr_children);
+		ASSERT(dr->dt.dl.dr_data != NULL);
+		if (dr->dt.dl.dr_data != db->db_buf)
+			VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
 	}
 	kmem_free(dr, sizeof (dbuf_dirty_record_t));
 
@@ -1358,13 +1342,12 @@
 
 		ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
 		dbuf_set_data(db, NULL);
-		VERIFY(arc_buf_remove_ref(buf, db) == 1);
+		VERIFY(arc_buf_remove_ref(buf, db));
 		dbuf_evict(db);
-		return (1);
+		return (B_TRUE);
 	}
 
-	mutex_exit(&db->db_mtx);
-	return (0);
+	return (B_FALSE);
 }
 
 #pragma weak dmu_buf_will_dirty = dbuf_will_dirty
@@ -1463,7 +1446,7 @@
 		mutex_exit(&db->db_mtx);
 		(void) dbuf_dirty(db, tx);
 		bcopy(buf->b_data, db->db.db_data, db->db.db_size);
-		VERIFY(arc_buf_remove_ref(buf, db) == 1);
+		VERIFY(arc_buf_remove_ref(buf, db));
 		xuio_stat_wbuf_copied();
 		return;
 	}
@@ -1481,10 +1464,10 @@
 				arc_release(db->db_buf, db);
 			}
 			dr->dt.dl.dr_data = buf;
-			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+			VERIFY(arc_buf_remove_ref(db->db_buf, db));
 		} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
 			arc_release(db->db_buf, db);
-			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+			VERIFY(arc_buf_remove_ref(db->db_buf, db));
 		}
 		db->db_buf = NULL;
 	}
@@ -1610,7 +1593,7 @@
 	if (level >= nlevels ||
 	    (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
 		/* the buffer has no parent yet */
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	} else if (level < nlevels-1) {
 		/* this block is referenced from an indirect block */
 		int err = dbuf_hold_impl(dn, level+1,
@@ -1861,7 +1844,7 @@
 		err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
 		if (fail_sparse) {
 			if (err == 0 && bp && BP_IS_HOLE(bp))
-				err = ENOENT;
+				err = SET_ERROR(ENOENT);
 			if (err) {
 				if (parent)
 					dbuf_rele(parent, NULL);
@@ -1958,7 +1941,7 @@
 	dnode_t *dn;
 
 	if (db->db_blkid != DMU_SPILL_BLKID)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	if (blksz == 0)
 		blksz = SPA_MINBLOCKSIZE;
 	if (blksz > SPA_MAXBLOCKSIZE)
@@ -2067,10 +2050,10 @@
 			 * This dbuf has anonymous data associated with it.
 			 */
 			dbuf_set_data(db, NULL);
-			VERIFY(arc_buf_remove_ref(buf, db) == 1);
+			VERIFY(arc_buf_remove_ref(buf, db));
 			dbuf_evict(db);
 		} else {
-			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
+			VERIFY(!arc_buf_remove_ref(db->db_buf, db));
 
 			/*
 			 * A dbuf will be eligible for eviction if either the
@@ -2571,7 +2554,7 @@
 		if (db->db_state != DB_NOFILL) {
 			if (dr->dt.dl.dr_data != db->db_buf)
 				VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
-				    db) == 1);
+				    db));
 			else if (!arc_released(db->db_buf))
 				arc_set_callback(db->db_buf, dbuf_do_evict, db);
 		}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -174,7 +174,7 @@
     ddt_entry_t *dde)
 {
 	if (!ddt_object_exists(ddt, type, class))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
 	    ddt->ddt_object[type][class], dde));
@@ -235,7 +235,7 @@
     dmu_object_info_t *doi)
 {
 	if (!ddt_object_exists(ddt, type, class))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class],
 	    doi));
@@ -1157,5 +1157,5 @@
 		ddb->ddb_type = 0;
 	} while (++ddb->ddb_class < DDT_CLASSES);
 
-	return (ENOENT);
+	return (SET_ERROR(ENOENT));
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -146,7 +146,7 @@
 	db = dbuf_hold(dn, blkid, tag);
 	rw_exit(&dn->dn_struct_rwlock);
 	if (db == NULL) {
-		err = EIO;
+		err = SET_ERROR(EIO);
 	} else {
 		err = dbuf_read(db, NULL, db_flags);
 		if (err) {
@@ -177,9 +177,9 @@
 	dn = DB_DNODE(db);
 
 	if (dn->dn_bonus != db) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	} else if (newsize < 0 || newsize > db_fake->db_size) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	} else {
 		dnode_setbonuslen(dn, newsize, tx);
 		error = 0;
@@ -200,9 +200,9 @@
 	dn = DB_DNODE(db);
 
 	if (!DMU_OT_IS_VALID(type)) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	} else if (dn->dn_bonus != db) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	} else {
 		dnode_setbonus_type(dn, type, tx);
 		error = 0;
@@ -329,12 +329,12 @@
 	dn = DB_DNODE(db);
 
 	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) {
-		err = EINVAL;
+		err = SET_ERROR(EINVAL);
 	} else {
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
 		if (!dn->dn_have_spill) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		} else {
 			err = dmu_spill_hold_by_dnode(dn,
 			    DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp);
@@ -400,7 +400,7 @@
 			    (longlong_t)dn->dn_object, dn->dn_datablksz,
 			    (longlong_t)offset, (longlong_t)length);
 			rw_exit(&dn->dn_struct_rwlock);
-			return (EIO);
+			return (SET_ERROR(EIO));
 		}
 		nblks = 1;
 	}
@@ -417,7 +417,7 @@
 			rw_exit(&dn->dn_struct_rwlock);
 			dmu_buf_rele_array(dbp, nblks, tag);
 			zio_nowait(zio);
-			return (EIO);
+			return (SET_ERROR(EIO));
 		}
 		/* initiate async i/o */
 		if (read)
@@ -449,7 +449,7 @@
 			    db->db_state == DB_FILL)
 				cv_wait(&db->db_changed, &db->db_mtx);
 			if (db->db_state == DB_UNCACHED)
-				err = EIO;
+				err = SET_ERROR(EIO);
 			mutex_exit(&db->db_mtx);
 			if (err) {
 				dmu_buf_rele_array(dbp, nblks, tag);
@@ -1204,7 +1204,7 @@
 dmu_return_arcbuf(arc_buf_t *buf)
 {
 	arc_return_buf(buf, FTAG);
-	VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
+	VERIFY(arc_buf_remove_ref(buf, FTAG));
 }
 
 /*
@@ -1363,7 +1363,8 @@
 	dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
 	if (dmu_tx_assign(tx, TXG_WAIT) != 0) {
 		dmu_tx_abort(tx);
-		return (EIO);	/* Make zl_get_data do txg_waited_synced() */
+		/* Make zl_get_data do txg_waited_synced() */
+		return (SET_ERROR(EIO));
 	}
 
 	dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP);
@@ -1448,7 +1449,7 @@
 		 * This txg has already synced.  There's nothing to do.
 		 */
 		mutex_exit(&db->db_mtx);
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 	}
 
 	if (txg <= spa_syncing_txg(os->os_spa)) {
@@ -1470,7 +1471,7 @@
 		 * There's no need to log writes to freed blocks, so we're done.
 		 */
 		mutex_exit(&db->db_mtx);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg);
@@ -1499,7 +1500,7 @@
 		 * have been dirtied since, or we would have cleared the state.
 		 */
 		mutex_exit(&db->db_mtx);
-		return (EALREADY);
+		return (SET_ERROR(EALREADY));
 	}
 
 	ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -135,7 +136,7 @@
 	int err = 0;
 
 	if (issig(JUSTLOOKING) && issig(FORREAL))
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	if (zb->zb_object != DMU_META_DNODE_OBJECT)
 		return (0);
@@ -158,7 +159,7 @@
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0)
-			return (EIO);
+			return (SET_ERROR(EIO));
 
 		blk = abuf->b_data;
 		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
@@ -178,52 +179,54 @@
 }
 
 int
-dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp)
+dmu_diff(const char *tosnap_name, const char *fromsnap_name,
+#ifdef illumos
+    struct vnode *vp, offset_t *offp)
+#else
+    struct file *fp, offset_t *offp)
+#endif
 {
 	struct diffarg da;
-	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
-	dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
-	dsl_dataset_t *findds;
-	dsl_dataset_t *relds;
-	int err = 0;
+	dsl_dataset_t *fromsnap;
+	dsl_dataset_t *tosnap;
+	dsl_pool_t *dp;
+	int error;
+	uint64_t fromtxg;
 
-	/* make certain we are looking at snapshots */
-	if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
-		return (EINVAL);
+	if (strchr(tosnap_name, '@') == NULL ||
+	    strchr(fromsnap_name, '@') == NULL)
+		return (SET_ERROR(EINVAL));
 
-	/* fromsnap must be earlier and from the same lineage as tosnap */
-	if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
-		return (EXDEV);
+	error = dsl_pool_hold(tosnap_name, FTAG, &dp);
+	if (error != 0)
+		return (error);
 
-	relds = NULL;
-	findds = ds;
+	error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap);
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
 
-	while (fromds->ds_dir != findds->ds_dir) {
-		dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap);
+	if (error != 0) {
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
 
-		if (!dsl_dir_is_clone(findds->ds_dir)) {
-			if (relds)
-				dsl_dataset_rele(relds, FTAG);
-			return (EXDEV);
-		}
+	if (!dsl_dataset_is_before(tosnap, fromsnap)) {
+		dsl_dataset_rele(fromsnap, FTAG);
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
+		return (SET_ERROR(EXDEV));
+	}
 
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
-		err = dsl_dataset_hold_obj(dp,
-		    findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
-		rw_exit(&dp->dp_config_rwlock);
+	fromtxg = fromsnap->ds_phys->ds_creation_txg;
+	dsl_dataset_rele(fromsnap, FTAG);
 
-		if (relds)
-			dsl_dataset_rele(relds, FTAG);
+	dsl_dataset_long_hold(tosnap, FTAG);
+	dsl_pool_rele(dp, FTAG);
 
-		if (err)
-			return (EXDEV);
-
-		relds = findds;
-	}
-
-	if (relds)
-		dsl_dataset_rele(relds, FTAG);
-
 	da.da_fp = fp;
 	da.da_offp = offp;
 	da.da_ddr.ddr_type = DDR_NONE;
@@ -231,15 +234,18 @@
 	da.da_err = 0;
 	da.da_td = curthread;
 
-	err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
+	error = traverse_dataset(tosnap, fromtxg,
 	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
 
-	if (err) {
-		da.da_err = err;
+	if (error != 0) {
+		da.da_err = error;
 	} else {
 		/* we set the da.da_err we return as side-effect */
 		(void) write_record(&da);
 	}
 
+	dsl_dataset_long_rele(tosnap, FTAG);
+	dsl_dataset_rele(tosnap, FTAG);
+
 	return (da.da_err);
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -90,7 +91,7 @@
 	int err;
 
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
-		return (EBADF);
+		return (SET_ERROR(EBADF));
 
 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
 	if (err)
@@ -112,7 +113,7 @@
 	int err;
 
 	if (object == DMU_META_DNODE_OBJECT)
-		return (EBADF);
+		return (SET_ERROR(EBADF));
 
 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -43,6 +44,7 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/sa.h>
 #include <sys/zfs_onexit.h>
+#include <sys/dsl_destroy.h>
 
 /*
  * Needed to close a window in dnode_move() that allows the objset to be freed
@@ -279,11 +281,11 @@
 		err = arc_read(NULL, spa, os->os_rootbp,
 		    arc_getbuf_func, &os->os_phys_buf,
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
-		if (err) {
+		if (err != 0) {
 			kmem_free(os, sizeof (objset_t));
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
-				err = EIO;
+				err = SET_ERROR(EIO);
 			return (err);
 		}
 
@@ -319,34 +321,49 @@
 	 * checksum/compression/copies.
 	 */
 	if (ds) {
-		err = dsl_prop_register(ds, "primarycache",
+		err = dsl_prop_register(ds,
+		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 		    primary_cache_changed_cb, os);
-		if (err == 0)
-			err = dsl_prop_register(ds, "secondarycache",
+		if (err == 0) {
+			err = dsl_prop_register(ds,
+			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 			    secondary_cache_changed_cb, os);
+		}
 		if (!dsl_dataset_is_snapshot(ds)) {
-			if (err == 0)
-				err = dsl_prop_register(ds, "checksum",
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 				    checksum_changed_cb, os);
-			if (err == 0)
-				err = dsl_prop_register(ds, "compression",
+			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 				    compression_changed_cb, os);
-			if (err == 0)
-				err = dsl_prop_register(ds, "copies",
+			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_COPIES),
 				    copies_changed_cb, os);
-			if (err == 0)
-				err = dsl_prop_register(ds, "dedup",
+			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_DEDUP),
 				    dedup_changed_cb, os);
-			if (err == 0)
-				err = dsl_prop_register(ds, "logbias",
+			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
 				    logbias_changed_cb, os);
-			if (err == 0)
-				err = dsl_prop_register(ds, "sync",
+			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(ZFS_PROP_SYNC),
 				    sync_changed_cb, os);
+			}
 		}
-		if (err) {
+		if (err != 0) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
-			    &os->os_phys_buf) == 1);
+			    &os->os_phys_buf));
 			kmem_free(os, sizeof (objset_t));
 			return (err);
 		}
@@ -424,45 +441,67 @@
 	return (err);
 }
 
-/* called from zpl */
+/*
+ * Holds the pool while the objset is held.  Therefore only one objset
+ * can be held at a time.
+ */
 int
 dmu_objset_hold(const char *name, void *tag, objset_t **osp)
 {
+	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
 
-	err = dsl_dataset_hold(name, tag, &ds);
-	if (err)
+	err = dsl_pool_hold(name, tag, &dp);
+	if (err != 0)
 		return (err);
+	err = dsl_dataset_hold(dp, name, tag, &ds);
+	if (err != 0) {
+		dsl_pool_rele(dp, tag);
+		return (err);
+	}
 
 	err = dmu_objset_from_ds(ds, osp);
-	if (err)
+	if (err != 0) {
 		dsl_dataset_rele(ds, tag);
+		dsl_pool_rele(dp, tag);
+	}
 
 	return (err);
 }
 
-/* called from zpl */
+/*
+ * dsl_pool must not be held when this is called.
+ * Upon successful return, there will be a longhold on the dataset,
+ * and the dsl_pool will not be held.
+ */
 int
 dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp)
 {
+	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
 
-	err = dsl_dataset_own(name, B_FALSE, tag, &ds);
-	if (err)
+	err = dsl_pool_hold(name, FTAG, &dp);
+	if (err != 0)
 		return (err);
+	err = dsl_dataset_own(dp, name, tag, &ds);
+	if (err != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (err);
+	}
 
 	err = dmu_objset_from_ds(ds, osp);
-	if (err) {
+	dsl_pool_rele(dp, FTAG);
+	if (err != 0) {
 		dsl_dataset_disown(ds, tag);
 	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
-		dmu_objset_disown(*osp, tag);
-		return (EINVAL);
+		dsl_dataset_disown(ds, tag);
+		return (SET_ERROR(EINVAL));
 	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
-		dmu_objset_disown(*osp, tag);
-		return (EROFS);
+		dsl_dataset_disown(ds, tag);
+		return (SET_ERROR(EROFS));
 	}
 	return (err);
 }
@@ -470,7 +509,9 @@
 void
 dmu_objset_rele(objset_t *os, void *tag)
 {
+	dsl_pool_t *dp = dmu_objset_pool(os);
 	dsl_dataset_rele(os->os_dsl_dataset, tag);
+	dsl_pool_rele(dp, tag);
 }
 
 void
@@ -479,7 +520,7 @@
 	dsl_dataset_disown(os->os_dsl_dataset, tag);
 }
 
-int
+void
 dmu_objset_evict_dbufs(objset_t *os)
 {
 	dnode_t *dn;
@@ -514,9 +555,7 @@
 		mutex_enter(&os->os_lock);
 		dn = next_dn;
 	}
-	dn = list_head(&os->os_dnodes);
 	mutex_exit(&os->os_lock);
-	return (dn != DMU_META_DNODE(os));
 }
 
 void
@@ -529,22 +568,30 @@
 
 	if (ds) {
 		if (!dsl_dataset_is_snapshot(ds)) {
-			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
 			    checksum_changed_cb, os));
-			VERIFY(0 == dsl_prop_unregister(ds, "compression",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 			    compression_changed_cb, os));
-			VERIFY(0 == dsl_prop_unregister(ds, "copies",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_COPIES),
 			    copies_changed_cb, os));
-			VERIFY(0 == dsl_prop_unregister(ds, "dedup",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_DEDUP),
 			    dedup_changed_cb, os));
-			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
 			    logbias_changed_cb, os));
-			VERIFY(0 == dsl_prop_unregister(ds, "sync",
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_SYNC),
 			    sync_changed_cb, os));
 		}
-		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
+		VERIFY0(dsl_prop_unregister(ds,
+		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 		    primary_cache_changed_cb, os));
-		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
+		VERIFY0(dsl_prop_unregister(ds,
+		    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
 		    secondary_cache_changed_cb, os));
 	}
 
@@ -551,11 +598,7 @@
 	if (os->os_sa)
 		sa_tear_down(os);
 
-	/*
-	 * We should need only a single pass over the dnode list, since
-	 * nothing can be added to the list at this point.
-	 */
-	(void) dmu_objset_evict_dbufs(os);
+	dmu_objset_evict_dbufs(os);
 
 	dnode_special_close(&os->os_meta_dnode);
 	if (DMU_USERUSED_DNODE(os)) {
@@ -566,7 +609,7 @@
 
 	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 
-	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
+	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
 
 	/*
 	 * This is a barrier to prevent the objset from going away in
@@ -598,10 +641,11 @@
 	dnode_t *mdn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
+
 	if (ds != NULL)
-		VERIFY(0 == dmu_objset_from_ds(ds, &os));
+		VERIFY0(dmu_objset_from_ds(ds, &os));
 	else
-		VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
+		VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
 
 	mdn = DMU_META_DNODE(os);
 
@@ -649,75 +693,71 @@
 	return (os);
 }
 
-struct oscarg {
-	void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
-	void *userarg;
-	dsl_dataset_t *clone_origin;
-	const char *lastname;
-	dmu_objset_type_t type;
-	uint64_t flags;
-	cred_t *cr;
-};
+typedef struct dmu_objset_create_arg {
+	const char *doca_name;
+	cred_t *doca_cred;
+	void (*doca_userfunc)(objset_t *os, void *arg,
+	    cred_t *cr, dmu_tx_t *tx);
+	void *doca_userarg;
+	dmu_objset_type_t doca_type;
+	uint64_t doca_flags;
+} dmu_objset_create_arg_t;
 
 /*ARGSUSED*/
 static int
-dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_objset_create_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd = arg1;
-	struct oscarg *oa = arg2;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	int err;
-	uint64_t ddobj;
+	dmu_objset_create_arg_t *doca = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *pdd;
+	const char *tail;
+	int error;
 
-	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
-	    oa->lastname, sizeof (uint64_t), 1, &ddobj);
-	if (err != ENOENT)
-		return (err ? err : EEXIST);
+	if (strchr(doca->doca_name, '@') != NULL)
+		return (SET_ERROR(EINVAL));
 
-	if (oa->clone_origin != NULL) {
-		/* You can't clone across pools. */
-		if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
-			return (EXDEV);
-
-		/* You can only clone snapshots, not the head datasets. */
-		if (!dsl_dataset_is_snapshot(oa->clone_origin))
-			return (EINVAL);
+	error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
+	if (error != 0)
+		return (error);
+	if (tail == NULL) {
+		dsl_dir_rele(pdd, FTAG);
+		return (SET_ERROR(EEXIST));
 	}
+	dsl_dir_rele(pdd, FTAG);
 
 	return (0);
 }
 
 static void
-dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd = arg1;
-	spa_t *spa = dd->dd_pool->dp_spa;
-	struct oscarg *oa = arg2;
+	dmu_objset_create_arg_t *doca = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *pdd;
+	const char *tail;
+	dsl_dataset_t *ds;
 	uint64_t obj;
+	blkptr_t *bp;
+	objset_t *os;
 
-	ASSERT(dmu_tx_is_syncing(tx));
+	VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
 
-	obj = dsl_dataset_create_sync(dd, oa->lastname,
-	    oa->clone_origin, oa->flags, oa->cr, tx);
+	obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
+	    doca->doca_cred, tx);
 
-	if (oa->clone_origin == NULL) {
-		dsl_pool_t *dp = dd->dd_pool;
-		dsl_dataset_t *ds;
-		blkptr_t *bp;
-		objset_t *os;
+	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+	bp = dsl_dataset_get_blkptr(ds);
+	os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
+	    ds, bp, doca->doca_type, tx);
 
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
-		bp = dsl_dataset_get_blkptr(ds);
-		ASSERT(BP_IS_HOLE(bp));
-
-		os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
-
-		if (oa->userfunc)
-			oa->userfunc(os, oa->userarg, oa->cr, tx);
-		dsl_dataset_rele(ds, FTAG);
+	if (doca->doca_userfunc != NULL) {
+		doca->doca_userfunc(os, doca->doca_userarg,
+		    doca->doca_cred, tx);
 	}
 
-	spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj);
+	spa_history_log_internal_ds(ds, "create", tx, "");
+	dsl_dataset_rele(ds, FTAG);
+	dsl_dir_rele(pdd, FTAG);
 }
 
 int
@@ -724,296 +764,123 @@
 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
 {
-	dsl_dir_t *pdd;
-	const char *tail;
-	int err = 0;
-	struct oscarg oa = { 0 };
+	dmu_objset_create_arg_t doca;
 
-	ASSERT(strchr(name, '@') == NULL);
-	err = dsl_dir_open(name, FTAG, &pdd, &tail);
-	if (err)
-		return (err);
-	if (tail == NULL) {
-		dsl_dir_close(pdd, FTAG);
-		return (EEXIST);
-	}
+	doca.doca_name = name;
+	doca.doca_cred = CRED();
+	doca.doca_flags = flags;
+	doca.doca_userfunc = func;
+	doca.doca_userarg = arg;
+	doca.doca_type = type;
 
-	oa.userfunc = func;
-	oa.userarg = arg;
-	oa.lastname = tail;
-	oa.type = type;
-	oa.flags = flags;
-	oa.cr = CRED();
-
-	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
-	    dmu_objset_create_sync, pdd, &oa, 5);
-	dsl_dir_close(pdd, FTAG);
-	return (err);
+	return (dsl_sync_task(name,
+	    dmu_objset_create_check, dmu_objset_create_sync, &doca, 5));
 }
 
-int
-dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
+typedef struct dmu_objset_clone_arg {
+	const char *doca_clone;
+	const char *doca_origin;
+	cred_t *doca_cred;
+} dmu_objset_clone_arg_t;
+
+/*ARGSUSED*/
+static int
+dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
 {
+	dmu_objset_clone_arg_t *doca = arg;
 	dsl_dir_t *pdd;
 	const char *tail;
-	int err = 0;
-	struct oscarg oa = { 0 };
+	int error;
+	dsl_dataset_t *origin;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 
-	ASSERT(strchr(name, '@') == NULL);
-	err = dsl_dir_open(name, FTAG, &pdd, &tail);
-	if (err)
-		return (err);
+	if (strchr(doca->doca_clone, '@') != NULL)
+		return (SET_ERROR(EINVAL));
+
+	error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
+	if (error != 0)
+		return (error);
 	if (tail == NULL) {
-		dsl_dir_close(pdd, FTAG);
-		return (EEXIST);
+		dsl_dir_rele(pdd, FTAG);
+		return (SET_ERROR(EEXIST));
 	}
-
-	oa.lastname = tail;
-	oa.clone_origin = clone_origin;
-	oa.flags = flags;
-	oa.cr = CRED();
-
-	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
-	    dmu_objset_create_sync, pdd, &oa, 5);
-	dsl_dir_close(pdd, FTAG);
-	return (err);
-}
-
-int
-dmu_objset_destroy(const char *name, boolean_t defer)
-{
-	dsl_dataset_t *ds;
-	int error;
-
-	error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
-	if (error == 0) {
-		error = dsl_dataset_destroy(ds, FTAG, defer);
-		/* dsl_dataset_destroy() closes the ds. */
+	/* You can't clone across pools. */
+	if (pdd->dd_pool != dp) {
+		dsl_dir_rele(pdd, FTAG);
+		return (SET_ERROR(EXDEV));
 	}
+	dsl_dir_rele(pdd, FTAG);
 
-	return (error);
-}
-
-struct snaparg {
-	dsl_sync_task_group_t *dstg;
-	char *snapname;
-	char *htag;
-	char failed[MAXPATHLEN];
-	boolean_t recursive;
-	boolean_t needsuspend;
-	boolean_t temporary;
-	nvlist_t *props;
-	struct dsl_ds_holdarg *ha;	/* only needed in the temporary case */
-	dsl_dataset_t *newds;
-};
-
-static int
-snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	objset_t *os = arg1;
-	struct snaparg *sn = arg2;
-	int error;
-
-	/* The props have already been checked by zfs_check_userprops(). */
-
-	error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
-	    sn->snapname, tx);
-	if (error)
+	error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
+	if (error != 0)
 		return (error);
 
-	if (sn->temporary) {
-		/*
-		 * Ideally we would just call
-		 * dsl_dataset_user_hold_check() and
-		 * dsl_dataset_destroy_check() here.  However the
-		 * dataset we want to hold and destroy is the snapshot
-		 * that we just confirmed we can create, but it won't
-		 * exist until after these checks are run.  Do any
-		 * checks we can here and if more checks are added to
-		 * those routines in the future, similar checks may be
-		 * necessary here.
-		 */
-		if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
-			return (ENOTSUP);
-		/*
-		 * Not checking number of tags because the tag will be
-		 * unique, as it will be the only tag.
-		 */
-		if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
-			return (E2BIG);
+	/* You can't clone across pools. */
+	if (origin->ds_dir->dd_pool != dp) {
+		dsl_dataset_rele(origin, FTAG);
+		return (SET_ERROR(EXDEV));
+	}
 
-		sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-		sn->ha->temphold = B_TRUE;
-		sn->ha->htag = sn->htag;
+	/* You can only clone snapshots, not the head datasets. */
+	if (!dsl_dataset_is_snapshot(origin)) {
+		dsl_dataset_rele(origin, FTAG);
+		return (SET_ERROR(EINVAL));
 	}
-	return (error);
+	dsl_dataset_rele(origin, FTAG);
+
+	return (0);
 }
 
 static void
-snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
 {
-	objset_t *os = arg1;
-	dsl_dataset_t *ds = os->os_dsl_dataset;
-	struct snaparg *sn = arg2;
+	dmu_objset_clone_arg_t *doca = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *pdd;
+	const char *tail;
+	dsl_dataset_t *origin, *ds;
+	uint64_t obj;
+	char namebuf[MAXNAMELEN];
 
-	dsl_dataset_snapshot_sync(ds, sn->snapname, tx);
+	VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
+	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
 
-	if (sn->props) {
-		dsl_props_arg_t pa;
-		pa.pa_props = sn->props;
-		pa.pa_source = ZPROP_SRC_LOCAL;
-		dsl_props_set_sync(ds->ds_prev, &pa, tx);
-	}
+	obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
+	    doca->doca_cred, tx);
 
-	if (sn->temporary) {
-		struct dsl_ds_destroyarg da;
-
-		dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx);
-		kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg));
-		sn->ha = NULL;
-		sn->newds = ds->ds_prev;
-
-		da.ds = ds->ds_prev;
-		da.defer = B_TRUE;
-		dsl_dataset_destroy_sync(&da, FTAG, tx);
-	}
+	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+	dsl_dataset_name(origin, namebuf);
+	spa_history_log_internal_ds(ds, "clone", tx,
+	    "origin=%s (%llu)", namebuf, origin->ds_object);
+	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele(origin, FTAG);
+	dsl_dir_rele(pdd, FTAG);
 }
 
-static int
-dmu_objset_snapshot_one(const char *name, void *arg)
+int
+dmu_objset_clone(const char *clone, const char *origin)
 {
-	struct snaparg *sn = arg;
-	objset_t *os;
-	int err;
-	char *cp;
+	dmu_objset_clone_arg_t doca;
 
-	/*
-	 * If the objset starts with a '%', then ignore it unless it was
-	 * explicitly named (ie, not recursive).  These hidden datasets
-	 * are always inconsistent, and by not opening them here, we can
-	 * avoid a race with dsl_dir_destroy_check().
-	 */
-	cp = strrchr(name, '/');
-	if (cp && cp[1] == '%' && sn->recursive)
-		return (0);
+	doca.doca_clone = clone;
+	doca.doca_origin = origin;
+	doca.doca_cred = CRED();
 
-	(void) strcpy(sn->failed, name);
-
-	/*
-	 * Check permissions if we are doing a recursive snapshot.  The
-	 * permission checks for the starting dataset have already been
-	 * performed in zfs_secpolicy_snapshot()
-	 */
-	if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED())))
-		return (err);
-
-	err = dmu_objset_hold(name, sn, &os);
-	if (err != 0)
-		return (err);
-
-	/*
-	 * If the objset is in an inconsistent state (eg, in the process
-	 * of being destroyed), don't snapshot it.  As with %hidden
-	 * datasets, we return EBUSY if this name was explicitly
-	 * requested (ie, not recursive), and otherwise ignore it.
-	 */
-	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
-		dmu_objset_rele(os, sn);
-		return (sn->recursive ? 0 : EBUSY);
-	}
-
-	if (sn->needsuspend) {
-		err = zil_suspend(dmu_objset_zil(os));
-		if (err) {
-			dmu_objset_rele(os, sn);
-			return (err);
-		}
-	}
-	dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync,
-	    os, sn, 3);
-
-	return (0);
+	return (dsl_sync_task(clone,
+	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 5));
 }
 
 int
-dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd)
+dmu_objset_snapshot_one(const char *fsname, const char *snapname)
 {
-	dsl_sync_task_t *dst;
-	struct snaparg sn;
-	spa_t *spa;
-	minor_t minor;
 	int err;
+	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
+	nvlist_t *snaps = fnvlist_alloc();
 
-	(void) strcpy(sn.failed, fsname);
-
-	err = spa_open(fsname, &spa, FTAG);
-	if (err)
-		return (err);
-
-	if (temporary) {
-		if (cleanup_fd < 0) {
-			spa_close(spa, FTAG);
-			return (EINVAL);
-		}
-		if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
-			spa_close(spa, FTAG);
-			return (err);
-		}
-	}
-
-	sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-	sn.snapname = snapname;
-	sn.htag = tag;
-	sn.props = props;
-	sn.recursive = recursive;
-	sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
-	sn.temporary = temporary;
-	sn.ha = NULL;
-	sn.newds = NULL;
-
-	if (recursive) {
-		err = dmu_objset_find(fsname,
-		    dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN);
-	} else {
-		err = dmu_objset_snapshot_one(fsname, &sn);
-	}
-
-	if (err == 0)
-		err = dsl_sync_task_group_wait(sn.dstg);
-
-	for (dst = list_head(&sn.dstg->dstg_tasks); dst;
-	    dst = list_next(&sn.dstg->dstg_tasks, dst)) {
-		objset_t *os = dst->dst_arg1;
-		dsl_dataset_t *ds = os->os_dsl_dataset;
-		if (dst->dst_err) {
-			dsl_dataset_name(ds, sn.failed);
-		} else if (temporary) {
-			dsl_register_onexit_hold_cleanup(sn.newds, tag, minor);
-		}
-		if (sn.needsuspend)
-			zil_resume(dmu_objset_zil(os));
-#ifdef __FreeBSD__
-#ifdef _KERNEL
-		if (dst->dst_err == 0 && dmu_objset_type(os) == DMU_OST_ZVOL) {
-			char name[MAXNAMELEN];
-
-			dmu_objset_name(os, name);
-			strlcat(name, "@", sizeof(name));
-			strlcat(name, snapname, sizeof(name));
-			zvol_create_minors(name);
-		}
-#endif
-#endif
-		dmu_objset_rele(os, &sn);
-	}
-
-	if (err)
-		(void) strcpy(fsname, sn.failed);
-	if (temporary)
-		zfs_onexit_fd_rele(cleanup_fd);
-	dsl_sync_task_group_destroy(sn.dstg);
-	spa_close(spa, FTAG);
+	fnvlist_add_boolean(snaps, longsnap);
+	strfree(longsnap);
+	err = dsl_dataset_snapshot(snaps, NULL, NULL);
+	fnvlist_free(snaps);
 	return (err);
 }
 
@@ -1052,9 +919,9 @@
 	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
 
-	ASSERT(bp == os->os_rootbp);
-	ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
-	ASSERT(BP_GET_LEVEL(bp) == 0);
+	ASSERT3P(bp, ==, os->os_rootbp);
+	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
+	ASSERT0(BP_GET_LEVEL(bp));
 
 	/*
 	 * Update rootbp fill count: it should be the number of objects
@@ -1161,7 +1028,7 @@
 
 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
 	while (dr = list_head(list)) {
-		ASSERT(dr->dr_dbuf->db_level == 0);
+		ASSERT0(dr->dr_dbuf->db_level);
 		list_remove(list, dr);
 		if (dr->dr_zio)
 			zio_nowait(dr->dr_zio);
@@ -1435,9 +1302,9 @@
 	if (dmu_objset_userspace_present(os))
 		return (0);
 	if (!dmu_objset_userused_enabled(os))
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	if (dmu_objset_is_snapshot(os))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * We simply need to mark every object dirty, so that it will be
@@ -1453,15 +1320,15 @@
 		int objerr;
 
 		if (issig(JUSTLOOKING) && issig(FORREAL))
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 
 		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
-		if (objerr)
+		if (objerr != 0)
 			continue;
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, obj);
 		objerr = dmu_tx_assign(tx, TXG_WAIT);
-		if (objerr) {
+		if (objerr != 0) {
 			dmu_tx_abort(tx);
 			continue;
 		}
@@ -1529,7 +1396,7 @@
 	uint64_t ignored;
 
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
@@ -1544,8 +1411,10 @@
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
+	ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
+
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	zap_cursor_init_serialized(&cursor,
 	    ds->ds_dir->dd_pool->dp_meta_objset,
@@ -1553,12 +1422,12 @@
 
 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
 		zap_cursor_fini(&cursor);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	if (strlen(attr.za_name) + 1 > namelen) {
 		zap_cursor_fini(&cursor);
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	(void) strcpy(name, attr.za_name);
@@ -1584,7 +1453,7 @@
 	/* there is no next dir on a snapshot! */
 	if (os->os_dsl_dataset->ds_object !=
 	    dd->dd_phys->dd_head_dataset_obj)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	zap_cursor_init_serialized(&cursor,
 	    dd->dd_pool->dp_meta_objset,
@@ -1592,12 +1461,12 @@
 
 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
 		zap_cursor_fini(&cursor);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	if (strlen(attr.za_name) + 1 > namelen) {
 		zap_cursor_fini(&cursor);
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	(void) strcpy(name, attr.za_name);
@@ -1610,42 +1479,122 @@
 	return (0);
 }
 
-struct findarg {
-	int (*func)(const char *, void *);
-	void *arg;
-};
-
-/* ARGSUSED */
-static int
-findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
-{
-	struct findarg *fa = arg;
-	return (fa->func(dsname, fa->arg));
-}
-
 /*
- * Find all objsets under name, and for each, call 'func(child_name, arg)'.
- * Perhaps change all callers to use dmu_objset_find_spa()?
+ * Find objsets under and including ddobj, call func(ds) on each.
  */
 int
-dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
-    int flags)
+dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
+    int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
 {
-	struct findarg fa;
-	fa.func = func;
-	fa.arg = arg;
-	return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
+	dsl_dir_t *dd;
+	dsl_dataset_t *ds;
+	zap_cursor_t zc;
+	zap_attribute_t *attr;
+	uint64_t thisobj;
+	int err;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
+	if (err != 0)
+		return (err);
+
+	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
+	if (dd->dd_myname[0] == '$') {
+		dsl_dir_rele(dd, FTAG);
+		return (0);
+	}
+
+	thisobj = dd->dd_phys->dd_head_dataset_obj;
+	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+
+	/*
+	 * Iterate over all children.
+	 */
+	if (flags & DS_FIND_CHILDREN) {
+		for (zap_cursor_init(&zc, dp->dp_meta_objset,
+		    dd->dd_phys->dd_child_dir_zapobj);
+		    zap_cursor_retrieve(&zc, attr) == 0;
+		    (void) zap_cursor_advance(&zc)) {
+			ASSERT3U(attr->za_integer_length, ==,
+			    sizeof (uint64_t));
+			ASSERT3U(attr->za_num_integers, ==, 1);
+
+			err = dmu_objset_find_dp(dp, attr->za_first_integer,
+			    func, arg, flags);
+			if (err != 0)
+				break;
+		}
+		zap_cursor_fini(&zc);
+
+		if (err != 0) {
+			dsl_dir_rele(dd, FTAG);
+			kmem_free(attr, sizeof (zap_attribute_t));
+			return (err);
+		}
+	}
+
+	/*
+	 * Iterate over all snapshots.
+	 */
+	if (flags & DS_FIND_SNAPSHOTS) {
+		dsl_dataset_t *ds;
+		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
+
+		if (err == 0) {
+			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
+			dsl_dataset_rele(ds, FTAG);
+
+			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
+			    zap_cursor_retrieve(&zc, attr) == 0;
+			    (void) zap_cursor_advance(&zc)) {
+				ASSERT3U(attr->za_integer_length, ==,
+				    sizeof (uint64_t));
+				ASSERT3U(attr->za_num_integers, ==, 1);
+
+				err = dsl_dataset_hold_obj(dp,
+				    attr->za_first_integer, FTAG, &ds);
+				if (err != 0)
+					break;
+				err = func(dp, ds, arg);
+				dsl_dataset_rele(ds, FTAG);
+				if (err != 0)
+					break;
+			}
+			zap_cursor_fini(&zc);
+		}
+	}
+
+	dsl_dir_rele(dd, FTAG);
+	kmem_free(attr, sizeof (zap_attribute_t));
+
+	if (err != 0)
+		return (err);
+
+	/*
+	 * Apply to self.
+	 */
+	err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
+	if (err != 0)
+		return (err);
+	err = func(dp, ds, arg);
+	dsl_dataset_rele(ds, FTAG);
+	return (err);
 }
 
 /*
- * Find all objsets under name, call func on each
+ * Find all objsets under name, and for each, call 'func(child_name, arg)'.
+ * The dp_config_rwlock must not be held when this is called, and it
+ * will not be held when the callback is called.
+ * Therefore this function should only be used when the pool is not changing
+ * (e.g. in syncing context), or the callback can deal with the possible races.
  */
-int
-dmu_objset_find_spa(spa_t *spa, const char *name,
-    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
+static int
+dmu_objset_find_impl(spa_t *spa, const char *name,
+    int func(const char *, void *), void *arg, int flags)
 {
 	dsl_dir_t *dd;
-	dsl_pool_t *dp;
+	dsl_pool_t *dp = spa_get_dsl(spa);
 	dsl_dataset_t *ds;
 	zap_cursor_t zc;
 	zap_attribute_t *attr;
@@ -1653,21 +1602,23 @@
 	uint64_t thisobj;
 	int err;
 
-	if (name == NULL)
-		name = spa_name(spa);
-	err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
-	if (err)
+	dsl_pool_config_enter(dp, FTAG);
+
+	err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
+	if (err != 0) {
+		dsl_pool_config_exit(dp, FTAG);
 		return (err);
+	}
 
 	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
 	if (dd->dd_myname[0] == '$') {
-		dsl_dir_close(dd, FTAG);
+		dsl_dir_rele(dd, FTAG);
+		dsl_pool_config_exit(dp, FTAG);
 		return (0);
 	}
 
 	thisobj = dd->dd_phys->dd_head_dataset_obj;
 	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
-	dp = dd->dd_pool;
 
 	/*
 	 * Iterate over all children.
@@ -1677,19 +1628,24 @@
 		    dd->dd_phys->dd_child_dir_zapobj);
 		    zap_cursor_retrieve(&zc, attr) == 0;
 		    (void) zap_cursor_advance(&zc)) {
-			ASSERT(attr->za_integer_length == sizeof (uint64_t));
-			ASSERT(attr->za_num_integers == 1);
+			ASSERT3U(attr->za_integer_length, ==,
+			    sizeof (uint64_t));
+			ASSERT3U(attr->za_num_integers, ==, 1);
 
 			child = kmem_asprintf("%s/%s", name, attr->za_name);
-			err = dmu_objset_find_spa(spa, child, func, arg, flags);
+			dsl_pool_config_exit(dp, FTAG);
+			err = dmu_objset_find_impl(spa, child,
+			    func, arg, flags);
+			dsl_pool_config_enter(dp, FTAG);
 			strfree(child);
-			if (err)
+			if (err != 0)
 				break;
 		}
 		zap_cursor_fini(&zc);
 
-		if (err) {
-			dsl_dir_close(dd, FTAG);
+		if (err != 0) {
+			dsl_dir_rele(dd, FTAG);
+			dsl_pool_config_exit(dp, FTAG);
 			kmem_free(attr, sizeof (zap_attribute_t));
 			return (err);
 		}
@@ -1699,11 +1655,7 @@
 	 * Iterate over all snapshots.
 	 */
 	if (flags & DS_FIND_SNAPSHOTS) {
-		if (!dsl_pool_sync_context(dp))
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
 		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
-		if (!dsl_pool_sync_context(dp))
-			rw_exit(&dp->dp_config_rwlock);
 
 		if (err == 0) {
 			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -1712,16 +1664,17 @@
 			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
 			    zap_cursor_retrieve(&zc, attr) == 0;
 			    (void) zap_cursor_advance(&zc)) {
-				ASSERT(attr->za_integer_length ==
+				ASSERT3U(attr->za_integer_length, ==,
 				    sizeof (uint64_t));
-				ASSERT(attr->za_num_integers == 1);
+				ASSERT3U(attr->za_num_integers, ==, 1);
 
 				child = kmem_asprintf("%s@%s",
 				    name, attr->za_name);
-				err = func(spa, attr->za_first_integer,
-				    child, arg);
+				dsl_pool_config_exit(dp, FTAG);
+				err = func(child, arg);
+				dsl_pool_config_enter(dp, FTAG);
 				strfree(child);
-				if (err)
+				if (err != 0)
 					break;
 			}
 			zap_cursor_fini(&zc);
@@ -1728,48 +1681,33 @@
 		}
 	}
 
-	dsl_dir_close(dd, FTAG);
+	dsl_dir_rele(dd, FTAG);
 	kmem_free(attr, sizeof (zap_attribute_t));
+	dsl_pool_config_exit(dp, FTAG);
 
-	if (err)
+	if (err != 0)
 		return (err);
 
-	/*
-	 * Apply to self if appropriate.
-	 */
-	err = func(spa, thisobj, name, arg);
-	return (err);
+	/* Apply to self. */
+	return (func(name, arg));
 }
 
-/* ARGSUSED */
+/*
+ * See comment above dmu_objset_find_impl().
+ */
 int
-dmu_objset_prefetch(const char *name, void *arg)
+dmu_objset_find(char *name, int func(const char *, void *), void *arg,
+    int flags)
 {
-	dsl_dataset_t *ds;
+	spa_t *spa;
+	int error;
 
-	if (dsl_dataset_hold(name, FTAG, &ds))
-		return (0);
-
-	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
-		mutex_enter(&ds->ds_opening_lock);
-		if (ds->ds_objset == NULL) {
-			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
-			zbookmark_t zb;
-
-			SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
-			    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
-
-			(void) arc_read(NULL, dsl_dataset_get_spa(ds),
-			    &ds->ds_phys->ds_bp, NULL, NULL,
-			    ZIO_PRIORITY_ASYNC_READ,
-			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
-			    &aflags, &zb);
-		}
-		mutex_exit(&ds->ds_opening_lock);
-	}
-
-	dsl_dataset_rele(ds, FTAG);
-	return (0);
+	error = spa_open(name, &spa, FTAG);
+	if (error != 0)
+		return (error);
+	error = dmu_objset_find_impl(spa, name, func, arg, flags);
+	spa_close(spa, FTAG);
+	return (error);
 }
 
 void
@@ -1785,3 +1723,19 @@
 	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
 	return (os->os_user_ptr);
 }
+
+/*
+ * Determine name of filesystem, given name of snapshot.
+ * buf must be at least MAXNAMELEN bytes
+ */
+int
+dmu_fsname(const char *snapname, char *buf)
+{
+	char *atp = strchr(snapname, '@');
+	if (atp == NULL)
+		return (SET_ERROR(EINVAL));
+	if (atp - snapname >= MAXNAMELEN)
+		return (SET_ERROR(ENAMETOOLONG));
+	(void) strlcpy(buf, snapname, atp - snapname + 1);
+	return (0);
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012, Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  */
@@ -47,11 +47,14 @@
 #include <sys/avl.h>
 #include <sys/ddt.h>
 #include <sys/zfs_onexit.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
 
 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
 int zfs_send_corrupt_data = B_FALSE;
 
 static char *dmu_recv_tag = "dmu_recv_tag";
+static const char *recv_clone_name = "%recv";
 
 static int
 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
@@ -107,7 +110,7 @@
 	    dsp->dsa_pending_op != PENDING_FREE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
@@ -131,7 +134,7 @@
 			/* not a continuation.  Push out pending record */
 			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
-				return (EINTR);
+				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
@@ -145,7 +148,7 @@
 	if (length == -1ULL) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 	} else {
 		dsp->dsa_pending_op = PENDING_FREE;
 	}
@@ -169,7 +172,7 @@
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 	/* write a DATA record */
@@ -189,9 +192,9 @@
 	drrw->drr_key.ddk_cksum = bp->blk_cksum;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 	if (dump_bytes(dsp, data, blksz) != 0)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 	return (0);
 }
 
@@ -203,7 +206,7 @@
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
@@ -215,9 +218,9 @@
 	drrs->drr_toguid = dsp->dsa_toguid;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 	if (dump_bytes(dsp, data, blksz))
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 	return (0);
 }
 
@@ -237,7 +240,7 @@
 	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
@@ -252,7 +255,7 @@
 			/* can't be aggregated.  Push out pending record */
 			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
-				return (EINTR);
+				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
@@ -280,7 +283,7 @@
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
-			return (EINTR);
+			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
@@ -297,17 +300,17 @@
 	drro->drr_toguid = dsp->dsa_toguid;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	/* free anything past the end of the file */
 	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
 	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
-		return (EINTR);
-	if (dsp->dsa_err)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
+	if (dsp->dsa_err != 0)
+		return (SET_ERROR(EINTR));
 	return (0);
 }
 
@@ -325,7 +328,7 @@
 	int err = 0;
 
 	if (issig(JUSTLOOKING) && issig(FORREAL))
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
@@ -349,7 +352,7 @@
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0)
-			return (EIO);
+			return (SET_ERROR(EIO));
 
 		blk = abuf->b_data;
 		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
@@ -356,7 +359,7 @@
 			uint64_t dnobj = (zb->zb_blkid <<
 			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 			err = dump_dnode(dsp, dnobj, blk+i);
-			if (err)
+			if (err != 0)
 				break;
 		}
 		(void) arc_buf_remove_ref(abuf, &abuf);
@@ -368,7 +371,7 @@
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0)
-			return (EIO);
+			return (SET_ERROR(EIO));
 
 		err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 		(void) arc_buf_remove_ref(abuf, &abuf);
@@ -390,7 +393,7 @@
 				    ptr++)
 					*ptr = 0x2f5baddb10c;
 			} else {
-				return (EIO);
+				return (SET_ERROR(EIO));
 			}
 		}
 
@@ -403,45 +406,39 @@
 	return (err);
 }
 
-int
-dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    int outfd, struct file *fp, offset_t *off)
+/*
+ * Releases dp, ds, and fromds, using the specified tag.
+ */
+static int
+dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
+#ifdef illumos
+    dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off)
+#else
+    dsl_dataset_t *fromds, int outfd, struct file *fp, offset_t *off)
+#endif
 {
-	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
-	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
+	objset_t *os;
 	dmu_replay_record_t *drr;
 	dmu_sendarg_t *dsp;
 	int err;
 	uint64_t fromtxg = 0;
 
-	/* tosnap must be a snapshot */
-	if (ds->ds_phys->ds_next_snap_obj == 0)
-		return (EINVAL);
+	if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) {
+		dsl_dataset_rele(fromds, tag);
+		dsl_dataset_rele(ds, tag);
+		dsl_pool_rele(dp, tag);
+		return (SET_ERROR(EXDEV));
+	}
 
-	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
-	if (fromds && (ds->ds_dir != fromds->ds_dir ||
-	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
-		return (EXDEV);
-
-	if (fromorigin) {
-		dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-		if (fromsnap)
-			return (EINVAL);
-
-		if (dsl_dir_is_clone(ds->ds_dir)) {
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
-			rw_exit(&dp->dp_config_rwlock);
-			if (err)
-				return (err);
-		} else {
-			fromorigin = B_FALSE;
-		}
+	err = dmu_objset_from_ds(ds, &os);
+	if (err != 0) {
+		if (fromds != NULL)
+			dsl_dataset_rele(fromds, tag);
+		dsl_dataset_rele(ds, tag);
+		dsl_pool_rele(dp, tag);
+		return (err);
 	}
 
-
 	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 	drr->drr_type = DRR_BEGIN;
 	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -449,13 +446,17 @@
 	    DMU_SUBSTREAM);
 
 #ifdef _KERNEL
-	if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
+	if (dmu_objset_type(os) == DMU_OST_ZFS) {
 		uint64_t version;
-		if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
+		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) {
 			kmem_free(drr, sizeof (dmu_replay_record_t));
-			return (EINVAL);
+			if (fromds != NULL)
+				dsl_dataset_rele(fromds, tag);
+			dsl_dataset_rele(ds, tag);
+			dsl_pool_rele(dp, tag);
+			return (SET_ERROR(EINVAL));
 		}
-		if (version == ZPL_VERSION_SA) {
+		if (version >= ZPL_VERSION_SA) {
 			DMU_SET_FEATUREFLAGS(
 			    drr->drr_u.drr_begin.drr_versioninfo,
 			    DMU_BACKUP_FEATURE_SA_SPILL);
@@ -465,21 +466,22 @@
 
 	drr->drr_u.drr_begin.drr_creation_time =
 	    ds->ds_phys->ds_creation_time;
-	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
-	if (fromorigin)
+	drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
+	if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 
-	if (fromds)
+	if (fromds != NULL)
 		drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 	dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 
-	if (fromds)
+	if (fromds != NULL) {
 		fromtxg = fromds->ds_phys->ds_creation_txg;
-	if (fromorigin)
-		dsl_dataset_rele(fromds, FTAG);
+		dsl_dataset_rele(fromds, tag);
+		fromds = NULL;
+	}
 
 	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 
@@ -488,7 +490,7 @@
 	dsp->dsa_proc = curproc;
 	dsp->dsa_td = curthread;
 	dsp->dsa_fp = fp;
-	dsp->dsa_os = tosnap;
+	dsp->dsa_os = os;
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = ds->ds_phys->ds_guid;
 	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
@@ -498,6 +500,9 @@
 	list_insert_head(&ds->ds_sendstreams, dsp);
 	mutex_exit(&ds->ds_sendstream_lock);
 
+	dsl_dataset_long_hold(ds, FTAG);
+	dsl_pool_rele(dp, tag);
+
 	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 		err = dsp->dsa_err;
 		goto out;
@@ -508,10 +513,10 @@
 
 	if (dsp->dsa_pending_op != PENDING_NONE)
 		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
-			err = EINTR;
+			err = SET_ERROR(EINTR);
 
-	if (err) {
-		if (err == EINTR && dsp->dsa_err)
+	if (err != 0) {
+		if (err == EINTR && dsp->dsa_err != 0)
 			err = dsp->dsa_err;
 		goto out;
 	}
@@ -534,44 +539,106 @@
 	kmem_free(drr, sizeof (dmu_replay_record_t));
 	kmem_free(dsp, sizeof (dmu_sendarg_t));
 
+	dsl_dataset_long_rele(ds, FTAG);
+	dsl_dataset_rele(ds, tag);
+
 	return (err);
 }
 
 int
-dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    uint64_t *sizep)
+dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
+#ifdef illumos
+    int outfd, vnode_t *vp, offset_t *off)
+#else
+    int outfd, struct file *fp, offset_t *off)
+#endif
 {
-	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
-	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	dsl_pool_t *dp;
+	dsl_dataset_t *ds;
+	dsl_dataset_t *fromds = NULL;
 	int err;
-	uint64_t size;
 
-	/* tosnap must be a snapshot */
-	if (ds->ds_phys->ds_next_snap_obj == 0)
-		return (EINVAL);
+	err = dsl_pool_hold(pool, FTAG, &dp);
+	if (err != 0)
+		return (err);
 
-	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
-	if (fromds && (ds->ds_dir != fromds->ds_dir ||
-	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
-		return (EXDEV);
+	err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
+	if (err != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (err);
+	}
 
-	if (fromorigin) {
-		if (fromsnap)
-			return (EINVAL);
+	if (fromsnap != 0) {
+		err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
+		if (err != 0) {
+			dsl_dataset_rele(ds, FTAG);
+			dsl_pool_rele(dp, FTAG);
+			return (err);
+		}
+	}
 
-		if (dsl_dir_is_clone(ds->ds_dir)) {
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
-			rw_exit(&dp->dp_config_rwlock);
-			if (err)
-				return (err);
-		} else {
-			fromorigin = B_FALSE;
+	return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off));
+}
+
+int
+dmu_send(const char *tosnap, const char *fromsnap,
+#ifdef illumos
+    int outfd, vnode_t *vp, offset_t *off)
+#else
+    int outfd, struct file *fp, offset_t *off)
+#endif
+{
+	dsl_pool_t *dp;
+	dsl_dataset_t *ds;
+	dsl_dataset_t *fromds = NULL;
+	int err;
+
+	if (strchr(tosnap, '@') == NULL)
+		return (SET_ERROR(EINVAL));
+	if (fromsnap != NULL && strchr(fromsnap, '@') == NULL)
+		return (SET_ERROR(EINVAL));
+
+	err = dsl_pool_hold(tosnap, FTAG, &dp);
+	if (err != 0)
+		return (err);
+
+	err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
+	if (err != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (err);
+	}
+
+	if (fromsnap != NULL) {
+		err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds);
+		if (err != 0) {
+			dsl_dataset_rele(ds, FTAG);
+			dsl_pool_rele(dp, FTAG);
+			return (err);
 		}
 	}
+	return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off));
+}
 
+int
+dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	int err;
+	uint64_t size;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	/* tosnap must be a snapshot */
+	if (!dsl_dataset_is_snapshot(ds))
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * fromsnap must be an earlier snapshot from the same fs as tosnap,
+	 * or the origin's fs.
+	 */
+	if (fromds != NULL && !dsl_dataset_is_before(ds, fromds))
+		return (SET_ERROR(EXDEV));
+
 	/* Get uncompressed size estimate of changed data. */
 	if (fromds == NULL) {
 		size = ds->ds_phys->ds_uncompressed_bytes;
@@ -579,9 +646,7 @@
 		uint64_t used, comp;
 		err = dsl_dataset_space_written(fromds, ds,
 		    &used, &comp, &size);
-		if (fromorigin)
-			dsl_dataset_rele(fromds, FTAG);
-		if (err)
+		if (err != 0)
 			return (err);
 	}
 
@@ -601,11 +666,8 @@
 	 * block, which we observe in practice.
 	 */
 	uint64_t recordsize;
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	err = dsl_prop_get_ds(ds, "recordsize",
-	    sizeof (recordsize), 1, &recordsize, NULL);
-	rw_exit(&dp->dp_config_rwlock);
-	if (err)
+	err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
+	if (err != 0)
 		return (err);
 	size -= size / recordsize * sizeof (blkptr_t);
 
@@ -617,116 +679,62 @@
 	return (0);
 }
 
-struct recvbeginsyncarg {
-	const char *tofs;
-	const char *tosnap;
-	dsl_dataset_t *origin;
-	uint64_t fromguid;
-	dmu_objset_type_t type;
-	void *tag;
-	boolean_t force;
-	uint64_t dsflags;
-	char clonelastname[MAXNAMELEN];
-	dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
-	cred_t *cr;
-};
+typedef struct dmu_recv_begin_arg {
+	const char *drba_origin;
+	dmu_recv_cookie_t *drba_cookie;
+	cred_t *drba_cred;
+} dmu_recv_begin_arg_t;
 
-/* ARGSUSED */
 static int
-recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
+    uint64_t fromguid)
 {
-	dsl_dir_t *dd = arg1;
-	struct recvbeginsyncarg *rbsa = arg2;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	uint64_t val;
-	int err;
+	int error;
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
-	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
-	    strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
+	/* must not have any changes since most recent snapshot */
+	if (!drba->drba_cookie->drc_force &&
+	    dsl_dataset_modified_since_lastsnap(ds))
+		return (SET_ERROR(ETXTBSY));
 
-	if (err != ENOENT)
-		return (err ? err : EEXIST);
+	/* temporary clone name must not exist */
+	error = zap_lookup(dp->dp_meta_objset,
+	    ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name,
+	    8, 1, &val);
+	if (error != ENOENT)
+		return (error == 0 ? EBUSY : error);
 
-	if (rbsa->origin) {
-		/* make sure it's a snap in the same pool */
-		if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
-			return (EXDEV);
-		if (!dsl_dataset_is_snapshot(rbsa->origin))
-			return (EINVAL);
-		if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
-			return (ENODEV);
-	}
-
-	return (0);
-}
-
-static void
-recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dir_t *dd = arg1;
-	struct recvbeginsyncarg *rbsa = arg2;
-	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
-	uint64_t dsobj;
-
-	/* Create and open new dataset. */
-	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
-	    rbsa->origin, flags, rbsa->cr, tx);
-	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
-	    B_TRUE, dmu_recv_tag, &rbsa->ds));
-
-	if (rbsa->origin == NULL) {
-		(void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
-		    rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
-	}
-
-	spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
-	    dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
-}
-
-/* ARGSUSED */
-static int
-recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	struct recvbeginsyncarg *rbsa = arg2;
-	int err;
-	uint64_t val;
-
-	/* must not have any changes since most recent snapshot */
-	if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
-		return (ETXTBSY);
-
 	/* new snapshot name must not exist */
-	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
-	    ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
-	if (err == 0)
-		return (EEXIST);
-	if (err != ENOENT)
-		return (err);
+	error = zap_lookup(dp->dp_meta_objset,
+	    ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap,
+	    8, 1, &val);
+	if (error != ENOENT)
+		return (error == 0 ? EEXIST : error);
 
-	if (rbsa->fromguid) {
+	if (fromguid != 0) {
 		/* if incremental, most recent snapshot must match fromguid */
 		if (ds->ds_prev == NULL)
-			return (ENODEV);
+			return (SET_ERROR(ENODEV));
 
 		/*
 		 * most recent snapshot must match fromguid, or there are no
 		 * changes since the fromguid one
 		 */
-		if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
+		if (ds->ds_prev->ds_phys->ds_guid != fromguid) {
 			uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
 			uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
 			while (obj != 0) {
 				dsl_dataset_t *snap;
-				err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
-				    obj, FTAG, &snap);
-				if (err)
-					return (ENODEV);
+				error = dsl_dataset_hold_obj(dp, obj, FTAG,
+				    &snap);
+				if (error != 0)
+					return (SET_ERROR(ENODEV));
 				if (snap->ds_phys->ds_creation_txg < birth) {
 					dsl_dataset_rele(snap, FTAG);
-					return (ENODEV);
+					return (SET_ERROR(ENODEV));
 				}
-				if (snap->ds_phys->ds_guid == rbsa->fromguid) {
+				if (snap->ds_phys->ds_guid == fromguid) {
 					dsl_dataset_rele(snap, FTAG);
 					break; /* it's ok */
 				}
@@ -734,200 +742,217 @@
 				dsl_dataset_rele(snap, FTAG);
 			}
 			if (obj == 0)
-				return (ENODEV);
+				return (SET_ERROR(ENODEV));
 		}
 	} else {
 		/* if full, most recent snapshot must be $ORIGIN */
 		if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
-			return (ENODEV);
+			return (SET_ERROR(ENODEV));
 	}
 
-	/* temporary clone name must not exist */
-	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
-	    ds->ds_dir->dd_phys->dd_child_dir_zapobj,
-	    rbsa->clonelastname, 8, 1, &val);
-	if (err == 0)
-		return (EEXIST);
-	if (err != ENOENT)
-		return (err);
+	return (0);
 
-	return (0);
 }
 
-/* ARGSUSED */
+static int
+dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
+{
+	dmu_recv_begin_arg_t *drba = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	uint64_t fromguid = drrb->drr_fromguid;
+	int flags = drrb->drr_flags;
+	int error;
+	dsl_dataset_t *ds;
+	const char *tofs = drba->drba_cookie->drc_tofs;
+
+	/* already checked */
+	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
+	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+	    DMU_COMPOUNDSTREAM ||
+	    drrb->drr_type >= DMU_OST_NUMTYPES ||
+	    ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL))
+		return (SET_ERROR(EINVAL));
+
+	/* Verify pool version supports SA if SA_SPILL feature set */
+	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+	    DMU_BACKUP_FEATURE_SA_SPILL) &&
+	    spa_version(dp->dp_spa) < SPA_VERSION_SA) {
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+	if (error == 0) {
+		/* target fs already exists; recv into temp clone */
+
+		/* Can't recv a clone into an existing fs */
+		if (flags & DRR_FLAG_CLONE) {
+			dsl_dataset_rele(ds, FTAG);
+			return (SET_ERROR(EINVAL));
+		}
+
+		error = recv_begin_check_existing_impl(drba, ds, fromguid);
+		dsl_dataset_rele(ds, FTAG);
+	} else if (error == ENOENT) {
+		/* target fs does not exist; must be a full backup or clone */
+		char buf[MAXNAMELEN];
+
+		/*
+		 * If it's a non-clone incremental, we are missing the
+		 * target fs, so fail the recv.
+		 */
+		if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
+			return (SET_ERROR(ENOENT));
+
+		/* Open the parent of tofs */
+		ASSERT3U(strlen(tofs), <, MAXNAMELEN);
+		(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
+		error = dsl_dataset_hold(dp, buf, FTAG, &ds);
+		if (error != 0)
+			return (error);
+
+		if (drba->drba_origin != NULL) {
+			dsl_dataset_t *origin;
+			error = dsl_dataset_hold(dp, drba->drba_origin,
+			    FTAG, &origin);
+			if (error != 0) {
+				dsl_dataset_rele(ds, FTAG);
+				return (error);
+			}
+			if (!dsl_dataset_is_snapshot(origin)) {
+				dsl_dataset_rele(origin, FTAG);
+				dsl_dataset_rele(ds, FTAG);
+				return (SET_ERROR(EINVAL));
+			}
+			if (origin->ds_phys->ds_guid != fromguid) {
+				dsl_dataset_rele(origin, FTAG);
+				dsl_dataset_rele(ds, FTAG);
+				return (SET_ERROR(ENODEV));
+			}
+			dsl_dataset_rele(origin, FTAG);
+		}
+		dsl_dataset_rele(ds, FTAG);
+		error = 0;
+	}
+	return (error);
+}
+
 static void
-recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ohds = arg1;
-	struct recvbeginsyncarg *rbsa = arg2;
-	dsl_pool_t *dp = ohds->ds_dir->dd_pool;
-	dsl_dataset_t *cds;
-	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
+	dmu_recv_begin_arg_t *drba = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	const char *tofs = drba->drba_cookie->drc_tofs;
+	dsl_dataset_t *ds, *newds;
 	uint64_t dsobj;
+	int error;
+	uint64_t crflags;
 
-	/* create and open the temporary clone */
-	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
-	    ohds->ds_prev, flags, rbsa->cr, tx);
-	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
+	crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
+	    DS_FLAG_CI_DATASET : 0;
 
+	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+	if (error == 0) {
+		/* create temporary clone */
+		dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
+		    ds->ds_prev, crflags, drba->drba_cred, tx);
+		dsl_dataset_rele(ds, FTAG);
+	} else {
+		dsl_dir_t *dd;
+		const char *tail;
+		dsl_dataset_t *origin = NULL;
+
+		VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail));
+
+		if (drba->drba_origin != NULL) {
+			VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
+			    FTAG, &origin));
+		}
+
+		/* Create new dataset. */
+		dsobj = dsl_dataset_create_sync(dd,
+		    strrchr(tofs, '/') + 1,
+		    origin, crflags, drba->drba_cred, tx);
+		if (origin != NULL)
+			dsl_dataset_rele(origin, FTAG);
+		dsl_dir_rele(dd, FTAG);
+		drba->drba_cookie->drc_newfs = B_TRUE;
+	}
+	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+
+	dmu_buf_will_dirty(newds->ds_dbuf, tx);
+	newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
+
 	/*
 	 * If we actually created a non-clone, we need to create the
 	 * objset in our new dataset.
 	 */
-	if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
+	if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
 		(void) dmu_objset_create_impl(dp->dp_spa,
-		    cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
+		    newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
 	}
 
-	rbsa->ds = cds;
+	drba->drba_cookie->drc_ds = newds;
 
-	spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
-	    dp->dp_spa, tx, "dataset = %lld", dsobj);
+	spa_history_log_internal_ds(newds, "receive", tx, "");
 }
 
-static boolean_t
-dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
-{
-	int featureflags;
-
-	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
-
-	/* Verify pool version supports SA if SA_SPILL feature set */
-	return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
-	    (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
-}
-
 /*
  * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
  * succeeds; otherwise we will leak the holds on the datasets.
  */
 int
-dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
-    boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
+dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
+    boolean_t force, char *origin, dmu_recv_cookie_t *drc)
 {
-	int err = 0;
-	boolean_t byteswap;
-	struct recvbeginsyncarg rbsa = { 0 };
-	uint64_t versioninfo;
-	int flags;
-	dsl_dataset_t *ds;
+	dmu_recv_begin_arg_t drba = { 0 };
+	dmu_replay_record_t *drr;
 
-	if (drrb->drr_magic == DMU_BACKUP_MAGIC)
-		byteswap = FALSE;
-	else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
-		byteswap = TRUE;
-	else
-		return (EINVAL);
-
-	rbsa.tofs = tofs;
-	rbsa.tosnap = tosnap;
-	rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
-	rbsa.fromguid = drrb->drr_fromguid;
-	rbsa.type = drrb->drr_type;
-	rbsa.tag = FTAG;
-	rbsa.dsflags = 0;
-	rbsa.cr = CRED();
-	versioninfo = drrb->drr_versioninfo;
-	flags = drrb->drr_flags;
-
-	if (byteswap) {
-		rbsa.type = BSWAP_32(rbsa.type);
-		rbsa.fromguid = BSWAP_64(rbsa.fromguid);
-		versioninfo = BSWAP_64(versioninfo);
-		flags = BSWAP_32(flags);
-	}
-
-	if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
-	    rbsa.type >= DMU_OST_NUMTYPES ||
-	    ((flags & DRR_FLAG_CLONE) && origin == NULL))
-		return (EINVAL);
-
-	if (flags & DRR_FLAG_CI_DATA)
-		rbsa.dsflags = DS_FLAG_CI_DATASET;
-
 	bzero(drc, sizeof (dmu_recv_cookie_t));
 	drc->drc_drrb = drrb;
 	drc->drc_tosnap = tosnap;
-	drc->drc_top_ds = top_ds;
+	drc->drc_tofs = tofs;
 	drc->drc_force = force;
 
-	/*
-	 * Process the begin in syncing context.
-	 */
+	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+		drc->drc_byteswap = B_TRUE;
+	else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
+		return (SET_ERROR(EINVAL));
 
-	/* open the dataset we are logically receiving into */
-	err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
-	if (err == 0) {
-		if (dmu_recv_verify_features(ds, drrb)) {
-			dsl_dataset_rele(ds, dmu_recv_tag);
-			return (ENOTSUP);
-		}
-		/* target fs already exists; recv into temp clone */
+	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
+	drr->drr_type = DRR_BEGIN;
+	drr->drr_u.drr_begin = *drc->drc_drrb;
+	if (drc->drc_byteswap) {
+		fletcher_4_incremental_byteswap(drr,
+		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
+	} else {
+		fletcher_4_incremental_native(drr,
+		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
+	}
+	kmem_free(drr, sizeof (dmu_replay_record_t));
 
-		/* Can't recv a clone into an existing fs */
-		if (flags & DRR_FLAG_CLONE) {
-			dsl_dataset_rele(ds, dmu_recv_tag);
-			return (EINVAL);
-		}
+	if (drc->drc_byteswap) {
+		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
+		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
+		drrb->drr_type = BSWAP_32(drrb->drr_type);
+		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+	}
 
-		/* must not have an incremental recv already in progress */
-		if (!mutex_tryenter(&ds->ds_recvlock)) {
-			dsl_dataset_rele(ds, dmu_recv_tag);
-			return (EBUSY);
-		}
+	drba.drba_origin = origin;
+	drba.drba_cookie = drc;
+	drba.drba_cred = CRED();
 
-		/* tmp clone name is: tofs/%tosnap" */
-		(void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
-		    "%%%s", tosnap);
-		rbsa.force = force;
-		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-		    recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
-		if (err) {
-			mutex_exit(&ds->ds_recvlock);
-			dsl_dataset_rele(ds, dmu_recv_tag);
-			return (err);
-		}
-		drc->drc_logical_ds = ds;
-		drc->drc_real_ds = rbsa.ds;
-	} else if (err == ENOENT) {
-		/* target fs does not exist; must be a full backup or clone */
-		char *cp;
-
-		/*
-		 * If it's a non-clone incremental, we are missing the
-		 * target fs, so fail the recv.
-		 */
-		if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
-			return (ENOENT);
-
-		/* Open the parent of tofs */
-		cp = strrchr(tofs, '/');
-		*cp = '\0';
-		err = dsl_dataset_hold(tofs, FTAG, &ds);
-		*cp = '/';
-		if (err)
-			return (err);
-
-		if (dmu_recv_verify_features(ds, drrb)) {
-			dsl_dataset_rele(ds, FTAG);
-			return (ENOTSUP);
-		}
-
-		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-		    recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
-		dsl_dataset_rele(ds, FTAG);
-		if (err)
-			return (err);
-		drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
-		drc->drc_newfs = B_TRUE;
-	}
-
-	return (err);
+	return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
+	    &drba, 5));
 }
 
 struct restorearg {
 	int err;
-	int byteswap;
+	boolean_t byteswap;
 	kthread_t *td;
 	struct file *fp;
 	char *buf;
@@ -964,7 +989,8 @@
 	guid_map_entry_t *gmep;
 
 	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
-		dsl_dataset_rele(gmep->gme_ds, ca);
+		dsl_dataset_long_rele(gmep->gme_ds, gmep);
+		dsl_dataset_rele(gmep->gme_ds, gmep);
 		kmem_free(gmep, sizeof (guid_map_entry_t));
 	}
 	avl_destroy(ca);
@@ -1013,10 +1039,10 @@
 		    len - done, ra->voff, &resid);
 
 		if (resid == len - done)
-			ra->err = EINVAL;
+			ra->err = SET_ERROR(EINVAL);
 		ra->voff += len - done - resid;
 		done = len - resid;
-		if (ra->err)
+		if (ra->err != 0)
 			return (NULL);
 	}
 
@@ -1125,17 +1151,17 @@
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
 	    drro->drr_blksz > SPA_MAXBLOCKSIZE ||
 	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	err = dmu_object_info(os, drro->drr_object, NULL);
 
 	if (err != 0 && err != ENOENT)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (drro->drr_bonuslen) {
 		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
-		if (ra->err)
+		if (ra->err != 0)
 			return (ra->err);
 	}
 
@@ -1144,7 +1170,7 @@
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 		err = dmu_tx_assign(tx, TXG_WAIT);
-		if (err) {
+		if (err != 0) {
 			dmu_tx_abort(tx);
 			return (err);
 		}
@@ -1158,14 +1184,14 @@
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen);
 	}
-	if (err) {
-		return (EINVAL);
+	if (err != 0) {
+		return (SET_ERROR(EINVAL));
 	}
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, drro->drr_object);
 	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
+	if (err != 0) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
@@ -1202,7 +1228,7 @@
 	uint64_t obj;
 
 	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	for (obj = drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
@@ -1213,7 +1239,7 @@
 			continue;
 
 		err = dmu_free_object(os, obj);
-		if (err)
+		if (err != 0)
 			return (err);
 	}
 	return (0);
@@ -1229,7 +1255,7 @@
 
 	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	data = restore_read(ra, drrw->drr_length);
 	if (data == NULL)
@@ -1236,7 +1262,7 @@
 		return (ra->err);
 
 	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	tx = dmu_tx_create(os);
 
@@ -1243,7 +1269,7 @@
 	dmu_tx_hold_write(tx, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
+	if (err != 0) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
@@ -1278,7 +1304,7 @@
 	dmu_buf_t *dbp;
 
 	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * If the GUID of the referenced dataset is different from the
@@ -1288,10 +1314,10 @@
 		gmesrch.guid = drrwbr->drr_refguid;
 		if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
 		    &where)) == NULL) {
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	} else {
 		ref_os = os;
 	}
@@ -1305,7 +1331,7 @@
 	dmu_tx_hold_write(tx, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
+	if (err != 0) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
@@ -1326,7 +1352,7 @@
 
 	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
 	    drrs->drr_length > SPA_MAXBLOCKSIZE)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	data = restore_read(ra, drrs->drr_length);
 	if (data == NULL)
@@ -1333,7 +1359,7 @@
 		return (ra->err);
 
 	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
 	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
@@ -1346,7 +1372,7 @@
 	dmu_tx_hold_spill(tx, db->db_object);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
+	if (err != 0) {
 		dmu_buf_rele(db, FTAG);
 		dmu_buf_rele(db_spill, FTAG);
 		dmu_tx_abort(tx);
@@ -1375,10 +1401,10 @@
 
 	if (drrf->drr_length != -1ULL &&
 	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	err = dmu_free_long_range(os, drrf->drr_object,
 	    drrf->drr_offset, drrf->drr_length);
@@ -1385,6 +1411,16 @@
 	return (err);
 }
 
+/* used to destroy the drc_ds on error */
+static void
+dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
+{
+	char name[MAXNAMELEN];
+	dsl_dataset_name(drc->drc_ds, name);
+	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+	(void) dsl_destroy_head(name);
+}
+
 /*
  * NB: callers *must* call dmu_recv_end() if this succeeds.
  */
@@ -1398,36 +1434,8 @@
 	zio_cksum_t pcksum;
 	int featureflags;
 
-	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
-		ra.byteswap = TRUE;
-
-	{
-		/* compute checksum of drr_begin record */
-		dmu_replay_record_t *drr;
-		drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
-
-		drr->drr_type = DRR_BEGIN;
-		drr->drr_u.drr_begin = *drc->drc_drrb;
-		if (ra.byteswap) {
-			fletcher_4_incremental_byteswap(drr,
-			    sizeof (dmu_replay_record_t), &ra.cksum);
-		} else {
-			fletcher_4_incremental_native(drr,
-			    sizeof (dmu_replay_record_t), &ra.cksum);
-		}
-		kmem_free(drr, sizeof (dmu_replay_record_t));
-	}
-
-	if (ra.byteswap) {
-		struct drr_begin *drrb = drc->drc_drrb;
-		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
-		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
-		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
-		drrb->drr_type = BSWAP_32(drrb->drr_type);
-		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
-		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
-	}
-
+	ra.byteswap = drc->drc_byteswap;
+	ra.cksum = drc->drc_cksum;
 	ra.td = curthread;
 	ra.fp = fp;
 	ra.voff = *voffp;
@@ -1435,16 +1443,16 @@
 	ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
 
 	/* these were verified in dmu_recv_begin */
-	ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
+	ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
 	    DMU_SUBSTREAM);
-	ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
+	ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES);
 
 	/*
 	 * Open the objset we are modifying.
 	 */
-	VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
+	VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
 
-	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
+	ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
 
 	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
 
@@ -1453,11 +1461,11 @@
 		minor_t minor;
 
 		if (cleanup_fd == -1) {
-			ra.err = EBADF;
+			ra.err = SET_ERROR(EBADF);
 			goto out;
 		}
 		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (ra.err) {
+		if (ra.err != 0) {
 			cleanup_fd = -1;
 			goto out;
 		}
@@ -1471,12 +1479,12 @@
 			ra.err = zfs_onexit_add_cb(minor,
 			    free_guid_map_onexit, ra.guid_to_ds_map,
 			    action_handlep);
-			if (ra.err)
+			if (ra.err != 0)
 				goto out;
 		} else {
 			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
 			    (void **)&ra.guid_to_ds_map);
-			if (ra.err)
+			if (ra.err != 0)
 				goto out;
 		}
 
@@ -1490,7 +1498,7 @@
 	while (ra.err == 0 &&
 	    NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
-			ra.err = EINTR;
+			ra.err = SET_ERROR(EINTR);
 			goto out;
 		}
 
@@ -1544,7 +1552,7 @@
 			 * everything before the DRR_END record.
 			 */
 			if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
-				ra.err = ECKSUM;
+				ra.err = SET_ERROR(ECKSUM);
 			goto out;
 		}
 		case DRR_SPILL:
@@ -1554,7 +1562,7 @@
 			break;
 		}
 		default:
-			ra.err = EINVAL;
+			ra.err = SET_ERROR(EINVAL);
 			goto out;
 		}
 		pcksum = ra.cksum;
@@ -1570,14 +1578,7 @@
 		 * destroy what we created, so we don't leave it in the
 		 * inconsistent restoring state.
 		 */
-		txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
-
-		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
-		    B_FALSE);
-		if (drc->drc_real_ds != drc->drc_logical_ds) {
-			mutex_exit(&drc->drc_logical_ds->ds_recvlock);
-			dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
-		}
+		dmu_recv_cleanup_ds(drc);
 	}
 
 	kmem_free(ra.buf, ra.bufsize);
@@ -1585,44 +1586,103 @@
 	return (ra.err);
 }
 
-struct recvendsyncarg {
-	char *tosnap;
-	uint64_t creation_time;
-	uint64_t toguid;
-};
-
 static int
-recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_end_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	struct recvendsyncarg *resa = arg2;
+	dmu_recv_cookie_t *drc = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	int error;
 
-	return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
+	ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag);
+
+	if (!drc->drc_newfs) {
+		dsl_dataset_t *origin_head;
+
+		error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
+		if (error != 0)
+			return (error);
+		error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
+		    origin_head, drc->drc_force);
+		if (error != 0) {
+			dsl_dataset_rele(origin_head, FTAG);
+			return (error);
+		}
+		error = dsl_dataset_snapshot_check_impl(origin_head,
+		    drc->drc_tosnap, tx);
+		dsl_dataset_rele(origin_head, FTAG);
+		if (error != 0)
+			return (error);
+
+		error = dsl_destroy_head_check_impl(drc->drc_ds, 1);
+	} else {
+		error = dsl_dataset_snapshot_check_impl(drc->drc_ds,
+		    drc->drc_tosnap, tx);
+	}
+	return (error);
 }
 
 static void
-recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	struct recvendsyncarg *resa = arg2;
+	dmu_recv_cookie_t *drc = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 
-	dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
+	spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
+	    tx, "snap=%s", drc->drc_tosnap);
 
-	/* set snapshot's creation time and guid */
-	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
-	ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
-	ds->ds_prev->ds_phys->ds_guid = resa->toguid;
-	ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+	if (!drc->drc_newfs) {
+		dsl_dataset_t *origin_head;
 
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+		VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG,
+		    &origin_head));
+		dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
+		    origin_head, tx);
+		dsl_dataset_snapshot_sync_impl(origin_head,
+		    drc->drc_tosnap, tx);
+
+		/* set snapshot's creation time and guid */
+		dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx);
+		origin_head->ds_prev->ds_phys->ds_creation_time =
+		    drc->drc_drrb->drr_creation_time;
+		origin_head->ds_prev->ds_phys->ds_guid =
+		    drc->drc_drrb->drr_toguid;
+		origin_head->ds_prev->ds_phys->ds_flags &=
+		    ~DS_FLAG_INCONSISTENT;
+
+		dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
+		origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+
+		dsl_dataset_rele(origin_head, FTAG);
+		dsl_destroy_head_sync_impl(drc->drc_ds, tx);
+	} else {
+		dsl_dataset_t *ds = drc->drc_ds;
+
+		dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx);
+
+		/* set snapshot's creation time and guid */
+		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
+		ds->ds_prev->ds_phys->ds_creation_time =
+		    drc->drc_drrb->drr_creation_time;
+		ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid;
+		ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+
+		dmu_buf_will_dirty(ds->ds_dbuf, tx);
+		ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+	}
+	drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj;
+	/*
+	 * Release the hold from dmu_recv_begin.  This must be done before
+	 * we return to open context, so that when we free the dataset's dnode,
+	 * we can evict its bonus buffer.
+	 */
+	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+	drc->drc_ds = NULL;
 }
 
 static int
-add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
+add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
 {
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
+	dsl_pool_t *dp;
 	dsl_dataset_t *snapds;
 	guid_map_entry_t *gmep;
 	int err;
@@ -1629,97 +1689,73 @@
 
 	ASSERT(guid_map != NULL);
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
+	err = dsl_pool_hold(name, FTAG, &dp);
+	if (err != 0)
+		return (err);
+	gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
+	err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds);
 	if (err == 0) {
-		gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
 		gmep->guid = snapds->ds_phys->ds_guid;
 		gmep->gme_ds = snapds;
 		avl_add(guid_map, gmep);
-	}
+		dsl_dataset_long_hold(snapds, gmep);
+	} else
+		kmem_free(gmep, sizeof (*gmep));
 
-	rw_exit(&dp->dp_config_rwlock);
+	dsl_pool_rele(dp, FTAG);
 	return (err);
 }
 
+static int dmu_recv_end_modified_blocks = 3;
+
 static int
 dmu_recv_existing_end(dmu_recv_cookie_t *drc)
 {
-	struct recvendsyncarg resa;
-	dsl_dataset_t *ds = drc->drc_logical_ds;
-	int err, myerr;
+	int error;
+	char name[MAXNAMELEN];
 
-	if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
-		err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
-		    drc->drc_force);
-		if (err)
-			goto out;
-	} else {
-		mutex_exit(&ds->ds_recvlock);
-		dsl_dataset_rele(ds, dmu_recv_tag);
-		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
-		    B_FALSE);
-		return (EBUSY);
-	}
+#ifdef _KERNEL
+	/*
+	 * We will be destroying the ds; make sure its origin is unmounted if
+	 * necessary.
+	 */
+	dsl_dataset_name(drc->drc_ds, name);
+	zfs_destroy_unmount_origin(name);
+#endif
 
-	resa.creation_time = drc->drc_drrb->drr_creation_time;
-	resa.toguid = drc->drc_drrb->drr_toguid;
-	resa.tosnap = drc->drc_tosnap;
+	error = dsl_sync_task(drc->drc_tofs,
+	    dmu_recv_end_check, dmu_recv_end_sync, drc,
+	    dmu_recv_end_modified_blocks);
 
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    recv_end_check, recv_end_sync, ds, &resa, 3);
-	if (err) {
-		/* swap back */
-		(void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
-	}
-
-out:
-	mutex_exit(&ds->ds_recvlock);
-	if (err == 0 && drc->drc_guid_to_ds_map != NULL)
-		(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
-	dsl_dataset_disown(ds, dmu_recv_tag);
-	myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
-	ASSERT0(myerr);
-	return (err);
+	if (error != 0)
+		dmu_recv_cleanup_ds(drc);
+	return (error);
 }
 
 static int
 dmu_recv_new_end(dmu_recv_cookie_t *drc)
 {
-	struct recvendsyncarg resa;
-	dsl_dataset_t *ds = drc->drc_logical_ds;
-	int err;
+	int error;
 
-	/*
-	 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
-	 * expects it to have a ds_user_ptr (and zil), but clone_swap()
-	 * can close it.
-	 */
-	txg_wait_synced(ds->ds_dir->dd_pool, 0);
+	error = dsl_sync_task(drc->drc_tofs,
+	    dmu_recv_end_check, dmu_recv_end_sync, drc,
+	    dmu_recv_end_modified_blocks);
 
-	resa.creation_time = drc->drc_drrb->drr_creation_time;
-	resa.toguid = drc->drc_drrb->drr_toguid;
-	resa.tosnap = drc->drc_tosnap;
-
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    recv_end_check, recv_end_sync, ds, &resa, 3);
-	if (err) {
-		/* clean up the fs we just recv'd into */
-		(void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
-	} else {
-		if (drc->drc_guid_to_ds_map != NULL)
-			(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
-		/* release the hold from dmu_recv_begin */
-		dsl_dataset_disown(ds, dmu_recv_tag);
+	if (error != 0) {
+		dmu_recv_cleanup_ds(drc);
+	} else if (drc->drc_guid_to_ds_map != NULL) {
+		(void) add_ds_to_guidmap(drc->drc_tofs,
+		    drc->drc_guid_to_ds_map,
+		    drc->drc_newsnapobj);
 	}
-	return (err);
+	return (error);
 }
 
 int
 dmu_recv_end(dmu_recv_cookie_t *drc)
 {
-	if (drc->drc_logical_ds != drc->drc_real_ds)
+	if (drc->drc_newfs)
+		return (dmu_recv_new_end(drc));
+	else
 		return (dmu_recv_existing_end(drc));
-	else
-		return (dmu_recv_new_end(drc));
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -265,7 +265,7 @@
 
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err)
+		if (err != 0)
 			return (err);
 		cbp = buf->b_data;
 
@@ -282,7 +282,7 @@
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
 			err = traverse_visitbp(td, dnp, &cbp[i], &czb);
-			if (err) {
+			if (err != 0) {
 				if (!hard)
 					break;
 				lasterr = err;
@@ -295,7 +295,7 @@
 
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err)
+		if (err != 0)
 			return (err);
 		dnp = buf->b_data;
 
@@ -308,7 +308,7 @@
 		for (i = 0; i < epb; i++) {
 			err = traverse_dnode(td, &dnp[i], zb->zb_objset,
 			    zb->zb_blkid * epb + i);
-			if (err) {
+			if (err != 0) {
 				if (!hard)
 					break;
 				lasterr = err;
@@ -321,7 +321,7 @@
 
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err)
+		if (err != 0)
 			return (err);
 
 		osp = buf->b_data;
@@ -405,7 +405,7 @@
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
 		err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
-		if (err) {
+		if (err != 0) {
 			if (!hard)
 				break;
 			lasterr = err;
@@ -415,7 +415,7 @@
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
 		err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
-		if (err) {
+		if (err != 0) {
 			if (!hard)
 				return (err);
 			lasterr = err;
@@ -434,7 +434,7 @@
 
 	ASSERT(pfd->pd_blks_fetched >= 0);
 	if (pfd->pd_cancel)
-		return (EINTR);
+		return (SET_ERROR(EINTR));
 
 	if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
 	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
@@ -514,14 +514,20 @@
 	cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
 
 	/* See comment on ZIL traversal in dsl_scan_visitds. */
-	if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
-		objset_t *os;
+	if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
+		uint32_t flags = ARC_WAIT;
+		objset_phys_t *osp;
+		arc_buf_t *buf;
 
-		err = dmu_objset_from_ds(ds, &os);
-		if (err)
+		err = arc_read(NULL, td.td_spa, rootbp,
+		    arc_getbuf_func, &buf,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
+		if (err != 0)
 			return (err);
 
-		traverse_zil(&td, &os->os_zil_header);
+		osp = buf->b_data;
+		traverse_zil(&td, &osp->os_zil_header);
+		(void) arc_buf_remove_ref(buf, &buf);
 	}
 
 	if (!(flags & TRAVERSE_PREFETCH_DATA) ||
@@ -583,7 +589,7 @@
 	/* visit the MOS */
 	err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
 	    txg_start, NULL, flags, func, arg);
-	if (err)
+	if (err != 0)
 		return (err);
 
 	/* visit each dataset */
@@ -592,7 +598,7 @@
 		dmu_object_info_t doi;
 
 		err = dmu_object_info(mos, obj, &doi);
-		if (err) {
+		if (err != 0) {
 			if (!hard)
 				return (err);
 			lasterr = err;
@@ -603,10 +609,10 @@
 			dsl_dataset_t *ds;
 			uint64_t txg = txg_start;
 
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
+			dsl_pool_config_enter(dp, FTAG);
 			err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
-			rw_exit(&dp->dp_config_rwlock);
-			if (err) {
+			dsl_pool_config_exit(dp, FTAG);
+			if (err != 0) {
 				if (!hard)
 					return (err);
 				lasterr = err;
@@ -616,7 +622,7 @@
 				txg = ds->ds_phys->ds_prev_snap_txg;
 			err = traverse_dataset(ds, txg, flags, func, arg);
 			dsl_dataset_rele(ds, FTAG);
-			if (err) {
+			if (err != 0) {
 				if (!hard)
 					return (err);
 				lasterr = err;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -48,7 +48,7 @@
 {
 	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
 	tx->tx_dir = dd;
-	if (dd)
+	if (dd != NULL)
 		tx->tx_pool = dd->dd_pool;
 	list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
 	    offsetof(dmu_tx_hold_t, txh_node));
@@ -160,7 +160,7 @@
 	db = dbuf_hold_level(dn, level, blkid, FTAG);
 	rw_exit(&dn->dn_struct_rwlock);
 	if (db == NULL)
-		return (EIO);
+		return (SET_ERROR(EIO));
 	err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH);
 	dbuf_rele(db, FTAG);
 	return (err);
@@ -370,7 +370,7 @@
 out:
 	if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
 	    2 * DMU_MAX_ACCESS)
-		err = EFBIG;
+		err = SET_ERROR(EFBIG);
 
 	if (err)
 		txh->txh_tx->tx_err = err;
@@ -898,7 +898,7 @@
 #endif
 
 static int
-dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
 {
 	dmu_tx_hold_t *txh;
 	spa_t *spa = tx->tx_pool->dp_spa;
@@ -922,9 +922,9 @@
 		 */
 		if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
 		    txg_how != TXG_WAIT)
-			return (EIO);
+			return (SET_ERROR(EIO));
 
-		return (ERESTART);
+		return (SET_ERROR(ERESTART));
 	}
 
 	tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
@@ -945,7 +945,7 @@
 			if (dn->dn_assigned_txg == tx->tx_txg - 1) {
 				mutex_exit(&dn->dn_mtx);
 				tx->tx_needassign_txh = txh;
-				return (ERESTART);
+				return (SET_ERROR(ERESTART));
 			}
 			if (dn->dn_assigned_txg == 0)
 				dn->dn_assigned_txg = tx->tx_txg;
@@ -962,13 +962,6 @@
 	}
 
 	/*
-	 * NB: This check must be after we've held the dnodes, so that
-	 * the dmu_tx_unassign() logic will work properly
-	 */
-	if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
-		return (ERESTART);
-
-	/*
 	 * If a snapshot has been taken since we made our estimates,
 	 * assume that we won't be able to free or overwrite anything.
 	 */
@@ -1048,26 +1041,25 @@
  *
  * (1)	TXG_WAIT.  If the current open txg is full, waits until there's
  *	a new one.  This should be used when you're not holding locks.
- *	If will only fail if we're truly out of space (or over quota).
+ *	It will only fail if we're truly out of space (or over quota).
  *
  * (2)	TXG_NOWAIT.  If we can't assign into the current open txg without
  *	blocking, returns immediately with ERESTART.  This should be used
  *	whenever you're holding locks.  On an ERESTART error, the caller
  *	should drop locks, do a dmu_tx_wait(tx), and try again.
- *
- * (3)	A specific txg.  Use this if you need to ensure that multiple
- *	transactions all sync in the same txg.  Like TXG_NOWAIT, it
- *	returns ERESTART if it can't assign you into the requested txg.
  */
 int
-dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
 {
 	int err;
 
 	ASSERT(tx->tx_txg == 0);
-	ASSERT(txg_how != 0);
+	ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT);
 	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
 
+	/* If we might wait, we must not hold the config lock. */
+	ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
+
 	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
 		dmu_tx_unassign(tx);
 
@@ -1088,6 +1080,7 @@
 	spa_t *spa = tx->tx_pool->dp_spa;
 
 	ASSERT(tx->tx_txg == 0);
+	ASSERT(!dsl_pool_config_held(tx->tx_pool));
 
 	/*
 	 * It's possible that the pool has become active after this thread
@@ -1214,6 +1207,14 @@
 	return (tx->tx_txg);
 }
 
+dsl_pool_t *
+dmu_tx_pool(dmu_tx_t *tx)
+{
+	ASSERT(tx->tx_pool != NULL);
+	return (tx->tx_pool);
+}
+
+
 void
 dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
 {

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -74,7 +74,11 @@
 	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
 
-	refcount_create(&dn->dn_holds);
+	/*
+	 * Every dbuf has a reference, and dropping a tracked reference is
+	 * O(number of references), so don't track dn_holds.
+	 */
+	refcount_create_untracked(&dn->dn_holds);
 	refcount_create(&dn->dn_tx_holds);
 	list_link_init(&dn->dn_link);
 
@@ -1032,12 +1036,12 @@
 		dn = (object == DMU_USERUSED_OBJECT) ?
 		    DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os);
 		if (dn == NULL)
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		type = dn->dn_type;
 		if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
-			return (EEXIST);
+			return (SET_ERROR(EEXIST));
 		DNODE_VERIFY(dn);
 		(void) refcount_add(&dn->dn_holds, tag);
 		*dnp = dn;
@@ -1045,7 +1049,7 @@
 	}
 
 	if (object == 0 || object >= DN_MAX_OBJECT)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	mdn = DMU_META_DNODE(os);
 	ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
@@ -1063,7 +1067,7 @@
 	if (drop_struct_lock)
 		rw_exit(&mdn->dn_struct_rwlock);
 	if (db == NULL)
-		return (EIO);
+		return (SET_ERROR(EIO));
 	err = dbuf_read(db, NULL, DB_RF_CANFAIL);
 	if (err) {
 		dbuf_rele(db, FTAG);
@@ -1371,7 +1375,7 @@
 
 fail:
 	rw_exit(&dn->dn_struct_rwlock);
-	return (ENOTSUP);
+	return (SET_ERROR(ENOTSUP));
 }
 
 /* read-holding callers must not rely on the lock being continuously held */
@@ -1857,7 +1861,7 @@
 			 * at the pointer to this block in its parent, and its
 			 * going to be unallocated, so we will skip over it.
 			 */
-			return (ESRCH);
+			return (SET_ERROR(ESRCH));
 		}
 		error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
 		if (error) {
@@ -1873,7 +1877,7 @@
 		 * This can only happen when we are searching up the tree
 		 * and these conditions mean that we need to keep climbing.
 		 */
-		error = ESRCH;
+		error = SET_ERROR(ESRCH);
 	} else if (lvl == 0) {
 		dnode_phys_t *dnp = data;
 		span = DNODE_SHIFT;
@@ -1886,7 +1890,7 @@
 			*offset += (1ULL << span) * inc;
 		}
 		if (i < 0 || i == blkfill)
-			error = ESRCH;
+			error = SET_ERROR(ESRCH);
 	} else {
 		blkptr_t *bp = data;
 		uint64_t start = *offset;
@@ -1918,7 +1922,7 @@
 			*offset = start;
 		}
 		if (i < 0 || i >= epb)
-			error = ESRCH;
+			error = SET_ERROR(ESRCH);
 	}
 
 	if (db)
@@ -1962,7 +1966,7 @@
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
 	if (dn->dn_phys->dn_nlevels == 0) {
-		error = ESRCH;
+		error = SET_ERROR(ESRCH);
 		goto out;
 	}
 
@@ -1971,7 +1975,7 @@
 			if (flags & DNODE_FIND_HOLE)
 				*offset = dn->dn_datablksz;
 		} else {
-			error = ESRCH;
+			error = SET_ERROR(ESRCH);
 		}
 		goto out;
 	}
@@ -1992,7 +1996,7 @@
 
 	if (error == 0 && (flags & DNODE_FIND_BACKWARDS ?
 	    initial_offset < *offset : initial_offset > *offset))
-		error = ESRCH;
+		error = SET_ERROR(ESRCH);
 out:
 	if (!(flags & DNODE_FIND_HAVELOCK))
 		rw_exit(&dn->dn_struct_rwlock);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -480,6 +480,7 @@
 	dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
 	dnode_evict_dbufs(dn);
 	ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
+	ASSERT3P(dn->dn_bonus, ==, NULL);
 
 	/*
 	 * XXX - It would be nice to assert this, but we may still

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,10 +20,8 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
- * All rights reserved.
  * Portions Copyright (c) 2011 Martin Matuska <mm at FreeBSD.org>
  */
 
@@ -48,13 +46,9 @@
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dsl_deadlist.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
 
-static char *dsl_reaper = "the grim reaper";
-
-static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
-static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
-static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
-
 #define	SWITCH64(x, y) \
 	{ \
 		uint64_t __tmp = (x); \
@@ -66,9 +60,6 @@
 
 #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
 
-#define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
-
-
 /*
  * Figure out how much of this delta should be propogated to the dsl_dir
  * layer.  If there's a refreservation, that space has already been
@@ -255,7 +246,7 @@
 {
 	dsl_dataset_t *ds = dsv;
 
-	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
+	ASSERT(ds->ds_owner == NULL);
 
 	unique_remove(ds->ds_fsid_guid);
 
@@ -263,19 +254,15 @@
 		dmu_objset_evict(ds->ds_objset);
 
 	if (ds->ds_prev) {
-		dsl_dataset_drop_ref(ds->ds_prev, ds);
+		dsl_dataset_rele(ds->ds_prev, ds);
 		ds->ds_prev = NULL;
 	}
 
 	bplist_destroy(&ds->ds_pending_deadlist);
-	if (db != NULL) {
+	if (ds->ds_phys->ds_deadlist_obj != 0)
 		dsl_deadlist_close(&ds->ds_deadlist);
-	} else {
-		ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
-		ASSERT(!ds->ds_deadlist.dl_oldfmt);
-	}
 	if (ds->ds_dir)
-		dsl_dir_close(ds->ds_dir, ds);
+		dsl_dir_rele(ds->ds_dir, ds);
 
 	ASSERT(!list_link_active(&ds->ds_synced_link));
 
@@ -282,17 +269,15 @@
 	if (mutex_owned(&ds->ds_lock))
 		mutex_exit(&ds->ds_lock);
 	mutex_destroy(&ds->ds_lock);
-	mutex_destroy(&ds->ds_recvlock);
 	if (mutex_owned(&ds->ds_opening_lock))
 		mutex_exit(&ds->ds_opening_lock);
 	mutex_destroy(&ds->ds_opening_lock);
-	rw_destroy(&ds->ds_rwlock);
-	cv_destroy(&ds->ds_exclusive_cv);
+	refcount_destroy(&ds->ds_longholds);
 
 	kmem_free(ds, sizeof (dsl_dataset_t));
 }
 
-static int
+int
 dsl_dataset_get_snapname(dsl_dataset_t *ds)
 {
 	dsl_dataset_phys_t *headphys;
@@ -308,7 +293,7 @@
 
 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
 	    FTAG, &headdbuf);
-	if (err)
+	if (err != 0)
 		return (err);
 	headphys = headdbuf->db_data;
 	err = zap_value_search(dp->dp_meta_objset,
@@ -317,7 +302,7 @@
 	return (err);
 }
 
-static int
+int
 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
@@ -337,8 +322,8 @@
 	return (err);
 }
 
-static int
-dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
+int
+dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -358,8 +343,8 @@
 	return (err);
 }
 
-static int
-dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+int
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **dsp)
 {
 	objset_t *mos = dp->dp_meta_objset;
@@ -368,17 +353,16 @@
 	int err;
 	dmu_object_info_t doi;
 
-	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
-	    dsl_pool_sync_context(dp));
+	ASSERT(dsl_pool_config_held(dp));
 
 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
-	if (err)
+	if (err != 0)
 		return (err);
 
 	/* Make sure dsobj has the correct object type. */
 	dmu_object_info_from_db(dbuf, &doi);
 	if (doi.doi_type != DMU_OT_DSL_DATASET)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ds = dmu_buf_get_user(dbuf);
 	if (ds == NULL) {
@@ -390,13 +374,10 @@
 		ds->ds_phys = dbuf->db_data;
 
 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
-		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
+		refcount_create(&ds->ds_longholds);
 
-		rw_init(&ds->ds_rwlock, 0, 0, 0);
-		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
-
 		bplist_create(&ds->ds_pending_deadlist);
 		dsl_deadlist_open(&ds->ds_deadlist,
 		    mos, ds->ds_phys->ds_deadlist_obj);
@@ -405,15 +386,13 @@
 		    offsetof(dmu_sendarg_t, dsa_link));
 
 		if (err == 0) {
-			err = dsl_dir_open_obj(dp,
+			err = dsl_dir_hold_obj(dp,
 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
 		}
-		if (err) {
+		if (err != 0) {
 			mutex_destroy(&ds->ds_lock);
-			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
-			rw_destroy(&ds->ds_rwlock);
-			cv_destroy(&ds->ds_exclusive_cv);
+			refcount_destroy(&ds->ds_longholds);
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			kmem_free(ds, sizeof (dsl_dataset_t));
@@ -423,8 +402,8 @@
 
 		if (!dsl_dataset_is_snapshot(ds)) {
 			ds->ds_snapname[0] = '\0';
-			if (ds->ds_phys->ds_prev_snap_obj) {
-				err = dsl_dataset_get_ref(dp,
+			if (ds->ds_phys->ds_prev_snap_obj != 0) {
+				err = dsl_dataset_hold_obj(dp,
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds, &ds->ds_prev);
 			}
@@ -440,29 +419,14 @@
 		}
 
 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
-			/*
-			 * In sync context, we're called with either no lock
-			 * or with the write lock.  If we're not syncing,
-			 * we're always called with the read lock held.
-			 */
-			boolean_t need_lock =
-			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
-			    dsl_pool_sync_context(dp);
-
-			if (need_lock)
-				rw_enter(&dp->dp_config_rwlock, RW_READER);
-
-			err = dsl_prop_get_ds(ds,
-			    "refreservation", sizeof (uint64_t), 1,
-			    &ds->ds_reserved, NULL);
+			err = dsl_prop_get_int_ds(ds,
+			    zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+			    &ds->ds_reserved);
 			if (err == 0) {
-				err = dsl_prop_get_ds(ds,
-				    "refquota", sizeof (uint64_t), 1,
-				    &ds->ds_quota, NULL);
+				err = dsl_prop_get_int_ds(ds,
+				    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+				    &ds->ds_quota);
 			}
-
-			if (need_lock)
-				rw_exit(&dp->dp_config_rwlock);
 		} else {
 			ds->ds_reserved = ds->ds_quota = 0;
 		}
@@ -472,15 +436,13 @@
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			if (ds->ds_prev)
-				dsl_dataset_drop_ref(ds->ds_prev, ds);
-			dsl_dir_close(ds->ds_dir, ds);
+				dsl_dataset_rele(ds->ds_prev, ds);
+			dsl_dir_rele(ds->ds_dir, ds);
 			mutex_destroy(&ds->ds_lock);
-			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
-			rw_destroy(&ds->ds_rwlock);
-			cv_destroy(&ds->ds_exclusive_cv);
+			refcount_destroy(&ds->ds_longholds);
 			kmem_free(ds, sizeof (dsl_dataset_t));
-			if (err) {
+			if (err != 0) {
 				dmu_buf_rele(dbuf, tag);
 				return (err);
 			}
@@ -495,171 +457,119 @@
 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
-	mutex_enter(&ds->ds_lock);
-	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
-		mutex_exit(&ds->ds_lock);
-		dmu_buf_rele(ds->ds_dbuf, tag);
-		return (ENOENT);
-	}
-	mutex_exit(&ds->ds_lock);
 	*dsp = ds;
 	return (0);
 }
 
-static int
-dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
-{
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-	/*
-	 * In syncing context we don't want the rwlock lock: there
-	 * may be an existing writer waiting for sync phase to
-	 * finish.  We don't need to worry about such writers, since
-	 * sync phase is single-threaded, so the writer can't be
-	 * doing anything while we are active.
-	 */
-	if (dsl_pool_sync_context(dp)) {
-		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
-		return (0);
-	}
-
-	/*
-	 * Normal users will hold the ds_rwlock as a READER until they
-	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
-	 * drop their READER lock after they set the ds_owner field.
-	 *
-	 * If the dataset is being destroyed, the destroy thread will
-	 * obtain a WRITER lock for exclusive access after it's done its
-	 * open-context work and then change the ds_owner to
-	 * dsl_reaper once destruction is assured.  So threads
-	 * may block here temporarily, until the "destructability" of
-	 * the dataset is determined.
-	 */
-	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
-	mutex_enter(&ds->ds_lock);
-	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
-		rw_exit(&dp->dp_config_rwlock);
-		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
-		if (DSL_DATASET_IS_DESTROYED(ds)) {
-			mutex_exit(&ds->ds_lock);
-			dsl_dataset_drop_ref(ds, tag);
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
-			return (ENOENT);
-		}
-		/*
-		 * The dp_config_rwlock lives above the ds_lock. And
-		 * we need to check DSL_DATASET_IS_DESTROYED() while
-		 * holding the ds_lock, so we have to drop and reacquire
-		 * the ds_lock here.
-		 */
-		mutex_exit(&ds->ds_lock);
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
-		mutex_enter(&ds->ds_lock);
-	}
-	mutex_exit(&ds->ds_lock);
-	return (0);
-}
-
 int
-dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
-    dsl_dataset_t **dsp)
-{
-	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
-
-	if (err)
-		return (err);
-	return (dsl_dataset_hold_ref(*dsp, tag));
-}
-
-int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
+dsl_dataset_hold(dsl_pool_t *dp, const char *name,
     void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
-	if (err)
-		return (err);
-	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
-		dsl_dataset_rele(*dsp, tag);
-		*dsp = NULL;
-		return (EBUSY);
-	}
-	return (0);
-}
-
-int
-dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
-{
 	dsl_dir_t *dd;
-	dsl_pool_t *dp;
 	const char *snapname;
 	uint64_t obj;
 	int err = 0;
 
-	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
-	if (err)
+	err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
+	if (err != 0)
 		return (err);
 
-	dp = dd->dd_pool;
+	ASSERT(dsl_pool_config_held(dp));
 	obj = dd->dd_phys->dd_head_dataset_obj;
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	if (obj)
-		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
+	if (obj != 0)
+		err = dsl_dataset_hold_obj(dp, obj, tag, dsp);
 	else
-		err = ENOENT;
-	if (err)
-		goto out;
+		err = SET_ERROR(ENOENT);
 
-	err = dsl_dataset_hold_ref(*dsp, tag);
-
 	/* we may be looking for a snapshot */
 	if (err == 0 && snapname != NULL) {
-		dsl_dataset_t *ds = NULL;
+		dsl_dataset_t *ds;
 
 		if (*snapname++ != '@') {
 			dsl_dataset_rele(*dsp, tag);
-			err = ENOENT;
-			goto out;
+			dsl_dir_rele(dd, FTAG);
+			return (SET_ERROR(ENOENT));
 		}
 
 		dprintf("looking for snapshot '%s'\n", snapname);
 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
 		if (err == 0)
-			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
+			err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
 		dsl_dataset_rele(*dsp, tag);
 
-		ASSERT3U((err == 0), ==, (ds != NULL));
-
-		if (ds) {
+		if (err == 0) {
 			mutex_enter(&ds->ds_lock);
 			if (ds->ds_snapname[0] == 0)
 				(void) strlcpy(ds->ds_snapname, snapname,
 				    sizeof (ds->ds_snapname));
 			mutex_exit(&ds->ds_lock);
-			err = dsl_dataset_hold_ref(ds, tag);
-			*dsp = err ? NULL : ds;
+			*dsp = ds;
 		}
 	}
-out:
-	rw_exit(&dp->dp_config_rwlock);
-	dsl_dir_close(dd, FTAG);
+
+	dsl_dir_rele(dd, FTAG);
 	return (err);
 }
 
 int
-dsl_dataset_own(const char *name, boolean_t inconsistentok,
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
     void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold(name, tag, dsp);
-	if (err)
+	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+	if (err != 0)
 		return (err);
-	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+	if (!dsl_dataset_tryown(*dsp, tag)) {
 		dsl_dataset_rele(*dsp, tag);
-		return (EBUSY);
+		*dsp = NULL;
+		return (SET_ERROR(EBUSY));
 	}
 	return (0);
 }
 
+int
+dsl_dataset_own(dsl_pool_t *dp, const char *name,
+    void *tag, dsl_dataset_t **dsp)
+{
+	int err = dsl_dataset_hold(dp, name, tag, dsp);
+	if (err != 0)
+		return (err);
+	if (!dsl_dataset_tryown(*dsp, tag)) {
+		dsl_dataset_rele(*dsp, tag);
+		return (SET_ERROR(EBUSY));
+	}
+	return (0);
+}
+
+/*
+ * See the comment above dsl_pool_hold() for details.  In summary, a long
+ * hold is used to prevent destruction of a dataset while the pool hold
+ * is dropped, allowing other concurrent operations (e.g. spa_sync()).
+ *
+ * The dataset and pool must be held when this function is called.  After it
+ * is called, the pool hold may be released while the dataset is still held
+ * and accessed.
+ */
 void
+dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag)
+{
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
+	(void) refcount_add(&ds->ds_longholds, tag);
+}
+
+void
+dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag)
+{
+	(void) refcount_remove(&ds->ds_longholds, tag);
+}
+
+/* Return B_TRUE if there are any long holds on this dataset. */
+boolean_t
+dsl_dataset_long_held(dsl_dataset_t *ds)
+{
+	return (!refcount_is_zero(&ds->ds_longholds));
+}
+
+void
 dsl_dataset_name(dsl_dataset_t *ds, char *name)
 {
 	if (ds == NULL) {
@@ -666,7 +576,7 @@
 		(void) strcpy(name, "mos");
 	} else {
 		dsl_dir_name(ds->ds_dir, name);
-		VERIFY(0 == dsl_dataset_get_snapname(ds));
+		VERIFY0(dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			(void) strcat(name, "@");
 			/*
@@ -693,7 +603,7 @@
 		result = 3;	/* "mos" */
 	} else {
 		result = dsl_dir_namelen(ds->ds_dir);
-		VERIFY(0 == dsl_dataset_get_snapname(ds));
+		VERIFY0(dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			++result;	/* adding one for the @-sign */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
@@ -710,50 +620,35 @@
 }
 
 void
-dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
+dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 {
 	dmu_buf_rele(ds->ds_dbuf, tag);
 }
 
 void
-dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
-{
-	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
-		rw_exit(&ds->ds_rwlock);
-	}
-	dsl_dataset_drop_ref(ds, tag);
-}
-
-void
 dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 {
-	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
-	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
+	ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL);
 
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = NULL;
-	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
-		rw_exit(&ds->ds_rwlock);
-		cv_broadcast(&ds->ds_exclusive_cv);
-	}
 	mutex_exit(&ds->ds_lock);
-	if (ds->ds_dbuf)
-		dsl_dataset_drop_ref(ds, tag);
+	dsl_dataset_long_rele(ds, tag);
+	if (ds->ds_dbuf != NULL)
+		dsl_dataset_rele(ds, tag);
 	else
 		dsl_dataset_evict(NULL, ds);
 }
 
 boolean_t
-dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
+dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
 {
 	boolean_t gotit = FALSE;
 
 	mutex_enter(&ds->ds_lock);
-	if (ds->ds_owner == NULL &&
-	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
+	if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
 		ds->ds_owner = tag;
-		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
-			rw_exit(&ds->ds_rwlock);
+		dsl_dataset_long_hold(ds, tag);
 		gotit = TRUE;
 	}
 	mutex_exit(&ds->ds_lock);
@@ -760,14 +655,6 @@
 	return (gotit);
 }
 
-void
-dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
-{
-	ASSERT3P(owner, ==, ds->ds_owner);
-	if (!RW_WRITE_HELD(&ds->ds_rwlock))
-		rw_enter(&ds->ds_rwlock, RW_WRITER);
-}
-
 uint64_t
 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx)
@@ -788,7 +675,7 @@
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
-	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
+	VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
@@ -808,7 +695,7 @@
 	if (origin == NULL) {
 		dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 	} else {
-		dsl_dataset_t *ohds;
+		dsl_dataset_t *ohds; /* head of the origin snapshot */
 
 		dsphys->ds_prev_snap_obj = origin->ds_object;
 		dsphys->ds_prev_snap_txg =
@@ -825,7 +712,7 @@
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
 		origin->ds_phys->ds_num_children++;
 
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+		VERIFY0(dsl_dataset_hold_obj(dp,
 		    origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
 		dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
 		    dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
@@ -837,9 +724,8 @@
 				    zap_create(mos,
 				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 			}
-			VERIFY(0 == zap_add_int(mos,
-			    origin->ds_phys->ds_next_clones_obj,
-			    dsobj, tx));
+			VERIFY0(zap_add_int(mos,
+			    origin->ds_phys->ds_next_clones_obj, dsobj, tx));
 		}
 
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -851,7 +737,7 @@
 				    zap_create(mos,
 				    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 			}
-			VERIFY3U(0, ==, zap_add_int(mos,
+			VERIFY0(zap_add_int(mos,
 			    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
 		}
 	}
@@ -867,6 +753,16 @@
 	return (dsobj);
 }
 
+static void
+dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	objset_t *os;
+
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+	dsl_dataset_dirty(ds, tx);
+}
+
 uint64_t
 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
@@ -875,29 +771,28 @@
 	uint64_t dsobj, ddobj;
 	dsl_dir_t *dd;
 
+	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(lastname[0] != '@');
 
 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
-	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
+	VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
 
-	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
+	dsobj = dsl_dataset_create_sync_dd(dd, origin,
+	    flags & ~DS_CREATE_FLAG_NODIRTY, tx);
 
 	dsl_deleg_set_create_perms(dd, tx, cr);
 
-	dsl_dir_close(dd, FTAG);
+	dsl_dir_rele(dd, FTAG);
 
 	/*
 	 * If we are creating a clone, make sure we zero out any stale
 	 * data from the origin snapshots zil header.
 	 */
-	if (origin != NULL) {
+	if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {
 		dsl_dataset_t *ds;
-		objset_t *os;
 
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
-		bzero(&os->os_zil_header, sizeof (os->os_zil_header));
-		dsl_dataset_dirty(ds, tx);
+		VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+		dsl_dataset_zero_zil(ds, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
@@ -926,7 +821,7 @@
 }
 
 int
-dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname,
+dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
     nvlist_t *snaps)
 {
 	struct destroyarg *da;
@@ -945,329 +840,6 @@
 #endif /* __FreeBSD__ */
 
 /*
- * The snapshots must all be in the same pool.
- */
-int
-dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
-{
-	int err;
-	dsl_sync_task_t *dst;
-	spa_t *spa;
-	nvpair_t *pair;
-	dsl_sync_task_group_t *dstg;
-
-	pair = nvlist_next_nvpair(snaps, NULL);
-	if (pair == NULL)
-		return (0);
-
-	err = spa_open(nvpair_name(pair), &spa, FTAG);
-	if (err)
-		return (err);
-	dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-
-	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(snaps, pair)) {
-		dsl_dataset_t *ds;
-
-		err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
-		if (err == 0) {
-			struct dsl_ds_destroyarg *dsda;
-
-			dsl_dataset_make_exclusive(ds, dstg);
-			dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
-			    KM_SLEEP);
-			dsda->ds = ds;
-			dsda->defer = defer;
-			dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
-			    dsl_dataset_destroy_sync, dsda, dstg, 0);
-		} else if (err == ENOENT) {
-			err = 0;
-		} else {
-			(void) strcpy(failed, nvpair_name(pair));
-			break;
-		}
-	}
-
-	if (err == 0)
-		err = dsl_sync_task_group_wait(dstg);
-
-	for (dst = list_head(&dstg->dstg_tasks); dst;
-	    dst = list_next(&dstg->dstg_tasks, dst)) {
-		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
-		dsl_dataset_t *ds = dsda->ds;
-
-		/*
-		 * Return the file system name that triggered the error
-		 */
-		if (dst->dst_err) {
-			dsl_dataset_name(ds, failed);
-		}
-		ASSERT3P(dsda->rm_origin, ==, NULL);
-		dsl_dataset_disown(ds, dstg);
-		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
-	}
-
-	dsl_sync_task_group_destroy(dstg);
-	spa_close(spa, FTAG);
-	return (err);
-
-}
-
-static boolean_t
-dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
-{
-	boolean_t might_destroy = B_FALSE;
-
-	mutex_enter(&ds->ds_lock);
-	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
-	    DS_IS_DEFER_DESTROY(ds))
-		might_destroy = B_TRUE;
-	mutex_exit(&ds->ds_lock);
-
-	return (might_destroy);
-}
-
-/*
- * If we're removing a clone, and these three conditions are true:
- *	1) the clone's origin has no other children
- *	2) the clone's origin has no user references
- *	3) the clone's origin has been marked for deferred destruction
- * Then, prepare to remove the origin as part of this sync task group.
- */
-static int
-dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
-{
-	dsl_dataset_t *ds = dsda->ds;
-	dsl_dataset_t *origin = ds->ds_prev;
-
-	if (dsl_dataset_might_destroy_origin(origin)) {
-		char *name;
-		int namelen;
-		int error;
-
-		namelen = dsl_dataset_namelen(origin) + 1;
-		name = kmem_alloc(namelen, KM_SLEEP);
-		dsl_dataset_name(origin, name);
-#ifdef _KERNEL
-		error = zfs_unmount_snap(name, NULL);
-		if (error) {
-			kmem_free(name, namelen);
-			return (error);
-		}
-#endif
-		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
-		kmem_free(name, namelen);
-		if (error)
-			return (error);
-		dsda->rm_origin = origin;
-		dsl_dataset_make_exclusive(origin, tag);
-	}
-
-	return (0);
-}
-
-/*
- * ds must be opened as OWNER.  On return (whether successful or not),
- * ds will be closed and caller can no longer dereference it.
- */
-int
-dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
-{
-	int err;
-	dsl_sync_task_group_t *dstg;
-	objset_t *os;
-	dsl_dir_t *dd;
-	uint64_t obj;
-	struct dsl_ds_destroyarg dsda = { 0 };
-	dsl_dataset_t dummy_ds = { 0 };
-
-	dsda.ds = ds;
-
-	if (dsl_dataset_is_snapshot(ds)) {
-		/* Destroying a snapshot is simpler */
-		dsl_dataset_make_exclusive(ds, tag);
-
-		dsda.defer = defer;
-		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
-		    &dsda, tag, 0);
-		ASSERT3P(dsda.rm_origin, ==, NULL);
-		goto out;
-	} else if (defer) {
-		err = EINVAL;
-		goto out;
-	}
-
-	dd = ds->ds_dir;
-	dummy_ds.ds_dir = dd;
-	dummy_ds.ds_object = ds->ds_object;
-
-	if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
-	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
-		/*
-		 * Check for errors and mark this ds as inconsistent, in
-		 * case we crash while freeing the objects.
-		 */
-		err = dsl_sync_task_do(dd->dd_pool,
-		    dsl_dataset_destroy_begin_check,
-		    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
-		if (err)
-			goto out;
-
-		err = dmu_objset_from_ds(ds, &os);
-		if (err)
-			goto out;
-
-		/*
-		 * Remove all objects while in the open context so that
-		 * there is less work to do in the syncing context.
-		 */
-		for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
-		    ds->ds_phys->ds_prev_snap_txg)) {
-			/*
-			 * Ignore errors, if there is not enough disk space
-			 * we will deal with it in dsl_dataset_destroy_sync().
-			 */
-			(void) dmu_free_object(os, obj);
-		}
-		if (err != ESRCH)
-			goto out;
-
-		/*
-		 * Sync out all in-flight IO.
-		 */
-		txg_wait_synced(dd->dd_pool, 0);
-
-		/*
-		 * If we managed to free all the objects in open
-		 * context, the user space accounting should be zero.
-		 */
-		if (ds->ds_phys->ds_bp.blk_fill == 0 &&
-		    dmu_objset_userused_enabled(os)) {
-			uint64_t count;
-
-			ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
-			    &count) != 0 || count == 0);
-			ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
-			    &count) != 0 || count == 0);
-		}
-	}
-
-	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
-	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
-	rw_exit(&dd->dd_pool->dp_config_rwlock);
-
-	if (err)
-		goto out;
-
-	/*
-	 * Blow away the dsl_dir + head dataset.
-	 */
-	dsl_dataset_make_exclusive(ds, tag);
-	/*
-	 * If we're removing a clone, we might also need to remove its
-	 * origin.
-	 */
-	do {
-		dsda.need_prep = B_FALSE;
-		if (dsl_dir_is_clone(dd)) {
-			err = dsl_dataset_origin_rm_prep(&dsda, tag);
-			if (err) {
-				dsl_dir_close(dd, FTAG);
-				goto out;
-			}
-		}
-
-		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
-		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
-		    dsl_dataset_destroy_sync, &dsda, tag, 0);
-		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
-		    dsl_dir_destroy_sync, &dummy_ds, FTAG, 0);
-		err = dsl_sync_task_group_wait(dstg);
-		dsl_sync_task_group_destroy(dstg);
-
-		/*
-		 * We could be racing against 'zfs release' or 'zfs destroy -d'
-		 * on the origin snap, in which case we can get EBUSY if we
-		 * needed to destroy the origin snap but were not ready to
-		 * do so.
-		 */
-		if (dsda.need_prep) {
-			ASSERT(err == EBUSY);
-			ASSERT(dsl_dir_is_clone(dd));
-			ASSERT(dsda.rm_origin == NULL);
-		}
-	} while (dsda.need_prep);
-
-	if (dsda.rm_origin != NULL)
-		dsl_dataset_disown(dsda.rm_origin, tag);
-
-	/* if it is successful, dsl_dir_destroy_sync will close the dd */
-	if (err)
-		dsl_dir_close(dd, FTAG);
-out:
-	dsl_dataset_disown(ds, tag);
-	return (err);
-}
-
-blkptr_t *
-dsl_dataset_get_blkptr(dsl_dataset_t *ds)
-{
-	return (&ds->ds_phys->ds_bp);
-}
-
-void
-dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
-{
-	ASSERT(dmu_tx_is_syncing(tx));
-	/* If it's the meta-objset, set dp_meta_rootbp */
-	if (ds == NULL) {
-		tx->tx_pool->dp_meta_rootbp = *bp;
-	} else {
-		dmu_buf_will_dirty(ds->ds_dbuf, tx);
-		ds->ds_phys->ds_bp = *bp;
-	}
-}
-
-spa_t *
-dsl_dataset_get_spa(dsl_dataset_t *ds)
-{
-	return (ds->ds_dir->dd_pool->dp_spa);
-}
-
-void
-dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp;
-
-	if (ds == NULL) /* this is the meta-objset */
-		return;
-
-	ASSERT(ds->ds_objset != NULL);
-
-	if (ds->ds_phys->ds_next_snap_obj != 0)
-		panic("dirtying snapshot!");
-
-	dp = ds->ds_dir->dd_pool;
-
-	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
-		/* up the hold count until we can be written out */
-		dmu_buf_add_ref(ds->ds_dbuf, ds);
-	}
-}
-
-boolean_t
-dsl_dataset_is_dirty(dsl_dataset_t *ds)
-{
-	for (int t = 0; t < TXG_SIZE; t++) {
-		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
-		    ds, t))
-			return (B_TRUE);
-	}
-	return (B_FALSE);
-}
-
-/*
  * The unique space in the head dataset can be calculated by subtracting
  * the space used in the most recent snapshot, that is still being used
  * in this file system, from the space currently in use.  To figure out
@@ -1275,7 +847,7 @@
  * the total space used in the snapshot and subtract out the space that
  * has been freed up since the snapshot was taken.
  */
-static void
+void
 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 {
 	uint64_t mrs_used;
@@ -1299,237 +871,10 @@
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 }
 
-struct killarg {
-	dsl_dataset_t *ds;
-	dmu_tx_t *tx;
-};
-
-/* ARGSUSED */
-static int
-kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
-{
-	struct killarg *ka = arg;
-	dmu_tx_t *tx = ka->tx;
-
-	if (bp == NULL)
-		return (0);
-
-	if (zb->zb_level == ZB_ZIL_LEVEL) {
-		ASSERT(zilog != NULL);
-		/*
-		 * It's a block in the intent log.  It has no
-		 * accounting, so just free it.
-		 */
-		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
-	} else {
-		ASSERT(zilog == NULL);
-		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
-		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
-	}
-
-	return (0);
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	uint64_t count;
-	int err;
-
-	/*
-	 * Can't delete a head dataset if there are snapshots of it.
-	 * (Except if the only snapshots are from the branch we cloned
-	 * from.)
-	 */
-	if (ds->ds_prev != NULL &&
-	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
-		return (EBUSY);
-
-	/*
-	 * This is really a dsl_dir thing, but check it here so that
-	 * we'll be less likely to leave this dataset inconsistent &
-	 * nearly destroyed.
-	 */
-	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
-	if (err)
-		return (err);
-	if (count != 0)
-		return (EEXIST);
-
-	return (0);
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-	/* Mark it as inconsistent on-disk, in case we crash */
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
-
-	spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
-}
-
-static int
-dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
+void
+dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
     dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = dsda->ds;
-	dsl_dataset_t *ds_prev = ds->ds_prev;
-
-	if (dsl_dataset_might_destroy_origin(ds_prev)) {
-		struct dsl_ds_destroyarg ndsda = {0};
-
-		/*
-		 * If we're not prepared to remove the origin, don't remove
-		 * the clone either.
-		 */
-		if (dsda->rm_origin == NULL) {
-			dsda->need_prep = B_TRUE;
-			return (EBUSY);
-		}
-
-		ndsda.ds = ds_prev;
-		ndsda.is_origin_rm = B_TRUE;
-		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
-	}
-
-	/*
-	 * If we're not going to remove the origin after all,
-	 * undo the open context setup.
-	 */
-	if (dsda->rm_origin != NULL) {
-		dsl_dataset_disown(dsda->rm_origin, tag);
-		dsda->rm_origin = NULL;
-	}
-
-	return (0);
-}
-
-/*
- * If you add new checks here, you may need to add
- * additional checks to the "temporary" case in
- * snapshot_check() in dmu_objset.c.
- */
-/* ARGSUSED */
-int
-dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	struct dsl_ds_destroyarg *dsda = arg1;
-	dsl_dataset_t *ds = dsda->ds;
-
-	/* we have an owner hold, so noone else can destroy us */
-	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
-
-	/*
-	 * Only allow deferred destroy on pools that support it.
-	 * NOTE: deferred destroy is only supported on snapshots.
-	 */
-	if (dsda->defer) {
-		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
-		    SPA_VERSION_USERREFS)
-			return (ENOTSUP);
-		ASSERT(dsl_dataset_is_snapshot(ds));
-		return (0);
-	}
-
-	/*
-	 * Can't delete a head dataset if there are snapshots of it.
-	 * (Except if the only snapshots are from the branch we cloned
-	 * from.)
-	 */
-	if (ds->ds_prev != NULL &&
-	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
-		return (EBUSY);
-
-	/*
-	 * If we made changes this txg, traverse_dsl_dataset won't find
-	 * them.  Try again.
-	 */
-	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
-		return (EAGAIN);
-
-	if (dsl_dataset_is_snapshot(ds)) {
-		/*
-		 * If this snapshot has an elevated user reference count,
-		 * we can't destroy it yet.
-		 */
-		if (ds->ds_userrefs > 0 && !dsda->releasing)
-			return (EBUSY);
-
-		mutex_enter(&ds->ds_lock);
-		/*
-		 * Can't delete a branch point. However, if we're destroying
-		 * a clone and removing its origin due to it having a user
-		 * hold count of 0 and having been marked for deferred destroy,
-		 * it's OK for the origin to have a single clone.
-		 */
-		if (ds->ds_phys->ds_num_children >
-		    (dsda->is_origin_rm ? 2 : 1)) {
-			mutex_exit(&ds->ds_lock);
-			return (EEXIST);
-		}
-		mutex_exit(&ds->ds_lock);
-	} else if (dsl_dir_is_clone(ds->ds_dir)) {
-		return (dsl_dataset_origin_check(dsda, arg2, tx));
-	}
-
-	/* XXX we should do some i/o error checking... */
-	return (0);
-}
-
-struct refsarg {
-	kmutex_t lock;
-	boolean_t gone;
-	kcondvar_t cv;
-};
-
-/* ARGSUSED */
-static void
-dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
-{
-	struct refsarg *arg = argv;
-
-	mutex_enter(&arg->lock);
-	arg->gone = TRUE;
-	cv_signal(&arg->cv);
-	mutex_exit(&arg->lock);
-}
-
-static void
-dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
-{
-	struct refsarg arg;
-
-	bzero(&arg, sizeof(arg));
-	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
-	arg.gone = FALSE;
-	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
-	    dsl_dataset_refs_gone);
-	dmu_buf_rele(ds->ds_dbuf, tag);
-	mutex_enter(&arg.lock);
-	while (!arg.gone)
-		cv_wait(&arg.cv, &arg.lock);
-	ASSERT(arg.gone);
-	mutex_exit(&arg.lock);
-	ds->ds_dbuf = NULL;
-	ds->ds_phys = NULL;
-	mutex_destroy(&arg.lock);
-	cv_destroy(&arg.cv);
-}
-
-static void
-remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
-{
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t count;
 	int err;
@@ -1546,489 +891,69 @@
 	 * too many entries in the next_clones_obj even after failing to
 	 * remove this one.
 	 */
-	if (err != ENOENT) {
+	if (err != ENOENT)
 		VERIFY0(err);
-	}
-	ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+	ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 	    &count));
 	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
 }
 
-static void
-dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
+
+blkptr_t *
+dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-
-	/*
-	 * If it is the old version, dd_clones doesn't exist so we can't
-	 * find the clones, but deadlist_remove_key() is a no-op so it
-	 * doesn't matter.
-	 */
-	if (ds->ds_dir->dd_phys->dd_clones == 0)
-		return;
-
-	for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
-	    zap_cursor_retrieve(&zc, &za) == 0;
-	    zap_cursor_advance(&zc)) {
-		dsl_dataset_t *clone;
-
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
-		    za.za_first_integer, FTAG, &clone));
-		if (clone->ds_dir->dd_origin_txg > mintxg) {
-			dsl_deadlist_remove_key(&clone->ds_deadlist,
-			    mintxg, tx);
-			dsl_dataset_remove_clones_key(clone, mintxg, tx);
-		}
-		dsl_dataset_rele(clone, FTAG);
-	}
-	zap_cursor_fini(&zc);
+	return (&ds->ds_phys->ds_bp);
 }
 
-struct process_old_arg {
-	dsl_dataset_t *ds;
-	dsl_dataset_t *ds_prev;
-	boolean_t after_branch_point;
-	zio_t *pio;
-	uint64_t used, comp, uncomp;
-};
-
-static int
-process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+void
+dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 {
-	struct process_old_arg *poa = arg;
-	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
-
-	if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
-		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
-		if (poa->ds_prev && !poa->after_branch_point &&
-		    bp->blk_birth >
-		    poa->ds_prev->ds_phys->ds_prev_snap_txg) {
-			poa->ds_prev->ds_phys->ds_unique_bytes +=
-			    bp_get_dsize_sync(dp->dp_spa, bp);
-		}
+	ASSERT(dmu_tx_is_syncing(tx));
+	/* If it's the meta-objset, set dp_meta_rootbp */
+	if (ds == NULL) {
+		tx->tx_pool->dp_meta_rootbp = *bp;
 	} else {
-		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
-		poa->comp += BP_GET_PSIZE(bp);
-		poa->uncomp += BP_GET_UCSIZE(bp);
-		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
+		dmu_buf_will_dirty(ds->ds_dbuf, tx);
+		ds->ds_phys->ds_bp = *bp;
 	}
-	return (0);
 }
 
-static void
-process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
-    dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
+spa_t *
+dsl_dataset_get_spa(dsl_dataset_t *ds)
 {
-	struct process_old_arg poa = { 0 };
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-
-	ASSERT(ds->ds_deadlist.dl_oldfmt);
-	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
-
-	poa.ds = ds;
-	poa.ds_prev = ds_prev;
-	poa.after_branch_point = after_branch_point;
-	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
-	VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
-	    process_old_cb, &poa, tx));
-	VERIFY0(zio_wait(poa.pio));
-	ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
-
-	/* change snapused */
-	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-	    -poa.used, -poa.comp, -poa.uncomp, tx);
-
-	/* swap next's deadlist to our deadlist */
-	dsl_deadlist_close(&ds->ds_deadlist);
-	dsl_deadlist_close(&ds_next->ds_deadlist);
-	SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
-	    ds->ds_phys->ds_deadlist_obj);
-	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
-	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
-	    ds_next->ds_phys->ds_deadlist_obj);
+	return (ds->ds_dir->dd_pool->dp_spa);
 }
 
-static int
-old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
-	int err;
-	struct killarg ka;
-
-	/*
-	 * Free everything that we point to (that's born after
-	 * the previous snapshot, if we are a clone)
-	 *
-	 * NB: this should be very quick, because we already
-	 * freed all the objects in open context.
-	 */
-	ka.ds = ds;
-	ka.tx = tx;
-	err = traverse_dataset(ds,
-	    ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
-	    kill_blkptr, &ka);
-	ASSERT0(err);
-	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
-
-	return (err);
-}
-
 void
-dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
+dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
-	struct dsl_ds_destroyarg *dsda = arg1;
-	dsl_dataset_t *ds = dsda->ds;
-	int err;
-	int after_branch_point = FALSE;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	dsl_dataset_t *ds_prev = NULL;
-	boolean_t wont_destroy;
-	uint64_t obj;
+	dsl_pool_t *dp;
 
-	wont_destroy = (dsda->defer &&
-	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
-
-	ASSERT(ds->ds_owner || wont_destroy);
-	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
-	ASSERT(ds->ds_prev == NULL ||
-	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
-	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
-
-	if (wont_destroy) {
-		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
-		dmu_buf_will_dirty(ds->ds_dbuf, tx);
-		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+	if (ds == NULL) /* this is the meta-objset */
 		return;
-	}
 
-	/* signal any waiters that this dataset is going away */
-	mutex_enter(&ds->ds_lock);
-	ds->ds_owner = dsl_reaper;
-	cv_broadcast(&ds->ds_exclusive_cv);
-	mutex_exit(&ds->ds_lock);
+	ASSERT(ds->ds_objset != NULL);
 
-	/* Remove our reservation */
-	if (ds->ds_reserved != 0) {
-		dsl_prop_setarg_t psa;
-		uint64_t value = 0;
+	if (ds->ds_phys->ds_next_snap_obj != 0)
+		panic("dirtying snapshot!");
 
-		dsl_prop_setarg_init_uint64(&psa, "refreservation",
-		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
-		    &value);
-		psa.psa_effective_value = 0;	/* predict default value */
+	dp = ds->ds_dir->dd_pool;
 
-		dsl_dataset_set_reservation_sync(ds, &psa, tx);
-		ASSERT0(ds->ds_reserved);
+	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+		/* up the hold count until we can be written out */
+		dmu_buf_add_ref(ds->ds_dbuf, ds);
 	}
+}
 
-	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
-
-	dsl_scan_ds_destroyed(ds, tx);
-
-	obj = ds->ds_object;
-
-	if (ds->ds_phys->ds_prev_snap_obj != 0) {
-		if (ds->ds_prev) {
-			ds_prev = ds->ds_prev;
-		} else {
-			VERIFY(0 == dsl_dataset_hold_obj(dp,
-			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
-		}
-		after_branch_point =
-		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
-
-		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
-		if (after_branch_point &&
-		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
-			remove_from_next_clones(ds_prev, obj, tx);
-			if (ds->ds_phys->ds_next_snap_obj != 0) {
-				VERIFY(0 == zap_add_int(mos,
-				    ds_prev->ds_phys->ds_next_clones_obj,
-				    ds->ds_phys->ds_next_snap_obj, tx));
-			}
-		}
-		if (after_branch_point &&
-		    ds->ds_phys->ds_next_snap_obj == 0) {
-			/* This clone is toast. */
-			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
-			ds_prev->ds_phys->ds_num_children--;
-
-			/*
-			 * If the clone's origin has no other clones, no
-			 * user holds, and has been marked for deferred
-			 * deletion, then we should have done the necessary
-			 * destroy setup for it.
-			 */
-			if (ds_prev->ds_phys->ds_num_children == 1 &&
-			    ds_prev->ds_userrefs == 0 &&
-			    DS_IS_DEFER_DESTROY(ds_prev)) {
-				ASSERT3P(dsda->rm_origin, !=, NULL);
-			} else {
-				ASSERT3P(dsda->rm_origin, ==, NULL);
-			}
-		} else if (!after_branch_point) {
-			ds_prev->ds_phys->ds_next_snap_obj =
-			    ds->ds_phys->ds_next_snap_obj;
-		}
+boolean_t
+dsl_dataset_is_dirty(dsl_dataset_t *ds)
+{
+	for (int t = 0; t < TXG_SIZE; t++) {
+		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
+		    ds, t))
+			return (B_TRUE);
 	}
-
-	if (dsl_dataset_is_snapshot(ds)) {
-		dsl_dataset_t *ds_next;
-		uint64_t old_unique;
-		uint64_t used = 0, comp = 0, uncomp = 0;
-
-		VERIFY(0 == dsl_dataset_hold_obj(dp,
-		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
-		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
-
-		old_unique = ds_next->ds_phys->ds_unique_bytes;
-
-		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
-		ds_next->ds_phys->ds_prev_snap_obj =
-		    ds->ds_phys->ds_prev_snap_obj;
-		ds_next->ds_phys->ds_prev_snap_txg =
-		    ds->ds_phys->ds_prev_snap_txg;
-		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
-		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
-
-
-		if (ds_next->ds_deadlist.dl_oldfmt) {
-			process_old_deadlist(ds, ds_prev, ds_next,
-			    after_branch_point, tx);
-		} else {
-			/* Adjust prev's unique space. */
-			if (ds_prev && !after_branch_point) {
-				dsl_deadlist_space_range(&ds_next->ds_deadlist,
-				    ds_prev->ds_phys->ds_prev_snap_txg,
-				    ds->ds_phys->ds_prev_snap_txg,
-				    &used, &comp, &uncomp);
-				ds_prev->ds_phys->ds_unique_bytes += used;
-			}
-
-			/* Adjust snapused. */
-			dsl_deadlist_space_range(&ds_next->ds_deadlist,
-			    ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
-			    &used, &comp, &uncomp);
-			dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-			    -used, -comp, -uncomp, tx);
-
-			/* Move blocks to be freed to pool's free list. */
-			dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
-			    &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
-			    tx);
-			dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
-			    DD_USED_HEAD, used, comp, uncomp, tx);
-
-			/* Merge our deadlist into next's and free it. */
-			dsl_deadlist_merge(&ds_next->ds_deadlist,
-			    ds->ds_phys->ds_deadlist_obj, tx);
-		}
-		dsl_deadlist_close(&ds->ds_deadlist);
-		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
-
-		/* Collapse range in clone heads */
-		dsl_dataset_remove_clones_key(ds,
-		    ds->ds_phys->ds_creation_txg, tx);
-
-		if (dsl_dataset_is_snapshot(ds_next)) {
-			dsl_dataset_t *ds_nextnext;
-
-			/*
-			 * Update next's unique to include blocks which
-			 * were previously shared by only this snapshot
-			 * and it.  Those blocks will be born after the
-			 * prev snap and before this snap, and will have
-			 * died after the next snap and before the one
-			 * after that (ie. be on the snap after next's
-			 * deadlist).
-			 */
-			VERIFY(0 == dsl_dataset_hold_obj(dp,
-			    ds_next->ds_phys->ds_next_snap_obj,
-			    FTAG, &ds_nextnext));
-			dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
-			    ds->ds_phys->ds_prev_snap_txg,
-			    ds->ds_phys->ds_creation_txg,
-			    &used, &comp, &uncomp);
-			ds_next->ds_phys->ds_unique_bytes += used;
-			dsl_dataset_rele(ds_nextnext, FTAG);
-			ASSERT3P(ds_next->ds_prev, ==, NULL);
-
-			/* Collapse range in this head. */
-			dsl_dataset_t *hds;
-			VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
-			    ds->ds_dir->dd_phys->dd_head_dataset_obj,
-			    FTAG, &hds));
-			dsl_deadlist_remove_key(&hds->ds_deadlist,
-			    ds->ds_phys->ds_creation_txg, tx);
-			dsl_dataset_rele(hds, FTAG);
-
-		} else {
-			ASSERT3P(ds_next->ds_prev, ==, ds);
-			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
-			ds_next->ds_prev = NULL;
-			if (ds_prev) {
-				VERIFY(0 == dsl_dataset_get_ref(dp,
-				    ds->ds_phys->ds_prev_snap_obj,
-				    ds_next, &ds_next->ds_prev));
-			}
-
-			dsl_dataset_recalc_head_uniq(ds_next);
-
-			/*
-			 * Reduce the amount of our unconsmed refreservation
-			 * being charged to our parent by the amount of
-			 * new unique data we have gained.
-			 */
-			if (old_unique < ds_next->ds_reserved) {
-				int64_t mrsdelta;
-				uint64_t new_unique =
-				    ds_next->ds_phys->ds_unique_bytes;
-
-				ASSERT(old_unique <= new_unique);
-				mrsdelta = MIN(new_unique - old_unique,
-				    ds_next->ds_reserved - old_unique);
-				dsl_dir_diduse_space(ds->ds_dir,
-				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
-			}
-		}
-		dsl_dataset_rele(ds_next, FTAG);
-	} else {
-		zfeature_info_t *async_destroy =
-		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
-		objset_t *os;
-
-		/*
-		 * There's no next snapshot, so this is a head dataset.
-		 * Destroy the deadlist.  Unless it's a clone, the
-		 * deadlist should be empty.  (If it's a clone, it's
-		 * safe to ignore the deadlist contents.)
-		 */
-		dsl_deadlist_close(&ds->ds_deadlist);
-		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
-		ds->ds_phys->ds_deadlist_obj = 0;
-
-		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
-
-		if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
-			err = old_synchronous_dataset_destroy(ds, tx);
-		} else {
-			/*
-			 * Move the bptree into the pool's list of trees to
-			 * clean up and update space accounting information.
-			 */
-			uint64_t used, comp, uncomp;
-
-			zil_destroy_sync(dmu_objset_zil(os), tx);
-
-			if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
-				spa_feature_incr(dp->dp_spa, async_destroy, tx);
-				dp->dp_bptree_obj = bptree_alloc(mos, tx);
-				VERIFY(zap_add(mos,
-				    DMU_POOL_DIRECTORY_OBJECT,
-				    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
-				    &dp->dp_bptree_obj, tx) == 0);
-			}
-
-			used = ds->ds_dir->dd_phys->dd_used_bytes;
-			comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
-			uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
-
-			ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
-			    ds->ds_phys->ds_unique_bytes == used);
-
-			bptree_add(mos, dp->dp_bptree_obj,
-			    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
-			    used, comp, uncomp, tx);
-			dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-			    -used, -comp, -uncomp, tx);
-			dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
-			    used, comp, uncomp, tx);
-		}
-
-		if (ds->ds_prev != NULL) {
-			if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
-				VERIFY3U(0, ==, zap_remove_int(mos,
-				    ds->ds_prev->ds_dir->dd_phys->dd_clones,
-				    ds->ds_object, tx));
-			}
-			dsl_dataset_rele(ds->ds_prev, ds);
-			ds->ds_prev = ds_prev = NULL;
-		}
-	}
-
-	/*
-	 * This must be done after the dsl_traverse(), because it will
-	 * re-open the objset.
-	 */
-	if (ds->ds_objset) {
-		dmu_objset_evict(ds->ds_objset);
-		ds->ds_objset = NULL;
-	}
-
-	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
-		/* Erase the link in the dir */
-		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
-		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
-		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
-		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
-		ASSERT(err == 0);
-	} else {
-		/* remove from snapshot namespace */
-		dsl_dataset_t *ds_head;
-		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
-		VERIFY(0 == dsl_dataset_hold_obj(dp,
-		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
-		VERIFY(0 == dsl_dataset_get_snapname(ds));
-#ifdef ZFS_DEBUG
-		{
-			uint64_t val;
-
-			err = dsl_dataset_snap_lookup(ds_head,
-			    ds->ds_snapname, &val);
-			ASSERT0(err);
-			ASSERT3U(val, ==, obj);
-		}
-#endif
-		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
-		ASSERT(err == 0);
-		dsl_dataset_rele(ds_head, FTAG);
-	}
-
-	if (ds_prev && ds->ds_prev != ds_prev)
-		dsl_dataset_rele(ds_prev, FTAG);
-
-	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
-	spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
-
-	if (ds->ds_phys->ds_next_clones_obj != 0) {
-		uint64_t count;
-		ASSERT(0 == zap_count(mos,
-		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
-		VERIFY(0 == dmu_object_free(mos,
-		    ds->ds_phys->ds_next_clones_obj, tx));
-	}
-	if (ds->ds_phys->ds_props_obj != 0)
-		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
-	if (ds->ds_phys->ds_userrefs_obj != 0)
-		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
-	dsl_dir_close(ds->ds_dir, ds);
-	ds->ds_dir = NULL;
-	dsl_dataset_drain_refs(ds, tag);
-	VERIFY(0 == dmu_object_free(mos, obj, tx));
-
-	if (dsda->rm_origin) {
-		/*
-		 * Remove the origin of the clone we just destroyed.
-		 */
-		struct dsl_ds_destroyarg ndsda = {0};
-
-		ndsda.ds = dsda->rm_origin;
-		dsl_dataset_destroy_sync(&ndsda, tag, tx);
-	}
+	return (B_FALSE);
 }
 
 static int
@@ -2047,10 +972,10 @@
 	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
 	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 
 	/*
-	 * Propogate any reserved space for this snapshot to other
+	 * Propagate any reserved space for this snapshot to other
 	 * snapshot checks in this sync group.
 	 */
 	if (asize > 0)
@@ -2059,60 +984,117 @@
 	return (0);
 }
 
+typedef struct dsl_dataset_snapshot_arg {
+	nvlist_t *ddsa_snaps;
+	nvlist_t *ddsa_props;
+	nvlist_t *ddsa_errors;
+} dsl_dataset_snapshot_arg_t;
+
 int
-dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	const char *snapname = arg2;
-	int err;
+	int error;
 	uint64_t value;
 
+	ds->ds_trysnap_txg = tx->tx_txg;
+
+	if (!dmu_tx_is_syncing(tx))
+		return (0);
+
 	/*
 	 * We don't allow multiple snapshots of the same txg.  If there
 	 * is already one, try again.
 	 */
 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 
 	/*
-	 * Check for conflicting name snapshot name.
+	 * Check for conflicting snapshot name.
 	 */
-	err = dsl_dataset_snap_lookup(ds, snapname, &value);
-	if (err == 0)
-		return (EEXIST);
-	if (err != ENOENT)
-		return (err);
+	error = dsl_dataset_snap_lookup(ds, snapname, &value);
+	if (error == 0)
+		return (SET_ERROR(EEXIST));
+	if (error != ENOENT)
+		return (error);
 
-	/*
-	 * Check that the dataset's name is not too long.  Name consists
-	 * of the dataset's length + 1 for the @-sign + snapshot name's length
-	 */
-	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
-		return (ENAMETOOLONG);
+	error = dsl_dataset_snapshot_reserve_space(ds, tx);
+	if (error != 0)
+		return (error);
 
-	err = dsl_dataset_snapshot_reserve_space(ds, tx);
-	if (err)
-		return (err);
-
-	ds->ds_trysnap_txg = tx->tx_txg;
 	return (0);
 }
 
+static int
+dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_snapshot_arg_t *ddsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	nvpair_t *pair;
+	int rv = 0;
+
+	for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
+		int error = 0;
+		dsl_dataset_t *ds;
+		char *name, *atp;
+		char dsname[MAXNAMELEN];
+
+		name = nvpair_name(pair);
+		if (strlen(name) >= MAXNAMELEN)
+			error = SET_ERROR(ENAMETOOLONG);
+		if (error == 0) {
+			atp = strchr(name, '@');
+			if (atp == NULL)
+				error = SET_ERROR(EINVAL);
+			if (error == 0)
+				(void) strlcpy(dsname, name, atp - name + 1);
+		}
+		if (error == 0)
+			error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+		if (error == 0) {
+			error = dsl_dataset_snapshot_check_impl(ds,
+			    atp + 1, tx);
+			dsl_dataset_rele(ds, FTAG);
+		}
+
+		if (error != 0) {
+			if (ddsa->ddsa_errors != NULL) {
+				fnvlist_add_int32(ddsa->ddsa_errors,
+				    name, error);
+			}
+			rv = error;
+		}
+	}
+	return (rv);
+}
+
 void
-dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	const char *snapname = arg2;
+	static zil_header_t zero_zil;
+
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj, crtxg;
 	objset_t *mos = dp->dp_meta_objset;
-	int err;
+	objset_t *os;
 
-	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 
 	/*
+	 * If we are on an old pool, the zil must not be active, in which
+	 * case it will be zeroed.  Usually zil_suspend() accomplishes this.
+	 */
+	ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||
+	    dmu_objset_from_ds(ds, &os) != 0 ||
+	    bcmp(&os->os_phys->os_zil_header, &zero_zil,
+	    sizeof (zero_zil)) == 0);
+
+
+	/*
 	 * The origin's ds_creation_txg has to be < TXG_INITIAL
 	 */
 	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
@@ -2122,7 +1104,7 @@
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
-	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
+	VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
@@ -2159,9 +1141,9 @@
 			    ds->ds_prev->ds_phys->ds_creation_txg);
 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
 		} else if (next_clones_obj != 0) {
-			remove_from_next_clones(ds->ds_prev,
+			dsl_dataset_remove_from_next_clones(ds->ds_prev,
 			    dsphys->ds_next_snap_obj, tx);
-			VERIFY3U(0, ==, zap_add_int(mos,
+			VERIFY0(zap_add_int(mos,
 			    next_clones_obj, dsobj, tx));
 		}
 	}
@@ -2180,9 +1162,6 @@
 	}
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
-	    ds->ds_dir->dd_myname, snapname, dsobj,
-	    ds->ds_phys->ds_prev_snap_txg);
 	ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
 	    UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
 	dsl_deadlist_close(&ds->ds_deadlist);
@@ -2197,13 +1176,12 @@
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
-	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
-	    snapname, 8, 1, &dsobj, tx);
-	ASSERT(err == 0);
+	VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
+	    snapname, 8, 1, &dsobj, tx));
 
 	if (ds->ds_prev)
-		dsl_dataset_drop_ref(ds->ds_prev, ds);
-	VERIFY(0 == dsl_dataset_get_ref(dp,
+		dsl_dataset_rele(ds->ds_prev, ds);
+	VERIFY0(dsl_dataset_hold_obj(dp,
 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
 
 	dsl_scan_ds_snapshotted(ds, tx);
@@ -2210,10 +1188,212 @@
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
-	spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx,
-	    "dataset = %llu", dsobj);
+	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
 }
 
+static void
+dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_snapshot_arg_t *ddsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	nvpair_t *pair;
+
+	for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
+		dsl_dataset_t *ds;
+		char *name, *atp;
+		char dsname[MAXNAMELEN];
+
+		name = nvpair_name(pair);
+		atp = strchr(name, '@');
+		(void) strlcpy(dsname, name, atp - name + 1);
+		VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));
+
+		dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);
+		if (ddsa->ddsa_props != NULL) {
+			dsl_props_set_sync_impl(ds->ds_prev,
+			    ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
+		}
+		dsl_dataset_rele(ds, FTAG);
+	}
+}
+
+/*
+ * The snapshots must all be in the same pool.
+ * All-or-nothing: if there are any failures, nothing will be modified.
+ */
+int
+dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
+{
+	dsl_dataset_snapshot_arg_t ddsa;
+	nvpair_t *pair;
+	boolean_t needsuspend;
+	int error;
+	spa_t *spa;
+	char *firstname;
+	nvlist_t *suspended = NULL;
+
+	pair = nvlist_next_nvpair(snaps, NULL);
+	if (pair == NULL)
+		return (0);
+	firstname = nvpair_name(pair);
+
+	error = spa_open(firstname, &spa, FTAG);
+	if (error != 0)
+		return (error);
+	needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+	spa_close(spa, FTAG);
+
+	if (needsuspend) {
+		suspended = fnvlist_alloc();
+		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(snaps, pair)) {
+			char fsname[MAXNAMELEN];
+			char *snapname = nvpair_name(pair);
+			char *atp;
+			void *cookie;
+
+			atp = strchr(snapname, '@');
+			if (atp == NULL) {
+				error = SET_ERROR(EINVAL);
+				break;
+			}
+			(void) strlcpy(fsname, snapname, atp - snapname + 1);
+
+			error = zil_suspend(fsname, &cookie);
+			if (error != 0)
+				break;
+			fnvlist_add_uint64(suspended, fsname,
+			    (uintptr_t)cookie);
+		}
+	}
+
+	ddsa.ddsa_snaps = snaps;
+	ddsa.ddsa_props = props;
+	ddsa.ddsa_errors = errors;
+
+	if (error == 0) {
+		error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
+		    dsl_dataset_snapshot_sync, &ddsa,
+		    fnvlist_num_pairs(snaps) * 3);
+	}
+
+	if (suspended != NULL) {
+		for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(suspended, pair)) {
+			zil_resume((void *)(uintptr_t)
+			    fnvpair_value_uint64(pair));
+		}
+		fnvlist_free(suspended);
+	}
+
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+	if (error == 0) {
+		for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(snaps, pair)) {
+			char *snapname = nvpair_name(pair);
+			zvol_create_minors(snapname);
+		}
+	}
+#endif
+#endif
+	return (error);
+}
+
+typedef struct dsl_dataset_snapshot_tmp_arg {
+	const char *ddsta_fsname;
+	const char *ddsta_snapname;
+	minor_t ddsta_cleanup_minor;
+	const char *ddsta_htag;
+} dsl_dataset_snapshot_tmp_arg_t;
+
+static int
+dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	int error;
+
+	error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);
+	if (error != 0)
+		return (error);
+
+	error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, tx);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
+
+	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,
+	    B_TRUE, tx);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
+
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
+
+static void
+dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+
+	VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
+
+	dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);
+	dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,
+	    ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);
+	dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);
+
+	dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
+    minor_t cleanup_minor, const char *htag)
+{
+	dsl_dataset_snapshot_tmp_arg_t ddsta;
+	int error;
+	spa_t *spa;
+	boolean_t needsuspend;
+	void *cookie;
+
+	ddsta.ddsta_fsname = fsname;
+	ddsta.ddsta_snapname = snapname;
+	ddsta.ddsta_cleanup_minor = cleanup_minor;
+	ddsta.ddsta_htag = htag;
+
+	error = spa_open(fsname, &spa, FTAG);
+	if (error != 0)
+		return (error);
+	needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+	spa_close(spa, FTAG);
+
+	if (needsuspend) {
+		error = zil_suspend(fsname, &cookie);
+		if (error != 0)
+			return (error);
+	}
+
+	error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,
+	    dsl_dataset_snapshot_tmp_sync, &ddsta, 3);
+
+	if (needsuspend)
+		zil_resume(cookie);
+	return (error);
+}
+
+
 void
 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 {
@@ -2238,67 +1418,66 @@
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
-	nvlist_t *propval;
-	nvlist_t *val;
+	nvlist_t *propval = fnvlist_alloc();
+	nvlist_t *val = fnvlist_alloc();
 
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 
 	/*
-	 * There may me missing entries in ds_next_clones_obj
+	 * There may be missing entries in ds_next_clones_obj
 	 * due to a bug in a previous version of the code.
 	 * Only trust it if it has the right number of entries.
 	 */
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
-		ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+		ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 		    &count));
 	}
-	if (count != ds->ds_phys->ds_num_children - 1) {
+	if (count != ds->ds_phys->ds_num_children - 1)
 		goto fail;
-	}
 	for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 		char buf[ZFS_MAXNAMELEN];
-		/*
-		 * Even though we hold the dp_config_rwlock, the dataset
-		 * may fail to open, returning ENOENT.  If there is a
-		 * thread concurrently attempting to destroy this
-		 * dataset, it will have the ds_rwlock held for
-		 * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
-		 * dsl_dataset_hold_ref() will fail its
-		 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
-		 * dp_config_rwlock, and wait for the destroy progress
-		 * and signal ds_exclusive_cv.  If the destroy was
-		 * successful, we will see that
-		 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
-		 */
-		if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
-		    za.za_first_integer, FTAG, &clone) != 0)
-			continue;
+		VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
+		    za.za_first_integer, FTAG, &clone));
 		dsl_dir_name(clone->ds_dir, buf);
-		VERIFY(nvlist_add_boolean(val, buf) == 0);
+		fnvlist_add_boolean(val, buf);
 		dsl_dataset_rele(clone, FTAG);
 	}
 	zap_cursor_fini(&zc);
-	VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
-	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
-	    propval) == 0);
+	fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
+	fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval);
 fail:
 	nvlist_free(val);
 	nvlist_free(propval);
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	uint64_t refd, avail, uobjs, aobjs, ratio;
 
-	dsl_dir_stats(ds->ds_dir, nv);
+	ASSERT(dsl_pool_config_held(dp));
 
+	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
+	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
+	    ds->ds_phys->ds_compressed_bytes);
+
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
+	    ds->ds_phys->ds_uncompressed_bytes);
+
+	if (dsl_dataset_is_snapshot(ds)) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+		    ds->ds_phys->ds_unique_bytes);
+		get_clones_stat(ds, nv);
+	} else {
+		dsl_dir_stats(ds->ds_dir, nv);
+	}
+
 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
@@ -2327,10 +1506,8 @@
 		dsl_pool_t *dp = ds->ds_dir->dd_pool;
 		dsl_dataset_t *prev;
 
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		int err = dsl_dataset_hold_obj(dp,
 		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
-		rw_exit(&dp->dp_config_rwlock);
 		if (err == 0) {
 			err = dsl_dataset_space_written(prev, ds, &written,
 			    &comp, &uncomp);
@@ -2341,53 +1518,34 @@
 			}
 		}
 	}
-	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
-	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
-	    ds->ds_phys->ds_compressed_bytes);
-	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
-	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
-	    ds->ds_phys->ds_uncompressed_bytes);
-
-	if (ds->ds_phys->ds_next_snap_obj) {
-		/*
-		 * This is a snapshot; override the dd's space used with
-		 * our unique space and compression ratio.
-		 */
-		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
-		    ds->ds_phys->ds_unique_bytes);
-		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
-
-		get_clones_stat(ds, nv);
-	}
 }
 
 void
 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	ASSERT(dsl_pool_config_held(dp));
+
 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
 	stat->dds_guid = ds->ds_phys->ds_guid;
-	if (ds->ds_phys->ds_next_snap_obj) {
+	stat->dds_origin[0] = '\0';
+	if (dsl_dataset_is_snapshot(ds)) {
 		stat->dds_is_snapshot = B_TRUE;
 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
 	} else {
 		stat->dds_is_snapshot = B_FALSE;
 		stat->dds_num_clones = 0;
-	}
 
-	/* clone origin is really a dsl_dir thing... */
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	if (dsl_dir_is_clone(ds->ds_dir)) {
-		dsl_dataset_t *ods;
+		if (dsl_dir_is_clone(ds->ds_dir)) {
+			dsl_dataset_t *ods;
 
-		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
-		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
-		dsl_dataset_name(ods, stat->dds_origin);
-		dsl_dataset_drop_ref(ods, FTAG);
-	} else {
-		stat->dds_origin[0] = '\0';
+			VERIFY0(dsl_dataset_hold_obj(dp,
+			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
+			dsl_dataset_name(ods, stat->dds_origin);
+			dsl_dataset_rele(ods, FTAG);
+		}
 	}
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 uint64_t
@@ -2424,8 +1582,7 @@
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
-	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
-	    dsl_pool_sync_context(dp));
+	ASSERT(dsl_pool_config_held(dp));
 	if (ds->ds_prev == NULL)
 		return (B_FALSE);
 	if (ds->ds_phys->ds_bp.blk_birth >
@@ -2447,249 +1604,245 @@
 	return (B_FALSE);
 }
 
+typedef struct dsl_dataset_rename_snapshot_arg {
+	const char *ddrsa_fsname;
+	const char *ddrsa_oldsnapname;
+	const char *ddrsa_newsnapname;
+	boolean_t ddrsa_recursive;
+	dmu_tx_t *ddrsa_tx;
+} dsl_dataset_rename_snapshot_arg_t;
+
 /* ARGSUSED */
 static int
-dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
+    dsl_dataset_t *hds, void *arg)
 {
-	dsl_dataset_t *ds = arg1;
-	char *newsnapname = arg2;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_dataset_t *hds;
+	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+	int error;
 	uint64_t val;
-	int err;
 
-	err = dsl_dataset_hold_obj(dd->dd_pool,
-	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
-	if (err)
-		return (err);
+	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
+	if (error != 0) {
+		/* ignore nonexistent snapshots */
+		return (error == ENOENT ? 0 : error);
+	}
 
-	/* new name better not be in use */
-	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
-	dsl_dataset_rele(hds, FTAG);
+	/* new name should not exist */
+	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);
+	if (error == 0)
+		error = SET_ERROR(EEXIST);
+	else if (error == ENOENT)
+		error = 0;
 
-	if (err == 0)
-		err = EEXIST;
-	else if (err == ENOENT)
-		err = 0;
-
 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
-	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
-		err = ENAMETOOLONG;
+	if (dsl_dir_namelen(hds->ds_dir) + 1 +
+	    strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
+		error = SET_ERROR(ENAMETOOLONG);
 
-	return (err);
+	return (error);
 }
 
-static void
-dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+static int
+dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
 {
-	char oldname[MAXPATHLEN], newname[MAXPATHLEN];
-	dsl_dataset_t *ds = arg1;
-	const char *newsnapname = arg2;
-	dsl_dir_t *dd = ds->ds_dir;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
+	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *hds;
-	int err;
+	int error;
 
-	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
+	error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);
+	if (error != 0)
+		return (error);
 
-	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
-	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
+	if (ddrsa->ddrsa_recursive) {
+		error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
+		    dsl_dataset_rename_snapshot_check_impl, ddrsa,
+		    DS_FIND_CHILDREN);
+	} else {
+		error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);
+	}
+	dsl_dataset_rele(hds, FTAG);
+	return (error);
+}
 
-	VERIFY(0 == dsl_dataset_get_snapname(ds));
-	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
-	ASSERT0(err);
-	dsl_dataset_name(ds, oldname);
+static int
+dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
+    dsl_dataset_t *hds, void *arg)
+{
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+	char *oldname, *newname;
+#endif
+#endif
+	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+	dsl_dataset_t *ds;
+	uint64_t val;
+	dmu_tx_t *tx = ddrsa->ddrsa_tx;
+	int error;
+
+	error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
+	ASSERT(error == 0 || error == ENOENT);
+	if (error == ENOENT) {
+		/* ignore nonexistent snapshots */
+		return (0);
+	}
+
+	VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));
+
+	/* log before we change the name */
+	spa_history_log_internal_ds(ds, "rename", tx,
+	    "-> @%s", ddrsa->ddrsa_newsnapname);
+
+	VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx));
 	mutex_enter(&ds->ds_lock);
-	(void) strcpy(ds->ds_snapname, newsnapname);
+	(void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
 	mutex_exit(&ds->ds_lock);
-	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
-	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
-	ASSERT0(err);
-	dsl_dataset_name(ds, newname);
+	VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj,
+	    ds->ds_snapname, 8, 1, &ds->ds_object, tx));
+
+#ifdef __FreeBSD__
 #ifdef _KERNEL
+	oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	newname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
+	    ddrsa->ddrsa_oldsnapname);
+	snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname,
+	    ddrsa->ddrsa_newsnapname);
+	zfsvfs_update_fromname(oldname, newname);
 	zvol_rename_minors(oldname, newname);
+	kmem_free(newname, MAXPATHLEN);
+	kmem_free(oldname, MAXPATHLEN);
 #endif
+#endif
+	dsl_dataset_rele(ds, FTAG);
 
-	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
-	dsl_dataset_rele(hds, FTAG);
+	return (0);
 }
 
-struct renamesnaparg {
-	dsl_sync_task_group_t *dstg;
-	char failed[MAXPATHLEN];
-	char *oldsnap;
-	char *newsnap;
-	int error;
-};
-
-static int
-dsl_snapshot_rename_one(const char *name, void *arg)
+static void
+dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
 {
-	struct renamesnaparg *ra = arg;
-	dsl_dataset_t *ds = NULL;
-	char *snapname;
-	int err;
+	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *hds;
 
-	snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
-	(void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
-
-	/*
-	 * For recursive snapshot renames the parent won't be changing
-	 * so we just pass name for both the to/from argument.
-	 */
-	err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
-	if (err != 0) {
-		strfree(snapname);
-		return (err == ENOENT ? 0 : err);
+	VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
+	ddrsa->ddrsa_tx = tx;
+	if (ddrsa->ddrsa_recursive) {
+		VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
+		    dsl_dataset_rename_snapshot_sync_impl, ddrsa,
+		    DS_FIND_CHILDREN));
+	} else {
+		VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));
 	}
+	dsl_dataset_rele(hds, FTAG);
+}
 
-#ifdef _KERNEL
-	/*
-	 * For all filesystems undergoing rename, we'll need to unmount it.
-	 */
-	(void) zfs_unmount_snap(snapname, NULL);
-#endif
-	err = dsl_dataset_hold(snapname, ra->dstg, &ds);
-	strfree(snapname);
-	if (err != 0)
-		return (err == ENOENT ? 0 : err);
+int
+dsl_dataset_rename_snapshot(const char *fsname,
+    const char *oldsnapname, const char *newsnapname, boolean_t recursive)
+{
+	dsl_dataset_rename_snapshot_arg_t ddrsa;
 
-	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
-	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
+	ddrsa.ddrsa_fsname = fsname;
+	ddrsa.ddrsa_oldsnapname = oldsnapname;
+	ddrsa.ddrsa_newsnapname = newsnapname;
+	ddrsa.ddrsa_recursive = recursive;
 
-	/* First successful rename clears the error. */
-	ra->error = 0;
-
-	return (0);
+	return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
+	    dsl_dataset_rename_snapshot_sync, &ddrsa, 1));
 }
 
 static int
-dsl_recursive_rename(char *oldname, const char *newname)
+dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
 {
-	int err;
-	struct renamesnaparg *ra;
-	dsl_sync_task_t *dst;
-	spa_t *spa;
-	char *cp, *fsname = spa_strdup(oldname);
-	int len = strlen(oldname) + 1;
+	const char *fsname = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	int64_t unused_refres_delta;
+	int error;
 
-	/* truncate the snapshot name to get the fsname */
-	cp = strchr(fsname, '@');
-	*cp = '\0';
+	error = dsl_dataset_hold(dp, fsname, FTAG, &ds);
+	if (error != 0)
+		return (error);
 
-	err = spa_open(fsname, &spa, FTAG);
-	if (err) {
-		kmem_free(fsname, len);
-		return (err);
+	/* must not be a snapshot */
+	if (dsl_dataset_is_snapshot(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
 	}
-	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
-	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 
-	ra->oldsnap = strchr(oldname, '@') + 1;
-	ra->newsnap = strchr(newname, '@') + 1;
-	*ra->failed = '\0';
-	ra->error = ENOENT;
+	/* must have a most recent snapshot */
+	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
 
-	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
-	    DS_FIND_CHILDREN);
-	kmem_free(fsname, len);
-	if (err == 0)
-		err = ra->error;
+	if (dsl_dataset_long_held(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EBUSY));
+	}
 
-	if (err == 0)
-		err = dsl_sync_task_group_wait(ra->dstg);
-
-	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
-	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
-		dsl_dataset_t *ds = dst->dst_arg1;
-		if (dst->dst_err) {
-			dsl_dir_name(ds->ds_dir, ra->failed);
-			(void) strlcat(ra->failed, "@", sizeof (ra->failed));
-			(void) strlcat(ra->failed, ra->newsnap,
-			    sizeof (ra->failed));
-		}
-		dsl_dataset_rele(ds, ra->dstg);
+	/*
+	 * Check if the snap we are rolling back to uses more than
+	 * the refquota.
+	 */
+	if (ds->ds_quota != 0 &&
+	    ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EDQUOT));
 	}
 
-	if (err)
-		(void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
+	/*
+	 * When we do the clone swap, we will temporarily use more space
+	 * due to the refreservation (the head will no longer have any
+	 * unique space, so the entire amount of the refreservation will need
+	 * to be free).  We will immediately destroy the clone, freeing
+	 * this space, but the freeing happens over many txg's.
+	 */
+	unused_refres_delta = (int64_t)MIN(ds->ds_reserved,
+	    ds->ds_phys->ds_unique_bytes);
 
-	dsl_sync_task_group_destroy(ra->dstg);
-	kmem_free(ra, sizeof (struct renamesnaparg));
-	spa_close(spa, FTAG);
-	return (err);
-}
+	if (unused_refres_delta > 0 &&
+	    unused_refres_delta >
+	    dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(ENOSPC));
+	}
 
-static int
-dsl_valid_rename(const char *oldname, void *arg)
-{
-	int delta = *(int *)arg;
-
-	if (strlen(oldname) + delta >= MAXNAMELEN)
-		return (ENAMETOOLONG);
-
+	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
-#pragma weak dmu_objset_rename = dsl_dataset_rename
-int
-dsl_dataset_rename(char *oldname, const char *newname, int flags)
+static void
+dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd;
-	dsl_dataset_t *ds;
-	const char *tail;
-	int err;
+	const char *fsname = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds, *clone;
+	uint64_t cloneobj;
 
-	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
-	if (err)
-		return (err);
+	VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds));
 
-	if (tail == NULL) {
-		int delta = strlen(newname) - strlen(oldname);
+	cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
+	    ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
 
-		/* if we're growing, validate child name lengths */
-		if (delta > 0)
-			err = dmu_objset_find(oldname, dsl_valid_rename,
-			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+	VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
 
-		if (err == 0)
-			err = dsl_dir_rename(dd, newname, flags);
-		dsl_dir_close(dd, FTAG);
-		return (err);
-	}
+	dsl_dataset_clone_swap_sync_impl(clone, ds, tx);
+	dsl_dataset_zero_zil(ds, tx);
 
-	if (tail[0] != '@') {
-		/* the name ended in a nonexistent component */
-		dsl_dir_close(dd, FTAG);
-		return (ENOENT);
-	}
+	dsl_destroy_head_sync_impl(clone, tx);
 
-	dsl_dir_close(dd, FTAG);
+	dsl_dataset_rele(clone, FTAG);
+	dsl_dataset_rele(ds, FTAG);
+}
 
-	/* new name must be snapshot in same filesystem */
-	tail = strchr(newname, '@');
-	if (tail == NULL)
-		return (EINVAL);
-	tail++;
-	if (strncmp(oldname, newname, tail - newname) != 0)
-		return (EXDEV);
-
-	if (flags & ZFS_RENAME_RECURSIVE) {
-		err = dsl_recursive_rename(oldname, newname);
-	} else {
-		err = dsl_dataset_hold(oldname, FTAG, &ds);
-		if (err)
-			return (err);
-
-		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-		    dsl_dataset_snapshot_rename_check,
-		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
-
-		dsl_dataset_rele(ds, FTAG);
-	}
-
-	return (err);
+int
+dsl_dataset_rollback(const char *fsname)
+{
+	return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
+	    dsl_dataset_rollback_sync, (void *)fsname, 1));
 }
 
 struct promotenode {
@@ -2697,49 +1850,66 @@
 	dsl_dataset_t *ds;
 };
 
-struct promotearg {
+typedef struct dsl_dataset_promote_arg {
+	const char *ddpa_clonename;
+	dsl_dataset_t *ddpa_clone;
 	list_t shared_snaps, origin_snaps, clone_snaps;
-	dsl_dataset_t *origin_origin;
+	dsl_dataset_t *origin_origin; /* origin of the origin */
 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 	char *err_ds;
-};
+} dsl_dataset_promote_arg_t;
 
 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
-static boolean_t snaplist_unstable(list_t *l);
+static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
+    void *tag);
+static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag);
 
 static int
-dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *hds = arg1;
-	struct promotearg *pa = arg2;
-	struct promotenode *snap = list_head(&pa->shared_snaps);
-	dsl_dataset_t *origin_ds = snap->ds;
+	dsl_dataset_promote_arg_t *ddpa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *hds;
+	struct promotenode *snap;
+	dsl_dataset_t *origin_ds;
 	int err;
 	uint64_t unused;
 
-	/* Check that it is a real clone */
-	if (!dsl_dir_is_clone(hds->ds_dir))
-		return (EINVAL);
+	err = promote_hold(ddpa, dp, FTAG);
+	if (err != 0)
+		return (err);
 
-	/* Since this is so expensive, don't do the preliminary check */
-	if (!dmu_tx_is_syncing(tx))
+	hds = ddpa->ddpa_clone;
+
+	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
+		promote_rele(ddpa, FTAG);
+		return (SET_ERROR(EXDEV));
+	}
+
+	/*
+	 * Compute and check the amount of space to transfer.  Since this is
+	 * so expensive, don't do the preliminary check.
+	 */
+	if (!dmu_tx_is_syncing(tx)) {
+		promote_rele(ddpa, FTAG);
 		return (0);
+	}
 
-	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
-		return (EXDEV);
+	snap = list_head(&ddpa->shared_snaps);
+	origin_ds = snap->ds;
 
 	/* compute origin's new unique space */
-	snap = list_tail(&pa->clone_snaps);
+	snap = list_tail(&ddpa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	dsl_deadlist_space_range(&snap->ds->ds_deadlist,
 	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
-	    &pa->unique, &unused, &unused);
+	    &ddpa->unique, &unused, &unused);
 
 	/*
 	 * Walk the snapshots that we are moving
 	 *
 	 * Compute space to transfer.  Consider the incremental changes
-	 * to used for each snapshot:
+	 * to used by each snapshot:
 	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
 	 * So each snapshot gave birth to:
 	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
@@ -2750,19 +1920,29 @@
 	 * Note however, if we stop before we reach the ORIGIN we get:
 	 * uN + kN + kN-1 + ... + kM - uM-1
 	 */
-	pa->used = origin_ds->ds_phys->ds_referenced_bytes;
-	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
-	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
-	for (snap = list_head(&pa->shared_snaps); snap;
-	    snap = list_next(&pa->shared_snaps, snap)) {
+	ddpa->used = origin_ds->ds_phys->ds_referenced_bytes;
+	ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes;
+	ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
+	for (snap = list_head(&ddpa->shared_snaps); snap;
+	    snap = list_next(&ddpa->shared_snaps, snap)) {
 		uint64_t val, dlused, dlcomp, dluncomp;
 		dsl_dataset_t *ds = snap->ds;
 
+		/*
+		 * If there are long holds, we won't be able to evict
+		 * the objset.
+		 */
+		if (dsl_dataset_long_held(ds)) {
+			err = SET_ERROR(EBUSY);
+			goto out;
+		}
+
 		/* Check that the snapshot name does not conflict */
-		VERIFY(0 == dsl_dataset_get_snapname(ds));
+		VERIFY0(dsl_dataset_get_snapname(ds));
 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
 		if (err == 0) {
-			err = EEXIST;
+			(void) strcpy(ddpa->err_ds, snap->ds->ds_snapname);
+			err = SET_ERROR(EEXIST);
 			goto out;
 		}
 		if (err != ENOENT)
@@ -2774,9 +1954,9 @@
 
 		dsl_deadlist_space(&ds->ds_deadlist,
 		    &dlused, &dlcomp, &dluncomp);
-		pa->used += dlused;
-		pa->comp += dlcomp;
-		pa->uncomp += dluncomp;
+		ddpa->used += dlused;
+		ddpa->comp += dlcomp;
+		ddpa->uncomp += dluncomp;
 	}
 
 	/*
@@ -2783,17 +1963,18 @@
 	 * If we are a clone of a clone then we never reached ORIGIN,
 	 * so we need to subtract out the clone origin's used space.
 	 */
-	if (pa->origin_origin) {
-		pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
-		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
-		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
+	if (ddpa->origin_origin) {
+		ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes;
+		ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes;
+		ddpa->uncomp -=
+		    ddpa->origin_origin->ds_phys->ds_uncompressed_bytes;
 	}
 
 	/* Check that there is enough space here */
 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
-	    pa->used);
-	if (err)
-		return (err);
+	    ddpa->used);
+	if (err != 0)
+		goto out;
 
 	/*
 	 * Compute the amounts of space that will be used by snapshots
@@ -2811,48 +1992,54 @@
 		 * calls will be fast because they do not have to
 		 * iterate over all bps.
 		 */
-		snap = list_head(&pa->origin_snaps);
-		err = snaplist_space(&pa->shared_snaps,
-		    snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
-		if (err)
-			return (err);
+		snap = list_head(&ddpa->origin_snaps);
+		err = snaplist_space(&ddpa->shared_snaps,
+		    snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
+		if (err != 0)
+			goto out;
 
-		err = snaplist_space(&pa->clone_snaps,
+		err = snaplist_space(&ddpa->clone_snaps,
 		    snap->ds->ds_dir->dd_origin_txg, &space);
-		if (err)
-			return (err);
-		pa->cloneusedsnap += space;
+		if (err != 0)
+			goto out;
+		ddpa->cloneusedsnap += space;
 	}
 	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
-		err = snaplist_space(&pa->origin_snaps,
-		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
-		if (err)
-			return (err);
+		err = snaplist_space(&ddpa->origin_snaps,
+		    origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap);
+		if (err != 0)
+			goto out;
 	}
 
-	return (0);
 out:
-	pa->err_ds =  snap->ds->ds_snapname;
+	promote_rele(ddpa, FTAG);
 	return (err);
 }
 
 static void
-dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *hds = arg1;
-	struct promotearg *pa = arg2;
-	struct promotenode *snap = list_head(&pa->shared_snaps);
-	dsl_dataset_t *origin_ds = snap->ds;
+	dsl_dataset_promote_arg_t *ddpa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *hds;
+	struct promotenode *snap;
+	dsl_dataset_t *origin_ds;
 	dsl_dataset_t *origin_head;
-	dsl_dir_t *dd = hds->ds_dir;
-	dsl_pool_t *dp = hds->ds_dir->dd_pool;
+	dsl_dir_t *dd;
 	dsl_dir_t *odd = NULL;
 	uint64_t oldnext_obj;
 	int64_t delta;
 
-	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
+	VERIFY0(promote_hold(ddpa, dp, FTAG));
+	hds = ddpa->ddpa_clone;
 
-	snap = list_head(&pa->origin_snaps);
+	ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE);
+
+	snap = list_head(&ddpa->shared_snaps);
+	origin_ds = snap->ds;
+	dd = hds->ds_dir;
+
+	snap = list_head(&ddpa->origin_snaps);
 	origin_head = snap->ds;
 
 	/*
@@ -2859,20 +2046,21 @@
 	 * We need to explicitly open odd, since origin_ds's dd will be
 	 * changing.
 	 */
-	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
+	VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
 	    NULL, FTAG, &odd));
 
 	/* change origin's next snap */
 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
 	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
-	snap = list_tail(&pa->clone_snaps);
+	snap = list_tail(&ddpa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
 
 	/* change the origin's next clone */
 	if (origin_ds->ds_phys->ds_next_clones_obj) {
-		remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
-		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
+		dsl_dataset_remove_from_next_clones(origin_ds,
+		    snap->ds->ds_object, tx);
+		VERIFY0(zap_add_int(dp->dp_meta_objset,
 		    origin_ds->ds_phys->ds_next_clones_obj,
 		    oldnext_obj, tx));
 	}
@@ -2889,39 +2077,43 @@
 
 	/* change dd_clone entries */
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
-		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		VERIFY0(zap_remove_int(dp->dp_meta_objset,
 		    odd->dd_phys->dd_clones, hds->ds_object, tx));
-		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
-		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
+		VERIFY0(zap_add_int(dp->dp_meta_objset,
+		    ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    hds->ds_object, tx));
 
-		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
-		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
+		VERIFY0(zap_remove_int(dp->dp_meta_objset,
+		    ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    origin_head->ds_object, tx));
 		if (dd->dd_phys->dd_clones == 0) {
 			dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
 			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 		}
-		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
+		VERIFY0(zap_add_int(dp->dp_meta_objset,
 		    dd->dd_phys->dd_clones, origin_head->ds_object, tx));
-
 	}
 
 	/* move snapshots to this dir */
-	for (snap = list_head(&pa->shared_snaps); snap;
-	    snap = list_next(&pa->shared_snaps, snap)) {
+	for (snap = list_head(&ddpa->shared_snaps); snap;
+	    snap = list_next(&ddpa->shared_snaps, snap)) {
 		dsl_dataset_t *ds = snap->ds;
 
-		/* unregister props as dsl_dir is changing */
+		/*
+		 * Property callbacks are registered to a particular
+		 * dsl_dir.  Since ours is changing, evict the objset
+		 * so that they will be unregistered from the old dsl_dir.
+		 */
 		if (ds->ds_objset) {
 			dmu_objset_evict(ds->ds_objset);
 			ds->ds_objset = NULL;
 		}
+
 		/* move snap name entry */
-		VERIFY(0 == dsl_dataset_get_snapname(ds));
-		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
+		VERIFY0(dsl_dataset_get_snapname(ds));
+		VERIFY0(dsl_dataset_snap_remove(origin_head,
 		    ds->ds_snapname, tx));
-		VERIFY(0 == zap_add(dp->dp_meta_objset,
+		VERIFY0(zap_add(dp->dp_meta_objset,
 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
 		    8, 1, &ds->ds_object, tx));
 
@@ -2930,8 +2122,8 @@
 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
 		ds->ds_phys->ds_dir_obj = dd->dd_object;
 		ASSERT3P(ds->ds_dir, ==, odd);
-		dsl_dir_close(ds->ds_dir, ds);
-		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
+		dsl_dir_rele(ds->ds_dir, ds);
+		VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,
 		    NULL, ds, &ds->ds_dir));
 
 		/* move any clone references */
@@ -2955,20 +2147,20 @@
 					continue;
 				}
 
-				VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+				VERIFY0(dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &cnds));
 				o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
 
-				VERIFY3U(zap_remove_int(dp->dp_meta_objset,
-				    odd->dd_phys->dd_clones, o, tx), ==, 0);
-				VERIFY3U(zap_add_int(dp->dp_meta_objset,
-				    dd->dd_phys->dd_clones, o, tx), ==, 0);
+				VERIFY0(zap_remove_int(dp->dp_meta_objset,
+				    odd->dd_phys->dd_clones, o, tx));
+				VERIFY0(zap_add_int(dp->dp_meta_objset,
+				    dd->dd_phys->dd_clones, o, tx));
 				dsl_dataset_rele(cnds, FTAG);
 			}
 			zap_cursor_fini(&zc);
 		}
 
-		ASSERT0(dsl_prop_numcb(ds));
+		ASSERT(!dsl_prop_hascb(ds));
 	}
 
 	/*
@@ -2978,32 +2170,31 @@
 	 * is true for each of {clone,origin} independently.
 	 */
 
-	delta = pa->cloneusedsnap -
+	delta = ddpa->cloneusedsnap -
 	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, >=, 0);
-	ASSERT3U(pa->used, >=, delta);
+	ASSERT3U(ddpa->used, >=, delta);
 	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
-	    pa->used - delta, pa->comp, pa->uncomp, tx);
+	    ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);
 
-	delta = pa->originusedsnap -
+	delta = ddpa->originusedsnap -
 	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, <=, 0);
-	ASSERT3U(pa->used, >=, -delta);
+	ASSERT3U(ddpa->used, >=, -delta);
 	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(odd, DD_USED_HEAD,
-	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
+	    -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);
 
-	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
+	origin_ds->ds_phys->ds_unique_bytes = ddpa->unique;
 
 	/* log history record */
-	spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
-	    "dataset = %llu", hds->ds_object);
+	spa_history_log_internal_ds(hds, "promote", tx, "");
 
-	dsl_dir_close(odd, FTAG);
+	dsl_dir_rele(odd, FTAG);
+	promote_rele(ddpa, FTAG);
 }
 
-static char *snaplist_tag = "snaplist";
 /*
  * Make a list of dsl_dataset_t's for the snapshots between first_obj
  * (exclusive) and last_obj (inclusive).  The list will be in reverse
@@ -3011,13 +2202,11 @@
  * snapshots back to this dataset's origin.
  */
 static int
-snaplist_make(dsl_pool_t *dp, boolean_t own,
-    uint64_t first_obj, uint64_t last_obj, list_t *l)
+snaplist_make(dsl_pool_t *dp,
+    uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag)
 {
 	uint64_t obj = last_obj;
 
-	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
-
 	list_create(l, sizeof (struct promotenode),
 	    offsetof(struct promotenode, link));
 
@@ -3026,28 +2215,15 @@
 		struct promotenode *snap;
 		int err;
 
-		if (own) {
-			err = dsl_dataset_own_obj(dp, obj,
-			    0, snaplist_tag, &ds);
-			if (err == 0)
-				dsl_dataset_make_exclusive(ds, snaplist_tag);
-		} else {
-			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
-		}
-		if (err == ENOENT) {
-			/* lost race with snapshot destroy */
-			struct promotenode *last = list_tail(l);
-			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
-			obj = last->ds->ds_phys->ds_prev_snap_obj;
-			continue;
-		} else if (err) {
+		err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
+		ASSERT(err != ENOENT);
+		if (err != 0)
 			return (err);
-		}
 
 		if (first_obj == 0)
 			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
 
-		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
+		snap = kmem_alloc(sizeof (*snap), KM_SLEEP);
 		snap->ds = ds;
 		list_insert_tail(l, snap);
 		obj = ds->ds_phys->ds_prev_snap_obj;
@@ -3072,208 +2248,209 @@
 }
 
 static void
-snaplist_destroy(list_t *l, boolean_t own)
+snaplist_destroy(list_t *l, void *tag)
 {
 	struct promotenode *snap;
 
-	if (!l || !list_link_active(&l->list_head))
+	if (l == NULL || !list_link_active(&l->list_head))
 		return;
 
 	while ((snap = list_tail(l)) != NULL) {
 		list_remove(l, snap);
-		if (own)
-			dsl_dataset_disown(snap->ds, snaplist_tag);
-		else
-			dsl_dataset_rele(snap->ds, snaplist_tag);
-		kmem_free(snap, sizeof (struct promotenode));
+		dsl_dataset_rele(snap->ds, tag);
+		kmem_free(snap, sizeof (*snap));
 	}
 	list_destroy(l);
 }
 
-/*
- * Promote a clone.  Nomenclature note:
- * "clone" or "cds": the original clone which is being promoted
- * "origin" or "ods": the snapshot which is originally clone's origin
- * "origin head" or "ohds": the dataset which is the head
- * (filesystem/volume) for the origin
- * "origin origin": the origin of the origin's filesystem (typically
- * NULL, indicating that the clone is not a clone of a clone).
- */
-int
-dsl_dataset_promote(const char *name, char *conflsnap)
+static int
+promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
 {
-	dsl_dataset_t *ds;
+	int error;
 	dsl_dir_t *dd;
-	dsl_pool_t *dp;
-	dmu_object_info_t doi;
-	struct promotearg pa = { 0 };
 	struct promotenode *snap;
-	int err;
 
-	err = dsl_dataset_hold(name, FTAG, &ds);
-	if (err)
-		return (err);
-	dd = ds->ds_dir;
-	dp = dd->dd_pool;
+	error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
+	    &ddpa->ddpa_clone);
+	if (error != 0)
+		return (error);
+	dd = ddpa->ddpa_clone->ds_dir;
 
-	err = dmu_object_info(dp->dp_meta_objset,
-	    ds->ds_phys->ds_snapnames_zapobj, &doi);
-	if (err) {
-		dsl_dataset_rele(ds, FTAG);
-		return (err);
+	if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
+	    !dsl_dir_is_clone(dd)) {
+		dsl_dataset_rele(ddpa->ddpa_clone, tag);
+		return (SET_ERROR(EINVAL));
 	}
 
-	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
-		dsl_dataset_rele(ds, FTAG);
-		return (EINVAL);
-	}
-
-	/*
-	 * We are going to inherit all the snapshots taken before our
-	 * origin (i.e., our new origin will be our parent's origin).
-	 * Take ownership of them so that we can rename them into our
-	 * namespace.
-	 */
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-
-	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
-	    &pa.shared_snaps);
-	if (err != 0)
+	error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj,
+	    &ddpa->shared_snaps, tag);
+	if (error != 0)
 		goto out;
 
-	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
-	if (err != 0)
+	error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
+	    &ddpa->clone_snaps, tag);
+	if (error != 0)
 		goto out;
 
-	snap = list_head(&pa.shared_snaps);
+	snap = list_head(&ddpa->shared_snaps);
 	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
-	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
-	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
-	if (err != 0)
+	error = snaplist_make(dp, dd->dd_phys->dd_origin_obj,
+	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj,
+	    &ddpa->origin_snaps, tag);
+	if (error != 0)
 		goto out;
 
 	if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
-		err = dsl_dataset_hold_obj(dp,
+		error = dsl_dataset_hold_obj(dp,
 		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
-		    FTAG, &pa.origin_origin);
-		if (err != 0)
+		    tag, &ddpa->origin_origin);
+		if (error != 0)
 			goto out;
 	}
-
 out:
-	rw_exit(&dp->dp_config_rwlock);
+	if (error != 0)
+		promote_rele(ddpa, tag);
+	return (error);
+}
 
+static void
+promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
+{
+	snaplist_destroy(&ddpa->shared_snaps, tag);
+	snaplist_destroy(&ddpa->clone_snaps, tag);
+	snaplist_destroy(&ddpa->origin_snaps, tag);
+	if (ddpa->origin_origin != NULL)
+		dsl_dataset_rele(ddpa->origin_origin, tag);
+	dsl_dataset_rele(ddpa->ddpa_clone, tag);
+}
+
+/*
+ * Promote a clone.
+ *
+ * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
+ * in with the name.  (It must be at least MAXNAMELEN bytes long.)
+ */
+int
+dsl_dataset_promote(const char *name, char *conflsnap)
+{
+	dsl_dataset_promote_arg_t ddpa = { 0 };
+	uint64_t numsnaps;
+	int error;
+	objset_t *os;
+
 	/*
-	 * Add in 128x the snapnames zapobj size, since we will be moving
-	 * a bunch of snapnames to the promoted ds, and dirtying their
-	 * bonus buffers.
+	 * We will modify space proportional to the number of
+	 * snapshots.  Compute numsnaps.
 	 */
-	if (err == 0) {
-		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
-		    dsl_dataset_promote_sync, ds, &pa,
-		    2 + 2 * doi.doi_physical_blocks_512);
-		if (err && pa.err_ds && conflsnap)
-			(void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
-	}
+	error = dmu_objset_hold(name, FTAG, &os);
+	if (error != 0)
+		return (error);
+	error = zap_count(dmu_objset_pool(os)->dp_meta_objset,
+	    dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps);
+	dmu_objset_rele(os, FTAG);
+	if (error != 0)
+		return (error);
 
-	snaplist_destroy(&pa.shared_snaps, B_TRUE);
-	snaplist_destroy(&pa.clone_snaps, B_FALSE);
-	snaplist_destroy(&pa.origin_snaps, B_FALSE);
-	if (pa.origin_origin)
-		dsl_dataset_rele(pa.origin_origin, FTAG);
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
+	ddpa.ddpa_clonename = name;
+	ddpa.err_ds = conflsnap;
+
+	return (dsl_sync_task(name, dsl_dataset_promote_check,
+	    dsl_dataset_promote_sync, &ddpa, 2 + numsnaps));
 }
 
-struct cloneswaparg {
-	dsl_dataset_t *cds; /* clone dataset */
-	dsl_dataset_t *ohds; /* origin's head dataset */
-	boolean_t force;
-	int64_t unused_refres_delta; /* change in unconsumed refreservation */
-};
-
-/* ARGSUSED */
-static int
-dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
+int
+dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
+    dsl_dataset_t *origin_head, boolean_t force)
 {
-	struct cloneswaparg *csa = arg1;
+	int64_t unused_refres_delta;
 
 	/* they should both be heads */
-	if (dsl_dataset_is_snapshot(csa->cds) ||
-	    dsl_dataset_is_snapshot(csa->ohds))
-		return (EINVAL);
+	if (dsl_dataset_is_snapshot(clone) ||
+	    dsl_dataset_is_snapshot(origin_head))
+		return (SET_ERROR(EINVAL));
 
 	/* the branch point should be just before them */
-	if (csa->cds->ds_prev != csa->ohds->ds_prev)
-		return (EINVAL);
+	if (clone->ds_prev != origin_head->ds_prev)
+		return (SET_ERROR(EINVAL));
 
-	/* cds should be the clone (unless they are unrelated) */
-	if (csa->cds->ds_prev != NULL &&
-	    csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
-	    csa->ohds->ds_object !=
-	    csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
-		return (EINVAL);
+	/* clone should be the clone (unless they are unrelated) */
+	if (clone->ds_prev != NULL &&
+	    clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
+	    origin_head->ds_object !=
+	    clone->ds_prev->ds_phys->ds_next_snap_obj)
+		return (SET_ERROR(EINVAL));
 
 	/* the clone should be a child of the origin */
-	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
-		return (EINVAL);
+	if (clone->ds_dir->dd_parent != origin_head->ds_dir)
+		return (SET_ERROR(EINVAL));
 
-	/* ohds shouldn't be modified unless 'force' */
-	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
-		return (ETXTBSY);
+	/* origin_head shouldn't be modified unless 'force' */
+	if (!force && dsl_dataset_modified_since_lastsnap(origin_head))
+		return (SET_ERROR(ETXTBSY));
 
-	/* adjust amount of any unconsumed refreservation */
-	csa->unused_refres_delta =
-	    (int64_t)MIN(csa->ohds->ds_reserved,
-	    csa->ohds->ds_phys->ds_unique_bytes) -
-	    (int64_t)MIN(csa->ohds->ds_reserved,
-	    csa->cds->ds_phys->ds_unique_bytes);
+	/* origin_head should have no long holds (e.g. is not mounted) */
+	if (dsl_dataset_long_held(origin_head))
+		return (SET_ERROR(EBUSY));
 
-	if (csa->unused_refres_delta > 0 &&
-	    csa->unused_refres_delta >
-	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
-		return (ENOSPC);
+	/* check amount of any unconsumed refreservation */
+	unused_refres_delta =
+	    (int64_t)MIN(origin_head->ds_reserved,
+	    origin_head->ds_phys->ds_unique_bytes) -
+	    (int64_t)MIN(origin_head->ds_reserved,
+	    clone->ds_phys->ds_unique_bytes);
 
-	if (csa->ohds->ds_quota != 0 &&
-	    csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
-		return (EDQUOT);
+	if (unused_refres_delta > 0 &&
+	    unused_refres_delta >
+	    dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
+		return (SET_ERROR(ENOSPC));
 
+	/* clone can't be over the head's refquota */
+	if (origin_head->ds_quota != 0 &&
+	    clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota)
+		return (SET_ERROR(EDQUOT));
+
 	return (0);
 }
 
-/* ARGSUSED */
-static void
-dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+void
+dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
+    dsl_dataset_t *origin_head, dmu_tx_t *tx)
 {
-	struct cloneswaparg *csa = arg1;
-	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	int64_t unused_refres_delta;
 
-	ASSERT(csa->cds->ds_reserved == 0);
-	ASSERT(csa->ohds->ds_quota == 0 ||
-	    csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
+	ASSERT(clone->ds_reserved == 0);
+	ASSERT(origin_head->ds_quota == 0 ||
+	    clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota);
 
-	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
-	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
+	dmu_buf_will_dirty(clone->ds_dbuf, tx);
+	dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
 
-	if (csa->cds->ds_objset != NULL) {
-		dmu_objset_evict(csa->cds->ds_objset);
-		csa->cds->ds_objset = NULL;
+	if (clone->ds_objset != NULL) {
+		dmu_objset_evict(clone->ds_objset);
+		clone->ds_objset = NULL;
 	}
 
-	if (csa->ohds->ds_objset != NULL) {
-		dmu_objset_evict(csa->ohds->ds_objset);
-		csa->ohds->ds_objset = NULL;
+	if (origin_head->ds_objset != NULL) {
+		dmu_objset_evict(origin_head->ds_objset);
+		origin_head->ds_objset = NULL;
 	}
 
+	unused_refres_delta =
+	    (int64_t)MIN(origin_head->ds_reserved,
+	    origin_head->ds_phys->ds_unique_bytes) -
+	    (int64_t)MIN(origin_head->ds_reserved,
+	    clone->ds_phys->ds_unique_bytes);
+
 	/*
 	 * Reset origin's unique bytes, if it exists.
 	 */
-	if (csa->cds->ds_prev) {
-		dsl_dataset_t *origin = csa->cds->ds_prev;
+	if (clone->ds_prev) {
+		dsl_dataset_t *origin = clone->ds_prev;
 		uint64_t comp, uncomp;
 
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
-		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
+		dsl_deadlist_space_range(&clone->ds_deadlist,
 		    origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 		    &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
 	}
@@ -3281,9 +2458,9 @@
 	/* swap blkptrs */
 	{
 		blkptr_t tmp;
-		tmp = csa->ohds->ds_phys->ds_bp;
-		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
-		csa->cds->ds_phys->ds_bp = tmp;
+		tmp = origin_head->ds_phys->ds_bp;
+		origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp;
+		clone->ds_phys->ds_bp = tmp;
 	}
 
 	/* set dd_*_bytes */
@@ -3292,25 +2469,25 @@
 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
 		uint64_t odl_used, odl_comp, odl_uncomp;
 
-		ASSERT3U(csa->cds->ds_dir->dd_phys->
+		ASSERT3U(clone->ds_dir->dd_phys->
 		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
 
-		dsl_deadlist_space(&csa->cds->ds_deadlist,
+		dsl_deadlist_space(&clone->ds_deadlist,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
-		dsl_deadlist_space(&csa->ohds->ds_deadlist,
+		dsl_deadlist_space(&origin_head->ds_deadlist,
 		    &odl_used, &odl_comp, &odl_uncomp);
 
-		dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
-		    (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
-		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
-		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
-		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
+		dused = clone->ds_phys->ds_referenced_bytes + cdl_used -
+		    (origin_head->ds_phys->ds_referenced_bytes + odl_used);
+		dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp -
+		    (origin_head->ds_phys->ds_compressed_bytes + odl_comp);
+		duncomp = clone->ds_phys->ds_uncompressed_bytes +
 		    cdl_uncomp -
-		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
+		    (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp);
 
-		dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
+		dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,
 		    dused, dcomp, duncomp, tx);
-		dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
+		dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,
 		    -dused, -dcomp, -duncomp, tx);
 
 		/*
@@ -3319,83 +2496,46 @@
 		 * deadlist (since that's the only thing that's
 		 * changing that affects the snapused).
 		 */
-		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
-		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
+		dsl_deadlist_space_range(&clone->ds_deadlist,
+		    origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
-		dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
-		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
+		dsl_deadlist_space_range(&origin_head->ds_deadlist,
+		    origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &odl_used, &odl_comp, &odl_uncomp);
-		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
+		dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,
 		    DD_USED_HEAD, DD_USED_SNAP, tx);
 	}
 
 	/* swap ds_*_bytes */
-	SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
-	    csa->cds->ds_phys->ds_referenced_bytes);
-	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
-	    csa->cds->ds_phys->ds_compressed_bytes);
-	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
-	    csa->cds->ds_phys->ds_uncompressed_bytes);
-	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
-	    csa->cds->ds_phys->ds_unique_bytes);
+	SWITCH64(origin_head->ds_phys->ds_referenced_bytes,
+	    clone->ds_phys->ds_referenced_bytes);
+	SWITCH64(origin_head->ds_phys->ds_compressed_bytes,
+	    clone->ds_phys->ds_compressed_bytes);
+	SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes,
+	    clone->ds_phys->ds_uncompressed_bytes);
+	SWITCH64(origin_head->ds_phys->ds_unique_bytes,
+	    clone->ds_phys->ds_unique_bytes);
 
 	/* apply any parent delta for change in unconsumed refreservation */
-	dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
-	    csa->unused_refres_delta, 0, 0, tx);
+	dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,
+	    unused_refres_delta, 0, 0, tx);
 
 	/*
 	 * Swap deadlists.
 	 */
-	dsl_deadlist_close(&csa->cds->ds_deadlist);
-	dsl_deadlist_close(&csa->ohds->ds_deadlist);
-	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
-	    csa->cds->ds_phys->ds_deadlist_obj);
-	dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
-	    csa->cds->ds_phys->ds_deadlist_obj);
-	dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
-	    csa->ohds->ds_phys->ds_deadlist_obj);
+	dsl_deadlist_close(&clone->ds_deadlist);
+	dsl_deadlist_close(&origin_head->ds_deadlist);
+	SWITCH64(origin_head->ds_phys->ds_deadlist_obj,
+	    clone->ds_phys->ds_deadlist_obj);
+	dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,
+	    clone->ds_phys->ds_deadlist_obj);
+	dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
+	    origin_head->ds_phys->ds_deadlist_obj);
 
-	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
-}
+	dsl_scan_ds_clone_swapped(origin_head, clone, tx);
 
-/*
- * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
- * recv" into an existing fs to swizzle the file system to the new
- * version, and by "zfs rollback".  Can also be used to swap two
- * independent head datasets if neither has any snapshots.
- */
-int
-dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
-    boolean_t force)
-{
-	struct cloneswaparg csa;
-	int error;
-
-	ASSERT(clone->ds_owner);
-	ASSERT(origin_head->ds_owner);
-retry:
-	/*
-	 * Need exclusive access for the swap. If we're swapping these
-	 * datasets back after an error, we already hold the locks.
-	 */
-	if (!RW_WRITE_HELD(&clone->ds_rwlock))
-		rw_enter(&clone->ds_rwlock, RW_WRITER);
-	if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
-	    !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
-		rw_exit(&clone->ds_rwlock);
-		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
-		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
-			rw_exit(&origin_head->ds_rwlock);
-			goto retry;
-		}
-	}
-	csa.cds = clone;
-	csa.ohds = origin_head;
-	csa.force = force;
-	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
-	    dsl_dataset_clone_swap_check,
-	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
-	return (error);
+	spa_history_log_internal_ds(clone, "clone swap", tx,
+	    "parent=%s", origin_head->ds_dir->dd_myname);
 }
 
 /*
@@ -3405,21 +2545,20 @@
 int
 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
 {
-	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int error;
 
-	if ((error = spa_open(pname, &spa, FTAG)) != 0)
+	error = dsl_pool_hold(pname, FTAG, &dp);
+	if (error != 0)
 		return (error);
-	dp = spa_get_dsl(spa);
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
+
+	error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
+	if (error == 0) {
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 	}
-	rw_exit(&dp->dp_config_rwlock);
-	spa_close(spa, FTAG);
+	dsl_pool_rele(dp, FTAG);
 
 	return (error);
 }
@@ -3463,9 +2602,9 @@
 	if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
 		if (inflight > 0 ||
 		    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
-			error = ERESTART;
+			error = SET_ERROR(ERESTART);
 		else
-			error = EDQUOT;
+			error = SET_ERROR(EDQUOT);
 	}
 	mutex_exit(&ds->ds_lock);
 
@@ -3472,102 +2611,134 @@
 	return (error);
 }
 
+typedef struct dsl_dataset_set_qr_arg {
+	const char *ddsqra_name;
+	zprop_source_t ddsqra_source;
+	uint64_t ddsqra_value;
+} dsl_dataset_set_qr_arg_t;
+
+
 /* ARGSUSED */
 static int
-dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_prop_setarg_t *psa = arg2;
-	int err;
+	dsl_dataset_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	int error;
+	uint64_t newval;
 
-	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
-		return (ENOTSUP);
+	if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
+		return (SET_ERROR(ENOTSUP));
 
-	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
-		return (err);
+	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+	if (error != 0)
+		return (error);
 
-	if (psa->psa_effective_value == 0)
+	if (dsl_dataset_is_snapshot(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+
+	error = dsl_prop_predict(ds->ds_dir,
+	    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
+
+	if (newval == 0) {
+		dsl_dataset_rele(ds, FTAG);
 		return (0);
+	}
 
-	if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
-	    psa->psa_effective_value < ds->ds_reserved)
-		return (ENOSPC);
+	if (newval < ds->ds_phys->ds_referenced_bytes ||
+	    newval < ds->ds_reserved) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(ENOSPC));
+	}
 
+	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
-extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
-
-void
-dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+static void
+dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value = psa->psa_effective_value;
+	dsl_dataset_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	uint64_t newval;
 
-	dsl_prop_set_sync(ds, psa, tx);
-	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
+	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 
-	if (ds->ds_quota != effective_value) {
+	dsl_prop_set_sync_impl(ds,
+	    zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+	    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+	    &ddsqra->ddsqra_value, tx);
+
+	VERIFY0(dsl_prop_get_int_ds(ds,
+	    zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));
+
+	if (ds->ds_quota != newval) {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
-		ds->ds_quota = effective_value;
+		ds->ds_quota = newval;
 	}
+	dsl_dataset_rele(ds, FTAG);
 }
 
 int
-dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
+dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
+    uint64_t refquota)
 {
-	dsl_dataset_t *ds;
-	dsl_prop_setarg_t psa;
-	int err;
+	dsl_dataset_set_qr_arg_t ddsqra;
 
-	dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
+	ddsqra.ddsqra_name = dsname;
+	ddsqra.ddsqra_source = source;
+	ddsqra.ddsqra_value = refquota;
 
-	err = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (err)
-		return (err);
-
-	/*
-	 * If someone removes a file, then tries to set the quota, we
-	 * want to make sure the file freeing takes effect.
-	 */
-	txg_wait_open(ds->ds_dir->dd_pool, 0);
-
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
-	    ds, &psa, 0);
-
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
+	return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
+	    dsl_dataset_set_refquota_sync, &ddsqra, 0));
 }
 
 static int
-dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value;
-	uint64_t unique;
-	int err;
+	dsl_dataset_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	int error;
+	uint64_t newval, unique;
 
-	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
-	    SPA_VERSION_REFRESERVATION)
-		return (ENOTSUP);
+	if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
+		return (SET_ERROR(ENOTSUP));
 
-	if (dsl_dataset_is_snapshot(ds))
-		return (EINVAL);
+	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+	if (error != 0)
+		return (error);
 
-	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
-		return (err);
+	if (dsl_dataset_is_snapshot(ds)) {
+		dsl_dataset_rele(ds, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
 
-	effective_value = psa->psa_effective_value;
+	error = dsl_prop_predict(ds->ds_dir,
+	    zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
-	if (!dmu_tx_is_syncing(tx))
+	if (!dmu_tx_is_syncing(tx)) {
+		dsl_dataset_rele(ds, FTAG);
 		return (0);
+	}
 
 	mutex_enter(&ds->ds_lock);
 	if (!DS_UNIQUE_IS_ACCURATE(ds))
@@ -3575,41 +2746,44 @@
 	unique = ds->ds_phys->ds_unique_bytes;
 	mutex_exit(&ds->ds_lock);
 
-	if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
-		uint64_t delta = MAX(unique, effective_value) -
+	if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {
+		uint64_t delta = MAX(unique, newval) -
 		    MAX(unique, ds->ds_reserved);
 
-		if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
-			return (ENOSPC);
-		if (ds->ds_quota > 0 &&
-		    effective_value > ds->ds_quota)
-			return (ENOSPC);
+		if (delta >
+		    dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||
+		    (ds->ds_quota > 0 && newval > ds->ds_quota)) {
+			dsl_dataset_rele(ds, FTAG);
+			return (SET_ERROR(ENOSPC));
+		}
 	}
 
+	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
-static void
-dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+void
+dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
+    zprop_source_t source, uint64_t value, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value = psa->psa_effective_value;
+	uint64_t newval;
 	uint64_t unique;
 	int64_t delta;
 
-	dsl_prop_set_sync(ds, psa, tx);
-	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
+	dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+	    source, sizeof (value), 1, &value, tx);
 
+	VERIFY0(dsl_prop_get_int_ds(ds,
+	    zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));
+
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
 	unique = ds->ds_phys->ds_unique_bytes;
-	delta = MAX(0, (int64_t)(effective_value - unique)) -
+	delta = MAX(0, (int64_t)(newval - unique)) -
 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
-	ds->ds_reserved = effective_value;
+	ds->ds_reserved = newval;
 	mutex_exit(&ds->ds_lock);
 
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
@@ -3616,601 +2790,34 @@
 	mutex_exit(&ds->ds_dir->dd_lock);
 }
 
-int
-dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
-    uint64_t reservation)
+static void
+dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
 {
+	dsl_dataset_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
-	dsl_prop_setarg_t psa;
-	int err;
 
-	dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
-	    &reservation);
-
-	err = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (err)
-		return (err);
-
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    dsl_dataset_set_reservation_check,
-	    dsl_dataset_set_reservation_sync, ds, &psa, 0);
-
+	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
+	dsl_dataset_set_refreservation_sync_impl(ds,
+	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);
 	dsl_dataset_rele(ds, FTAG);
-	return (err);
 }
 
-typedef struct zfs_hold_cleanup_arg {
-	dsl_pool_t *dp;
-	uint64_t dsobj;
-	char htag[MAXNAMELEN];
-} zfs_hold_cleanup_arg_t;
-
-static void
-dsl_dataset_user_release_onexit(void *arg)
-{
-	zfs_hold_cleanup_arg_t *ca = arg;
-
-	(void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
-	    B_TRUE);
-	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
-}
-
-void
-dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
-    minor_t minor)
-{
-	zfs_hold_cleanup_arg_t *ca;
-
-	ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
-	ca->dp = ds->ds_dir->dd_pool;
-	ca->dsobj = ds->ds_object;
-	(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
-	VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
-	    dsl_dataset_user_release_onexit, ca, NULL));
-}
-
-/*
- * If you add new checks here, you may need to add
- * additional checks to the "temporary" case in
- * snapshot_check() in dmu_objset.c.
- */
-static int
-dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	struct dsl_ds_holdarg *ha = arg2;
-	char *htag = ha->htag;
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	int error = 0;
-
-	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
-		return (ENOTSUP);
-
-	if (!dsl_dataset_is_snapshot(ds))
-		return (EINVAL);
-
-	/* tags must be unique */
-	mutex_enter(&ds->ds_lock);
-	if (ds->ds_phys->ds_userrefs_obj) {
-		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
-		    8, 1, tx);
-		if (error == 0)
-			error = EEXIST;
-		else if (error == ENOENT)
-			error = 0;
-	}
-	mutex_exit(&ds->ds_lock);
-
-	if (error == 0 && ha->temphold &&
-	    strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
-		error = E2BIG;
-
-	return (error);
-}
-
-void
-dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	struct dsl_ds_holdarg *ha = arg2;
-	char *htag = ha->htag;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	uint64_t now = gethrestime_sec();
-	uint64_t zapobj;
-
-	mutex_enter(&ds->ds_lock);
-	if (ds->ds_phys->ds_userrefs_obj == 0) {
-		/*
-		 * This is the first user hold for this dataset.  Create
-		 * the userrefs zap object.
-		 */
-		dmu_buf_will_dirty(ds->ds_dbuf, tx);
-		zapobj = ds->ds_phys->ds_userrefs_obj =
-		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
-	} else {
-		zapobj = ds->ds_phys->ds_userrefs_obj;
-	}
-	ds->ds_userrefs++;
-	mutex_exit(&ds->ds_lock);
-
-	VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
-
-	if (ha->temphold) {
-		VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
-		    htag, &now, tx));
-	}
-
-	spa_history_log_internal(LOG_DS_USER_HOLD,
-	    dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
-	    (int)ha->temphold, ds->ds_object);
-}
-
-static int
-dsl_dataset_user_hold_one(const char *dsname, void *arg)
-{
-	struct dsl_ds_holdarg *ha = arg;
-	dsl_dataset_t *ds;
-	int error;
-	char *name;
-
-	/* alloc a buffer to hold dsname at snapname plus terminating NULL */
-	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
-	error = dsl_dataset_hold(name, ha->dstg, &ds);
-	strfree(name);
-	if (error == 0) {
-		ha->gotone = B_TRUE;
-		dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
-		    dsl_dataset_user_hold_sync, ds, ha, 0);
-	} else if (error == ENOENT && ha->recursive) {
-		error = 0;
-	} else {
-		(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-	}
-	return (error);
-}
-
 int
-dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
-    boolean_t temphold)
+dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
+    uint64_t refreservation)
 {
-	struct dsl_ds_holdarg *ha;
-	int error;
+	dsl_dataset_set_qr_arg_t ddsqra;
 
-	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-	ha->htag = htag;
-	ha->temphold = temphold;
-	error = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
-	    ds, ha, 0);
-	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+	ddsqra.ddsqra_name = dsname;
+	ddsqra.ddsqra_source = source;
+	ddsqra.ddsqra_value = refreservation;
 
-	return (error);
+	return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
+	    dsl_dataset_set_refreservation_sync, &ddsqra, 0));
 }
 
-int
-dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
-    boolean_t recursive, boolean_t temphold, int cleanup_fd)
-{
-	struct dsl_ds_holdarg *ha;
-	dsl_sync_task_t *dst;
-	spa_t *spa;
-	int error;
-	minor_t minor = 0;
-
-	if (cleanup_fd != -1) {
-		/* Currently we only support cleanup-on-exit of tempholds. */
-		if (!temphold)
-			return (EINVAL);
-		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (error)
-			return (error);
-	}
-
-	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-
-	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-
-	error = spa_open(dsname, &spa, FTAG);
-	if (error) {
-		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
-		if (cleanup_fd != -1)
-			zfs_onexit_fd_rele(cleanup_fd);
-		return (error);
-	}
-
-	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-	ha->htag = htag;
-	ha->snapname = snapname;
-	ha->recursive = recursive;
-	ha->temphold = temphold;
-
-	if (recursive) {
-		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
-		    ha, DS_FIND_CHILDREN);
-	} else {
-		error = dsl_dataset_user_hold_one(dsname, ha);
-	}
-	if (error == 0)
-		error = dsl_sync_task_group_wait(ha->dstg);
-
-	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
-	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
-		dsl_dataset_t *ds = dst->dst_arg1;
-
-		if (dst->dst_err) {
-			dsl_dataset_name(ds, ha->failed);
-			*strchr(ha->failed, '@') = '\0';
-		} else if (error == 0 && minor != 0 && temphold) {
-			/*
-			 * If this hold is to be released upon process exit,
-			 * register that action now.
-			 */
-			dsl_register_onexit_hold_cleanup(ds, htag, minor);
-		}
-		dsl_dataset_rele(ds, ha->dstg);
-	}
-
-	if (error == 0 && recursive && !ha->gotone)
-		error = ENOENT;
-
-	if (error)
-		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
-
-	dsl_sync_task_group_destroy(ha->dstg);
-
-	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
-	spa_close(spa, FTAG);
-	if (cleanup_fd != -1)
-		zfs_onexit_fd_rele(cleanup_fd);
-	return (error);
-}
-
-struct dsl_ds_releasearg {
-	dsl_dataset_t *ds;
-	const char *htag;
-	boolean_t own;		/* do we own or just hold ds? */
-};
-
-static int
-dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
-    boolean_t *might_destroy)
-{
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	uint64_t zapobj;
-	uint64_t tmp;
-	int error;
-
-	*might_destroy = B_FALSE;
-
-	mutex_enter(&ds->ds_lock);
-	zapobj = ds->ds_phys->ds_userrefs_obj;
-	if (zapobj == 0) {
-		/* The tag can't possibly exist */
-		mutex_exit(&ds->ds_lock);
-		return (ESRCH);
-	}
-
-	/* Make sure the tag exists */
-	error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
-	if (error) {
-		mutex_exit(&ds->ds_lock);
-		if (error == ENOENT)
-			error = ESRCH;
-		return (error);
-	}
-
-	if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
-	    DS_IS_DEFER_DESTROY(ds))
-		*might_destroy = B_TRUE;
-
-	mutex_exit(&ds->ds_lock);
-	return (0);
-}
-
-static int
-dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
-{
-	struct dsl_ds_releasearg *ra = arg1;
-	dsl_dataset_t *ds = ra->ds;
-	boolean_t might_destroy;
-	int error;
-
-	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
-		return (ENOTSUP);
-
-	error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
-	if (error)
-		return (error);
-
-	if (might_destroy) {
-		struct dsl_ds_destroyarg dsda = {0};
-
-		if (dmu_tx_is_syncing(tx)) {
-			/*
-			 * If we're not prepared to remove the snapshot,
-			 * we can't allow the release to happen right now.
-			 */
-			if (!ra->own)
-				return (EBUSY);
-		}
-		dsda.ds = ds;
-		dsda.releasing = B_TRUE;
-		return (dsl_dataset_destroy_check(&dsda, tag, tx));
-	}
-
-	return (0);
-}
-
-static void
-dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
-	struct dsl_ds_releasearg *ra = arg1;
-	dsl_dataset_t *ds = ra->ds;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	uint64_t zapobj;
-	uint64_t dsobj = ds->ds_object;
-	uint64_t refs;
-	int error;
-
-	mutex_enter(&ds->ds_lock);
-	ds->ds_userrefs--;
-	refs = ds->ds_userrefs;
-	mutex_exit(&ds->ds_lock);
-	error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
-	VERIFY(error == 0 || error == ENOENT);
-	zapobj = ds->ds_phys->ds_userrefs_obj;
-	VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
-
-	spa_history_log_internal(LOG_DS_USER_RELEASE,
-	    dp->dp_spa, tx, "<%s> %lld dataset = %llu",
-	    ra->htag, (longlong_t)refs, dsobj);
-
-	if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
-	    DS_IS_DEFER_DESTROY(ds)) {
-		struct dsl_ds_destroyarg dsda = {0};
-
-		ASSERT(ra->own);
-		dsda.ds = ds;
-		dsda.releasing = B_TRUE;
-		/* We already did the destroy_check */
-		dsl_dataset_destroy_sync(&dsda, tag, tx);
-	}
-}
-
-static int
-dsl_dataset_user_release_one(const char *dsname, void *arg)
-{
-	struct dsl_ds_holdarg *ha = arg;
-	struct dsl_ds_releasearg *ra;
-	dsl_dataset_t *ds;
-	int error;
-	void *dtag = ha->dstg;
-	char *name;
-	boolean_t own = B_FALSE;
-	boolean_t might_destroy;
-
-	/* alloc a buffer to hold dsname at snapname, plus the terminating NULL */
-	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
-	error = dsl_dataset_hold(name, dtag, &ds);
-	strfree(name);
-	if (error == ENOENT && ha->recursive)
-		return (0);
-	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-	if (error)
-		return (error);
-
-	ha->gotone = B_TRUE;
-
-	ASSERT(dsl_dataset_is_snapshot(ds));
-
-	error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
-	if (error) {
-		dsl_dataset_rele(ds, dtag);
-		return (error);
-	}
-
-	if (might_destroy) {
-#ifdef _KERNEL
-		name = kmem_asprintf("%s@%s", dsname, ha->snapname);
-		error = zfs_unmount_snap(name, NULL);
-		strfree(name);
-		if (error) {
-			dsl_dataset_rele(ds, dtag);
-			return (error);
-		}
-#endif
-		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
-			dsl_dataset_rele(ds, dtag);
-			return (EBUSY);
-		} else {
-			own = B_TRUE;
-			dsl_dataset_make_exclusive(ds, dtag);
-		}
-	}
-
-	ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
-	ra->ds = ds;
-	ra->htag = ha->htag;
-	ra->own = own;
-	dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
-	    dsl_dataset_user_release_sync, ra, dtag, 0);
-
-	return (0);
-}
-
-int
-dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
-    boolean_t recursive)
-{
-	struct dsl_ds_holdarg *ha;
-	dsl_sync_task_t *dst;
-	spa_t *spa;
-	int error;
-
-top:
-	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-
-	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-
-	error = spa_open(dsname, &spa, FTAG);
-	if (error) {
-		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
-		return (error);
-	}
-
-	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-	ha->htag = htag;
-	ha->snapname = snapname;
-	ha->recursive = recursive;
-	if (recursive) {
-		error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
-		    ha, DS_FIND_CHILDREN);
-	} else {
-		error = dsl_dataset_user_release_one(dsname, ha);
-	}
-	if (error == 0)
-		error = dsl_sync_task_group_wait(ha->dstg);
-
-	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
-	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
-		struct dsl_ds_releasearg *ra = dst->dst_arg1;
-		dsl_dataset_t *ds = ra->ds;
-
-		if (dst->dst_err)
-			dsl_dataset_name(ds, ha->failed);
-
-		if (ra->own)
-			dsl_dataset_disown(ds, ha->dstg);
-		else
-			dsl_dataset_rele(ds, ha->dstg);
-
-		kmem_free(ra, sizeof (struct dsl_ds_releasearg));
-	}
-
-	if (error == 0 && recursive && !ha->gotone)
-		error = ENOENT;
-
-	if (error && error != EBUSY)
-		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
-
-	dsl_sync_task_group_destroy(ha->dstg);
-	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
-	spa_close(spa, FTAG);
-
-	/*
-	 * We can get EBUSY if we were racing with deferred destroy and
-	 * dsl_dataset_user_release_check() hadn't done the necessary
-	 * open context setup.  We can also get EBUSY if we're racing
-	 * with destroy and that thread is the ds_owner.  Either way
-	 * the busy condition should be transient, and we should retry
-	 * the release operation.
-	 */
-	if (error == EBUSY)
-		goto top;
-
-	return (error);
-}
-
 /*
- * Called at spa_load time (with retry == B_FALSE) to release a stale
- * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
- */
-int
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
-    boolean_t retry)
-{
-	dsl_dataset_t *ds;
-	char *snap;
-	char *name;
-	int namelen;
-	int error;
-
-	do {
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
-		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
-		rw_exit(&dp->dp_config_rwlock);
-		if (error)
-			return (error);
-		namelen = dsl_dataset_namelen(ds)+1;
-		name = kmem_alloc(namelen, KM_SLEEP);
-		dsl_dataset_name(ds, name);
-		dsl_dataset_rele(ds, FTAG);
-
-		snap = strchr(name, '@');
-		*snap = '\0';
-		++snap;
-		error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
-		kmem_free(name, namelen);
-
-		/*
-		 * The object can't have been destroyed because we have a hold,
-		 * but it might have been renamed, resulting in ENOENT.  Retry
-		 * if we've been requested to do so.
-		 *
-		 * It would be nice if we could use the dsobj all the way
-		 * through and avoid ENOENT entirely.  But we might need to
-		 * unmount the snapshot, and there's currently no way to lookup
-		 * a vfsp using a ZFS object id.
-		 */
-	} while ((error == ENOENT) && retry);
-
-	return (error);
-}
-
-int
-dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
-{
-	dsl_dataset_t *ds;
-	int err;
-
-	err = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (err)
-		return (err);
-
-	VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
-	if (ds->ds_phys->ds_userrefs_obj != 0) {
-		zap_attribute_t *za;
-		zap_cursor_t zc;
-
-		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
-		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
-		    ds->ds_phys->ds_userrefs_obj);
-		    zap_cursor_retrieve(&zc, za) == 0;
-		    zap_cursor_advance(&zc)) {
-			VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
-			    za->za_first_integer));
-		}
-		zap_cursor_fini(&zc);
-		kmem_free(za, sizeof (zap_attribute_t));
-	}
-	dsl_dataset_rele(ds, FTAG);
-	return (0);
-}
-
-/*
- * Note, this function is used as the callback for dmu_objset_find().  We
- * always return 0 so that we will continue to find and process
- * inconsistent datasets, even if we encounter an error trying to
- * process one of them.
- */
-/* ARGSUSED */
-int
-dsl_destroy_inconsistent(const char *dsname, void *arg)
-{
-	dsl_dataset_t *ds;
-
-	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
-		if (DS_IS_INCONSISTENT(ds))
-			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
-		else
-			dsl_dataset_disown(ds, FTAG);
-	}
-	return (0);
-}
-
-/*
  * Return (in *usedp) the amount of space written in new that is not
  * present in oldsnap.  New may be a snapshot or the head.  Old must be
  * a snapshot before new, in new's filesystem (or its origin).  If not then
@@ -4235,6 +2842,8 @@
 	uint64_t snapobj;
 	dsl_pool_t *dp = new->ds_dir->dd_pool;
 
+	ASSERT(dsl_pool_config_held(dp));
+
 	*usedp = 0;
 	*usedp += new->ds_phys->ds_referenced_bytes;
 	*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
@@ -4247,7 +2856,6 @@
 	*uncompp += new->ds_phys->ds_uncompressed_bytes;
 	*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = new->ds_object;
 	while (snapobj != oldsnap->ds_object) {
 		dsl_dataset_t *snap;
@@ -4291,12 +2899,11 @@
 		if (snap != new)
 			dsl_dataset_rele(snap, FTAG);
 		if (snapobj == 0) {
-			err = EINVAL;
+			err = SET_ERROR(EINVAL);
 			break;
 		}
 
 	}
-	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
@@ -4334,11 +2941,10 @@
 	if (firstsnap->ds_dir != lastsnap->ds_dir ||
 	    firstsnap->ds_phys->ds_creation_txg >
 	    lastsnap->ds_phys->ds_creation_txg)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	*usedp = *compp = *uncompp = 0;
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = lastsnap->ds_phys->ds_next_snap_obj;
 	while (snapobj != firstsnap->ds_object) {
 		dsl_dataset_t *ds;
@@ -4359,6 +2965,42 @@
 		ASSERT3U(snapobj, !=, 0);
 		dsl_dataset_rele(ds, FTAG);
 	}
-	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
+
+/*
+ * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
+ * For example, they could both be snapshots of the same filesystem, and
+ * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
+ * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
+ * filesystem.  Or 'earlier' could be the origin's origin.
+ */
+boolean_t
+dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
+{
+	dsl_pool_t *dp = later->ds_dir->dd_pool;
+	int error;
+	boolean_t ret;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	if (earlier->ds_phys->ds_creation_txg >=
+	    later->ds_phys->ds_creation_txg)
+		return (B_FALSE);
+
+	if (later->ds_dir == earlier->ds_dir)
+		return (B_TRUE);
+	if (!dsl_dir_is_clone(later->ds_dir))
+		return (B_FALSE);
+
+	if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object)
+		return (B_TRUE);
+	dsl_dataset_t *origin;
+	error = dsl_dataset_hold_obj(dp,
+	    later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
+	if (error != 0)
+		return (B_FALSE);
+	ret = dsl_dataset_is_before(origin, earlier);
+	dsl_dataset_rele(origin, FTAG);
+	return (ret);
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -107,7 +107,7 @@
 			const char *perm = nvpair_name(permpair);
 
 			if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0)
-				return (EPERM);
+				return (SET_ERROR(EPERM));
 
 			if ((error = dsl_deleg_access(ddname, perm, cr)) != 0)
 				return (error);
@@ -139,23 +139,32 @@
 
 		if (type != ZFS_DELEG_USER &&
 		    type != ZFS_DELEG_USER_SETS)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 
 		if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 	}
 	return (0);
 }
 
+typedef struct dsl_deleg_arg {
+	const char *dda_name;
+	nvlist_t *dda_nvlist;
+} dsl_deleg_arg_t;
+
 static void
-dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd = arg1;
-	nvlist_t *nvp = arg2;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
+	dsl_deleg_arg_t *dda = arg;
+	dsl_dir_t *dd;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
 	nvpair_t *whopair = NULL;
-	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
+	uint64_t zapobj;
 
+	VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
+
+	zapobj = dd->dd_phys->dd_deleg_zapobj;
 	if (zapobj == 0) {
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
@@ -162,13 +171,13 @@
 		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
 	}
 
-	while (whopair = nvlist_next_nvpair(nvp, whopair)) {
+	while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
 		const char *whokey = nvpair_name(whopair);
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 		uint64_t jumpobj;
 
-		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
+		perms = fnvpair_value_nvlist(whopair);
 
 		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
 			jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
@@ -181,27 +190,31 @@
 
 			VERIFY(zap_update(mos, jumpobj,
 			    perm, 8, 1, &n, tx) == 0);
-			spa_history_log_internal(LOG_DS_PERM_UPDATE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s %s dataset = %llu", whokey, perm,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission update", tx,
+			    "%s %s", whokey, perm);
 		}
 	}
+	dsl_dir_rele(dd, FTAG);
 }
 
 static void
-dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd = arg1;
-	nvlist_t *nvp = arg2;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
+	dsl_deleg_arg_t *dda = arg;
+	dsl_dir_t *dd;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	objset_t *mos = dp->dp_meta_objset;
 	nvpair_t *whopair = NULL;
-	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
+	uint64_t zapobj;
 
-	if (zapobj == 0)
+	VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
+	zapobj = dd->dd_phys->dd_deleg_zapobj;
+	if (zapobj == 0) {
+		dsl_dir_rele(dd, FTAG);
 		return;
+	}
 
-	while (whopair = nvlist_next_nvpair(nvp, whopair)) {
+	while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
 		const char *whokey = nvpair_name(whopair);
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
@@ -213,10 +226,8 @@
 				(void) zap_remove(mos, zapobj, whokey, tx);
 				VERIFY(0 == zap_destroy(mos, jumpobj, tx));
 			}
-			spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s dataset = %llu", whokey,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission who remove",
+			    tx, "%s", whokey);
 			continue;
 		}
 
@@ -234,41 +245,44 @@
 				VERIFY(0 == zap_destroy(mos,
 				    jumpobj, tx));
 			}
-			spa_history_log_internal(LOG_DS_PERM_REMOVE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s %s dataset = %llu", whokey, perm,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission remove", tx,
+			    "%s %s", whokey, perm);
 		}
 	}
+	dsl_dir_rele(dd, FTAG);
 }
 
-int
-dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
+static int
+dsl_deleg_check(void *arg, dmu_tx_t *tx)
 {
+	dsl_deleg_arg_t *dda = arg;
 	dsl_dir_t *dd;
 	int error;
-	nvpair_t *whopair = NULL;
-	int blocks_modified = 0;
 
-	error = dsl_dir_open(ddname, FTAG, &dd, NULL);
-	if (error)
-		return (error);
-
-	if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
+	if (spa_version(dmu_tx_pool(tx)->dp_spa) <
 	    SPA_VERSION_DELEGATED_PERMS) {
-		dsl_dir_close(dd, FTAG);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
-	while (whopair = nvlist_next_nvpair(nvp, whopair))
-		blocks_modified++;
+	error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
+	if (error == 0)
+		dsl_dir_rele(dd, FTAG);
+	return (error);
+}
 
-	error = dsl_sync_task_do(dd->dd_pool, NULL,
+int
+dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
+{
+	dsl_deleg_arg_t dda;
+
+	/* nvp must already have been verified to be valid */
+
+	dda.dda_name = ddname;
+	dda.dda_nvlist = nvp;
+
+	return (dsl_sync_task(ddname, dsl_deleg_check,
 	    unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
-	    dd, nvp, blocks_modified);
-	dsl_dir_close(dd, FTAG);
-
-	return (error);
+	    &dda, fnvlist_num_pairs(nvp)));
 }
 
 /*
@@ -296,16 +310,21 @@
 	int error;
 	objset_t *mos;
 
-	error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
-	if (error)
+	error = dsl_pool_hold(ddname, FTAG, &dp);
+	if (error != 0)
 		return (error);
 
+	error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL);
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+
 	dp = startdd->dd_pool;
 	mos = dp->dp_meta_objset;
 
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
 		zap_cursor_t basezc;
 		zap_attribute_t baseza;
@@ -313,15 +332,12 @@
 		uint64_t n;
 		char source[MAXNAMELEN];
 
-		if (dd->dd_phys->dd_deleg_zapobj &&
-		    (zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
-		    &n) == 0) && n) {
-			VERIFY(nvlist_alloc(&sp_nvp,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		} else {
+		if (dd->dd_phys->dd_deleg_zapobj == 0 ||
+		    zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 ||
+		    n == 0)
 			continue;
-		}
 
+		sp_nvp = fnvlist_alloc();
 		for (zap_cursor_init(&basezc, mos,
 		    dd->dd_phys->dd_deleg_zapobj);
 		    zap_cursor_retrieve(&basezc, &baseza) == 0;
@@ -333,29 +349,26 @@
 			ASSERT(baseza.za_integer_length == 8);
 			ASSERT(baseza.za_num_integers == 1);
 
-			VERIFY(nvlist_alloc(&perms_nvp,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+			perms_nvp = fnvlist_alloc();
 			for (zap_cursor_init(&zc, mos, baseza.za_first_integer);
 			    zap_cursor_retrieve(&zc, &za) == 0;
 			    zap_cursor_advance(&zc)) {
-				VERIFY(nvlist_add_boolean(perms_nvp,
-				    za.za_name) == 0);
+				fnvlist_add_boolean(perms_nvp, za.za_name);
 			}
 			zap_cursor_fini(&zc);
-			VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name,
-			    perms_nvp) == 0);
-			nvlist_free(perms_nvp);
+			fnvlist_add_nvlist(sp_nvp, baseza.za_name, perms_nvp);
+			fnvlist_free(perms_nvp);
 		}
 
 		zap_cursor_fini(&basezc);
 
 		dsl_dir_name(dd, source);
-		VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
+		fnvlist_add_nvlist(*nvp, source, sp_nvp);
 		nvlist_free(sp_nvp);
 	}
-	rw_exit(&dp->dp_config_rwlock);
 
-	dsl_dir_close(startdd, FTAG);
+	dsl_dir_rele(startdd, FTAG);
+	dsl_pool_rele(dp, FTAG);
 	return (0);
 }
 
@@ -404,7 +417,7 @@
 	if (error == 0) {
 		error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero);
 		if (error == ENOENT)
-			error = EPERM;
+			error = SET_ERROR(EPERM);
 	}
 	return (error);
 }
@@ -449,7 +462,7 @@
 			return (0);
 	}
 
-	return (EPERM);
+	return (SET_ERROR(EPERM));
 }
 
 /*
@@ -524,12 +537,10 @@
 }
 
 /*
- * Check if user has requested permission.  If descendent is set, must have
- * descendent perms.
+ * Check if user has requested permission.
  */
 int
-dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
-    cred_t *cr)
+dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
 {
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
@@ -544,13 +555,13 @@
 	mos = dp->dp_meta_objset;
 
 	if (dsl_delegation_on(mos) == B_FALSE)
-		return (ECANCELED);
+		return (SET_ERROR(ECANCELED));
 
 	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
 	    SPA_VERSION_DELEGATED_PERMS)
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
-	if (dsl_dataset_is_snapshot(ds) || descendent) {
+	if (dsl_dataset_is_snapshot(ds)) {
 		/*
 		 * Snapshots are treated as descendents only,
 		 * local permissions do not apply.
@@ -563,7 +574,7 @@
 	avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
 	    offsetof(perm_set_t, p_node));
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	ASSERT(dsl_pool_config_held(dp));
 	for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
 	    checkflag = ZFS_DELEG_DESCENDENT) {
 		uint64_t zapobj;
@@ -622,9 +633,8 @@
 		if (error == 0)
 			goto success;
 	}
-	error = EPERM;
+	error = SET_ERROR(EPERM);
 success:
-	rw_exit(&dp->dp_config_rwlock);
 
 	cookie = NULL;
 	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@@ -636,16 +646,20 @@
 int
 dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
 {
+	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int error;
 
-	error = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (error)
+	error = dsl_pool_hold(dsname, FTAG, &dp);
+	if (error != 0)
 		return (error);
+	error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+	if (error == 0) {
+		error = dsl_deleg_access_impl(ds, perm, cr);
+		dsl_dataset_rele(ds, FTAG);
+	}
+	dsl_pool_rele(dp, FTAG);
 
-	error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
-	dsl_dataset_rele(ds, FTAG);
-
 	return (error);
 }
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -45,9 +46,7 @@
 #include "zfs_namecheck.h"
 
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
 
-
 /* ARGSUSED */
 static void
 dsl_dir_evict(dmu_buf_t *db, void *arg)
@@ -63,7 +62,7 @@
 	}
 
 	if (dd->dd_parent)
-		dsl_dir_close(dd->dd_parent, dd);
+		dsl_dir_rele(dd->dd_parent, dd);
 
 	spa_close(dd->dd_pool->dp_spa, dd);
 
@@ -77,7 +76,7 @@
 }
 
 int
-dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
+dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
     const char *tail, void *tag, dsl_dir_t **ddp)
 {
 	dmu_buf_t *dbuf;
@@ -84,11 +83,10 @@
 	dsl_dir_t *dd;
 	int err;
 
-	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
-	    dsl_pool_sync_context(dp));
+	ASSERT(dsl_pool_config_held(dp));
 
 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
-	if (err)
+	if (err != 0)
 		return (err);
 	dd = dmu_buf_get_user(dbuf);
 #ifdef ZFS_DEBUG
@@ -115,9 +113,9 @@
 		dsl_dir_snap_cmtime_update(dd);
 
 		if (dd->dd_phys->dd_parent_obj) {
-			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
+			err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
 			    NULL, dd, &dd->dd_parent);
-			if (err)
+			if (err != 0)
 				goto errout;
 			if (tail) {
 #ifdef ZFS_DEBUG
@@ -134,7 +132,7 @@
 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 				    ddobj, 0, dd->dd_myname);
 			}
-			if (err)
+			if (err != 0)
 				goto errout;
 		} else {
 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
@@ -151,7 +149,7 @@
 			 */
 			err = dmu_bonus_hold(dp->dp_meta_objset,
 			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
-			if (err)
+			if (err != 0)
 				goto errout;
 			origin_phys = origin_bonus->db_data;
 			dd->dd_origin_txg =
@@ -163,7 +161,7 @@
 		    dsl_dir_evict);
 		if (winner) {
 			if (dd->dd_parent)
-				dsl_dir_close(dd->dd_parent, dd);
+				dsl_dir_rele(dd->dd_parent, dd);
 			mutex_destroy(&dd->dd_lock);
 			kmem_free(dd, sizeof (dsl_dir_t));
 			dd = winner;
@@ -190,7 +188,7 @@
 
 errout:
 	if (dd->dd_parent)
-		dsl_dir_close(dd->dd_parent, dd);
+		dsl_dir_rele(dd->dd_parent, dd);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 	dmu_buf_rele(dbuf, tag);
@@ -198,7 +196,7 @@
 }
 
 void
-dsl_dir_close(dsl_dir_t *dd, void *tag)
+dsl_dir_rele(dsl_dir_t *dd, void *tag)
 {
 	dprintf_dd(dd, "%s\n", "");
 	spa_close(dd->dd_pool->dp_spa, tag);
@@ -255,13 +253,14 @@
 getcomponent(const char *path, char *component, const char **nextp)
 {
 	char *p;
+
 	if ((path == NULL) || (path[0] == '\0'))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	/* This would be a good place to reserve some namespace... */
 	p = strpbrk(path, "/@");
 	if (p && (p[1] == '/' || p[1] == '@')) {
 		/* two separators in a row */
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 	if (p == NULL || p == path) {
 		/*
@@ -271,16 +270,16 @@
 		 */
 		if (p != NULL &&
 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		if (strlen(path) >= MAXNAMELEN)
-			return (ENAMETOOLONG);
+			return (SET_ERROR(ENAMETOOLONG));
 		(void) strcpy(component, path);
 		p = NULL;
 	} else if (p[0] == '/') {
-		if (p-path >= MAXNAMELEN)
-			return (ENAMETOOLONG);
+		if (p - path >= MAXNAMELEN)
+			return (SET_ERROR(ENAMETOOLONG));
 		(void) strncpy(component, path, p - path);
-		component[p-path] = '\0';
+		component[p - path] = '\0';
 		p++;
 	} else if (p[0] == '@') {
 		/*
@@ -288,13 +287,13 @@
 		 * any more slashes.
 		 */
 		if (strchr(path, '/'))
-			return (EINVAL);
-		if (p-path >= MAXNAMELEN)
-			return (ENAMETOOLONG);
+			return (SET_ERROR(EINVAL));
+		if (p - path >= MAXNAMELEN)
+			return (SET_ERROR(ENAMETOOLONG));
 		(void) strncpy(component, path, p - path);
-		component[p-path] = '\0';
+		component[p - path] = '\0';
 	} else {
-		ASSERT(!"invalid p");
+		panic("invalid p=%p", (void *)p);
 	}
 	*nextp = p;
 	return (0);
@@ -301,46 +300,35 @@
 }
 
 /*
- * same as dsl_open_dir, ignore the first component of name and use the
- * spa instead
+ * Return the dsl_dir_t, and possibly the last component which couldn't
+ * be found in *tail.  The name must be in the specified dsl_pool_t.  This
+ * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
+ * path is bogus, or if tail==NULL and we couldn't parse the whole name.
+ * (*tail)[0] == '@' means that the last component is a snapshot.
  */
 int
-dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
+dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
     dsl_dir_t **ddp, const char **tailp)
 {
 	char buf[MAXNAMELEN];
-	const char *next, *nextnext = NULL;
+	const char *spaname, *next, *nextnext = NULL;
 	int err;
 	dsl_dir_t *dd;
-	dsl_pool_t *dp;
 	uint64_t ddobj;
-	int openedspa = FALSE;
 
-	dprintf("%s\n", name);
-
 	err = getcomponent(name, buf, &next);
-	if (err)
+	if (err != 0)
 		return (err);
-	if (spa == NULL) {
-		err = spa_open(buf, &spa, FTAG);
-		if (err) {
-			dprintf("spa_open(%s) failed\n", buf);
-			return (err);
-		}
-		openedspa = TRUE;
 
-		/* XXX this assertion belongs in spa_open */
-		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
-	}
+	/* Make sure the name is in the specified pool. */
+	spaname = spa_name(dp->dp_spa);
+	if (strcmp(buf, spaname) != 0)
+		return (SET_ERROR(EINVAL));
 
-	dp = spa_get_dsl(spa);
+	ASSERT(dsl_pool_config_held(dp));
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
-	if (err) {
-		rw_exit(&dp->dp_config_rwlock);
-		if (openedspa)
-			spa_close(spa, FTAG);
+	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
+	if (err != 0) {
 		return (err);
 	}
 
@@ -347,7 +335,7 @@
 	while (next != NULL) {
 		dsl_dir_t *child_ds;
 		err = getcomponent(next, buf, &nextnext);
-		if (err)
+		if (err != 0)
 			break;
 		ASSERT(next[0] != '\0');
 		if (next[0] == '@')
@@ -358,25 +346,22 @@
 		err = zap_lookup(dp->dp_meta_objset,
 		    dd->dd_phys->dd_child_dir_zapobj,
 		    buf, sizeof (ddobj), 1, &ddobj);
-		if (err) {
+		if (err != 0) {
 			if (err == ENOENT)
 				err = 0;
 			break;
 		}
 
-		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
-		if (err)
+		err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
+		if (err != 0)
 			break;
-		dsl_dir_close(dd, tag);
+		dsl_dir_rele(dd, tag);
 		dd = child_ds;
 		next = nextnext;
 	}
-	rw_exit(&dp->dp_config_rwlock);
 
-	if (err) {
-		dsl_dir_close(dd, tag);
-		if (openedspa)
-			spa_close(spa, FTAG);
+	if (err != 0) {
+		dsl_dir_rele(dd, tag);
 		return (err);
 	}
 
@@ -387,30 +372,16 @@
 	if (next != NULL &&
 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 		/* bad path name */
-		dsl_dir_close(dd, tag);
+		dsl_dir_rele(dd, tag);
 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
-		err = ENOENT;
+		err = SET_ERROR(ENOENT);
 	}
-	if (tailp)
+	if (tailp != NULL)
 		*tailp = next;
-	if (openedspa)
-		spa_close(spa, FTAG);
 	*ddp = dd;
 	return (err);
 }
 
-/*
- * Return the dsl_dir_t, and possibly the last component which couldn't
- * be found in *tail.  Return NULL if the path is bogus, or if
- * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
- * means that the last component is a snapshot.
- */
-int
-dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
-{
-	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
-}
-
 uint64_t
 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
     dmu_tx_t *tx)
@@ -448,77 +419,6 @@
 	return (ddobj);
 }
 
-/* ARGSUSED */
-int
-dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_pool_t *dp = dd->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	int err;
-	uint64_t count;
-
-	/*
-	 * There should be exactly two holds, both from
-	 * dsl_dataset_destroy: one on the dd directory, and one on its
-	 * head ds.  If there are more holds, then a concurrent thread is
-	 * performing a lookup inside this dir while we're trying to destroy
-	 * it.  To minimize this possibility, we perform this check only
-	 * in syncing context and fail the operation if we encounter
-	 * additional holds.  The dp_config_rwlock ensures that nobody else
-	 * opens it after we check.
-	 */
-	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
-		return (EBUSY);
-
-	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
-	if (err)
-		return (err);
-	if (count != 0)
-		return (EEXIST);
-
-	return (0);
-}
-
-void
-dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	dsl_prop_setarg_t psa;
-	uint64_t value = 0;
-	uint64_t obj;
-	dd_used_t t;
-
-	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
-	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
-
-	/* Remove our reservation. */
-	dsl_prop_setarg_init_uint64(&psa, "reservation",
-	    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
-	    &value);
-	psa.psa_effective_value = 0;	/* predict default value */
-
-	dsl_dir_set_reservation_sync(ds, &psa, tx);
-
-	ASSERT0(dd->dd_phys->dd_used_bytes);
-	ASSERT0(dd->dd_phys->dd_reserved);
-	for (t = 0; t < DD_USED_NUM; t++)
-		ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
-
-	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
-	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
-	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
-	VERIFY(0 == zap_remove(mos,
-	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
-
-	obj = dd->dd_object;
-	dsl_dir_close(dd, tag);
-	VERIFY(0 == dmu_object_free(mos, obj, tx));
-}
-
 boolean_t
 dsl_dir_is_clone(dsl_dir_t *dd)
 {
@@ -556,18 +456,16 @@
 	}
 	mutex_exit(&dd->dd_lock);
 
-	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	if (dsl_dir_is_clone(dd)) {
 		dsl_dataset_t *ds;
 		char buf[MAXNAMELEN];
 
-		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
+		VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
-	rw_exit(&dd->dd_pool->dp_config_rwlock);
 }
 
 void
@@ -577,7 +475,7 @@
 
 	ASSERT(dd->dd_phys);
 
-	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
+	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(dd->dd_dbuf, dd);
 	}
@@ -785,7 +683,7 @@
 		    used_on_disk>>10, est_inflight>>10,
 		    quota>>10, asize>>10, retval);
 		mutex_exit(&dd->dd_lock);
-		return (retval);
+		return (SET_ERROR(retval));
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
@@ -847,7 +745,7 @@
 	} else {
 		if (err == EAGAIN) {
 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
-			err = ERESTART;
+			err = SET_ERROR(ERESTART);
 		}
 		dsl_pool_memory_pressure(dd->dd_pool);
 	}
@@ -864,7 +762,7 @@
 		    FALSE, asize > usize, tr_list, tx, TRUE);
 	}
 
-	if (err)
+	if (err != 0)
 		dsl_dir_tempreserve_clear(tr_list, tx);
 	else
 		*tr_cookiep = tr_list;
@@ -1015,22 +913,38 @@
 		mutex_exit(&dd->dd_lock);
 }
 
+typedef struct dsl_dir_set_qr_arg {
+	const char *ddsqra_name;
+	zprop_source_t ddsqra_source;
+	uint64_t ddsqra_value;
+} dsl_dir_set_qr_arg_t;
+
 static int
-dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_prop_setarg_t *psa = arg2;
-	int err;
-	uint64_t towrite;
+	dsl_dir_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	int error;
+	uint64_t towrite, newval;
 
-	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
-		return (err);
+	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+	if (error != 0)
+		return (error);
 
-	if (psa->psa_effective_value == 0)
+	error = dsl_prop_predict(ds->ds_dir, "quota",
+	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
+
+	if (newval == 0) {
+		dsl_dataset_rele(ds, FTAG);
 		return (0);
+	}
 
-	mutex_enter(&dd->dd_lock);
+	mutex_enter(&ds->ds_dir->dd_lock);
 	/*
 	 * If we are doing the preliminary check in open context, and
 	 * there are pending changes, then don't fail it, since the
@@ -1037,94 +951,86 @@
 	 * pending changes could under-estimate the amount of space to be
 	 * freed up.
 	 */
-	towrite = dsl_dir_space_towrite(dd);
+	towrite = dsl_dir_space_towrite(ds->ds_dir);
 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
-	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
-	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
-		err = ENOSPC;
+	    (newval < ds->ds_dir->dd_phys->dd_reserved ||
+	    newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
+		error = SET_ERROR(ENOSPC);
 	}
-	mutex_exit(&dd->dd_lock);
-	return (err);
+	mutex_exit(&ds->ds_dir->dd_lock);
+	dsl_dataset_rele(ds, FTAG);
+	return (error);
 }
 
-extern dsl_syncfunc_t dsl_prop_set_sync;
-
 static void
-dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value = psa->psa_effective_value;
+	dsl_dir_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	uint64_t newval;
 
-	dsl_prop_set_sync(ds, psa, tx);
-	DSL_PROP_CHECK_PREDICTION(dd, psa);
+	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 
-	dmu_buf_will_dirty(dd->dd_dbuf, tx);
+	dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
+	    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+	    &ddsqra->ddsqra_value, tx);
 
-	mutex_enter(&dd->dd_lock);
-	dd->dd_phys->dd_quota = effective_value;
-	mutex_exit(&dd->dd_lock);
+	VERIFY0(dsl_prop_get_int_ds(ds,
+	    zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
+
+	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
+	mutex_enter(&ds->ds_dir->dd_lock);
+	ds->ds_dir->dd_phys->dd_quota = newval;
+	mutex_exit(&ds->ds_dir->dd_lock);
+	dsl_dataset_rele(ds, FTAG);
 }
 
 int
 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
 {
-	dsl_dir_t *dd;
-	dsl_dataset_t *ds;
-	dsl_prop_setarg_t psa;
-	int err;
+	dsl_dir_set_qr_arg_t ddsqra;
 
-	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
+	ddsqra.ddsqra_name = ddname;
+	ddsqra.ddsqra_source = source;
+	ddsqra.ddsqra_value = quota;
 
-	err = dsl_dataset_hold(ddname, FTAG, &ds);
-	if (err)
-		return (err);
-
-	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
-	if (err) {
-		dsl_dataset_rele(ds, FTAG);
-		return (err);
-	}
-
-	ASSERT(ds->ds_dir == dd);
-
-	/*
-	 * If someone removes a file, then tries to set the quota, we want to
-	 * make sure the file freeing takes effect.
-	 */
-	txg_wait_open(dd->dd_pool, 0);
-
-	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
-	    dsl_dir_set_quota_sync, ds, &psa, 0);
-
-	dsl_dir_close(dd, FTAG);
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
+	return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
+	    dsl_dir_set_quota_sync, &ddsqra, 0));
 }
 
 int
-dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value;
-	uint64_t used, avail;
-	int err;
+	dsl_dir_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+	dsl_dir_t *dd;
+	uint64_t newval, used, avail;
+	int error;
 
-	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
-		return (err);
+	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+	if (error != 0)
+		return (error);
+	dd = ds->ds_dir;
 
-	effective_value = psa->psa_effective_value;
-
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
-	if (!dmu_tx_is_syncing(tx))
+	if (!dmu_tx_is_syncing(tx)) {
+		dsl_dataset_rele(ds, FTAG);
 		return (0);
+	}
 
+	error = dsl_prop_predict(ds->ds_dir,
+	    zfs_prop_to_name(ZFS_PROP_RESERVATION),
+	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+	if (error != 0) {
+		dsl_dataset_rele(ds, FTAG);
+		return (error);
+	}
+
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
 	mutex_exit(&dd->dd_lock);
@@ -1136,40 +1042,32 @@
 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
 	}
 
-	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
-		uint64_t delta = MAX(used, effective_value) -
+	if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
+		uint64_t delta = MAX(used, newval) -
 		    MAX(used, dd->dd_phys->dd_reserved);
 
-		if (delta > avail)
-			return (ENOSPC);
-		if (dd->dd_phys->dd_quota > 0 &&
-		    effective_value > dd->dd_phys->dd_quota)
-			return (ENOSPC);
+		if (delta > avail ||
+		    (dd->dd_phys->dd_quota > 0 &&
+		    newval > dd->dd_phys->dd_quota))
+			error = SET_ERROR(ENOSPC);
 	}
 
-	return (0);
+	dsl_dataset_rele(ds, FTAG);
+	return (error);
 }
 
-static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+void
+dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value = psa->psa_effective_value;
 	uint64_t used;
 	int64_t delta;
 
-	dsl_prop_set_sync(ds, psa, tx);
-	DSL_PROP_CHECK_PREDICTION(dd, psa);
-
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
-	delta = MAX(used, effective_value) -
-	    MAX(used, dd->dd_phys->dd_reserved);
-	dd->dd_phys->dd_reserved = effective_value;
+	delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
+	dd->dd_phys->dd_reserved = value;
 
 	if (dd->dd_parent != NULL) {
 		/* Roll up this additional usage into our ancestors */
@@ -1179,35 +1077,39 @@
 	mutex_exit(&dd->dd_lock);
 }
 
-int
-dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
-    uint64_t reservation)
+static void
+dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_dir_t *dd;
+	dsl_dir_set_qr_arg_t *ddsqra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
-	dsl_prop_setarg_t psa;
-	int err;
+	uint64_t newval;
 
-	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
+	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 
-	err = dsl_dataset_hold(ddname, FTAG, &ds);
-	if (err)
-		return (err);
+	dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
+	    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+	    &ddsqra->ddsqra_value, tx);
 
-	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
-	if (err) {
-		dsl_dataset_rele(ds, FTAG);
-		return (err);
-	}
+	VERIFY0(dsl_prop_get_int_ds(ds,
+	    zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
 
-	ASSERT(ds->ds_dir == dd);
+	dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
+	dsl_dataset_rele(ds, FTAG);
+}
 
-	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
-	    dsl_dir_set_reservation_sync, ds, &psa, 0);
+int
+dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
+    uint64_t reservation)
+{
+	dsl_dir_set_qr_arg_t ddsqra;
 
-	dsl_dir_close(dd, FTAG);
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
+	ddsqra.ddsqra_name = ddname;
+	ddsqra.ddsqra_source = source;
+	ddsqra.ddsqra_value = reservation;
+
+	return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
+	    dsl_dir_set_reservation_sync, &ddsqra, 0));
 }
 
 static dsl_dir_t *
@@ -1239,78 +1141,125 @@
 	return (would_change(dd->dd_parent, delta, ancestor));
 }
 
-struct renamearg {
-	dsl_dir_t *newparent;
-	const char *mynewname;
-	boolean_t allowmounted;
-};
+typedef struct dsl_dir_rename_arg {
+	const char *ddra_oldname;
+	const char *ddra_newname;
+} dsl_dir_rename_arg_t;
 
+/* ARGSUSED */
 static int
-dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
-	dsl_dir_t *dd = arg1;
-	struct renamearg *ra = arg2;
-	dsl_pool_t *dp = dd->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	int err;
-	uint64_t val;
+	int *deltap = arg;
+	char namebuf[MAXNAMELEN];
 
-	/*
-	 * There should only be one reference, from dmu_objset_rename().
-	 * Fleeting holds are also possible (eg, from "zfs list" getting
-	 * stats), but any that are present in open context will likely
-	 * be gone by syncing context, so only fail from syncing
-	 * context.
-	 * Don't check if we allow renaming of busy (mounted) dataset.
-	 */
-	if (!ra->allowmounted && dmu_tx_is_syncing(tx) &&
-	    dmu_buf_refcount(dd->dd_dbuf) > 1) {
-		return (EBUSY);
+	dsl_dataset_name(ds, namebuf);
+
+	if (strlen(namebuf) + *deltap >= MAXNAMELEN)
+		return (SET_ERROR(ENAMETOOLONG));
+	return (0);
+}
+
+static int
+dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_dir_rename_arg_t *ddra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *dd, *newparent;
+	const char *mynewname;
+	int error;
+	int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
+
+	/* target dir should exist */
+	error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
+	if (error != 0)
+		return (error);
+
+	/* new parent should exist */
+	error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
+	    &newparent, &mynewname);
+	if (error != 0) {
+		dsl_dir_rele(dd, FTAG);
+		return (error);
 	}
 
-	/* check for existing name */
-	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
-	    ra->mynewname, 8, 1, &val);
-	if (err == 0)
-		return (EEXIST);
-	if (err != ENOENT)
-		return (err);
+	/* can't rename to different pool */
+	if (dd->dd_pool != newparent->dd_pool) {
+		dsl_dir_rele(newparent, FTAG);
+		dsl_dir_rele(dd, FTAG);
+		return (SET_ERROR(ENXIO));
+	}
 
-	if (ra->newparent != dd->dd_parent) {
+	/* new name should not already exist */
+	if (mynewname == NULL) {
+		dsl_dir_rele(newparent, FTAG);
+		dsl_dir_rele(dd, FTAG);
+		return (SET_ERROR(EEXIST));
+	}
+
+	/* if the name length is growing, validate child name lengths */
+	if (delta > 0) {
+		error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
+		    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+		if (error != 0) {
+			dsl_dir_rele(newparent, FTAG);
+			dsl_dir_rele(dd, FTAG);
+			return (error);
+		}
+	}
+
+	if (newparent != dd->dd_parent) {
 		/* is there enough space? */
 		uint64_t myspace =
 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
 
 		/* no rename into our descendant */
-		if (closest_common_ancestor(dd, ra->newparent) == dd)
-			return (EINVAL);
+		if (closest_common_ancestor(dd, newparent) == dd) {
+			dsl_dir_rele(newparent, FTAG);
+			dsl_dir_rele(dd, FTAG);
+			return (SET_ERROR(EINVAL));
+		}
 
-		if (err = dsl_dir_transfer_possible(dd->dd_parent,
-		    ra->newparent, myspace))
-			return (err);
+		error = dsl_dir_transfer_possible(dd->dd_parent,
+		    newparent, myspace);
+		if (error != 0) {
+			dsl_dir_rele(newparent, FTAG);
+			dsl_dir_rele(dd, FTAG);
+			return (error);
+		}
 	}
 
+	dsl_dir_rele(newparent, FTAG);
+	dsl_dir_rele(dd, FTAG);
 	return (0);
 }
 
 static void
-dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
 {
-	char oldname[MAXPATHLEN], newname[MAXPATHLEN];
-	dsl_dir_t *dd = arg1;
-	struct renamearg *ra = arg2;
-	dsl_pool_t *dp = dd->dd_pool;
+	dsl_dir_rename_arg_t *ddra = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *dd, *newparent;
+	const char *mynewname;
+	int error;
 	objset_t *mos = dp->dp_meta_objset;
-	int err;
 
-	ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2);
+	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
 
-	if (ra->newparent != dd->dd_parent) {
+	VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
+	VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
+	    &mynewname));
+
+	/* Log this before we change the name. */
+	spa_history_log_internal_dd(dd, "rename", tx,
+	    "-> %s", ddra->ddra_newname);
+
+	if (newparent != dd->dd_parent) {
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    -dd->dd_phys->dd_used_bytes,
 		    -dd->dd_phys->dd_compressed_bytes,
 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
-		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
+		dsl_dir_diduse_space(newparent, DD_USED_CHILD,
 		    dd->dd_phys->dd_used_bytes,
 		    dd->dd_phys->dd_compressed_bytes,
 		    dd->dd_phys->dd_uncompressed_bytes, tx);
@@ -1321,7 +1270,7 @@
 
 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 			    -unused_rsrv, 0, 0, tx);
-			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
+			dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
 			    unused_rsrv, 0, 0, tx);
 		}
 	}
@@ -1329,62 +1278,43 @@
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/* remove from old parent zapobj */
-	dsl_dir_name(dd, oldname);
-	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
+	error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 	    dd->dd_myname, tx);
-	ASSERT0(err);
+	ASSERT0(error);
 
-	(void) strcpy(dd->dd_myname, ra->mynewname);
-	dsl_dir_close(dd->dd_parent, dd);
-	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
-	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
-	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
+	(void) strcpy(dd->dd_myname, mynewname);
+	dsl_dir_rele(dd->dd_parent, dd);
+	dd->dd_phys->dd_parent_obj = newparent->dd_object;
+	VERIFY0(dsl_dir_hold_obj(dp,
+	    newparent->dd_object, NULL, dd, &dd->dd_parent));
 
 	/* add to new parent zapobj */
-	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
-	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
-	ASSERT0(err);
-	dsl_dir_name(dd, newname);
+	VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
+	    dd->dd_myname, 8, 1, &dd->dd_object, tx));
+
+#ifdef __FreeBSD__
 #ifdef _KERNEL
-	zfsvfs_update_fromname(oldname, newname);
-	zvol_rename_minors(oldname, newname);
+	zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
+	zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
 #endif
+#endif
 
-	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
-	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
+	dsl_prop_notify_all(dd);
+
+	dsl_dir_rele(newparent, FTAG);
+	dsl_dir_rele(dd, FTAG);
 }
 
 int
-dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags)
+dsl_dir_rename(const char *oldname, const char *newname)
 {
-	struct renamearg ra;
-	int err;
+	dsl_dir_rename_arg_t ddra;
 
-	/* new parent should exist */
-	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
-	if (err)
-		return (err);
+	ddra.ddra_oldname = oldname;
+	ddra.ddra_newname = newname;
 
-	/* can't rename to different pool */
-	if (dd->dd_pool != ra.newparent->dd_pool) {
-		err = ENXIO;
-		goto out;
-	}
-
-	/* new name should not already exist */
-	if (ra.mynewname == NULL) {
-		err = EEXIST;
-		goto out;
-	}
-
-	ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED);
-
-	err = dsl_sync_task_do(dd->dd_pool,
-	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
-
-out:
-	dsl_dir_close(ra.newparent, FTAG);
-	return (err);
+	return (dsl_sync_task(oldname,
+	    dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
 }
 
 int
@@ -1398,7 +1328,7 @@
 	adelta = would_change(sdd, -space, ancestor);
 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
 	if (avail < space)
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 
 	return (0);
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dsl_pool.h>
@@ -43,6 +43,7 @@
 #include <sys/bptree.h>
 #include <sys/zfeature.h>
 #include <sys/zil_impl.h>
+#include <sys/dsl_userhold.h>
 
 int zfs_no_write_throttle = 0;
 int zfs_write_limit_shift = 3;			/* 1/8th of physical memory */
@@ -94,7 +95,7 @@
 	if (err)
 		return (err);
 
-	return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
+	return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
 }
 
 static dsl_pool_t *
@@ -106,7 +107,7 @@
 	dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
 	dp->dp_spa = spa;
 	dp->dp_meta_rootbp = *bp;
-	rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
+	rrw_init(&dp->dp_config_rwlock, B_TRUE);
 	dp->dp_write_limit = zfs_write_limit_min;
 	txg_init(dp, txg);
 
@@ -117,7 +118,7 @@
 	txg_list_create(&dp->dp_dirty_dirs,
 	    offsetof(dsl_dir_t, dd_dirty_link));
 	txg_list_create(&dp->dp_sync_tasks,
-	    offsetof(dsl_sync_task_group_t, dstg_node));
+	    offsetof(dsl_sync_task_t, dst_node));
 
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -151,7 +152,7 @@
 	dsl_dataset_t *ds;
 	uint64_t obj;
 
-	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
+	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
 	    &dp->dp_root_dir_obj);
@@ -158,7 +159,7 @@
 	if (err)
 		goto out;
 
-	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
+	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 	    NULL, dp, &dp->dp_root_dir);
 	if (err)
 		goto out;
@@ -179,7 +180,7 @@
 			    &dp->dp_origin_snap);
 			dsl_dataset_rele(ds, FTAG);
 		}
-		dsl_dir_close(dd, dp);
+		dsl_dir_rele(dd, dp);
 		if (err)
 			goto out;
 	}
@@ -194,7 +195,7 @@
 		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
 		if (err)
 			goto out;
-		VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
+		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 		    dp->dp_meta_objset, obj));
 	}
 
@@ -227,7 +228,7 @@
 	err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
 
 out:
-	rw_exit(&dp->dp_config_rwlock);
+	rrw_exit(&dp->dp_config_rwlock, FTAG);
 	return (err);
 }
 
@@ -242,13 +243,13 @@
 	 * and not a hold, so just drop that here.
 	 */
 	if (dp->dp_origin_snap)
-		dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
+		dsl_dataset_rele(dp->dp_origin_snap, dp);
 	if (dp->dp_mos_dir)
-		dsl_dir_close(dp->dp_mos_dir, dp);
+		dsl_dir_rele(dp->dp_mos_dir, dp);
 	if (dp->dp_free_dir)
-		dsl_dir_close(dp->dp_free_dir, dp);
+		dsl_dir_rele(dp->dp_free_dir, dp);
 	if (dp->dp_root_dir)
-		dsl_dir_close(dp->dp_root_dir, dp);
+		dsl_dir_rele(dp->dp_root_dir, dp);
 
 	bpobj_close(&dp->dp_free_bpobj);
 
@@ -264,7 +265,7 @@
 	arc_flush(dp->dp_spa);
 	txg_fini(dp);
 	dsl_scan_fini(dp);
-	rw_destroy(&dp->dp_config_rwlock);
+	rrw_destroy(&dp->dp_config_rwlock);
 	mutex_destroy(&dp->dp_lock);
 	taskq_destroy(dp->dp_vnrele_taskq);
 	if (dp->dp_blkstats)
@@ -282,6 +283,8 @@
 	dsl_dataset_t *ds;
 	uint64_t obj;
 
+	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+
 	/* create and open the MOS (meta-objset) */
 	dp->dp_meta_objset = dmu_objset_create_impl(spa,
 	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
@@ -292,16 +295,16 @@
 	ASSERT0(err);
 
 	/* Initialize scan structures */
-	VERIFY3U(0, ==, dsl_scan_init(dp, txg));
+	VERIFY0(dsl_scan_init(dp, txg));
 
 	/* create and open the root dir */
 	dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
-	VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
+	VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 	    NULL, dp, &dp->dp_root_dir));
 
 	/* create and open the meta-objset dir */
 	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
-	VERIFY(0 == dsl_pool_open_special_dir(dp,
+	VERIFY0(dsl_pool_open_special_dir(dp,
 	    MOS_DIR_NAME, &dp->dp_mos_dir));
 
 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
@@ -308,7 +311,7 @@
 		/* create and open the free dir */
 		(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
 		    FREE_DIR_NAME, tx);
-		VERIFY(0 == dsl_pool_open_special_dir(dp,
+		VERIFY0(dsl_pool_open_special_dir(dp,
 		    FREE_DIR_NAME, &dp->dp_free_dir));
 
 		/* create and open the free_bplist */
@@ -315,7 +318,7 @@
 		obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
 		VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
-		VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
+		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 		    dp->dp_meta_objset, obj));
 	}
 
@@ -326,7 +329,7 @@
 	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
 
 	/* create the root objset */
-	VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+	VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
 	os = dmu_objset_create_impl(dp->dp_spa, ds,
 	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
 #ifdef _KERNEL
@@ -336,6 +339,8 @@
 
 	dmu_tx_commit(tx);
 
+	rrw_exit(&dp->dp_config_rwlock, FTAG);
+
 	return (dp);
 }
 
@@ -358,10 +363,7 @@
 deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	dsl_deadlist_t *dl = arg;
-	dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	dsl_deadlist_insert(dl, bp, tx);
-	rw_exit(&dp->dp_config_rwlock);
 	return (0);
 }
 
@@ -383,7 +385,7 @@
 
 	/*
 	 * We need to copy dp_space_towrite() before doing
-	 * dsl_sync_task_group_sync(), because
+	 * dsl_sync_task_sync(), because
 	 * dsl_dataset_snapshot_reserve_space() will increase
 	 * dp_space_towrite but not actually write anything.
 	 */
@@ -498,14 +500,14 @@
 	 */
 	DTRACE_PROBE(pool_sync__3task);
 	if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
-		dsl_sync_task_group_t *dstg;
+		dsl_sync_task_t *dst;
 		/*
 		 * No more sync tasks should have been added while we
 		 * were syncing.
 		 */
 		ASSERT(spa_sync_pass(dp->dp_spa) == 1);
-		while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
-			dsl_sync_task_group_sync(dstg, tx);
+		while (dst = txg_list_remove(&dp->dp_sync_tasks, txg))
+			dsl_sync_task_sync(dst, tx);
 	}
 
 	dmu_tx_commit(tx);
@@ -628,7 +630,7 @@
 		    + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2;
 
 		if (reserved && reserved > write_limit)
-			return (ERESTART);
+			return (SET_ERROR(ERESTART));
 	}
 
 	atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space);
@@ -680,14 +682,13 @@
 
 /* ARGSUSED */
 static int
-upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	dmu_tx_t *tx = arg;
 	dsl_dataset_t *ds, *prev = NULL;
 	int err;
-	dsl_pool_t *dp = spa_get_dsl(spa);
 
-	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
@@ -713,7 +714,7 @@
 		 * The $ORIGIN can't have any data, or the accounting
 		 * will be wrong.
 		 */
-		ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
+		ASSERT0(prev->ds_phys->ds_bp.blk_birth);
 
 		/* The origin doesn't get attached to itself */
 		if (ds->ds_object == prev->ds_object) {
@@ -733,13 +734,13 @@
 
 		if (ds->ds_phys->ds_next_snap_obj == 0) {
 			ASSERT(ds->ds_prev == NULL);
-			VERIFY(0 == dsl_dataset_hold_obj(dp,
+			VERIFY0(dsl_dataset_hold_obj(dp,
 			    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
 		}
 	}
 
-	ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
-	ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
+	ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object);
+	ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object);
 
 	if (prev->ds_phys->ds_next_clones_obj == 0) {
 		dmu_buf_will_dirty(prev->ds_dbuf, tx);
@@ -747,7 +748,7 @@
 		    zap_create(dp->dp_meta_objset,
 		    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 	}
-	VERIFY(0 == zap_add_int(dp->dp_meta_objset,
+	VERIFY0(zap_add_int(dp->dp_meta_objset,
 	    prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
 
 	dsl_dataset_rele(ds, FTAG);
@@ -762,25 +763,21 @@
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dp->dp_origin_snap != NULL);
 
-	VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
+	VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
 	    tx, DS_FIND_CHILDREN));
 }
 
 /* ARGSUSED */
 static int
-upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	dmu_tx_t *tx = arg;
-	dsl_dataset_t *ds;
-	dsl_pool_t *dp = spa_get_dsl(spa);
 	objset_t *mos = dp->dp_meta_objset;
 
-	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-
-	if (ds->ds_dir->dd_phys->dd_origin_obj) {
+	if (ds->ds_dir->dd_phys->dd_origin_obj != 0) {
 		dsl_dataset_t *origin;
 
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+		VERIFY0(dsl_dataset_hold_obj(dp,
 		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
 
 		if (origin->ds_dir->dd_phys->dd_clones == 0) {
@@ -789,13 +786,11 @@
 			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 		}
 
-		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
-		    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
+		VERIFY0(zap_add_int(dp->dp_meta_objset,
+		    origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx));
 
 		dsl_dataset_rele(origin, FTAG);
 	}
-
-	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
@@ -806,7 +801,7 @@
 	uint64_t obj;
 
 	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
-	VERIFY(0 == dsl_pool_open_special_dir(dp,
+	VERIFY0(dsl_pool_open_special_dir(dp,
 	    FREE_DIR_NAME, &dp->dp_free_dir));
 
 	/*
@@ -816,12 +811,11 @@
 	 */
 	obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
 	    SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
-	VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
-	VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
-	    dp->dp_meta_objset, obj));
+	VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
 
-	VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
+	VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 	    upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
 }
 
@@ -833,17 +827,16 @@
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dp->dp_origin_snap == NULL);
+	ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
 
 	/* create the origin dir, ds, & snap-ds */
-	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
 	dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
 	    NULL, 0, kcred, tx);
-	VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-	dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
-	VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
+	VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+	dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
+	VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
 	    dp, &dp->dp_origin_snap));
 	dsl_dataset_rele(ds, FTAG);
-	rw_exit(&dp->dp_config_rwlock);
 }
 
 taskq_t *
@@ -878,7 +871,7 @@
 		*htag = '\0';
 		++htag;
 		dsobj = strtonum(za.za_name, NULL);
-		(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
+		dsl_dataset_user_release_tmp(dp, dsobj, htag);
 	}
 	zap_cursor_fini(&zc);
 }
@@ -900,7 +893,7 @@
 
 static int
 dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
-    const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
+    const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
@@ -919,13 +912,13 @@
 			dsl_pool_user_hold_create_obj(dp, tx);
 			zapobj = dp->dp_tmp_userrefs_obj;
 		} else {
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		}
 	}
 
 	name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
 	if (holding)
-		error = zap_add(mos, zapobj, name, 8, 1, now, tx);
+		error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
 	else
 		error = zap_remove(mos, zapobj, name, tx);
 	strfree(name);
@@ -938,7 +931,7 @@
  */
 int
 dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
-    uint64_t *now, dmu_tx_t *tx)
+    uint64_t now, dmu_tx_t *tx)
 {
 	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
 }
@@ -950,6 +943,109 @@
 dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
     dmu_tx_t *tx)
 {
-	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
+	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
 	    tx, B_FALSE));
 }
+
+/*
+ * DSL Pool Configuration Lock
+ *
+ * The dp_config_rwlock protects against changes to DSL state (e.g. dataset
+ * creation / destruction / rename / property setting).  It must be held for
+ * read to hold a dataset or dsl_dir.  I.e. you must call
+ * dsl_pool_config_enter() or dsl_pool_hold() before calling
+ * dsl_{dataset,dir}_hold{_obj}.  In most circumstances, the dp_config_rwlock
+ * must be held continuously until all datasets and dsl_dirs are released.
+ *
+ * The only exception to this rule is that if a "long hold" is placed on
+ * a dataset, then the dp_config_rwlock may be dropped while the dataset
+ * is still held.  The long hold will prevent the dataset from being
+ * destroyed -- the destroy will fail with EBUSY.  A long hold can be
+ * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
+ * (by calling dsl_{dataset,objset}_{try}own{_obj}).
+ *
+ * Legitimate long-holders (including owners) should be long-running, cancelable
+ * tasks that should cause "zfs destroy" to fail.  This includes DMU
+ * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
+ * "zfs send", and "zfs diff".  There are several other long-holders whose
+ * uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
+ *
+ * The usual formula for long-holding would be:
+ * dsl_pool_hold()
+ * dsl_dataset_hold()
+ * ... perform checks ...
+ * dsl_dataset_long_hold()
+ * dsl_pool_rele()
+ * ... perform long-running task ...
+ * dsl_dataset_long_rele()
+ * dsl_dataset_rele()
+ *
+ * Note that when the long hold is released, the dataset is still held but
+ * the pool is not held.  The dataset may change arbitrarily during this time
+ * (e.g. it could be destroyed).  Therefore you shouldn't do anything to the
+ * dataset except release it.
+ *
+ * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
+ * or modifying operations.
+ *
+ * Modifying operations should generally use dsl_sync_task().  The synctask
+ * infrastructure enforces proper locking strategy with respect to the
+ * dp_config_rwlock.  See the comment above dsl_sync_task() for details.
+ *
+ * Read-only operations will manually hold the pool, then the dataset, obtain
+ * information from the dataset, then release the pool and dataset.
+ * dmu_objset_{hold,rele}() are convenience routines that also do the pool
+ * hold/rele.
+ */
+
+int
+dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
+{
+	spa_t *spa;
+	int error;
+
+	error = spa_open(name, &spa, tag);
+	if (error == 0) {
+		*dp = spa_get_dsl(spa);
+		dsl_pool_config_enter(*dp, tag);
+	}
+	return (error);
+}
+
+void
+dsl_pool_rele(dsl_pool_t *dp, void *tag)
+{
+	dsl_pool_config_exit(dp, tag);
+	spa_close(dp->dp_spa, tag);
+}
+
+void
+dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
+{
+	/*
+	 * We use a "reentrant" reader-writer lock, but not reentrantly.
+	 *
+	 * The rrwlock can (with the track_all flag) track all reading threads,
+	 * which is very useful for debugging which code path failed to release
+	 * the lock, and for verifying that the *current* thread does hold
+	 * the lock.
+	 *
+	 * (Unlike a rwlock, which knows that N threads hold it for
+	 * read, but not *which* threads, so rw_held(RW_READER) returns TRUE
+	 * if any thread holds it for read, even if this thread doesn't).
+	 */
+	ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
+	rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
+}
+
+void
+dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
+{
+	rrw_exit(&dp->dp_config_rwlock, tag);
+}
+
+boolean_t
+dsl_pool_config_held(dsl_pool_t *dp)
+{
+	return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -51,16 +52,16 @@
 	 */
 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL ||
 	    (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop)))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 		if (intsz != 1)
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		(void) strncpy(buf, zfs_prop_default_string(prop),
 		    numints);
 	} else {
 		if (intsz != 8 || numints < 1)
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 
 		*(uint64_t *)buf = zfs_prop_default_numeric(prop);
 	}
@@ -81,7 +82,7 @@
 	char *inheritstr;
 	char *recvdstr;
 
-	ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
+	ASSERT(dsl_pool_config_held(dd->dd_pool));
 
 	if (setpoint)
 		setpoint[0] = '\0';
@@ -96,8 +97,6 @@
 	 * after this loop.
 	 */
 	for (; dd != NULL; dd = dd->dd_parent) {
-		ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
-
 		if (dd != target || snapshot) {
 			if (!inheritable)
 				break;
@@ -145,7 +144,7 @@
 		 * at the end of the loop (instead of at the beginning) ensures
 		 * that err has a valid post-loop value.
 		 */
-		err = ENOENT;
+		err = SET_ERROR(ENOENT);
 	}
 
 	if (err == ENOENT)
@@ -166,7 +165,7 @@
 	boolean_t snapshot;
 	uint64_t zapobj;
 
-	ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
+	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
 	snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
 	zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
@@ -234,18 +233,12 @@
 	uint64_t value;
 	dsl_prop_cb_record_t *cbr;
 	int err;
-	int need_rwlock;
 
-	need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
-	if (need_rwlock)
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
+	ASSERT(dsl_pool_config_held(dp));
 
-	err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
-	if (err != 0) {
-		if (need_rwlock)
-			rw_exit(&dp->dp_config_rwlock);
+	err = dsl_prop_get_int_ds(ds, propname, &value);
+	if (err != 0)
 		return (err);
-	}
 
 	cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
 	cbr->cbr_ds = ds;
@@ -258,9 +251,6 @@
 	mutex_exit(&dd->dd_lock);
 
 	cbr->cbr_func(cbr->cbr_arg, value);
-
-	if (need_rwlock)
-		rw_exit(&dp->dp_config_rwlock);
 	return (0);
 }
 
@@ -268,19 +258,18 @@
 dsl_prop_get(const char *dsname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint)
 {
-	dsl_dataset_t *ds;
-	int err;
+	objset_t *os;
+	int error;
 
-	err = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (err)
-		return (err);
+	error = dmu_objset_hold(dsname, FTAG, &os);
+	if (error != 0)
+		return (error);
 
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+	error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
+	    intsz, numints, buf, setpoint);
 
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
+	dmu_objset_rele(os, FTAG);
+	return (error);
 }
 
 /*
@@ -298,17 +287,11 @@
 	return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
 }
 
-void
-dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
-    zprop_source_t source, uint64_t *value)
+int
+dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
+    uint64_t *valuep)
 {
-	psa->psa_name = propname;
-	psa->psa_source = source;
-	psa->psa_intsz = 8;
-	psa->psa_numints = 1;
-	psa->psa_value = value;
-
-	psa->psa_effective_value = -1ULL;
+	return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
 }
 
 /*
@@ -322,11 +305,10 @@
  * a property not handled by this function.
  */
 int
-dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
+dsl_prop_predict(dsl_dir_t *dd, const char *propname,
+    zprop_source_t source, uint64_t value, uint64_t *newvalp)
 {
-	const char *propname = psa->psa_name;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
-	zprop_source_t source = psa->psa_source;
 	objset_t *mos;
 	uint64_t zapobj;
 	uint64_t version;
@@ -358,13 +340,12 @@
 	switch (source) {
 	case ZPROP_SRC_NONE:
 		/* Revert to the received value, if any. */
-		err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
-		    &psa->psa_effective_value);
+		err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
 		if (err == ENOENT)
-			psa->psa_effective_value = 0;
+			*newvalp = 0;
 		break;
 	case ZPROP_SRC_LOCAL:
-		psa->psa_effective_value = *(uint64_t *)psa->psa_value;
+		*newvalp = value;
 		break;
 	case ZPROP_SRC_RECEIVED:
 		/*
@@ -371,10 +352,9 @@
 		 * If there's no local setting, then the new received value will
 		 * be the effective value.
 		 */
-		err = zap_lookup(mos, zapobj, propname, 8, 1,
-		    &psa->psa_effective_value);
+		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
 		if (err == ENOENT)
-			psa->psa_effective_value = *(uint64_t *)psa->psa_value;
+			*newvalp = value;
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
@@ -381,13 +361,12 @@
 		 * We're clearing the received value, so the local setting (if
 		 * it exists) remains the effective value.
 		 */
-		err = zap_lookup(mos, zapobj, propname, 8, 1,
-		    &psa->psa_effective_value);
+		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
 		if (err == ENOENT)
-			psa->psa_effective_value = 0;
+			*newvalp = 0;
 		break;
 	default:
-		cmn_err(CE_PANIC, "unexpected property source: %d", source);
+		panic("unexpected property source: %d", source);
 	}
 
 	strfree(recvdstr);
@@ -398,37 +377,6 @@
 	return (err);
 }
 
-#ifdef	ZFS_DEBUG
-void
-dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
-{
-	zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
-	uint64_t intval;
-	char setpoint[MAXNAMELEN];
-	uint64_t version = spa_version(dd->dd_pool->dp_spa);
-	int err;
-
-	if (version < SPA_VERSION_RECVD_PROPS) {
-		switch (prop) {
-		case ZFS_PROP_QUOTA:
-		case ZFS_PROP_RESERVATION:
-			return;
-		}
-	}
-
-	err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
-	    setpoint, B_FALSE);
-	if (err == 0 && intval != psa->psa_effective_value) {
-		cmn_err(CE_PANIC, "%s property, source: %x, "
-		    "predicted effective value: %llu, "
-		    "actual effective value: %llu (setpoint: %s)",
-		    psa->psa_name, psa->psa_source,
-		    (unsigned long long)psa->psa_effective_value,
-		    (unsigned long long)intval, setpoint);
-	}
-}
-#endif
-
 /*
  * Unregister this callback.  Return 0 on success, ENOENT if ddname is
  * invalid, ENOMSG if no matching callback registered.
@@ -452,7 +400,7 @@
 
 	if (cbr == NULL) {
 		mutex_exit(&dd->dd_lock);
-		return (ENOMSG);
+		return (SET_ERROR(ENOMSG));
 	}
 
 	list_remove(&dd->dd_prop_cbs, cbr);
@@ -463,27 +411,59 @@
 	return (0);
 }
 
-/*
- * Return the number of callbacks that are registered for this dataset.
- */
-int
-dsl_prop_numcb(dsl_dataset_t *ds)
+boolean_t
+dsl_prop_hascb(dsl_dataset_t *ds)
 {
 	dsl_dir_t *dd = ds->ds_dir;
+	boolean_t rv = B_FALSE;
 	dsl_prop_cb_record_t *cbr;
-	int num = 0;
 
 	mutex_enter(&dd->dd_lock);
-	for (cbr = list_head(&dd->dd_prop_cbs);
-	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
-		if (cbr->cbr_ds == ds)
-			num++;
+	for (cbr = list_head(&dd->dd_prop_cbs); cbr;
+	    cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+		if (cbr->cbr_ds == ds) {
+			rv = B_TRUE;
+			break;
+		}
 	}
 	mutex_exit(&dd->dd_lock);
+	return (rv);
+}
 
-	return (num);
+/* ARGSUSED */
+static int
+dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+	dsl_dir_t *dd = ds->ds_dir;
+	dsl_prop_cb_record_t *cbr;
+
+	mutex_enter(&dd->dd_lock);
+	for (cbr = list_head(&dd->dd_prop_cbs); cbr;
+	    cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+		uint64_t value;
+
+		if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
+		    sizeof (value), 1, &value, NULL) == 0)
+			cbr->cbr_func(cbr->cbr_arg, value);
+	}
+	mutex_exit(&dd->dd_lock);
+
+	return (0);
 }
 
+/*
+ * Update all property values for ddobj & its descendants.  This is used
+ * when renaming the dir.
+ */
+void
+dsl_prop_notify_all(dsl_dir_t *dd)
+{
+	dsl_pool_t *dp = dd->dd_pool;
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+	(void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
+	    NULL, DS_FIND_CHILDREN);
+}
+
 static void
 dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
     const char *propname, uint64_t value, int first)
@@ -495,8 +475,8 @@
 	zap_attribute_t *za;
 	int err;
 
-	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
-	err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+	err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
 	if (err)
 		return;
 
@@ -507,7 +487,7 @@
 		 */
 		err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
 		if (err == 0) {
-			dsl_dir_close(dd, FTAG);
+			dsl_dir_rele(dd, FTAG);
 			return;
 		}
 		ASSERT3U(err, ==, ENOENT);
@@ -542,26 +522,24 @@
 	}
 	kmem_free(za, sizeof (zap_attribute_t));
 	zap_cursor_fini(&zc);
-	dsl_dir_close(dd, FTAG);
+	dsl_dir_rele(dd, FTAG);
 }
 
 void
-dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
+    zprop_source_t source, int intsz, int numints, const void *value,
+    dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_prop_setarg_t *psa = arg2;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t zapobj, intval, dummy;
 	int isint;
 	char valbuf[32];
-	char *valstr = NULL;
+	const char *valstr = NULL;
 	char *inheritstr;
 	char *recvdstr;
 	char *tbuf = NULL;
 	int err;
 	uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
-	const char *propname = psa->psa_name;
-	zprop_source_t source = psa->psa_source;
 
 	isint = (dodefault(propname, 8, 1, &intval) == 0);
 
@@ -611,8 +589,8 @@
 		 */
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
-		VERIFY(0 == zap_update(mos, zapobj, propname,
-		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
+		VERIFY0(zap_update(mos, zapobj, propname,
+		    intsz, numints, value, tx));
 		break;
 	case ZPROP_SRC_INHERITED:
 		/*
@@ -623,12 +601,10 @@
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		if (version >= SPA_VERSION_RECVD_PROPS &&
-		    dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
-		    NULL) == 0) {
+		    dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
 			dummy = 0;
-			err = zap_update(mos, zapobj, inheritstr,
-			    8, 1, &dummy, tx);
-			ASSERT(err == 0);
+			VERIFY0(zap_update(mos, zapobj, inheritstr,
+			    8, 1, &dummy, tx));
 		}
 		break;
 	case ZPROP_SRC_RECEIVED:
@@ -636,7 +612,7 @@
 		 * set propname$recvd -> value
 		 */
 		err = zap_update(mos, zapobj, recvdstr,
-		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
+		    intsz, numints, value, tx);
 		ASSERT(err == 0);
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
@@ -666,7 +642,7 @@
 	strfree(recvdstr);
 
 	if (isint) {
-		VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
+		VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
 
 		if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
 			dsl_prop_cb_record_t *cbr;
@@ -693,7 +669,7 @@
 		valstr = valbuf;
 	} else {
 		if (source == ZPROP_SRC_LOCAL) {
-			valstr = (char *)psa->psa_value;
+			valstr = value;
 		} else {
 			tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
 			if (dsl_prop_get_ds(ds, propname, 1,
@@ -702,146 +678,81 @@
 		}
 	}
 
-	spa_history_log_internal((source == ZPROP_SRC_NONE ||
-	    source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
-	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
-	    "%s=%s dataset = %llu", propname,
-	    (valstr == NULL ? "" : valstr), ds->ds_object);
+	spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE ||
+	    source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx,
+	    "%s=%s", propname, (valstr == NULL ? "" : valstr));
 
 	if (tbuf != NULL)
 		kmem_free(tbuf, ZAP_MAXVALUELEN);
 }
 
-void
-dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+int
+dsl_prop_set_int(const char *dsname, const char *propname,
+    zprop_source_t source, uint64_t value)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_props_arg_t *pa = arg2;
-	nvlist_t *props = pa->pa_props;
-	dsl_prop_setarg_t psa;
-	nvpair_t *elem = NULL;
+	nvlist_t *nvl = fnvlist_alloc();
+	int error;
 
-	psa.psa_source = pa->pa_source;
-
-	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
-		nvpair_t *pair = elem;
-
-		psa.psa_name = nvpair_name(pair);
-
-		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
-			/*
-			 * dsl_prop_get_all_impl() returns properties in this
-			 * format.
-			 */
-			nvlist_t *attrs;
-			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
-			VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-			    &pair) == 0);
-		}
-
-		if (nvpair_type(pair) == DATA_TYPE_STRING) {
-			VERIFY(nvpair_value_string(pair,
-			    (char **)&psa.psa_value) == 0);
-			psa.psa_intsz = 1;
-			psa.psa_numints = strlen(psa.psa_value) + 1;
-		} else {
-			uint64_t intval;
-			VERIFY(nvpair_value_uint64(pair, &intval) == 0);
-			psa.psa_intsz = sizeof (intval);
-			psa.psa_numints = 1;
-			psa.psa_value = &intval;
-		}
-		dsl_prop_set_sync(ds, &psa, tx);
-	}
+	fnvlist_add_uint64(nvl, propname, value);
+	error = dsl_props_set(dsname, source, nvl);
+	fnvlist_free(nvl);
+	return (error);
 }
 
-void
-dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    dmu_tx_t *tx)
+int
+dsl_prop_set_string(const char *dsname, const char *propname,
+    zprop_source_t source, const char *value)
 {
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
+	nvlist_t *nvl = fnvlist_alloc();
+	int error;
 
-	ASSERT(dmu_tx_is_syncing(tx));
-
-	VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
-
-	dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
-
-	spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
-	    "%s=%llu dataset = %llu", name, (u_longlong_t)val,
-	    dd->dd_phys->dd_head_dataset_obj);
+	fnvlist_add_string(nvl, propname, value);
+	error = dsl_props_set(dsname, source, nvl);
+	fnvlist_free(nvl);
+	return (error);
 }
 
 int
-dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
-    int intsz, int numints, const void *buf)
+dsl_prop_inherit(const char *dsname, const char *propname,
+    zprop_source_t source)
 {
-	dsl_dataset_t *ds;
-	uint64_t version;
-	int err;
-	dsl_prop_setarg_t psa;
+	nvlist_t *nvl = fnvlist_alloc();
+	int error;
 
-	/*
-	 * We must do these checks before we get to the syncfunc, since
-	 * it can't fail.
-	 */
-	if (strlen(propname) >= ZAP_MAXNAMELEN)
-		return (ENAMETOOLONG);
+	fnvlist_add_boolean(nvl, propname);
+	error = dsl_props_set(dsname, source, nvl);
+	fnvlist_free(nvl);
+	return (error);
+}
 
-	err = dsl_dataset_hold(dsname, FTAG, &ds);
-	if (err)
-		return (err);
+typedef struct dsl_props_set_arg {
+	const char *dpsa_dsname;
+	zprop_source_t dpsa_source;
+	nvlist_t *dpsa_props;
+} dsl_props_set_arg_t;
 
-	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
-	if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
-	    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
-		dsl_dataset_rele(ds, FTAG);
-		return (E2BIG);
-	}
-	if (dsl_dataset_is_snapshot(ds) &&
-	    version < SPA_VERSION_SNAP_PROPS) {
-		dsl_dataset_rele(ds, FTAG);
-		return (ENOTSUP);
-	}
-
-	psa.psa_name = propname;
-	psa.psa_source = source;
-	psa.psa_intsz = intsz;
-	psa.psa_numints = numints;
-	psa.psa_value = buf;
-	psa.psa_effective_value = -1ULL;
-
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    NULL, dsl_prop_set_sync, ds, &psa, 2);
-
-	dsl_dataset_rele(ds, FTAG);
-	return (err);
-}
-
-int
-dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
+static int
+dsl_props_set_check(void *arg, dmu_tx_t *tx)
 {
+	dsl_props_set_arg_t *dpsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	uint64_t version;
 	nvpair_t *elem = NULL;
-	dsl_props_arg_t pa;
 	int err;
 
-	if (err = dsl_dataset_hold(dsname, FTAG, &ds))
+	err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
+	if (err != 0)
 		return (err);
-	/*
-	 * Do these checks before the syncfunc, since it can't fail.
-	 */
+
 	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
-	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+	while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
 		if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
 			dsl_dataset_rele(ds, FTAG);
-			return (ENAMETOOLONG);
+			return (SET_ERROR(ENAMETOOLONG));
 		}
 		if (nvpair_type(elem) == DATA_TYPE_STRING) {
-			char *valstr;
-			VERIFY(nvpair_value_string(elem, &valstr) == 0);
+			char *valstr = fnvpair_value_string(elem);
 			if (strlen(valstr) >= (version <
 			    SPA_VERSION_STMF_PROP ?
 			    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
@@ -851,22 +762,85 @@
 		}
 	}
 
-	if (dsl_dataset_is_snapshot(ds) &&
-	    version < SPA_VERSION_SNAP_PROPS) {
+	if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
 		dsl_dataset_rele(ds, FTAG);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
 
-	pa.pa_props = props;
-	pa.pa_source = source;
+void
+dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
+    nvlist_t *props, dmu_tx_t *tx)
+{
+	nvpair_t *elem = NULL;
 
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-	    NULL, dsl_props_set_sync, ds, &pa, 2);
+	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+		nvpair_t *pair = elem;
 
+		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
+			/*
+			 * dsl_prop_get_all_impl() returns properties in this
+			 * format.
+			 */
+			nvlist_t *attrs = fnvpair_value_nvlist(pair);
+			pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
+		}
+
+		if (nvpair_type(pair) == DATA_TYPE_STRING) {
+			const char *value = fnvpair_value_string(pair);
+			dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+			    source, 1, strlen(value) + 1, value, tx);
+		} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
+			uint64_t intval = fnvpair_value_uint64(pair);
+			dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+			    source, sizeof (intval), 1, &intval, tx);
+		} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
+			dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+			    source, 0, 0, NULL, tx);
+		} else {
+			panic("invalid nvpair type");
+		}
+	}
+}
+
+static void
+dsl_props_set_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_props_set_arg_t *dpsa = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dataset_t *ds;
+
+	VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
+	dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
 	dsl_dataset_rele(ds, FTAG);
-	return (err);
 }
 
+/*
+ * All-or-nothing; if any prop can't be set, nothing will be modified.
+ */
+int
+dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
+{
+	dsl_props_set_arg_t dpsa;
+	int nblks = 0;
+
+	dpsa.dpsa_dsname = dsname;
+	dpsa.dpsa_source = source;
+	dpsa.dpsa_props = props;
+
+	/*
+	 * If the source includes NONE, then we will only be removing entries
+	 * from the ZAP object.  In that case don't check for ENOSPC.
+	 */
+	if ((source & ZPROP_SRC_NONE) == 0)
+		nblks = 2 * fnvlist_num_pairs(props);
+
+	return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
+	    &dpsa, nblks));
+}
+
 typedef enum dsl_prop_getflags {
 	DSL_PROP_GET_INHERITING = 0x1,	/* searching parent of target ds */
 	DSL_PROP_GET_SNAPSHOT = 0x2,	/* snapshot dataset */
@@ -1014,7 +988,7 @@
 	if (dsl_dataset_is_snapshot(ds))
 		flags |= DSL_PROP_GET_SNAPSHOT;
 
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	ASSERT(dsl_pool_config_held(dp));
 
 	if (ds->ds_phys->ds_props_obj != 0) {
 		ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
@@ -1039,38 +1013,32 @@
 			break;
 	}
 out:
-	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 boolean_t
-dsl_prop_get_hasrecvd(objset_t *os)
+dsl_prop_get_hasrecvd(const char *dsname)
 {
-	dsl_dataset_t *ds = os->os_dsl_dataset;
-	int rc;
 	uint64_t dummy;
 
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
-	ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
-	return (rc == 0);
+	return (0 ==
+	    dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
 }
 
-static void
-dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
+static int
+dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
 {
-	dsl_dataset_t *ds = os->os_dsl_dataset;
-	uint64_t dummy = 0;
-	dsl_prop_setarg_t psa;
+	uint64_t version;
+	spa_t *spa;
+	int error = 0;
 
-	if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
-		return;
+	VERIFY0(spa_open(dsname, &spa, FTAG));
+	version = spa_version(spa);
+	spa_close(spa, FTAG);
 
-	dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
-
-	(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
-	    dsl_prop_set_sync, ds, &psa, 2);
+	if (version >= SPA_VERSION_RECVD_PROPS)
+		error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
+	return (error);
 }
 
 /*
@@ -1077,20 +1045,19 @@
  * Call after successfully receiving properties to ensure that only the first
  * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
  */
-void
-dsl_prop_set_hasrecvd(objset_t *os)
+int
+dsl_prop_set_hasrecvd(const char *dsname)
 {
-	if (dsl_prop_get_hasrecvd(os)) {
-		ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
-		return;
-	}
-	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
+	int error = 0;
+	if (!dsl_prop_get_hasrecvd(dsname))
+		error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
+	return (error);
 }
 
 void
-dsl_prop_unset_hasrecvd(objset_t *os)
+dsl_prop_unset_hasrecvd(const char *dsname)
 {
-	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
+	VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
 }
 
 int
@@ -1100,16 +1067,25 @@
 }
 
 int
-dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
+dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
 {
+	objset_t *os;
+	int error;
+
 	/*
 	 * Received properties are not distinguishable from local properties
 	 * until the dataset has received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
-	dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
+	dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
 	    DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
-	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
+
+	error = dmu_objset_hold(dsname, FTAG, &os);
+	if (error != 0)
+		return (error);
+	error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
+	dmu_objset_rele(os, FTAG);
+	return (error);
 }
 
 void

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dsl_scan.h>
@@ -55,7 +55,7 @@
 static scan_cb_t dsl_scan_defrag_cb;
 static scan_cb_t dsl_scan_scrub_cb;
 static scan_cb_t dsl_scan_remove_cb;
-static dsl_syncfunc_t dsl_scan_cancel_sync;
+static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
 static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
 
 unsigned int zfs_top_maxinflight = 32;	/* maximum I/Os per top-level */
@@ -184,22 +184,21 @@
 
 /* ARGSUSED */
 static int
-dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_scan_t *scn = arg1;
+	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	if (scn->scn_phys.scn_state == DSS_SCANNING)
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 
 	return (0);
 }
 
-/* ARGSUSED */
 static void
-dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_scan_t *scn = arg1;
-	pool_scan_func_t *funcp = arg2;
+	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
+	pool_scan_func_t *funcp = arg;
 	dmu_object_type_t ot = 0;
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
@@ -258,7 +257,7 @@
 
 	dsl_scan_sync_state(scn, tx);
 
-	spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
+	spa_history_log_internal(spa, "scan setup", tx,
 	    "func=%u mintxg=%llu maxtxg=%llu",
 	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
 }
@@ -307,7 +306,7 @@
 	else
 		scn->scn_phys.scn_state = DSS_CANCELED;
 
-	spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
+	spa_history_log_internal(spa, "scan done", tx,
 	    "complete=%u", complete);
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
@@ -345,20 +344,20 @@
 
 /* ARGSUSED */
 static int
-dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
 {
-	dsl_scan_t *scn = arg1;
+	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	return (0);
 }
 
 /* ARGSUSED */
 static void
-dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
 {
-	dsl_scan_t *scn = arg1;
+	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 
 	dsl_scan_done(scn, B_FALSE, tx);
 	dsl_scan_sync_state(scn, tx);
@@ -367,12 +366,8 @@
 int
 dsl_scan_cancel(dsl_pool_t *dp)
 {
-	boolean_t complete = B_FALSE;
-	int err;
-
-	err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
-	    dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
-	return (err);
+	return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
+	    dsl_scan_cancel_sync, NULL, 3));
 }
 
 static void dsl_scan_visitbp(blkptr_t *bp,
@@ -409,7 +404,7 @@
 static void
 dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 {
-	VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
+	VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 	    &scn->scn_phys, tx));
@@ -981,33 +976,33 @@
 
 /* ARGSUSED */
 static int
-enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	struct enqueue_clones_arg *eca = arg;
 	dsl_dataset_t *ds;
 	int err;
-	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 
-	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
+		return (0);
+
+	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
-	if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
-		while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
-			dsl_dataset_t *prev;
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
+	while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
+		dsl_dataset_t *prev;
+		err = dsl_dataset_hold_obj(dp,
+		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
 
-			dsl_dataset_rele(ds, FTAG);
-			if (err)
-				return (err);
-			ds = prev;
-		}
-		VERIFY(zap_add_int_key(dp->dp_meta_objset,
-		    scn->scn_phys.scn_queue_obj, ds->ds_object,
-		    ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
+		dsl_dataset_rele(ds, FTAG);
+		if (err)
+			return (err);
+		ds = prev;
 	}
+	VERIFY(zap_add_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds->ds_object,
+	    ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
@@ -1096,17 +1091,17 @@
 		}
 
 		if (usenext) {
-			VERIFY(zap_join_key(dp->dp_meta_objset,
+			VERIFY0(zap_join_key(dp->dp_meta_objset,
 			    ds->ds_phys->ds_next_clones_obj,
 			    scn->scn_phys.scn_queue_obj,
-			    ds->ds_phys->ds_creation_txg, tx) == 0);
+			    ds->ds_phys->ds_creation_txg, tx));
 		} else {
 			struct enqueue_clones_arg eca;
 			eca.tx = tx;
 			eca.originobj = ds->ds_object;
 
-			(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
-			    NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
+			VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+			    enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
 		}
 	}
 
@@ -1116,15 +1111,14 @@
 
 /* ARGSUSED */
 static int
-enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 {
 	dmu_tx_t *tx = arg;
 	dsl_dataset_t *ds;
 	int err;
-	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 
-	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 	if (err)
 		return (err);
 
@@ -1279,8 +1273,8 @@
 			return;
 
 		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
-			VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
-			    NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
+			VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+			    enqueue_cb, tx, DS_FIND_CHILDREN));
 		} else {
 			dsl_scan_visitds(scn,
 			    dp->dp_origin_snap->ds_object, tx);
@@ -1358,7 +1352,7 @@
 	if (!scn->scn_is_bptree ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
 		if (dsl_scan_free_should_pause(scn))
-			return (ERESTART);
+			return (SET_ERROR(ERESTART));
 	}
 
 	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
@@ -1415,7 +1409,7 @@
 			func = POOL_SCAN_RESILVER;
 		zfs_dbgmsg("restarting scan func=%u txg=%llu",
 		    func, tx->tx_txg);
-		dsl_scan_setup_sync(scn, &func, tx);
+		dsl_scan_setup_sync(&func, tx);
 	}
 
 	if (!dsl_scan_active(scn) ||
@@ -1449,21 +1443,21 @@
 			err = bptree_iterate(dp->dp_meta_objset,
 			    dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
 			    scn, tx);
-			VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
-			if (err != 0)
-				return;
+			VERIFY0(zio_wait(scn->scn_zio_root));
 
-			/* disable async destroy feature */
-			spa_feature_decr(spa,
-			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
-			ASSERT(!spa_feature_is_active(spa,
-			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
-			VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
-			    DMU_POOL_DIRECTORY_OBJECT,
-			    DMU_POOL_BPTREE_OBJ, tx));
-			VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
-			    dp->dp_bptree_obj, tx));
-			dp->dp_bptree_obj = 0;
+			if (err == 0) {
+				zfeature_info_t *feat = &spa_feature_table
+				    [SPA_FEATURE_ASYNC_DESTROY];
+				/* finished; deactivate async destroy feature */
+				spa_feature_decr(spa, feat, tx);
+				ASSERT(!spa_feature_is_active(spa, feat));
+				VERIFY0(zap_remove(dp->dp_meta_objset,
+				    DMU_POOL_DIRECTORY_OBJECT,
+				    DMU_POOL_BPTREE_OBJ, tx));
+				VERIFY0(bptree_free(dp->dp_meta_objset,
+				    dp->dp_bptree_obj, tx));
+				dp->dp_bptree_obj = 0;
+			}
 		}
 		if (scn->scn_visited_this_txg) {
 			zfs_dbgmsg("freed %llu blocks in %llums from "
@@ -1510,7 +1504,9 @@
 
 	scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 	    NULL, ZIO_FLAG_CANFAIL);
+	dsl_pool_config_enter(dp, FTAG);
 	dsl_scan_visit(scn, tx);
+	dsl_pool_config_exit(dp, FTAG);
 	(void) zio_wait(scn->scn_zio_root);
 	scn->scn_zio_root = NULL;
 
@@ -1746,6 +1742,6 @@
 	spa->spa_scrub_reopen = B_FALSE;
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
-	return (dsl_sync_task_do(dp, dsl_scan_setup_check,
-	    dsl_scan_setup_sync, dp->dp_scan, &func, 0));
+	return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
+	    dsl_scan_setup_sync, &func, 0));
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -33,135 +34,115 @@
 
 /* ARGSUSED */
 static int
-dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
 {
 	return (0);
 }
 
-dsl_sync_task_group_t *
-dsl_sync_task_group_create(dsl_pool_t *dp)
-{
-	dsl_sync_task_group_t *dstg;
-
-	dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
-	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
-	    offsetof(dsl_sync_task_t, dst_node));
-	dstg->dstg_pool = dp;
-
-	return (dstg);
-}
-
-void
-dsl_sync_task_create(dsl_sync_task_group_t *dstg,
-    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
-    void *arg1, void *arg2, int blocks_modified)
-{
-	dsl_sync_task_t *dst;
-
-	if (checkfunc == NULL)
-		checkfunc = dsl_null_checkfunc;
-	dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
-	dst->dst_checkfunc = checkfunc;
-	dst->dst_syncfunc = syncfunc;
-	dst->dst_arg1 = arg1;
-	dst->dst_arg2 = arg2;
-	list_insert_tail(&dstg->dstg_tasks, dst);
-
-	dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
-}
-
+/*
+ * Called from open context to perform a callback in syncing context.  Waits
+ * for the operation to complete.
+ *
+ * The checkfunc will be called from open context as a preliminary check
+ * which can quickly fail.  If it succeeds, it will be called again from
+ * syncing context.  The checkfunc should generally be designed to work
+ * properly in either context, but if necessary it can check
+ * dmu_tx_is_syncing(tx).
+ *
+ * The synctask infrastructure enforces proper locking strategy with respect
+ * to the dp_config_rwlock -- the lock will always be held when the callbacks
+ * are called.  It will be held for read during the open-context (preliminary)
+ * call to the checkfunc, and then held for write from syncing context during
+ * the calls to the check and sync funcs.
+ *
+ * A dataset or pool name can be passed as the first argument.  Typically,
+ * the check func will hold, check the return value of the hold, and then
+ * release the dataset.  The sync func will VERIFYO(hold()) the dataset.
+ * This is safe because no changes can be made between the check and sync funcs,
+ * and the sync func will only be called if the check func successfully opened
+ * the dataset.
+ */
 int
-dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
+dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
+    dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified)
 {
+	spa_t *spa;
 	dmu_tx_t *tx;
-	uint64_t txg;
-	dsl_sync_task_t *dst;
+	int err;
+	dsl_sync_task_t dst = { 0 };
+	dsl_pool_t *dp;
 
+	err = spa_open(pool, &spa, FTAG);
+	if (err != 0)
+		return (err);
+	dp = spa_get_dsl(spa);
+
 top:
-	tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
-	VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
+	tx = dmu_tx_create_dd(dp->dp_mos_dir);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
 
-	txg = dmu_tx_get_txg(tx);
+	dst.dst_pool = dp;
+	dst.dst_txg = dmu_tx_get_txg(tx);
+	dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
+	dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
+	dst.dst_syncfunc = syncfunc;
+	dst.dst_arg = arg;
+	dst.dst_error = 0;
+	dst.dst_nowaiter = B_FALSE;
 
-	/* Do a preliminary error check. */
-	dstg->dstg_err = 0;
-	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
-	for (dst = list_head(&dstg->dstg_tasks); dst;
-	    dst = list_next(&dstg->dstg_tasks, dst)) {
-#ifdef ZFS_DEBUG
-		/*
-		 * Only check half the time, otherwise, the sync-context
-		 * check will almost never fail.
-		 */
-		if (spa_get_random(2) == 0)
-			continue;
-#endif
-		dst->dst_err =
-		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
-		if (dst->dst_err)
-			dstg->dstg_err = dst->dst_err;
-	}
-	rw_exit(&dstg->dstg_pool->dp_config_rwlock);
+	dsl_pool_config_enter(dp, FTAG);
+	err = dst.dst_checkfunc(arg, tx);
+	dsl_pool_config_exit(dp, FTAG);
 
-	if (dstg->dstg_err) {
+	if (err != 0) {
 		dmu_tx_commit(tx);
-		return (dstg->dstg_err);
+		spa_close(spa, FTAG);
+		return (err);
 	}
 
-	/*
-	 * We don't generally have many sync tasks, so pay the price of
-	 * add_tail to get the tasks executed in the right order.
-	 */
-	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
-	    dstg, txg));
+	VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));
 
 	dmu_tx_commit(tx);
 
-	txg_wait_synced(dstg->dstg_pool, txg);
+	txg_wait_synced(dp, dst.dst_txg);
 
-	if (dstg->dstg_err == EAGAIN) {
-		txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
+	if (dst.dst_error == EAGAIN) {
+		txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
 		goto top;
 	}
 
-	return (dstg->dstg_err);
+	spa_close(spa, FTAG);
+	return (dst.dst_error);
 }
 
 void
-dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
+dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
+    int blocks_modified, dmu_tx_t *tx)
 {
-	uint64_t txg;
+	dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
 
-	dstg->dstg_nowaiter = B_TRUE;
-	txg = dmu_tx_get_txg(tx);
-	/*
-	 * We don't generally have many sync tasks, so pay the price of
-	 * add_tail to get the tasks executed in the right order.
-	 */
-	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
-	    dstg, txg));
-}
+	dst->dst_pool = dp;
+	dst->dst_txg = dmu_tx_get_txg(tx);
+	dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
+	dst->dst_checkfunc = dsl_null_checkfunc;
+	dst->dst_syncfunc = syncfunc;
+	dst->dst_arg = arg;
+	dst->dst_error = 0;
+	dst->dst_nowaiter = B_TRUE;
 
-void
-dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
-{
-	dsl_sync_task_t *dst;
-
-	while (dst = list_head(&dstg->dstg_tasks)) {
-		list_remove(&dstg->dstg_tasks, dst);
-		kmem_free(dst, sizeof (dsl_sync_task_t));
-	}
-	kmem_free(dstg, sizeof (dsl_sync_task_group_t));
+	VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg));
 }
 
+/*
+ * Called in syncing context to execute the synctask.
+ */
 void
-dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
+dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
 {
-	dsl_sync_task_t *dst;
-	dsl_pool_t *dp = dstg->dstg_pool;
+	dsl_pool_t *dp = dst->dst_pool;
 	uint64_t quota, used;
 
-	ASSERT0(dstg->dstg_err);
+	ASSERT0(dst->dst_error);
 
 	/*
 	 * Check for sufficient space.  We just check against what's
@@ -173,63 +154,21 @@
 	    metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
 	used = dp->dp_root_dir->dd_phys->dd_used_bytes;
 	/* MOS space is triple-dittoed, so we multiply by 3. */
-	if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
-		dstg->dstg_err = ENOSPC;
+	if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) {
+		dst->dst_error = SET_ERROR(ENOSPC);
+		if (dst->dst_nowaiter)
+			kmem_free(dst, sizeof (*dst));
 		return;
 	}
 
 	/*
-	 * Check for errors by calling checkfuncs.
+	 * Check for errors by calling checkfunc.
 	 */
-	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
-	for (dst = list_head(&dstg->dstg_tasks); dst;
-	    dst = list_next(&dstg->dstg_tasks, dst)) {
-		dst->dst_err =
-		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
-		if (dst->dst_err)
-			dstg->dstg_err = dst->dst_err;
-	}
-
-	if (dstg->dstg_err == 0) {
-		/*
-		 * Execute sync tasks.
-		 */
-		for (dst = list_head(&dstg->dstg_tasks); dst;
-		    dst = list_next(&dstg->dstg_tasks, dst)) {
-			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
-		}
-	}
-	rw_exit(&dp->dp_config_rwlock);
-
-	if (dstg->dstg_nowaiter)
-		dsl_sync_task_group_destroy(dstg);
+	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+	dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
+	if (dst->dst_error == 0)
+		dst->dst_syncfunc(dst->dst_arg, tx);
+	rrw_exit(&dp->dp_config_rwlock, FTAG);
+	if (dst->dst_nowaiter)
+		kmem_free(dst, sizeof (*dst));
 }
-
-int
-dsl_sync_task_do(dsl_pool_t *dp,
-    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
-    void *arg1, void *arg2, int blocks_modified)
-{
-	dsl_sync_task_group_t *dstg;
-	int err;
-
-	ASSERT(spa_writeable(dp->dp_spa));
-
-	dstg = dsl_sync_task_group_create(dp);
-	dsl_sync_task_create(dstg, checkfunc, syncfunc,
-	    arg1, arg2, blocks_modified);
-	err = dsl_sync_task_group_wait(dstg);
-	dsl_sync_task_group_destroy(dstg);
-	return (err);
-}
-
-void
-dsl_sync_task_do_nowait(dsl_pool_t *dp,
-    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
-    void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
-{
-	dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
-	dsl_sync_task_create(dstg, checkfunc, syncfunc,
-	    arg1, arg2, blocks_modified);
-	dsl_sync_task_group_nowait(dstg, tx);
-}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
@@ -1518,7 +1518,7 @@
 	 * For testing, make some blocks above a certain size be gang blocks.
 	 */
 	if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 
 	/*
 	 * Start at the rotor and loop through all mgs until we find something.
@@ -1683,7 +1683,7 @@
 
 	bzero(&dva[d], sizeof (dva_t));
 
-	return (ENOSPC);
+	return (SET_ERROR(ENOSPC));
 }
 
 /*
@@ -1752,7 +1752,7 @@
 
 	if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
 	    (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
 
@@ -1765,7 +1765,7 @@
 		error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
 
 	if (error == 0 && !space_map_contains(msp->ms_map, offset, size))
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 
 	if (error || txg == 0) {	/* txg == 0 indicates dry run */
 		mutex_exit(&msp->ms_lock);
@@ -1800,7 +1800,7 @@
 
 	if (mc->mc_rotor == NULL) {	/* no vdevs in this class */
 		spa_config_exit(spa, SCL_ALLOC, FTAG);
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 	}
 
 	ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
@@ -1876,3 +1876,41 @@
 
 	return (error);
 }
+
+static void
+checkmap(space_map_t *sm, uint64_t off, uint64_t size)
+{
+	space_seg_t *ss;
+	avl_index_t where;
+
+	mutex_enter(sm->sm_lock);
+	ss = space_map_find(sm, off, size, &where);
+	if (ss != NULL)
+		panic("freeing free block; ss=%p", (void *)ss);
+	mutex_exit(sm->sm_lock);
+}
+
+void
+metaslab_check_free(spa_t *spa, const blkptr_t *bp)
+{
+	if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
+		return;
+
+	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+	for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
+		uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]);
+		vdev_t *vd = vdev_lookup_top(spa, vdid);
+		uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]);
+		uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
+		metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift];
+
+		if (ms->ms_map->sm_loaded)
+			checkmap(ms->ms_map, off, size);
+
+		for (int j = 0; j < TXG_SIZE; j++)
+			checkmap(ms->ms_freemap[j], off, size);
+		for (int j = 0; j < TXG_DEFER_SIZE; j++)
+			checkmap(ms->ms_defermap[j], off, size);
+	}
+	spa_config_exit(spa, SCL_VDEV, FTAG);
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -32,7 +33,7 @@
 #else
 int reference_tracking_enable = TRUE;
 #endif
-int reference_history = 4; /* tunable */
+int reference_history = 3; /* tunable */
 
 static kmem_cache_t *reference_cache;
 static kmem_cache_t *reference_history_cache;
@@ -64,9 +65,17 @@
 	    offsetof(reference_t, ref_link));
 	rc->rc_count = 0;
 	rc->rc_removed_count = 0;
+	rc->rc_tracked = reference_tracking_enable;
 }
 
 void
+refcount_create_untracked(refcount_t *rc)
+{
+	refcount_create(rc);
+	rc->rc_tracked = B_FALSE;
+}
+
+void
 refcount_destroy_many(refcount_t *rc, uint64_t number)
 {
 	reference_t *ref;
@@ -96,7 +105,6 @@
 int
 refcount_is_zero(refcount_t *rc)
 {
-	ASSERT(rc->rc_count >= 0);
 	return (rc->rc_count == 0);
 }
 
@@ -103,7 +111,6 @@
 int64_t
 refcount_count(refcount_t *rc)
 {
-	ASSERT(rc->rc_count >= 0);
 	return (rc->rc_count);
 }
 
@@ -113,7 +120,7 @@
 	reference_t *ref = NULL;
 	int64_t count;
 
-	if (reference_tracking_enable) {
+	if (rc->rc_tracked) {
 		ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
 		ref->ref_holder = holder;
 		ref->ref_number = number;
@@ -120,7 +127,7 @@
 	}
 	mutex_enter(&rc->rc_mtx);
 	ASSERT(rc->rc_count >= 0);
-	if (reference_tracking_enable)
+	if (rc->rc_tracked)
 		list_insert_head(&rc->rc_list, ref);
 	rc->rc_count += number;
 	count = rc->rc_count;
@@ -144,7 +151,7 @@
 	mutex_enter(&rc->rc_mtx);
 	ASSERT(rc->rc_count >= number);
 
-	if (!reference_tracking_enable) {
+	if (!rc->rc_tracked) {
 		rc->rc_count -= number;
 		count = rc->rc_count;
 		mutex_exit(&rc->rc_mtx);
@@ -161,7 +168,7 @@
 				    KM_SLEEP);
 				list_insert_head(&rc->rc_removed, ref);
 				rc->rc_removed_count++;
-				if (rc->rc_removed_count >= reference_history) {
+				if (rc->rc_removed_count > reference_history) {
 					ref = list_tail(&rc->rc_removed);
 					list_remove(&rc->rc_removed, ref);
 					kmem_cache_free(reference_history_cache,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include <sys/refcount.h>
 #include <sys/rrwlock.h>
@@ -72,8 +75,9 @@
 uint_t rrw_tsd_key;
 
 typedef struct rrw_node {
-	struct rrw_node	*rn_next;
-	rrwlock_t	*rn_rrl;
+	struct rrw_node *rn_next;
+	rrwlock_t *rn_rrl;
+	void *rn_tag;
 } rrw_node_t;
 
 static rrw_node_t *
@@ -95,7 +99,7 @@
  * Add a node to the head of the singly linked list.
  */
 static void
-rrn_add(rrwlock_t *rrl)
+rrn_add(rrwlock_t *rrl, void *tag)
 {
 	rrw_node_t *rn;
 
@@ -102,6 +106,7 @@
 	rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
 	rn->rn_rrl = rrl;
 	rn->rn_next = tsd_get(rrw_tsd_key);
+	rn->rn_tag = tag;
 	VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
 }
 
@@ -110,7 +115,7 @@
  * thread's list and return TRUE; otherwise return FALSE.
  */
 static boolean_t
-rrn_find_and_remove(rrwlock_t *rrl)
+rrn_find_and_remove(rrwlock_t *rrl, void *tag)
 {
 	rrw_node_t *rn;
 	rrw_node_t *prev = NULL;
@@ -119,7 +124,7 @@
 		return (B_FALSE);
 
 	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
-		if (rn->rn_rrl == rrl) {
+		if (rn->rn_rrl == rrl && rn->rn_tag == tag) {
 			if (prev)
 				prev->rn_next = rn->rn_next;
 			else
@@ -133,7 +138,7 @@
 }
 
 void
-rrw_init(rrwlock_t *rrl)
+rrw_init(rrwlock_t *rrl, boolean_t track_all)
 {
 	mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
@@ -141,6 +146,7 @@
 	refcount_create(&rrl->rr_anon_rcount);
 	refcount_create(&rrl->rr_linked_rcount);
 	rrl->rr_writer_wanted = B_FALSE;
+	rrl->rr_track_all = track_all;
 }
 
 void
@@ -153,12 +159,13 @@
 	refcount_destroy(&rrl->rr_linked_rcount);
 }
 
-static void
+void
 rrw_enter_read(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
 #if !defined(DEBUG) && defined(_KERNEL)
-	if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
+	if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
+	    !rrl->rr_track_all) {
 		rrl->rr_anon_rcount.rc_count++;
 		mutex_exit(&rrl->rr_lock);
 		return;
@@ -168,14 +175,14 @@
 	ASSERT(rrl->rr_writer != curthread);
 	ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
 
-	while (rrl->rr_writer || (rrl->rr_writer_wanted &&
+	while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
 	    refcount_is_zero(&rrl->rr_anon_rcount) &&
 	    rrn_find(rrl) == NULL))
 		cv_wait(&rrl->rr_cv, &rrl->rr_lock);
 
-	if (rrl->rr_writer_wanted) {
+	if (rrl->rr_writer_wanted || rrl->rr_track_all) {
 		/* may or may not be a re-entrant enter */
-		rrn_add(rrl);
+		rrn_add(rrl, tag);
 		(void) refcount_add(&rrl->rr_linked_rcount, tag);
 	} else {
 		(void) refcount_add(&rrl->rr_anon_rcount, tag);
@@ -184,7 +191,7 @@
 	mutex_exit(&rrl->rr_lock);
 }
 
-static void
+void
 rrw_enter_write(rrwlock_t *rrl)
 {
 	mutex_enter(&rrl->rr_lock);
@@ -230,10 +237,12 @@
 
 	if (rrl->rr_writer == NULL) {
 		int64_t count;
-		if (rrn_find_and_remove(rrl))
+		if (rrn_find_and_remove(rrl, tag)) {
 			count = refcount_remove(&rrl->rr_linked_rcount, tag);
-		else
+		} else {
+			ASSERT(!rrl->rr_track_all);
 			count = refcount_remove(&rrl->rr_anon_rcount, tag);
+		}
 		if (count == 0)
 			cv_broadcast(&rrl->rr_cv);
 	} else {
@@ -246,6 +255,11 @@
 	mutex_exit(&rrl->rr_lock);
 }
 
+/*
+ * If the lock was created with track_all, rrw_held(RW_READER) will return
+ * B_TRUE iff the current thread has the lock for reader.  Otherwise it may
+ * return B_TRUE if any thread has the lock for reader.
+ */
 boolean_t
 rrw_held(rrwlock_t *rrl, krw_t rw)
 {
@@ -256,9 +270,19 @@
 		held = (rrl->rr_writer == curthread);
 	} else {
 		held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
-		    !refcount_is_zero(&rrl->rr_linked_rcount));
+		    rrn_find(rrl) != NULL);
 	}
 	mutex_exit(&rrl->rr_lock);
 
 	return (held);
 }
+
+void
+rrw_tsd_destroy(void *arg)
+{
+	rrw_node_t *rn = arg;
+	if (rn != NULL) {
+		panic("thread %p terminating with rrw lock %p held",
+		    (void *)curthread, (void *)rn->rn_rrl);
+	}
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 iXsystems, Inc
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -373,7 +373,7 @@
 		switch (data_op) {
 		case SA_LOOKUP:
 			if (bulk[i].sa_addr == NULL)
-				return (ENOENT);
+				return (SET_ERROR(ENOENT));
 			if (bulk[i].sa_data) {
 				SA_COPY_DATA(bulk[i].sa_data_func,
 				    bulk[i].sa_addr, bulk[i].sa_data,
@@ -503,7 +503,7 @@
 		blocksize = SPA_MINBLOCKSIZE;
 	} else if (size > SPA_MAXBLOCKSIZE) {
 		ASSERT(0);
-		return (EFBIG);
+		return (SET_ERROR(EFBIG));
 	} else {
 		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
 	}
@@ -677,7 +677,7 @@
 	    SA_BONUS, &i, &used, &spilling);
 
 	if (used > SPA_MAXBLOCKSIZE)
-		return (EFBIG);
+		return (SET_ERROR(EFBIG));
 
 	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
 	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
@@ -701,7 +701,7 @@
 		    &spill_used, &dummy);
 
 		if (spill_used > SPA_MAXBLOCKSIZE)
-			return (EFBIG);
+			return (SET_ERROR(EFBIG));
 
 		buf_space = hdl->sa_spill->db_size - spillhdrsize;
 		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
@@ -861,7 +861,7 @@
 		 */
 		if (error || (error == 0 && sa_attr_count == 0)) {
 			if (error == 0)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			goto bail;
 		}
 		sa_reg_count = sa_attr_count;
@@ -892,7 +892,7 @@
 			error = zap_lookup(os, sa->sa_reg_attr_obj,
 			    reg_attrs[i].sa_name, 8, 1, &attr_value);
 		else
-			error = ENOENT;
+			error = SET_ERROR(ENOENT);
 		switch (error) {
 		case ENOENT:
 			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
@@ -1004,10 +1004,10 @@
 	sa_attr_type_t *tb;
 	int error;
 
-	mutex_enter(&os->os_lock);
+	mutex_enter(&os->os_user_ptr_lock);
 	if (os->os_sa) {
 		mutex_enter(&os->os_sa->sa_lock);
-		mutex_exit(&os->os_lock);
+		mutex_exit(&os->os_user_ptr_lock);
 		tb = os->os_sa->sa_user_table;
 		mutex_exit(&os->os_sa->sa_lock);
 		*user_table = tb;
@@ -1020,7 +1020,7 @@
 
 	os->os_sa = sa;
 	mutex_enter(&sa->sa_lock);
-	mutex_exit(&os->os_lock);
+	mutex_exit(&os->os_user_ptr_lock);
 	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
 	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
 	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
@@ -1051,7 +1051,7 @@
 		 */
 		if (error || (error == 0 && layout_count == 0)) {
 			if (error == 0)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			goto fail;
 		}
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,8 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  */
 
@@ -61,6 +62,9 @@
 #include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 #include <sys/zvol.h>
 #include <sys/trim_map.h>
@@ -120,10 +124,8 @@
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 };
 
-static dsl_syncfunc_t spa_sync_version;
-static dsl_syncfunc_t spa_sync_props;
-static dsl_checkfunc_t spa_change_guid_check;
-static dsl_syncfunc_t spa_change_guid_sync;
+static void spa_sync_version(void *arg, dmu_tx_t *tx);
+static void spa_sync_props(void *arg, dmu_tx_t *tx);
 static boolean_t spa_has_active_shared_spare(spa_t *spa);
 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@@ -324,10 +326,10 @@
 				dsl_dataset_t *ds = NULL;
 
 				dp = spa_get_dsl(spa);
-				rw_enter(&dp->dp_config_rwlock, RW_READER);
+				dsl_pool_config_enter(dp, FTAG);
 				if (err = dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &ds)) {
-					rw_exit(&dp->dp_config_rwlock);
+					dsl_pool_config_exit(dp, FTAG);
 					break;
 				}
 
@@ -336,7 +338,7 @@
 				    KM_SLEEP);
 				dsl_dataset_name(ds, strval);
 				dsl_dataset_rele(ds, FTAG);
-				rw_exit(&dp->dp_config_rwlock);
+				dsl_pool_config_exit(dp, FTAG);
 			} else {
 				strval = NULL;
 				intval = za.za_first_integer;
@@ -401,7 +403,7 @@
 		switch (prop) {
 		case ZPROP_INVAL:
 			if (!zpool_prop_feature(propname)) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
@@ -409,23 +411,23 @@
 			 * Sanitize the input.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			if (nvpair_value_uint64(elem, &intval) != 0) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			if (intval != 0) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
 			fname = strchr(propname, '@') + 1;
 			if (zfeature_lookup_name(fname, NULL) != 0) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
@@ -438,7 +440,7 @@
 			    (intval < spa_version(spa) ||
 			    intval > SPA_VERSION_BEFORE_FEATURES ||
 			    has_feature))
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_DELEGATION:
@@ -447,7 +449,7 @@
 		case ZPOOL_PROP_AUTOEXPAND:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && intval > 1)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_BOOTFS:
@@ -457,7 +459,7 @@
 			 * the bootfs property cannot be set.
 			 */
 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
-				error = ENOTSUP;
+				error = SET_ERROR(ENOTSUP);
 				break;
 			}
 
@@ -465,7 +467,7 @@
 			 * Make sure the vdev config is bootable
 			 */
 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
-				error = ENOTSUP;
+				error = SET_ERROR(ENOTSUP);
 				break;
 			}
 
@@ -489,12 +491,13 @@
 				/* Must be ZPL and not gzip compressed. */
 
 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
-					error = ENOTSUP;
-				} else if ((error = dsl_prop_get_integer(strval,
+					error = SET_ERROR(ENOTSUP);
+				} else if ((error =
+				    dsl_prop_get_int_ds(dmu_objset_ds(os),
 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
-				    &compress, NULL)) == 0 &&
+				    &compress)) == 0 &&
 				    !BOOTFS_COMPRESS_VALID(compress)) {
-					error = ENOTSUP;
+					error = SET_ERROR(ENOTSUP);
 				} else {
 					objnum = dmu_objset_id(os);
 				}
@@ -506,7 +509,7 @@
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
 			    intval > ZIO_FAILURE_MODE_PANIC))
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 
 			/*
 			 * This is a special case which only occurs when
@@ -520,7 +523,7 @@
 			 */
 			if (!error && spa_suspended(spa)) {
 				spa->spa_failmode = intval;
-				error = EIO;
+				error = SET_ERROR(EIO);
 			}
 			break;
 
@@ -535,7 +538,7 @@
 				break;
 
 			if (strval[0] != '/') {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 
@@ -544,7 +547,7 @@
 
 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
 			    strcmp(slash, "/..") == 0)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			break;
 
 		case ZPOOL_PROP_COMMENT:
@@ -558,7 +561,7 @@
 				 * there is an easy-to-use kernel isprint().
 				 */
 				if (*check >= 0x7f) {
-					error = EINVAL;
+					error = SET_ERROR(EINVAL);
 					break;
 				}
 				check++;
@@ -569,12 +572,12 @@
 
 		case ZPOOL_PROP_DEDUPDITTO:
 			if (spa_version(spa) < SPA_VERSION_DEDUP)
-				error = ENOTSUP;
+				error = SET_ERROR(ENOTSUP);
 			else
 				error = nvpair_value_uint64(elem, &intval);
 			if (error == 0 &&
 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			break;
 		}
 
@@ -659,8 +662,8 @@
 			 * read object, the features for write object, or the
 			 * feature descriptions object.
 			 */
-			error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
-			    spa_sync_version, spa, &ver, 6);
+			error = dsl_sync_task(spa->spa_name, NULL,
+			    spa_sync_version, &ver, 6);
 			if (error)
 				return (error);
 			continue;
@@ -671,8 +674,8 @@
 	}
 
 	if (need_sync) {
-		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
-		    spa, nvp, 6));
+		return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
+		    nvp, 6));
 	}
 
 	return (0);
@@ -694,10 +697,10 @@
 
 /*ARGSUSED*/
 static int
-spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_change_guid_check(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	uint64_t *newguid = arg2;
+	uint64_t *newguid = arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t vdev_state;
 
@@ -706,7 +709,7 @@
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (vdev_state != VDEV_STATE_HEALTHY)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	ASSERT3U(spa_guid(spa), !=, *newguid);
 
@@ -714,10 +717,10 @@
 }
 
 static void
-spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_change_guid_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	uint64_t *newguid = arg2;
+	uint64_t *newguid = arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	uint64_t oldguid;
 	vdev_t *rvd = spa->spa_root_vdev;
 
@@ -729,17 +732,8 @@
 	vdev_config_dirty(rvd);
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
-#ifdef __FreeBSD__
-	/*
-	 * TODO: until recent illumos logging changes are merged
-	 *       log reguid as pool property change
-	 */
-	spa_history_log_internal(LOG_POOL_PROPSET, spa, tx,
-	    "guid change old=%llu new=%llu", oldguid, *newguid);
-#else
-	spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld",
+	spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
 	    oldguid, *newguid);
-#endif
 }
 
 /*
@@ -760,8 +754,8 @@
 	mutex_enter(&spa_namespace_lock);
 	guid = spa_generate_guid(NULL);
 
-	error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
-	    spa_change_guid_sync, spa, &guid, 5);
+	error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
+	    spa_change_guid_sync, &guid, 5);
 
 	if (error == 0) {
 		spa_config_sync(spa, B_FALSE, B_TRUE);
@@ -1134,7 +1128,7 @@
 	if (error) {
 		vdev_free(*vdp);
 		*vdp = NULL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	for (int c = 0; c < children; c++) {
@@ -1532,7 +1526,8 @@
 	for (int c = 0; c < vd->vdev_children; c++)
 		spa_check_removed(vd->vdev_child[c]);
 
-	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
+	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) &&
+	    !vd->vdev_ishole) {
 		zfs_post_autoreplace(vd->vdev_spa, vd);
 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
 	}
@@ -1663,21 +1658,22 @@
 /*
  * Check for missing log devices
  */
-static int
+static boolean_t
 spa_check_logs(spa_t *spa)
 {
+	boolean_t rv = B_FALSE;
+
 	switch (spa->spa_log_state) {
 	case SPA_LOG_MISSING:
 		/* need to recheck in case slog has been restored */
 	case SPA_LOG_UNKNOWN:
-		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
-		    DS_FIND_CHILDREN)) {
+		rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
+		    NULL, DS_FIND_CHILDREN) != 0);
+		if (rv)
 			spa_set_log_state(spa, SPA_LOG_MISSING);
-			return (1);
-		}
 		break;
 	}
-	return (0);
+	return (rv);
 }
 
 static boolean_t
@@ -1723,11 +1719,11 @@
 int
 spa_offline_log(spa_t *spa)
 {
-	int error = 0;
+	int error;
 
-	if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
-	    NULL, DS_FIND_CHILDREN)) == 0) {
-
+	error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+	    NULL, DS_FIND_CHILDREN);
+	if (error == 0) {
 		/*
 		 * We successfully offlined the log device, sync out the
 		 * current txg so that the "stubby" block can be removed
@@ -1848,7 +1844,7 @@
 
 	if (error) {
 		if (error != ENXIO && error != EIO)
-			error = EIO;
+			error = SET_ERROR(EIO);
 		return (error);
 	}
 
@@ -1976,7 +1972,7 @@
 	nvlist_t *nvl;
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ASSERT(spa->spa_comment == NULL);
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
@@ -1995,7 +1991,7 @@
 
 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
 	    spa_guid_exists(pool_guid, 0)) {
-		error = EEXIST;
+		error = SET_ERROR(EEXIST);
 	} else {
 		spa->spa_config_guid = pool_guid;
 
@@ -2061,7 +2057,7 @@
 	spa->spa_load_state = state;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	parse = (type == SPA_IMPORT_EXISTING ?
 	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
@@ -2121,7 +2117,7 @@
 			return (error);
 
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
-			return (ENXIO);
+			return (SET_ERROR(ENXIO));
 	}
 
 	/*
@@ -2356,7 +2352,7 @@
 				    "See: http://illumos.org/msg/ZFS-8000-EY",
 				    spa_name(spa), hostname,
 				    (unsigned long)hostid);
-				return (EBADF);
+				return (SET_ERROR(EBADF));
 			}
 		}
 		if (nvlist_lookup_nvlist(spa->spa_config,
@@ -2545,7 +2541,7 @@
 		 * more toplevel vdevs are faulted.
 		 */
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
-			return (ENXIO);
+			return (SET_ERROR(ENXIO));
 
 		if (spa_check_logs(spa)) {
 			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
@@ -2643,6 +2639,12 @@
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 
 		/*
+		 * Log the fact that we booted up (so that we can detect if
+		 * we rebooted in the middle of an operation).
+		 */
+		spa_history_log_version(spa, "open");
+
+		/*
 		 * Delete any inconsistent datasets.
 		 */
 		(void) dmu_objset_find(spa_name(spa),
@@ -2802,7 +2804,7 @@
 	if ((spa = spa_lookup(pool)) == NULL) {
 		if (locked)
 			mutex_exit(&spa_namespace_lock);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
@@ -2837,7 +2839,7 @@
 			spa_remove(spa);
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		}
 
 		if (error) {
@@ -3174,7 +3176,7 @@
 		return (0);
 
 	if (ndev == 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * Make sure the pool is formatted with a version that supports this
@@ -3181,7 +3183,7 @@
 	 * device type.
 	 */
 	if (spa_version(spa) < version)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Set the pending device list so we correctly handle device in-use
@@ -3197,7 +3199,7 @@
 
 		if (!vd->vdev_ops->vdev_op_leaf) {
 			vdev_free(vd);
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 
@@ -3208,7 +3210,7 @@
 #ifdef _KERNEL
 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
-			error = ENOTBLK;
+			error = SET_ERROR(ENOTBLK);
 			vdev_free(vd);
 			goto out;
 		}
@@ -3327,7 +3329,7 @@
  */
 int
 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
-    const char *history_str, nvlist_t *zplprops)
+    nvlist_t *zplprops)
 {
 	spa_t *spa;
 	char *altroot = NULL;
@@ -3347,7 +3349,7 @@
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 	}
 
 	/*
@@ -3400,7 +3402,7 @@
 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
 
 	if (error == 0 && !zfs_allocatable_devs(nvroot))
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
@@ -3530,7 +3532,7 @@
 
 	if (props != NULL) {
 		spa_configfile_set(spa, props, B_FALSE);
-		spa_sync_props(spa, props, tx);
+		spa_sync_props(props, tx);
 	}
 
 	dmu_tx_commit(tx);
@@ -3546,9 +3548,7 @@
 
 	spa_config_sync(spa, B_FALSE, B_TRUE);
 
-	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
-		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
-	spa_history_log_version(spa, LOG_POOL_CREATE);
+	spa_history_log_version(spa, "create");
 
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 
@@ -3675,7 +3675,7 @@
 	if (config == NULL) {
 		cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
 		    devpath);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
@@ -3718,7 +3718,7 @@
 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
 		    (u_longlong_t)guid);
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 		goto out;
 	}
 
@@ -3730,7 +3730,7 @@
 	if (avd != bvd) {
 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
 		    "try booting from '%s'", avd->vdev_path);
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
@@ -3744,12 +3744,11 @@
 		    "try booting from '%s'",
 		    bvd->vdev_parent->
 		    vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	error = 0;
-	spa_history_log_version(spa, LOG_POOL_IMPORT);
 out:
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_free(rvd);
@@ -3980,7 +3979,7 @@
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 	}
 
 	/*
@@ -4006,7 +4005,7 @@
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 
 		mutex_exit(&spa_namespace_lock);
-		spa_history_log_version(spa, LOG_POOL_IMPORT);
+		spa_history_log_version(spa, "import");
 
 		return (0);
 	}
@@ -4137,7 +4136,7 @@
 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
 
 	mutex_exit(&spa_namespace_lock);
-	spa_history_log_version(spa, LOG_POOL_IMPORT);
+	spa_history_log_version(spa, "import");
 
 #ifdef __FreeBSD__
 #ifdef _KERNEL
@@ -4258,12 +4257,12 @@
 		*oldconfig = NULL;
 
 	if (!(spa_mode_global & FWRITE))
-		return (EROFS);
+		return (SET_ERROR(EROFS));
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(pool)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	/*
@@ -4297,7 +4296,7 @@
 		    new_state != POOL_STATE_UNINITIALIZED)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
-			return (EBUSY);
+			return (SET_ERROR(EBUSY));
 		}
 
 		/*
@@ -4310,7 +4309,7 @@
 		    spa_has_active_shared_spare(spa)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
-			return (EXDEV);
+			return (SET_ERROR(EXDEV));
 		}
 
 		/*
@@ -4680,7 +4679,7 @@
 	 */
 	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
 
-	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
+	spa_history_log_internal(spa, "vdev attach", NULL,
 	    "%s vdev=%s %s vdev=%s",
 	    replacing && newvd_isspare ? "spare in" :
 	    replacing ? "replace" : "attach", newvdpath,
@@ -4897,7 +4896,7 @@
 
 	error = spa_vdev_exit(spa, vd, txg, 0);
 
-	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
+	spa_history_log_internal(spa, "detach", NULL,
 	    "vdev=%s", vdpath);
 	spa_strfree(vdpath);
 
@@ -5018,7 +5017,7 @@
 			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
 				continue;
 			} else {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				break;
 			}
 		}
@@ -5026,7 +5025,7 @@
 		/* which disk is going to be split? */
 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
 		    &glist[c]) != 0) {
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			break;
 		}
 
@@ -5033,7 +5032,7 @@
 		/* look it up in the spa */
 		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
 		if (vml[c] == NULL) {
-			error = ENODEV;
+			error = SET_ERROR(ENODEV);
 			break;
 		}
 
@@ -5047,12 +5046,12 @@
 		    vml[c]->vdev_children != 0 ||
 		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
 		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		if (vdev_dtl_required(vml[c])) {
-			error = EBUSY;
+			error = SET_ERROR(EBUSY);
 			break;
 		}
 
@@ -5173,9 +5172,8 @@
 		if (vml[c] != NULL) {
 			vdev_split(vml[c]);
 			if (error == 0)
-				spa_history_log_internal(LOG_POOL_VDEV_DETACH,
-				    spa, tx, "vdev=%s",
-				    vml[c]->vdev_path);
+				spa_history_log_internal(spa, "detach", tx,
+				    "vdev=%s", vml[c]->vdev_path);
 			vdev_free(vml[c]);
 		}
 	}
@@ -5190,8 +5188,8 @@
 		zio_handle_panic_injection(spa, FTAG, 3);
 
 	/* split is complete; log a history record */
-	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
-	    "split new pool %s from pool %s", newname, spa_name(spa));
+	spa_history_log_internal(newspa, "split", NULL,
+	    "from pool %s", spa_name(spa));
 
 	kmem_free(vml, children * sizeof (vdev_t *));
 
@@ -5288,7 +5286,7 @@
 		if (vd->vdev_stat.vs_alloc != 0)
 			error = spa_offline_log(spa);
 	} else {
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 	}
 
 	if (error)
@@ -5397,7 +5395,7 @@
 			spa_load_spares(spa);
 			spa->spa_spares.sav_sync = B_TRUE;
 		} else {
-			error = EBUSY;
+			error = SET_ERROR(EBUSY);
 		}
 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
@@ -5457,12 +5455,12 @@
 		/*
 		 * Normal vdevs cannot be removed (yet).
 		 */
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 	} else {
 		/*
 		 * There is no vdev of any kind with the specified guid.
 		 */
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 	}
 
 	if (!locked)
@@ -5649,7 +5647,7 @@
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 	return (dsl_scan_cancel(spa->spa_dsl_pool));
 }
 
@@ -5659,7 +5657,7 @@
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 
 	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * If a resilver was requested, but there is no DTL on a
@@ -5778,8 +5776,7 @@
 		 * then log an internal history event.
 		 */
 		if (new_space != old_space) {
-			spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
-			    spa, NULL,
+			spa_history_log_internal(spa, "vdev online", NULL,
 			    "pool '%s' size: %llu(+%llu)",
 			    spa_name(spa), new_space, new_space - old_space);
 		}
@@ -6008,10 +6005,11 @@
 }
 
 static void
-spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_sync_version(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	uint64_t version = *(uint64_t *)arg2;
+	uint64_t *versionp = arg;
+	uint64_t version = *versionp;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	/*
 	 * Setting the version is special cased when first creating the pool.
@@ -6023,6 +6021,7 @@
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
+	spa_history_log_internal(spa, "set", tx, "version=%lld", version);
 }
 
 /*
@@ -6029,11 +6028,11 @@
  * Set zpool properties.
  */
 static void
-spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_sync_props(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
+	nvlist_t *nvp = arg;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	objset_t *mos = spa->spa_meta_objset;
-	nvlist_t *nvp = arg2;
 	nvpair_t *elem = NULL;
 
 	mutex_enter(&spa->spa_props_lock);
@@ -6057,6 +6056,8 @@
 			VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
 
 			spa_feature_enable(spa, feature, tx);
+			spa_history_log_internal(spa, "set", tx,
+			    "%s=enabled", nvpair_name(elem));
 			break;
 
 		case ZPOOL_PROP_VERSION:
@@ -6096,6 +6097,8 @@
 			 */
 			if (tx->tx_txg != TXG_INITIAL)
 				vdev_config_dirty(spa->spa_root_vdev);
+			spa_history_log_internal(spa, "set", tx,
+			    "%s=%s", nvpair_name(elem), strval);
 			break;
 		default:
 			/*
@@ -6118,7 +6121,8 @@
 				VERIFY(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    1, strlen(strval) + 1, strval, tx) == 0);
-
+				spa_history_log_internal(spa, "set", tx,
+				    "%s=%s", nvpair_name(elem), strval);
 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
 
@@ -6130,6 +6134,8 @@
 				VERIFY(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    8, 1, &intval, tx) == 0);
+				spa_history_log_internal(spa, "set", tx,
+				    "%s=%lld", nvpair_name(elem), intval);
 			} else {
 				ASSERT(0); /* not allowed */
 			}
@@ -6158,13 +6164,6 @@
 			}
 		}
 
-		/* log internal history if this is not a zpool create */
-		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
-		    tx->tx_txg != TXG_INITIAL) {
-			spa_history_log_internal(LOG_POOL_PROPSET,
-			    spa, tx, "%s %lld %s",
-			    nvpair_name(elem), intval, spa_name(spa));
-		}
 	}
 
 	mutex_exit(&spa->spa_props_lock);
@@ -6184,6 +6183,8 @@
 
 	ASSERT(spa->spa_sync_pass == 1);
 
+	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
 		dsl_pool_create_origin(dp, tx);
@@ -6209,6 +6210,7 @@
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
 		spa_feature_create_zap_objects(spa, tx);
 	}
+	rrw_exit(&dp->dp_config_rwlock, FTAG);
 }
 
 /*

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -220,7 +220,15 @@
 		 */
 		nvl = NULL;
 		while ((spa = spa_next(spa)) != NULL) {
-			if (spa == target && removing)
+			/*
+			 * Skip over our own pool if we're about to remove
+			 * ourselves from the spa namespace or any pool that
+			 * is readonly. Since we cannot guarantee that a
+			 * readonly pool would successfully import upon reboot,
+			 * we don't allow them to be written to the cache file.
+			 */
+			if ((spa == target && removing) ||
+			    !spa_writeable(spa))
 				continue;
 
 			mutex_enter(&spa->spa_props_lock);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -175,7 +176,7 @@
 
 		if (*count == 0) {
 			zap_cursor_fini(&zc);
-			return (ENOMEM);
+			return (SET_ERROR(ENOMEM));
 		}
 
 		name_to_bookmark(za.za_name, &zb);
@@ -183,7 +184,7 @@
 		if (copyout(&zb, (char *)addr +
 		    (*count - 1) * sizeof (zbookmark_t),
 		    sizeof (zbookmark_t)) != 0)
-			return (EFAULT);
+			return (SET_ERROR(EFAULT));
 
 		*count -= 1;
 	}
@@ -201,12 +202,12 @@
 	for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
 
 		if (*count == 0)
-			return (ENOMEM);
+			return (SET_ERROR(ENOMEM));
 
 		if (copyout(&se->se_bookmark, (char *)addr +
 		    (*count - 1) * sizeof (zbookmark_t),
 		    sizeof (zbookmark_t)) != 0)
-			return (EFAULT);
+			return (SET_ERROR(EFAULT));
 
 		*count -= 1;
 	}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -30,8 +30,11 @@
 #include <sys/dsl_synctask.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
 #include <sys/utsname.h>
 #include <sys/sunddi.h>
+#include <sys/cred.h>
 #include "zfs_comutil.h"
 #ifdef _KERNEL
 #include <sys/cmn_err.h>
@@ -176,7 +179,7 @@
 }
 
 static char *
-spa_history_zone()
+spa_history_zone(void)
 {
 #ifdef _KERNEL
 	/* XXX: pr_hostname can be changed by default from within a jail! */
@@ -183,7 +186,7 @@
 	if (jailed(curthread->td_ucred))
 		return (curthread->td_ucred->cr_prison->pr_hostname);
 #endif
-	return ("global");
+	return (NULL);
 }
 
 /*
@@ -191,17 +194,15 @@
  */
 /*ARGSUSED*/
 static void
-spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_history_log_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t		*spa = arg1;
-	history_arg_t	*hap = arg2;
-	const char	*history_str = hap->ha_history_str;
+	nvlist_t	*nvl = arg;
+	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
 	objset_t	*mos = spa->spa_meta_objset;
 	dmu_buf_t	*dbp;
 	spa_history_phys_t *shpp;
 	size_t		reclen;
 	uint64_t	le_len;
-	nvlist_t	*nvrecord;
 	char		*record_packed = NULL;
 	int		ret;
 
@@ -218,7 +219,7 @@
 	 * Get the offset of where we need to write via the bonus buffer.
 	 * Update the offset when the write completes.
 	 */
-	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
+	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
 	shpp = dbp->db_data;
 
 	dmu_buf_will_dirty(dbp, tx);
@@ -231,46 +232,35 @@
 	}
 #endif
 
-	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
-	    gethrestime_sec()) == 0);
-	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
-	if (hap->ha_zone != NULL)
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
-		    hap->ha_zone) == 0);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
 #ifdef _KERNEL
-	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
-	    utsname.nodename) == 0);
+	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
 #endif
-	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
-	    hap->ha_log_type == LOG_CMD_NORMAL) {
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
-		    history_str) == 0);
-
-		zfs_dbgmsg("command: %s", history_str);
-	} else {
-		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
-		    hap->ha_event) == 0);
-		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
-		    tx->tx_txg) == 0);
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
-		    history_str) == 0);
-
-		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
-		    zfs_history_event_names[hap->ha_event], spa_name(spa),
-		    (longlong_t)tx->tx_txg, history_str);
-
+	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
+		zfs_dbgmsg("command: %s",
+		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
+	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
+		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
+			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
+		} else {
+			zfs_dbgmsg("txg %lld %s %s",
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
+		}
+	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
+		zfs_dbgmsg("ioctl %s",
+		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
 	}
 
-	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
-	record_packed = kmem_alloc(reclen, KM_SLEEP);
+	record_packed = fnvlist_pack(nvl, &reclen);
 
-	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
-	    NV_ENCODE_XDR, KM_SLEEP) == 0);
-
 	mutex_enter(&spa->spa_history_lock);
-	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
-		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
 
 	/* write out the packed length as little endian */
 	le_len = LE_64((uint64_t)reclen);
@@ -278,20 +268,16 @@
 	if (!ret)
 		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
 
-	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
-		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
-		shpp->sh_bof = shpp->sh_pool_create_len;
+	/* The first command is the create, which we keep forever */
+	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
+	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
+		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
 	}
 
 	mutex_exit(&spa->spa_history_lock);
-	nvlist_free(nvrecord);
-	kmem_free(record_packed, reclen);
+	fnvlist_pack_free(record_packed, reclen);
 	dmu_buf_rele(dbp, FTAG);
-
-	strfree(hap->ha_history_str);
-	if (hap->ha_zone != NULL)
-		strfree(hap->ha_zone);
-	kmem_free(hap, sizeof (history_arg_t));
+	fnvlist_free(nvl);
 }
 
 /*
@@ -298,16 +284,29 @@
  * Write out a history event.
  */
 int
-spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
+spa_history_log(spa_t *spa, const char *msg)
 {
-	history_arg_t *ha;
+	int err;
+	nvlist_t *nvl = fnvlist_alloc();
+
+	fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
+	err = spa_history_log_nvl(spa, nvl);
+	fnvlist_free(nvl);
+	return (err);
+}
+
+int
+spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
+{
 	int err = 0;
 	dmu_tx_t *tx;
+	nvlist_t *nvarg;
 
-	ASSERT(what != LOG_INTERNAL);
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY)
+		return (EINVAL);
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	err = dmu_tx_assign(tx, TXG_WAIT);
@@ -316,19 +315,21 @@
 		return (err);
 	}
 
-	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
-	ha->ha_history_str = strdup(history_str);
-	ha->ha_zone = strdup(spa_history_zone());
-	ha->ha_log_type = what;
-	ha->ha_uid = crgetuid(CRED());
+	nvarg = fnvlist_dup(nvl);
+	if (spa_history_zone() != NULL) {
+		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
+		    spa_history_zone());
+	}
+	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
 
 	/* Kick this off asynchronously; errors are ignored. */
-	dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
-	    spa_history_log_sync, spa, ha, 0, tx);
+	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
+	    nvarg, 0, tx);
 	dmu_tx_commit(tx);
 
-	/* spa_history_log_sync will free ha and strings */
+	/* spa_history_log_sync will free nvl */
 	return (err);
+
 }
 
 /*
@@ -345,11 +346,11 @@
 	int err;
 
 	/*
-	 * If the command history  doesn't exist (older pool),
+	 * If the command history doesn't exist (older pool),
 	 * that's ok, just return ENOENT.
 	 */
 	if (!spa->spa_history)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	/*
 	 * The history is logged asynchronously, so when they request
@@ -428,11 +429,14 @@
 	return (err);
 }
 
+/*
+ * The nvlist will be consumed by this call.
+ */
 static void
-log_internal(history_internal_events_t event, spa_t *spa,
+log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
     dmu_tx_t *tx, const char *fmt, va_list adx)
 {
-	history_arg_t *ha;
+	char *msg;
 	va_list adx2;
 
 	/*
@@ -440,35 +444,34 @@
 	 * initialized yet, so don't bother logging the internal events.
 	 * Likewise if the pool is not writeable.
 	 */
-	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa))
+	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
+		fnvlist_free(nvl);
 		return;
+	}
 
 	va_copy(adx2, adx);
 
-	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
-	ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx2) + 1,
-	    KM_SLEEP);
+	msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
+	(void) vsprintf(msg, fmt, adx2);
+	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
+	strfree(msg);
 
 	va_end(adx2);
 
-	(void) vsprintf(ha->ha_history_str, fmt, adx);
+	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
 
-	ha->ha_log_type = LOG_INTERNAL;
-	ha->ha_event = event;
-	ha->ha_zone = NULL;
-	ha->ha_uid = 0;
-
 	if (dmu_tx_is_syncing(tx)) {
-		spa_history_log_sync(spa, ha, tx);
+		spa_history_log_sync(nvl, tx);
 	} else {
-		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
-		    spa_history_log_sync, spa, ha, 0, tx);
+		dsl_sync_task_nowait(spa_get_dsl(spa),
+		    spa_history_log_sync, nvl, 0, tx);
 	}
-	/* spa_history_log_sync() will free ha and strings */
+	/* spa_history_log_sync() will free nvl */
 }
 
 void
-spa_history_log_internal(history_internal_events_t event, spa_t *spa,
+spa_history_log_internal(spa_t *spa, const char *operation,
     dmu_tx_t *tx, const char *fmt, ...)
 {
 	dmu_tx_t *htx = tx;
@@ -484,7 +487,7 @@
 	}
 
 	va_start(adx, fmt);
-	log_internal(event, spa, htx, fmt, adx);
+	log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);
 	va_end(adx);
 
 	/* if we didn't get a tx from the caller, commit the one we made */
@@ -493,23 +496,50 @@
 }
 
 void
-spa_history_log_version(spa_t *spa, history_internal_events_t event)
+spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...)
 {
-#ifdef _KERNEL
-	uint64_t current_vers = spa_version(spa);
+	va_list adx;
+	char namebuf[MAXNAMELEN];
+	nvlist_t *nvl = fnvlist_alloc();
 
-	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
-		spa_history_log_internal(event, spa, NULL,
-		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
-		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
-		    utsname.nodename, utsname.release, utsname.version,
-		    utsname.machine);
-	}
-#if 0
-	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
-	    event == LOG_POOL_IMPORT ? "imported" :
-	    event == LOG_POOL_CREATE ? "created" : "accessed",
-	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
-#endif
-#endif
+	ASSERT(tx != NULL);
+
+	dsl_dataset_name(ds, namebuf);
+	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
+
+	va_start(adx, fmt);
+	log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
+	va_end(adx);
 }
+
+void
+spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...)
+{
+	va_list adx;
+	char namebuf[MAXNAMELEN];
+	nvlist_t *nvl = fnvlist_alloc();
+
+	ASSERT(tx != NULL);
+
+	dsl_dir_name(dd, namebuf);
+	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
+	    dd->dd_phys->dd_head_dataset_obj);
+
+	va_start(adx, fmt);
+	log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
+	va_end(adx);
+}
+
+void
+spa_history_log_version(spa_t *spa, const char *operation)
+{
+	spa_history_log_internal(spa, operation, NULL,
+	    "pool version %llu; software version %llu/%d; uts %s %s %s %s",
+	    (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
+	    utsname.nodename, utsname.release, utsname.version,
+	    utsname.machine);
+}

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2013 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  */
@@ -238,8 +238,8 @@
 int spa_mode_global;
 
 #ifdef ZFS_DEBUG
-/* Everything except dprintf is on by default in debug builds */
-int zfs_flags = ~ZFS_DEBUG_DPRINTF;
+/* Everything except dprintf and spa is on by default in debug builds */
+int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
 #else
 int zfs_flags = 0;
 #endif
@@ -314,7 +314,7 @@
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
 		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
-		refcount_create(&scl->scl_count);
+		refcount_create_untracked(&scl->scl_count);
 		scl->scl_writer = NULL;
 		scl->scl_write_wanted = 0;
 	}
@@ -367,6 +367,8 @@
 {
 	int wlocks_held = 0;
 
+	ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
+
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (scl->scl_writer == curthread)
@@ -445,27 +447,22 @@
 	static spa_t search;	/* spa_t is large; don't allocate on stack */
 	spa_t *spa;
 	avl_index_t where;
-	char c;
 	char *cp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
+	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
+
 	/*
 	 * If it's a full dataset name, figure out the pool name and
 	 * just use that.
 	 */
-	cp = strpbrk(name, "/@");
-	if (cp) {
-		c = *cp;
+	cp = strpbrk(search.spa_name, "/@");
+	if (cp != NULL)
 		*cp = '\0';
-	}
 
-	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
 	spa = avl_find(&spa_namespace_avl, &search, &where);
 
-	if (cp)
-		*cp = c;
-
 	return (spa);
 }
 
@@ -600,6 +597,8 @@
 		    KM_SLEEP) == 0);
 	}
 
+	spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
+
 	return (spa);
 }
 
@@ -1862,7 +1861,7 @@
 	dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
 
 	if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	bzero(ps, sizeof (pool_scan_stat_t));
 
 	/* data stored on disk */

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -102,7 +102,7 @@
 space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
 {
 	avl_index_t where;
-	space_seg_t ssearch, *ss_before, *ss_after, *ss;
+	space_seg_t *ss_before, *ss_after, *ss;
 	uint64_t end = start + size;
 	int merge_before, merge_after;
 
@@ -115,11 +115,8 @@
 	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
 	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
 
-	ssearch.ss_start = start;
-	ssearch.ss_end = end;
-	ss = avl_find(&sm->sm_root, &ssearch, &where);
-
-	if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
+	ss = space_map_find(sm, start, size, &where);
+	if (ss != NULL) {
 		zfs_panic_recover("zfs: allocating allocated segment"
 		    "(offset=%llu size=%llu)\n",
 		    (longlong_t)start, (longlong_t)size);
@@ -170,20 +167,20 @@
 void
 space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
 {
-	space_seg_t ssearch, *ss, *newseg;
+#ifdef illumos
+	avl_index_t where;
+#endif
+	space_seg_t *ss, *newseg;
 	uint64_t end = start + size;
 	int left_over, right_over;
 
-	ASSERT(MUTEX_HELD(sm->sm_lock));
 	VERIFY(!sm->sm_condensing);
-	VERIFY(size != 0);
-	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
-	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
+#ifdef illumos
+	ss = space_map_find(sm, start, size, &where);
+#else
+	ss = space_map_find(sm, start, size, NULL);
+#endif
 
-	ssearch.ss_start = start;
-	ssearch.ss_end = end;
-	ss = avl_find(&sm->sm_root, &ssearch, NULL);
-
 	/* Make sure we completely overlap with someone */
 	if (ss == NULL) {
 		zfs_panic_recover("zfs: freeing free segment "
@@ -225,12 +222,11 @@
 	sm->sm_space -= size;
 }
 
-boolean_t
-space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
+space_seg_t *
+space_map_find(space_map_t *sm, uint64_t start, uint64_t size,
+    avl_index_t *wherep)
 {
-	avl_index_t where;
 	space_seg_t ssearch, *ss;
-	uint64_t end = start + size;
 
 	ASSERT(MUTEX_HELD(sm->sm_lock));
 	VERIFY(size != 0);
@@ -238,12 +234,22 @@
 	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
 
 	ssearch.ss_start = start;
-	ssearch.ss_end = end;
-	ss = avl_find(&sm->sm_root, &ssearch, &where);
+	ssearch.ss_end = start + size;
+	ss = avl_find(&sm->sm_root, &ssearch, wherep);
 
-	return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
+	if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size)
+		return (ss);
+	return (NULL);
 }
 
+boolean_t
+space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
+{
+	avl_index_t where;
+
+	return (space_map_find(sm, start, size, &where) != 0);
+}
+
 void
 space_map_swap(space_map_t **msrc, space_map_t **mdst)
 {

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -89,7 +89,7 @@
 void arc_return_buf(arc_buf_t *buf, void *tag);
 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
 void arc_buf_add_ref(arc_buf_t *buf, void *tag);
-int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
+boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
 int arc_buf_size(arc_buf_t *buf);
 void arc_release(arc_buf_t *buf, void *tag);
 int arc_released(arc_buf_t *buf);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -311,20 +311,17 @@
 
 boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
 
-#define	DBUF_IS_METADATA(_db)	\
-	(dbuf_is_metadata(_db))
-
 #define	DBUF_GET_BUFC_TYPE(_db)	\
-	(DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
+	(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
 
 #define	DBUF_IS_CACHEABLE(_db)						\
 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
-	(DBUF_IS_METADATA(_db) &&					\
+	(dbuf_is_metadata(_db) &&					\
 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
 
 #define	DBUF_IS_L2CACHEABLE(_db)					\
 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
-	(DBUF_IS_METADATA(_db) &&					\
+	(dbuf_is_metadata(_db) &&					\
 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
 
 #ifdef ZFS_DEBUG

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -43,6 +43,7 @@
 #include <sys/param.h>
 #include <sys/cred.h>
 #include <sys/time.h>
+#include <sys/fs/zfs.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -216,15 +217,10 @@
 	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 
-typedef enum dmu_objset_type {
-	DMU_OST_NONE,
-	DMU_OST_META,
-	DMU_OST_ZFS,
-	DMU_OST_ZVOL,
-	DMU_OST_OTHER,			/* For testing only! */
-	DMU_OST_ANY,			/* Be careful! */
-	DMU_OST_NUMTYPES
-} dmu_objset_type_t;
+typedef enum txg_how {
+	TXG_WAIT = 1,
+	TXG_NOWAIT,
+} txg_how_t;
 
 void byteswap_uint64_array(void *buf, size_t size);
 void byteswap_uint32_array(void *buf, size_t size);
@@ -264,22 +260,21 @@
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
 
-int dmu_objset_evict_dbufs(objset_t *os);
+void dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
-    uint64_t flags);
-int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname,
+int dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
     struct nvlist *snaps);
-int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *);
-int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
-int dmu_objset_rename(const char *name, const char *newname,
-    boolean_t recursive);
-int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
+int dmu_objset_clone(const char *name, const char *origin);
+int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
+    struct nvlist *errlist);
+int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
+int dmu_objset_snapshot_tmp(const char *, const char *, int);
+int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
+int dsl_dataset_rename_snapshot(const char *fsname,
+    const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 
 typedef struct dmu_buf {
 	uint64_t db_object;		/* object that this buffer is part of */
@@ -554,7 +549,7 @@
 void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
 void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
 void dmu_tx_abort(dmu_tx_t *tx);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
 void dmu_tx_wait(dmu_tx_t *tx);
 void dmu_tx_commit(dmu_tx_t *tx);
 
@@ -796,39 +791,9 @@
     uint64_t object, uint64_t offset, int len);
 void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
     dmu_traverse_cb_t cb, void *arg);
+int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
+    struct file *fp, offset_t *offp);
 
-int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    int outfd, struct file *fp, offset_t *off);
-int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap,
-    boolean_t fromorigin, uint64_t *sizep);
-
-typedef struct dmu_recv_cookie {
-	/*
-	 * This structure is opaque!
-	 *
-	 * If logical and real are different, we are recving the stream
-	 * into the "real" temporary clone, and then switching it with
-	 * the "logical" target.
-	 */
-	struct dsl_dataset *drc_logical_ds;
-	struct dsl_dataset *drc_real_ds;
-	struct drr_begin *drc_drrb;
-	char *drc_tosnap;
-	char *drc_top_ds;
-	boolean_t drc_newfs;
-	boolean_t drc_force;
-	struct avl_tree *drc_guid_to_ds_map;
-} dmu_recv_cookie_t;
-
-int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
-    boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
-int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
-    int cleanup_fd, uint64_t *action_handlep);
-int dmu_recv_end(dmu_recv_cookie_t *drc);
-
-int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp,
-    offset_t *off);
-
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 extern uint64_t zfs_crc64_table[256];

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -42,6 +43,7 @@
 
 extern krwlock_t os_lock;
 
+struct dsl_pool;
 struct dsl_dataset;
 struct dmu_tx;
 
@@ -113,8 +115,6 @@
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;
-
-	/* SA layout/attribute registration */
 	sa_os_t *os_sa;
 };
 
@@ -137,25 +137,16 @@
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
 
-int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
-    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
-    uint64_t flags);
-int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
 void dmu_objset_stats(objset_t *os, nvlist_t *nv);
 void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
 void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 uint64_t dmu_objset_fsid_guid(objset_t *os);
-int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
-    int flags);
-int dmu_objset_find_spa(spa_t *spa, const char *name,
-    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
+int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
+    int func(struct dsl_pool *, struct dsl_dataset *, void *),
+    void *arg, int flags);
 int dmu_objset_prefetch(const char *name, void *arg);
-void dmu_objset_byteswap(void *buf, size_t size);
-int dmu_objset_evict_dbufs(objset_t *os);
+void dmu_objset_evict_dbufs(objset_t *os);
 timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 
 /* called from dsl */
@@ -171,6 +162,7 @@
 boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
+int dmu_fsname(const char *snapname, char *buf);
 
 void dmu_objset_init(void);
 void dmu_objset_fini(void);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #ifndef	_SYS_DMU_TX_H
 #define	_SYS_DMU_TX_H
@@ -107,10 +110,11 @@
  * These routines are defined in dmu.h, and are called by the user.
  */
 dmu_tx_t *dmu_tx_create(objset_t *dd);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
 void dmu_tx_commit(dmu_tx_t *tx);
 void dmu_tx_abort(dmu_tx_t *tx);
 uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
+struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
 void dmu_tx_wait(dmu_tx_t *tx);
 
 void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,8 +20,6 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
- * All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
@@ -37,6 +35,7 @@
 #include <sys/dsl_synctask.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_deadlist.h>
+#include <sys/refcount.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -50,10 +49,8 @@
 #define	DS_IS_INCONSISTENT(ds)	\
 	((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
 /*
- * NB: nopromote can not yet be set, but we want support for it in this
- * on-disk version, so that we don't need to upgrade for it later.  It
- * will be needed when we implement 'zfs split' (where the split off
- * clone should not be promoted).
+ * Note: nopromote can not yet be set, but we want support for it in this
+ * on-disk version, so that we don't need to upgrade for it later.
  */
 #define	DS_FLAG_NOPROMOTE	(1ULL<<1)
 
@@ -78,6 +75,8 @@
  */
 #define	DS_FLAG_CI_DATASET	(1ULL<<16)
 
+#define	DS_CREATE_FLAG_NODIRTY	(1ULL<<24)
+
 typedef struct dsl_dataset_phys {
 	uint64_t ds_dir_obj;		/* DMU_OT_DSL_DIR */
 	uint64_t ds_prev_snap_obj;	/* DMU_OT_DSL_DATASET */
@@ -127,9 +126,6 @@
 	dsl_deadlist_t ds_deadlist;
 	bplist_t ds_pending_deadlist;
 
-	/* to protect against multiple concurrent incremental recv */
-	kmutex_t ds_recvlock;
-
 	/* protected by lock on pool's dp_dirty_datasets list */
 	txg_node_t ds_dirty_link;
 	list_node_t ds_synced_link;
@@ -141,13 +137,15 @@
 	kmutex_t ds_lock;
 	objset_t *ds_objset;
 	uint64_t ds_userrefs;
+	void *ds_owner;
 
 	/*
-	 * ds_owner is protected by the ds_rwlock and the ds_lock
+	 * Long holds prevent the ds from being destroyed; they allow the
+	 * ds to remain held even after dropping the dp_config_rwlock.
+	 * Owning counts as a long hold.  See the comments above
+	 * dsl_pool_hold() for details.
 	 */
-	krwlock_t ds_rwlock;
-	kcondvar_t ds_exclusive_cv;
-	void *ds_owner;
+	refcount_t ds_longholds;
 
 	/* no locking; only for making guesses */
 	uint64_t ds_trysnap_txg;
@@ -165,15 +163,6 @@
 	char ds_snapname[MAXNAMELEN];
 } dsl_dataset_t;
 
-struct dsl_ds_destroyarg {
-	dsl_dataset_t *ds;		/* ds to destroy */
-	dsl_dataset_t *rm_origin;	/* also remove our origin? */
-	boolean_t is_origin_rm;		/* set if removing origin snap */
-	boolean_t defer;		/* destroy -d requested? */
-	boolean_t releasing;		/* destroying due to release? */
-	boolean_t need_prep;		/* do we need to retry due to EBUSY? */
-};
-
 /*
  * The max length of a temporary tag prefix is the number of hex digits
  * required to express UINT64_MAX plus one for the hyphen.
@@ -180,22 +169,6 @@
  */
 #define	MAX_TAG_PREFIX_LEN	17
 
-struct dsl_ds_holdarg {
-	dsl_sync_task_group_t *dstg;
-	char *htag;
-	char *snapname;
-	boolean_t recursive;
-	boolean_t gotone;
-	boolean_t temphold;
-	char failed[MAXPATHLEN];
-};
-
-/*
- * Flags for dsl_dataset_rename().
- */
-#define	ZFS_RENAME_RECURSIVE		0x01
-#define	ZFS_RENAME_ALLOW_MOUNTED	0x02
-
 #define	dsl_dataset_is_snapshot(ds) \
 	((ds)->ds_phys->ds_num_children != 0)
 
@@ -202,20 +175,18 @@
 #define	DS_UNIQUE_IS_ACCURATE(ds)	\
 	(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
 
-int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
-int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
-    void *tag, dsl_dataset_t **);
-int dsl_dataset_own(const char *name, boolean_t inconsistentok,
+int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
+    dsl_dataset_t **dsp);
+int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
+    dsl_dataset_t **);
+void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+int dsl_dataset_own(struct dsl_pool *dp, const char *name,
     void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
-    boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
+    void *tag, dsl_dataset_t **dsp);
+void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
-void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
-    void *tag);
-void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
 void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
     minor_t minor);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
@@ -222,26 +193,14 @@
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx);
-int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
-int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
-dsl_checkfunc_t dsl_dataset_destroy_check;
-dsl_syncfunc_t dsl_dataset_destroy_sync;
-dsl_checkfunc_t dsl_dataset_snapshot_check;
-dsl_syncfunc_t dsl_dataset_snapshot_sync;
-dsl_syncfunc_t dsl_dataset_user_hold_sync;
-int dsl_dataset_rename(char *name, const char *newname, int flags);
+int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
 int dsl_dataset_promote(const char *name, char *conflsnap);
 int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force);
-int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
-    boolean_t recursive, boolean_t temphold, int cleanup_fd);
-int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
-    boolean_t temphold);
-int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
-    boolean_t recursive);
-int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
-    char *htag, boolean_t retry);
-int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
+int dsl_dataset_rename_snapshot(const char *fsname,
+    const char *oldsnapname, const char *newsnapname, boolean_t recursive);
+int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
+    minor_t cleanup_minor, const char *htag);
 
 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
 void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@@ -278,14 +237,36 @@
 int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
     uint64_t asize, uint64_t inflight, uint64_t *used,
     uint64_t *ref_rsrv);
-int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
+int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
     uint64_t quota);
-dsl_syncfunc_t dsl_dataset_set_quota_sync;
-int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
+int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
     uint64_t reservation);
 
-int dsl_destroy_inconsistent(const char *dsname, void *arg);
+boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier);
+void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
+boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
 
+int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
+    dsl_dataset_t *origin_head, boolean_t force);
+void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
+    dsl_dataset_t *origin_head, dmu_tx_t *tx);
+int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx);
+void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx);
+
+void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
+    dmu_tx_t *tx);
+void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
+int dsl_dataset_get_snapname(dsl_dataset_t *ds);
+int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
+    uint64_t *value);
+int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
+void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
+    zprop_source_t source, uint64_t value, dmu_tx_t *tx);
+int dsl_dataset_rollback(const char *fsname);
+
 #ifdef ZFS_DEBUG
 #define	dprintf_ds(ds, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DELEG_H
@@ -65,8 +65,7 @@
 int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
 int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
 int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
-int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent,
-    const char *perm, cred_t *cr);
+int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);
 void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
 int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
 int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,8 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
- * All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DIR_H
@@ -103,18 +102,15 @@
 	char dd_myname[MAXNAMELEN];
 };
 
-void dsl_dir_close(dsl_dir_t *dd, void *tag);
-int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
-int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
-    const char **tailp);
-int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
+void dsl_dir_rele(dsl_dir_t *dd, void *tag);
+int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
+    dsl_dir_t **, const char **tail);
+int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
     const char *tail, void *tag, dsl_dir_t **);
 void dsl_dir_name(dsl_dir_t *dd, char *buf);
 int dsl_dir_namelen(dsl_dir_t *dd);
 uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
     const char *name, dmu_tx_t *tx);
-dsl_checkfunc_t dsl_dir_destroy_check;
-dsl_syncfunc_t dsl_dir_destroy_sync;
 void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
 uint64_t dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
@@ -133,14 +129,15 @@
     uint64_t quota);
 int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
     uint64_t reservation);
-int dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags);
+int dsl_dir_rename(const char *oldname, const char *newname);
 int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
-int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
 boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
 void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
     uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
 void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
 timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
+void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
+    dmu_tx_t *tx);
 
 /* internal reserved dir name */
 #define	MOS_DIR_NAME "$MOS"

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -36,6 +36,7 @@
 #include <sys/arc.h>
 #include <sys/bpobj.h>
 #include <sys/bptree.h>
+#include <sys/rrwlock.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -113,7 +114,7 @@
 	 * syncing context does not need to ever have it for read, since
 	 * nobody else could possibly have it for write.
 	 */
-	krwlock_t dp_config_rwlock;
+	rrwlock_t dp_config_rwlock;
 
 	zfs_all_blkstats_t *dp_blkstats;
 } dsl_pool_t;
@@ -139,15 +140,20 @@
 void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
 void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
     int64_t used, int64_t comp, int64_t uncomp);
+void dsl_pool_config_enter(dsl_pool_t *dp, void *tag);
+void dsl_pool_config_exit(dsl_pool_t *dp, void *tag);
+boolean_t dsl_pool_config_held(dsl_pool_t *dp);
 
 taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
 
-extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
-    const char *tag, uint64_t *now, dmu_tx_t *tx);
-extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
+int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
+    const char *tag, uint64_t now, dmu_tx_t *tx);
+int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
     const char *tag, dmu_tx_t *tx);
-extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
+void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
 int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
+int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp);
+void dsl_pool_rele(dsl_pool_t *dp, void *tag);
 
 #ifdef	__cplusplus
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_PROP_H
@@ -53,25 +54,12 @@
 	zprop_source_t pa_source;
 } dsl_props_arg_t;
 
-typedef struct dsl_prop_set_arg {
-	const char *psa_name;
-	zprop_source_t psa_source;
-	int psa_intsz;
-	int psa_numints;
-	const void *psa_value;
-
-	/*
-	 * Used to handle the special requirements of the quota and reservation
-	 * properties.
-	 */
-	uint64_t psa_effective_value;
-} dsl_prop_setarg_t;
-
 int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
 int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
-int dsl_prop_numcb(struct dsl_dataset *ds);
+void dsl_prop_notify_all(struct dsl_dir *dd);
+boolean_t dsl_prop_hascb(struct dsl_dataset *ds);
 
 int dsl_prop_get(const char *ddname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
@@ -78,35 +66,35 @@
 int dsl_prop_get_integer(const char *ddname, const char *propname,
     uint64_t *valuep, char *setpoint);
 int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
-int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
+int dsl_prop_get_received(const char *dsname, nvlist_t **nvp);
 int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
+int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname,
+    uint64_t *valuep);
 int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
     int intsz, int numints, void *buf, char *setpoint,
     boolean_t snapshot);
 
-dsl_syncfunc_t dsl_props_set_sync;
-int dsl_prop_set(const char *ddname, const char *propname,
-    zprop_source_t source, int intsz, int numints, const void *buf);
+void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source,
+    nvlist_t *props, dmu_tx_t *tx);
+void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname,
+    zprop_source_t source, int intsz, int numints, const void *value,
+    dmu_tx_t *tx);
 int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
-void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    dmu_tx_t *tx);
+int dsl_prop_set_int(const char *dsname, const char *propname,
+    zprop_source_t source, uint64_t value);
+int dsl_prop_set_string(const char *dsname, const char *propname,
+    zprop_source_t source, const char *value);
+int dsl_prop_inherit(const char *dsname, const char *propname,
+    zprop_source_t source);
 
-void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
-    zprop_source_t source, uint64_t *value);
-int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
-#ifdef	ZFS_DEBUG
-void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
-#define	DSL_PROP_CHECK_PREDICTION(dd, psa)	\
-	dsl_prop_check_prediction((dd), (psa))
-#else
-#define	DSL_PROP_CHECK_PREDICTION(dd, psa)	/* nothing */
-#endif
+int dsl_prop_predict(dsl_dir_t *dd, const char *propname,
+    zprop_source_t source, uint64_t value, uint64_t *newvalp);
 
 /* flag first receive on or after SPA_VERSION_RECVD_PROPS */
-boolean_t dsl_prop_get_hasrecvd(objset_t *os);
-void dsl_prop_set_hasrecvd(objset_t *os);
-void dsl_prop_unset_hasrecvd(objset_t *os);
+boolean_t dsl_prop_get_hasrecvd(const char *dsname);
+int dsl_prop_set_hasrecvd(const char *dsname);
+void dsl_prop_unset_hasrecvd(const char *dsname);
 
 void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
 void dsl_prop_nvlist_add_string(nvlist_t *nv,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_SYNCTASK_H
@@ -34,44 +35,27 @@
 
 struct dsl_pool;
 
-typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
-typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
+typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *);
+typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *);
 
 typedef struct dsl_sync_task {
-	list_node_t dst_node;
+	txg_node_t dst_node;
+	struct dsl_pool *dst_pool;
+	uint64_t dst_txg;
+	int dst_space;
 	dsl_checkfunc_t *dst_checkfunc;
 	dsl_syncfunc_t *dst_syncfunc;
-	void *dst_arg1;
-	void *dst_arg2;
-	int dst_err;
+	void *dst_arg;
+	int dst_error;
+	boolean_t dst_nowaiter;
 } dsl_sync_task_t;
 
-typedef struct dsl_sync_task_group {
-	txg_node_t dstg_node;
-	list_t dstg_tasks;
-	struct dsl_pool *dstg_pool;
-	uint64_t dstg_txg;
-	int dstg_err;
-	int dstg_space;
-	boolean_t dstg_nowaiter;
-} dsl_sync_task_group_t;
+void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx);
+int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
+    dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified);
+void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc,
+    void *arg, int blocks_modified, dmu_tx_t *tx);
 
-dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
-void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
-    dsl_checkfunc_t *, dsl_syncfunc_t *,
-    void *arg1, void *arg2, int blocks_modified);
-int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
-void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
-
-int dsl_sync_task_do(struct dsl_pool *dp,
-    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
-    void *arg1, void *arg2, int blocks_modified);
-void dsl_sync_task_do_nowait(struct dsl_pool *dp,
-    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
-    void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
-
 #ifdef	__cplusplus
 }
 #endif

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_METASLAB_H
@@ -56,6 +56,7 @@
 extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
     boolean_t now);
 extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
+extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp);
 
 extern metaslab_class_t *metaslab_class_create(spa_t *spa,
     space_map_ops_t *ops);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -53,6 +53,7 @@
 
 typedef struct refcount {
 	kmutex_t rc_mtx;
+	boolean_t rc_tracked;
 	list_t rc_list;
 	list_t rc_removed;
 	uint64_t rc_count;
@@ -59,9 +60,10 @@
 	uint64_t rc_removed_count;
 } refcount_t;
 
-/* Note: refcount_t must be initialized with refcount_create() */
+/* Note: refcount_t must be initialized with refcount_create[_untracked]() */
 
 void refcount_create(refcount_t *rc);
+void refcount_create_untracked(refcount_t *rc);
 void refcount_destroy(refcount_t *rc);
 void refcount_destroy_many(refcount_t *rc, uint64_t number);
 int refcount_is_zero(refcount_t *rc);
@@ -82,6 +84,7 @@
 } refcount_t;
 
 #define	refcount_create(rc) ((rc)->rc_count = 0)
+#define	refcount_create_untracked(rc) ((rc)->rc_count = 0)
 #define	refcount_destroy(rc) ((rc)->rc_count = 0)
 #define	refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
 #define	refcount_is_zero(rc) ((rc)->rc_count == 0)

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,12 +22,13 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #ifndef	_SYS_RR_RW_LOCK_H
 #define	_SYS_RR_RW_LOCK_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -56,6 +57,7 @@
 	refcount_t	rr_anon_rcount;
 	refcount_t	rr_linked_rcount;
 	boolean_t	rr_writer_wanted;
+	boolean_t	rr_track_all;
 } rrwlock_t;
 
 /*
@@ -63,14 +65,19 @@
  * 'tag' must be the same in a rrw_enter() as in its
  * corresponding rrw_exit().
  */
-void rrw_init(rrwlock_t *rrl);
+void rrw_init(rrwlock_t *rrl, boolean_t track_all);
 void rrw_destroy(rrwlock_t *rrl);
 void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
+void rrw_enter_read(rrwlock_t *rrl, void *tag);
+void rrw_enter_write(rrwlock_t *rrl);
 void rrw_exit(rrwlock_t *rrl, void *tag);
 boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
+void rrw_tsd_destroy(void *arg);
 
 #define	RRW_READ_HELD(x)	rrw_held(x, RW_READER)
 #define	RRW_WRITE_HELD(x)	rrw_held(x, RW_WRITER)
+#define	RRW_LOCK_HELD(x) \
+	(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
 
 #ifdef	__cplusplus
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -52,6 +52,7 @@
 typedef struct ddt ddt_t;
 typedef struct ddt_entry ddt_entry_t;
 struct dsl_pool;
+struct dsl_dataset;
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.
@@ -418,7 +419,7 @@
 extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
     size_t buflen);
 extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
-    const char *history_str, nvlist_t *zplprops);
+    nvlist_t *zplprops);
 #if defined(sun)
 extern int spa_import_rootpool(char *devpath, char *devid);
 #else
@@ -630,31 +631,20 @@
 extern uint64_t zfs_strtonum(const char *str, char **nptr);
 #define	strtonum(str, nptr)	zfs_strtonum((str), (nptr))
 
-/* history logging */
-typedef enum history_log_type {
-	LOG_CMD_POOL_CREATE,
-	LOG_CMD_NORMAL,
-	LOG_INTERNAL
-} history_log_type_t;
-
-typedef struct history_arg {
-	char *ha_history_str;
-	history_log_type_t ha_log_type;
-	history_internal_events_t ha_event;
-	char *ha_zone;
-	uid_t ha_uid;
-} history_arg_t;
-
 extern char *spa_his_ievent_table[];
 
 extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
 extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
     char *his_buf);
-extern int spa_history_log(spa_t *spa, const char *his_buf,
-    history_log_type_t what);
-extern void spa_history_log_internal(history_internal_events_t event,
-    spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
-extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
+extern int spa_history_log(spa_t *spa, const char *his_buf);
+extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl);
+extern void spa_history_log_version(spa_t *spa, const char *operation);
+extern void spa_history_log_internal(spa_t *spa, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...);
+extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
+    dmu_tx_t *tx, const char *fmt, ...);
+extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...);
 
 /* error handling */
 struct zbookmark;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -149,6 +149,8 @@
 extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
 extern boolean_t space_map_contains(space_map_t *sm,
     uint64_t start, uint64_t size);
+extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start,
+    uint64_t size, avl_index_t *wherep);
 extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
 extern void space_map_vacate(space_map_t *sm,
     space_map_func_t *func, space_map_t *mdest);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -45,9 +45,6 @@
 /* Number of txgs worth of frees we defer adding to in-core spacemaps */
 #define	TXG_DEFER_SIZE		2
 
-#define	TXG_WAIT		1ULL
-#define	TXG_NOWAIT		2ULL
-
 typedef struct tx_cpu tx_cpu_t;
 
 typedef struct txg_handle {
@@ -119,11 +116,11 @@
 extern void txg_list_create(txg_list_t *tl, size_t offset);
 extern void txg_list_destroy(txg_list_t *tl);
 extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
-extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
-extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
 extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
 extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
-extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
 extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
 extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -104,8 +104,7 @@
 	avl_tree_t	vq_read_tree;
 	avl_tree_t	vq_write_tree;
 	avl_tree_t	vq_pending_tree;
-	uint64_t	vq_io_complete_ts;
-	uint64_t	vq_io_delta_ts;
+	hrtime_t	vq_io_complete_ts;
 	kmutex_t	vq_lock;
 };
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -26,7 +26,6 @@
 #ifndef _SYS_ZFEATURE_H
 #define	_SYS_ZFEATURE_H
 
-#include <sys/dmu.h>
 #include <sys/nvpair.h>
 #include "zfeature_common.h"
 
@@ -34,14 +33,18 @@
 extern "C" {
 #endif
 
-extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
+struct spa;
+struct dmu_tx;
+struct objset;
+
+extern boolean_t feature_is_supported(struct objset *os, uint64_t obj,
     uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat);
 
-struct spa;
-extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
-extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *);
+extern void spa_feature_enable(struct spa *, zfeature_info_t *,
+    struct dmu_tx *);
+extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *);
+extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *);
 extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
 extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -50,11 +50,13 @@
 
 extern int zfs_flags;
 
-#define	ZFS_DEBUG_DPRINTF	0x0001
-#define	ZFS_DEBUG_DBUF_VERIFY	0x0002
-#define	ZFS_DEBUG_DNODE_VERIFY	0x0004
-#define	ZFS_DEBUG_SNAPNAMES	0x0008
-#define	ZFS_DEBUG_MODIFY	0x0010
+#define	ZFS_DEBUG_DPRINTF	(1<<0)
+#define	ZFS_DEBUG_DBUF_VERIFY	(1<<1)
+#define	ZFS_DEBUG_DNODE_VERIFY	(1<<2)
+#define	ZFS_DEBUG_SNAPNAMES	(1<<3)
+#define	ZFS_DEBUG_MODIFY	(1<<4)
+#define	ZFS_DEBUG_SPA		(1<<5)
+#define	ZFS_DEBUG_ZIO_FREE	(1<<6)
 
 #ifdef ZFS_DEBUG
 extern void __dprintf(const char *file, const char *func,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ZFS_IOCTL_H
@@ -41,6 +42,15 @@
 #endif
 
 /*
+ * The structures in this file are passed between userland and the
+ * kernel.  Userland may be running a 32-bit process, while the kernel
+ * is 64-bit.  Therefore, these structures need to compile the same in
+ * 32-bit and 64-bit.  This means not using type "long", and adding
+ * explicit padding so that the 32-bit structure will not be packed more
+ * tightly than the 64-bit structure (which requires 64-bit alignment).
+ */
+
+/*
  * Property values for snapdir
  */
 #define	ZFS_SNAPDIR_HIDDEN		0
@@ -284,22 +294,28 @@
 } zfs_case_t;
 
 typedef struct zfs_cmd {
-	char		zc_name[MAXPATHLEN];
+	char		zc_name[MAXPATHLEN];	/* name of pool or dataset */
+	uint64_t	zc_nvlist_src;		/* really (char *) */
+	uint64_t	zc_nvlist_src_size;
+	uint64_t	zc_nvlist_dst;		/* really (char *) */
+	uint64_t	zc_nvlist_dst_size;
+	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */
+	int		zc_pad2;
+
+	/*
+	 * The following members are for legacy ioctls which haven't been
+	 * converted to the new method.
+	 */
+	uint64_t	zc_history;		/* really (char *) */
 	char		zc_value[MAXPATHLEN * 2];
 	char		zc_string[MAXNAMELEN];
-	char		zc_top_ds[MAXPATHLEN];
 	uint64_t	zc_guid;
 	uint64_t	zc_nvlist_conf;		/* really (char *) */
 	uint64_t	zc_nvlist_conf_size;
-	uint64_t	zc_nvlist_src;		/* really (char *) */
-	uint64_t	zc_nvlist_src_size;
-	uint64_t	zc_nvlist_dst;		/* really (char *) */
-	uint64_t	zc_nvlist_dst_size;
 	uint64_t	zc_cookie;
 	uint64_t	zc_objset_type;
 	uint64_t	zc_perm_action;
-	uint64_t 	zc_history;		/* really (char *) */
-	uint64_t 	zc_history_len;
+	uint64_t	zc_history_len;
 	uint64_t	zc_history_offset;
 	uint64_t	zc_obj;
 	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
@@ -344,7 +360,8 @@
     const char *to, cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
 extern int zfs_busy(void);
-extern int zfs_unmount_snap(const char *, void *);
+extern void zfs_unmount_snap(const char *);
+extern void zfs_destroy_unmount_origin(const char *);
 
 /*
  * ZFS minor numbers can refer to either a control device instance or

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_FS_ZFS_ZNODE_H
@@ -259,7 +260,7 @@
  */
 #define	ZFS_ENTER(zfsvfs) \
 	{ \
-		rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
+		rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
 		if ((zfsvfs)->z_unmounted) { \
 			ZFS_EXIT(zfsvfs); \
 			return (EIO); \

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -411,8 +411,8 @@
 extern void	zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 extern void	zil_clean(zilog_t *zilog, uint64_t synced_txg);
 
-extern int	zil_suspend(zilog_t *zilog);
-extern void	zil_resume(zilog_t *zilog);
+extern int	zil_suspend(const char *osname, void **cookiep);
+extern void	zil_resume(void *cookie);
 
 extern void	zil_add_block(zilog_t *zilog, const blkptr_t *bp);
 extern int	zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -443,7 +443,7 @@
 
 	uint64_t	io_offset;
 	uint64_t	io_deadline;
-	uint64_t	io_timestamp;
+	hrtime_t	io_timestamp;
 	avl_node_t	io_offset_node;
 	avl_node_t	io_deadline_node;
 	avl_tree_t	*io_vdev_tree;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -585,6 +585,8 @@
 {
 	tx_state_t *tx = &dp->dp_tx;
 
+	ASSERT(!dsl_pool_config_held(dp));
+
 	mutex_enter(&tx->tx_sync_lock);
 	ASSERT(tx->tx_threads == 2);
 	if (txg == 0)
@@ -608,6 +610,8 @@
 {
 	tx_state_t *tx = &dp->dp_tx;
 
+	ASSERT(!dsl_pool_config_held(dp));
+
 	mutex_enter(&tx->tx_sync_lock);
 	ASSERT(tx->tx_threads == 2);
 	if (txg == 0)
@@ -673,19 +677,19 @@
 }
 
 /*
- * Add an entry to the list.
- * Returns 0 if it's a new entry, 1 if it's already there.
+ * Add an entry to the list (unless it's already on the list).
+ * Returns B_TRUE if it was actually added.
  */
-int
+boolean_t
 txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
 {
 	int t = txg & TXG_MASK;
 	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
-	int already_on_list;
+	boolean_t add;
 
 	mutex_enter(&tl->tl_lock);
-	already_on_list = tn->tn_member[t];
-	if (!already_on_list) {
+	add = (tn->tn_member[t] == 0);
+	if (add) {
 		tn->tn_member[t] = 1;
 		tn->tn_next[t] = tl->tl_head[t];
 		tl->tl_head[t] = tn;
@@ -692,23 +696,24 @@
 	}
 	mutex_exit(&tl->tl_lock);
 
-	return (already_on_list);
+	return (add);
 }
 
 /*
- * Add an entry to the end of the list (walks list to find end).
- * Returns 0 if it's a new entry, 1 if it's already there.
+ * Add an entry to the end of the list, unless it's already on the list.
+ * (walks list to find end)
+ * Returns B_TRUE if it was actually added.
  */
-int
+boolean_t
 txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
 {
 	int t = txg & TXG_MASK;
 	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
-	int already_on_list;
+	boolean_t add;
 
 	mutex_enter(&tl->tl_lock);
-	already_on_list = tn->tn_member[t];
-	if (!already_on_list) {
+	add = (tn->tn_member[t] == 0);
+	if (add) {
 		txg_node_t **tp;
 
 		for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
@@ -720,7 +725,7 @@
 	}
 	mutex_exit(&tl->tl_lock);
 
-	return (already_on_list);
+	return (add);
 }
 
 /*
@@ -771,13 +776,13 @@
 	return (NULL);
 }
 
-int
+boolean_t
 txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
 {
 	int t = txg & TXG_MASK;
 	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
 
-	return (tn->tn_member[t]);
+	return (tn->tn_member[t] != 0);
 }
 
 /*

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -33,7 +34,7 @@
 		byteswap_uint64_array(ub, sizeof (uberblock_t));
 
 	if (ub->ub_magic != UBERBLOCK_MAGIC)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	return (0);
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright 2013 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  */
 
@@ -357,10 +357,10 @@
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if ((ops = vdev_getops(type)) == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * If this is a load, get the vdev guid from the nvlist.
@@ -371,19 +371,19 @@
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) ||
 		    label_id != id)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	} else if (alloctype == VDEV_ALLOC_SPARE) {
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	} else if (alloctype == VDEV_ALLOC_L2CACHE) {
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	} else if (alloctype == VDEV_ALLOC_ROOTPOOL) {
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -390,7 +390,7 @@
 	 * The first allocated vdev must be of type 'root'.
 	 */
 	if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * Determine whether we're a log vdev.
@@ -398,10 +398,10 @@
 	islog = 0;
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &islog);
 	if (islog && spa_version(spa) < SPA_VERSION_SLOGS)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Set the nparity property for RAID-Z vdevs.
@@ -411,7 +411,7 @@
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 		    &nparity) == 0) {
 			if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
-				return (EINVAL);
+				return (SET_ERROR(EINVAL));
 			/*
 			 * Previous versions could only support 1 or 2 parity
 			 * device.
@@ -418,10 +418,10 @@
 			 */
 			if (nparity > 1 &&
 			    spa_version(spa) < SPA_VERSION_RAIDZ2)
-				return (ENOTSUP);
+				return (SET_ERROR(ENOTSUP));
 			if (nparity > 2 &&
 			    spa_version(spa) < SPA_VERSION_RAIDZ3)
-				return (ENOTSUP);
+				return (SET_ERROR(ENOTSUP));
 		} else {
 			/*
 			 * We require the parity to be specified for SPAs that
@@ -428,7 +428,7 @@
 			 * support multiple parity levels.
 			 */
 			if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
-				return (EINVAL);
+				return (SET_ERROR(EINVAL));
 			/*
 			 * Otherwise, we default to 1 parity device for RAID-Z.
 			 */
@@ -946,7 +946,7 @@
 			ASSERT(zio->io_error != 0);
 			zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
 			    spa, vd, NULL, 0, 0);
-			zio->io_error = ENXIO;
+			zio->io_error = SET_ERROR(ENXIO);
 		}
 
 		mutex_enter(&vd->vdev_probe_lock);
@@ -956,7 +956,7 @@
 
 		while ((pio = zio_walk_parents(zio)) != NULL)
 			if (!vdev_accessible(vd, pio))
-				pio->io_error = ENXIO;
+				pio->io_error = SET_ERROR(ENXIO);
 
 		kmem_free(vps, sizeof (*vps));
 	}
@@ -1141,11 +1141,11 @@
 		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
 		    vd->vdev_label_aux);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	} else if (vd->vdev_offline) {
 		ASSERT(vd->vdev_children == 0);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
@@ -1180,7 +1180,7 @@
 		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
 		    vd->vdev_label_aux);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (vd->vdev_degraded) {
@@ -1217,7 +1217,7 @@
 		if (osize < SPA_MINDEVSIZE) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
 			    VDEV_AUX_TOO_SMALL);
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		}
 		psize = osize;
 		asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE);
@@ -1228,7 +1228,7 @@
 		    (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
 			    VDEV_AUX_TOO_SMALL);
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		}
 		psize = 0;
 		asize = osize;
@@ -1243,7 +1243,7 @@
 	if (asize < vd->vdev_min_asize) {
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_BAD_LABEL);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (vd->vdev_asize == 0) {
@@ -1325,7 +1325,7 @@
 
 	for (int c = 0; c < vd->vdev_children; c++)
 		if (vdev_validate(vd->vdev_child[c], strict) != 0)
-			return (EBADF);
+			return (SET_ERROR(EBADF));
 
 	/*
 	 * If the device has already failed, or was marked offline, don't do
@@ -1411,7 +1411,7 @@
 		if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
 		    spa_load_state(spa) == SPA_LOAD_OPEN &&
 		    state != POOL_STATE_ACTIVE)
-			return (EBADF);
+			return (SET_ERROR(EBADF));
 
 		/*
 		 * If we were able to open and validate a vdev that was
@@ -3200,10 +3200,10 @@
 			 * the spa_deadman_synctime we panic the system.
 			 */
 			fio = avl_first(&vq->vq_pending_tree);
-			delta = ddi_get_lbolt64() - fio->io_timestamp;
-			if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
-				zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
-				    "delta %llu, last io %llu",
+			delta = gethrtime() - fio->io_timestamp;
+			if (delta > spa_deadman_synctime(spa)) {
+				zfs_dbgmsg("SLOW IO: zio timestamp %lluns, "
+				    "delta %lluns, last io %lluns",
 				    fio->io_timestamp, delta,
 				    vq->vq_io_complete_ts);
 				fm_panic("I/O to pool '%s' appears to be "

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
@@ -271,16 +274,16 @@
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
 
 	if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (zio->io_size > zfs_vdev_cache_max)
-		return (EOVERFLOW);
+		return (SET_ERROR(EOVERFLOW));
 
 	/*
 	 * If the I/O straddles two or more cache blocks, don't cache it.
 	 */
 	if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
-		return (EXDEV);
+		return (SET_ERROR(EXDEV));
 
 	ASSERT(cache_phase + zio->io_size <= VCBS);
 
@@ -292,7 +295,7 @@
 	if (ve != NULL) {
 		if (ve->ve_missed_update) {
 			mutex_exit(&vc->vc_lock);
-			return (ESTALE);
+			return (SET_ERROR(ESTALE));
 		}
 
 		if ((fio = ve->ve_fill_io) != NULL) {
@@ -315,7 +318,7 @@
 
 	if (ve == NULL) {
 		mutex_exit(&vc->vc_lock);
-		return (ENOMEM);
+		return (SET_ERROR(ENOMEM));
 	}
 
 	fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -144,6 +144,8 @@
 	int error;
 	dev_t dev;
 	int otyp;
+	boolean_t validate_devid = B_FALSE;
+	ddi_devid_t devid;
 
 	/*
 	 * We must have a pathname, and it must be absolute.
@@ -150,7 +152,7 @@
 	 */
 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -185,7 +187,7 @@
 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
 		    &dvd->vd_minor) != 0) {
 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 	}
 
@@ -192,7 +194,6 @@
 	error = EINVAL;		/* presume failure */
 
 	if (vd->vdev_path != NULL) {
-		ddi_devid_t devid;
 
 		if (vd->vdev_wholedisk == -1ULL) {
 			size_t len = strlen(vd->vdev_path) + 3;
@@ -221,7 +222,7 @@
 		if (error == 0 && vd->vdev_devid != NULL &&
 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
 				    kcred);
 				dvd->vd_lh = NULL;
@@ -241,9 +242,10 @@
 	 * If we were unable to open by path, or the devid check fails, open by
 	 * devid instead.
 	 */
-	if (error != 0 && vd->vdev_devid != NULL)
+	if (error != 0 && vd->vdev_devid != NULL) {
 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
+	}
 
 	/*
 	 * If all else fails, then try opening by physical path (if available)
@@ -252,6 +254,9 @@
 	 * level vdev validation will prevent us from opening the wrong device.
 	 */
 	if (error) {
+		if (vd->vdev_devid != NULL)
+			validate_devid = B_TRUE;
+
 		if (vd->vdev_physpath != NULL &&
 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
@@ -273,6 +278,25 @@
 	}
 
 	/*
+	 * Now that the device has been successfully opened, update the devid
+	 * if necessary.
+	 */
+	if (validate_devid && spa_writeable(spa) &&
+	    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
+		if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
+			char *vd_devid;
+
+			vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor);
+			zfs_dbgmsg("vdev %s: update devid from %s, "
+			    "to %s", vd->vdev_path, vd->vdev_devid, vd_devid);
+			spa_strfree(vd->vdev_devid);
+			vd->vdev_devid = spa_strdup(vd_devid);
+			ddi_devid_str_free(vd_devid);
+		}
+		ddi_devid_free(devid);
+	}
+
+	/*
 	 * Once a device is opened, verify that the physical device path (if
 	 * available) is up to date.
 	 */
@@ -303,7 +327,7 @@
 	 */
 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -374,7 +398,7 @@
 	int error = 0;
 
 	if (vd_lh == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ASSERT(flags & B_READ || flags & B_WRITE);
 
@@ -388,7 +412,7 @@
 	error = ldi_strategy(vd_lh, bp);
 	ASSERT(error == 0);
 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
-		error = EIO;
+		error = SET_ERROR(EIO);
 	freerbuf(bp);
 
 	return (error);
@@ -408,7 +432,7 @@
 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
 
 	if (zio->io_error == 0 && bp->b_resid != 0)
-		zio->io_error = EIO;
+		zio->io_error = SET_ERROR(EIO);
 
 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
 
@@ -449,7 +473,7 @@
 	if (zio->io_type == ZIO_TYPE_IOCTL) {
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
-			zio->io_error = ENXIO;
+			zio->io_error = SET_ERROR(ENXIO);
 			return (ZIO_PIPELINE_CONTINUE);
 		}
 
@@ -461,7 +485,7 @@
 				break;
 
 			if (vd->vdev_nowritecache) {
-				zio->io_error = ENOTSUP;
+				zio->io_error = SET_ERROR(ENOTSUP);
 				break;
 			}
 
@@ -499,7 +523,7 @@
 			break;
 
 		default:
-			zio->io_error = ENOTSUP;
+			zio->io_error = SET_ERROR(ENOTSUP);
 		}
 
 		return (ZIO_PIPELINE_CONTINUE);
@@ -604,7 +628,7 @@
 
 	if (ldi_get_size(vd_lh, &s)) {
 		(void) ldi_close(vd_lh, FREAD, kcred);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 
 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
@@ -646,7 +670,7 @@
 	kmem_free(label, sizeof (vdev_label_t));
 	(void) ldi_close(vd_lh, FREAD, kcred);
 	if (*config == NULL)
-		error = EIDRM;
+		error = SET_ERROR(EIDRM);
 
 	return (error);
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -61,7 +61,7 @@
 	 */
 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -101,13 +101,17 @@
 	 * Make sure it's a regular file.
 	 */
 	if (vp->v_type != VREG) {
+#ifdef __FreeBSD__
 		(void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL);
+#endif
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+#ifdef __FreeBSD__
 		kmem_free(vd->vdev_tsd, sizeof (vdev_file_t));
 		vd->vdev_tsd = NULL;
-		return (ENODEV);
+#endif
+		return (SET_ERROR(ENODEV));
 	}
-#endif
+#endif	/* _KERNEL */
 
 skip_open:
 	/*
@@ -160,7 +164,7 @@
 	ssize_t resid;
 
 	if (!vdev_readable(vd)) {
-		zio->io_error = ENXIO;
+		zio->io_error = SET_ERROR(ENXIO);
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 
@@ -174,7 +178,7 @@
 			    kcred, NULL);
 			break;
 		default:
-			zio->io_error = ENOTSUP;
+			zio->io_error = SET_ERROR(ENOTSUP);
 		}
 
 		return (ZIO_PIPELINE_CONTINUE);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -669,7 +669,7 @@
 	 * Dead vdevs cannot be initialized.
 	 */
 	if (vdev_is_dead(vd))
-		return (EIO);
+		return (SET_ERROR(EIO));
 
 	/*
 	 * Determine if the vdev is in use.
@@ -676,7 +676,7 @@
 	 */
 	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
 	    vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 
 	/*
 	 * If this is a request to add or replace a spare or l2cache device
@@ -1094,7 +1094,7 @@
 	uint64_t *good_writes = zio->io_private;
 
 	if (*good_writes == 0)
-		zio->io_error = EIO;
+		zio->io_error = SET_ERROR(EIO);
 
 	kmem_free(good_writes, sizeof (uint64_t));
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -139,7 +139,7 @@
 
 	if (vd->vdev_children == 0) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	vdev_open_children(vd);
@@ -234,7 +234,7 @@
 		if (mc->mc_tried || mc->mc_skipped)
 			continue;
 		if (!vdev_readable(mc->mc_vd)) {
-			mc->mc_error = ENXIO;
+			mc->mc_error = SET_ERROR(ENXIO);
 			mc->mc_tried = 1;	/* don't even try */
 			mc->mc_skipped = 1;
 			continue;
@@ -241,7 +241,7 @@
 		}
 		if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1))
 			return (c);
-		mc->mc_error = ESTALE;
+		mc->mc_error = SET_ERROR(ESTALE);
 		mc->mc_skipped = 1;
 		mc->mc_speculative = 1;
 	}
@@ -429,7 +429,7 @@
 				    !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
 				    zio->io_txg, 1))
 					continue;
-				mc->mc_error = ESTALE;
+				mc->mc_error = SET_ERROR(ESTALE);
 			}
 
 			zio_nowait(zio_vdev_child_io(zio, zio->io_bp,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -69,7 +69,7 @@
 static int
 vdev_missing_io_start(zio_t *zio)
 {
-	zio->io_error = ENOTSUP;
+	zio->io_error = SET_ERROR(ENOTSUP);
 	return (ZIO_PIPELINE_CONTINUE);
 }
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -44,8 +44,11 @@
 int zfs_vdev_max_pending = 10;
 int zfs_vdev_min_pending = 4;
 
-/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
-int zfs_vdev_time_shift = 6;
+/*
+ * The deadlines are grouped into buckets based on zfs_vdev_time_shift:
+ * deadline = pri + gethrtime() >> time_shift)
+ */
+int zfs_vdev_time_shift = 29; /* each bucket is 0.537 seconds */
 
 /* exponential I/O issue ramp-up rate */
 int zfs_vdev_ramp_rate = 2;
@@ -391,7 +394,7 @@
 
 	mutex_enter(&vq->vq_lock);
 
-	zio->io_timestamp = ddi_get_lbolt64();
+	zio->io_timestamp = gethrtime();
 	zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
 	    zio->io_priority;
 
@@ -424,8 +427,7 @@
 
 	avl_remove(&vq->vq_pending_tree, zio);
 
-	vq->vq_io_complete_ts = ddi_get_lbolt64();
-	vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
+	vq->vq_io_complete_ts = gethrtime();
 
 	for (int i = 0; i < zfs_vdev_ramp_rate; i++) {
 		zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -1465,7 +1465,7 @@
 	if (nparity > VDEV_RAIDZ_MAXPARITY ||
 	    vd->vdev_children < nparity + 1) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	vdev_open_children(vd);
@@ -1602,7 +1602,7 @@
 				rm->rm_missingdata++;
 			else
 				rm->rm_missingparity++;
-			rc->rc_error = ENXIO;
+			rc->rc_error = SET_ERROR(ENXIO);
 			rc->rc_tried = 1;	/* don't even try */
 			rc->rc_skipped = 1;
 			continue;
@@ -1612,7 +1612,7 @@
 				rm->rm_missingdata++;
 			else
 				rm->rm_missingparity++;
-			rc->rc_error = ESTALE;
+			rc->rc_error = SET_ERROR(ESTALE);
 			rc->rc_skipped = 1;
 			continue;
 		}
@@ -1700,7 +1700,7 @@
 			continue;
 		if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
 			raidz_checksum_error(zio, rc, orig[c]);
-			rc->rc_error = ECKSUM;
+			rc->rc_error = SET_ERROR(ECKSUM);
 			ret++;
 		}
 		zio_buf_free(orig[c], rc->rc_size);
@@ -1824,7 +1824,7 @@
 					if (rc->rc_tried)
 						raidz_checksum_error(zio, rc,
 						    orig[i]);
-					rc->rc_error = ECKSUM;
+					rc->rc_error = SET_ERROR(ECKSUM);
 				}
 
 				ret = code;
@@ -2102,7 +2102,7 @@
 		 * Start checksum ereports for all children which haven't
 		 * failed, and the IO wasn't speculative.
 		 */
-		zio->io_error = ECKSUM;
+		zio->io_error = SET_ERROR(ECKSUM);
 
 		if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 			for (c = 0; c < rm->rm_cols; c++) {

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -62,7 +62,7 @@
 
 	if (vd->vdev_children == 0) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	vdev_open_children(vd);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -325,7 +325,7 @@
 	 * this is already an aberrant condition.
 	 */
 	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 
 	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
 		/*
@@ -714,7 +714,7 @@
 fzap_checkname(zap_name_t *zn)
 {
 	if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 	return (0);
 }
 
@@ -729,7 +729,7 @@
 	case 8:
 		break;
 	default:
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (integer_size * num_integers > ZAP_MAXVALUELEN)
@@ -805,7 +805,7 @@
 retry:
 	err = zap_leaf_lookup(l, zn, &zeh);
 	if (err == 0) {
-		err = EEXIST;
+		err = SET_ERROR(EEXIST);
 		goto out;
 	}
 	if (err != ENOENT)
@@ -996,7 +996,7 @@
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    (void) zap_cursor_advance(&zc)) {
 		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		err = zap_add(os, intoobj, za.za_name,
 		    8, 1, &za.za_first_integer, tx);
 		if (err)
@@ -1018,7 +1018,7 @@
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    (void) zap_cursor_advance(&zc)) {
 		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		err = zap_add(os, intoobj, za.za_name,
 		    8, 1, &value, tx);
 		if (err)
@@ -1042,7 +1042,7 @@
 		uint64_t delta = 0;
 
 		if (za.za_integer_length != 8 || za.za_num_integers != 1)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
 		if (err != 0 && err != ENOENT)
@@ -1250,7 +1250,7 @@
 	zap_entry_handle_t zeh;
 
 	if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 
 	err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l);
 	if (err != 0)

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -434,7 +435,7 @@
 		goto again;
 	}
 
-	return (ENOENT);
+	return (SET_ERROR(ENOENT));
 }
 
 /* Return (h1,cd1 >= h2,cd2) */
@@ -492,7 +493,7 @@
 	ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
 
 	if (le->le_value_intlen > integer_size)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
 	    le->le_value_intlen, le->le_value_numints,
@@ -499,7 +500,7 @@
 	    integer_size, num_integers, buf);
 
 	if (zeh->zeh_num_integers > num_integers)
-		return (EOVERFLOW);
+		return (SET_ERROR(EOVERFLOW));
 	return (0);
 
 }
@@ -520,7 +521,7 @@
 		    le->le_name_numints, 1, buflen, buf);
 	}
 	if (le->le_name_numints > buflen)
-		return (EOVERFLOW);
+		return (SET_ERROR(EOVERFLOW));
 	return (0);
 }
 
@@ -536,7 +537,7 @@
 	    ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
 
 	if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 
 	zap_leaf_array_free(l, &le->le_value_chunk);
 	le->le_value_chunk =
@@ -626,7 +627,7 @@
 	}
 
 	if (l->l_phys->l_hdr.lh_nfree < numchunks)
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 
 	/* make the entry */
 	chunk = zap_leaf_chunk_alloc(l);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zio.h>
@@ -791,7 +791,7 @@
 	zn = zap_name_alloc(zap, name, mt);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	if (!zap->zap_ismicro) {
@@ -800,12 +800,12 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze == NULL) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		} else {
 			if (num_integers < 1) {
-				err = EOVERFLOW;
+				err = SET_ERROR(EOVERFLOW);
 			} else if (integer_size != 8) {
-				err = EINVAL;
+				err = SET_ERROR(EINVAL);
 			} else {
 				*(uint64_t *)buf =
 				    MZE_PHYS(zap, mze)->mze_value;
@@ -837,7 +837,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	fzap_prefetch(zn);
@@ -860,7 +860,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	err = fzap_lookup(zn, integer_size, num_integers, buf,
@@ -895,7 +895,7 @@
 	zn = zap_name_alloc(zap, name, MT_EXACT);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if (!zap->zap_ismicro) {
 		err = fzap_length(zn, integer_size, num_integers);
@@ -902,7 +902,7 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze == NULL) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		} else {
 			if (integer_size)
 				*integer_size = 8;
@@ -929,7 +929,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	err = fzap_length(zn, integer_size, num_integers);
 	zap_name_free(zn);
@@ -998,7 +998,7 @@
 	zn = zap_name_alloc(zap, key, MT_EXACT);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if (!zap->zap_ismicro) {
 		err = fzap_add(zn, integer_size, num_integers, val, tx);
@@ -1012,7 +1012,7 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze != NULL) {
-			err = EEXIST;
+			err = SET_ERROR(EEXIST);
 		} else {
 			mzap_addent(zn, *intval);
 		}
@@ -1039,7 +1039,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	err = fzap_add(zn, integer_size, num_integers, val, tx);
 	zap = zn->zn_zap;	/* fzap_add() may change zap */
@@ -1075,7 +1075,7 @@
 	zn = zap_name_alloc(zap, name, MT_EXACT);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if (!zap->zap_ismicro) {
 		err = fzap_update(zn, integer_size, num_integers, val, tx);
@@ -1120,7 +1120,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	err = fzap_update(zn, integer_size, num_integers, val, tx);
 	zap = zn->zn_zap;	/* fzap_update() may change zap */
@@ -1151,7 +1151,7 @@
 	zn = zap_name_alloc(zap, name, mt);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if (!zap->zap_ismicro) {
 		err = fzap_remove(zn, tx);
@@ -1158,7 +1158,7 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze == NULL) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		} else {
 			zap->zap_m.zap_num_entries--;
 			bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
@@ -1185,7 +1185,7 @@
 	zn = zap_name_alloc_uint64(zap, key, key_numints);
 	if (zn == NULL) {
 		zap_unlockdir(zap);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	err = fzap_remove(zn, tx);
 	zap_name_free(zn);
@@ -1263,7 +1263,7 @@
 	mzap_ent_t *mze;
 
 	if (zc->zc_hash == -1ULL)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	if (zc->zc_zap == NULL) {
 		int hb;
@@ -1289,8 +1289,6 @@
 	if (!zc->zc_zap->zap_ismicro) {
 		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
 	} else {
-		err = ENOENT;
-
 		mze_tofind.mze_hash = zc->zc_hash;
 		mze_tofind.mze_cd = zc->zc_cd;
 
@@ -1313,6 +1311,7 @@
 			err = 0;
 		} else {
 			zc->zc_hash = -1ULL;
+			err = SET_ERROR(ENOENT);
 		}
 	}
 	rw_exit(&zc->zc_zap->zap_rwlock);
@@ -1346,7 +1345,7 @@
 	zn = zap_name_alloc(zc->zc_zap, name, mt);
 	if (zn == NULL) {
 		rw_exit(&zc->zc_zap->zap_rwlock);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	if (!zc->zc_zap->zap_ismicro) {
@@ -1354,7 +1353,7 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze == NULL) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 			goto out;
 		}
 		zc->zc_hash = mze->mze_hash;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -226,13 +226,13 @@
 	 * have been allocated yet.  Act as though all features are disabled.
 	 */
 	if (zapobj == 0)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
 	    &refcount);
 	if (err != 0) {
 		if (err == ENOENT)
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		else
 			return (err);
 	}
@@ -273,16 +273,16 @@
 		break;
 	case FEATURE_ACTION_INCR:
 		if (error == ENOENT)
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		if (refcount == UINT64_MAX)
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		refcount++;
 		break;
 	case FEATURE_ACTION_DECR:
 		if (error == ENOENT)
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		if (refcount == 0)
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		refcount--;
 		break;
 	default:

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -681,7 +682,7 @@
 		 */
 		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
 		    aceptr->z_hdr.z_flags) != B_TRUE)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		switch (acep->a_type) {
 		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
@@ -788,7 +789,7 @@
 		 */
 		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
 		    aceptr->z_flags) != B_TRUE)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	}
 	*size = (caddr_t)aceptr - (caddr_t)z_acl;
 	return (0);
@@ -1122,7 +1123,7 @@
 		zfs_acl_node_free(aclnode);
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
-			error = EIO;
+			error = SET_ERROR(EIO);
 		goto done;
 	}
 
@@ -1781,7 +1782,7 @@
 	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
 
 	if (mask == 0)
-		return (ENOSYS);
+		return (SET_ERROR(ENOSYS));
 
 	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
 		return (error);
@@ -1875,7 +1876,7 @@
 	int error;
 
 	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
 
@@ -1937,10 +1938,10 @@
 	uint64_t	acl_obj;
 
 	if (mask == 0)
-		return (ENOSYS);
+		return (SET_ERROR(ENOSYS));
 
 	if (zp->z_pflags & ZFS_IMMUTABLE)
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
 		return (error);
@@ -2037,7 +2038,7 @@
 	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 	    (!IS_DEVVP(ZTOV(zp)) ||
 	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
-		return (EROFS);
+		return (SET_ERROR(EROFS));
 	}
 
 	/*
@@ -2048,13 +2049,13 @@
 	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
 	    (ZTOV(zp)->v_type == VDIR &&
 	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 #ifdef sun
 	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
 	    (zp->z_pflags & ZFS_NOUNLINK)) {
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 #else
 	/*
@@ -2070,7 +2071,7 @@
 
 	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 	}
 
 	return (0);
@@ -2178,7 +2179,7 @@
 				break;
 			} else {
 				mutex_exit(&zp->z_acl_lock);
-				return (EIO);
+				return (SET_ERROR(EIO));
 			}
 		}
 
@@ -2212,7 +2213,7 @@
 	/* Put the found 'denies' back on the working mode */
 	if (deny_mask) {
 		*working_mode |= deny_mask;
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 	} else if (*working_mode) {
 		return (-1);
 	}
@@ -2279,7 +2280,7 @@
     cred_t *cr)
 {
 	if (*working_mode != ACE_WRITE_DATA)
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 
 	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
 	    check_privs, B_FALSE, cr));
@@ -2295,7 +2296,7 @@
 	int error;
 
 	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 
 	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
 	    (ZTOV(zdp)->v_type == VDIR));
@@ -2501,7 +2502,7 @@
 			 * for are still present.  If so then return EACCES
 			 */
 			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
-				error = EACCES;
+				error = SET_ERROR(EACCES);
 			}
 		}
 	} else if (error == 0) {
@@ -2615,7 +2616,7 @@
 	 */
 
 	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	/*
 	 * First row
@@ -2682,7 +2683,7 @@
 	int error;
 
 	if (szp->z_pflags & ZFS_AV_QUARANTINED)
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 
 	add_perm = (ZTOV(szp)->v_type == VDIR) ?
 	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,8 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
- * All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -73,6 +72,7 @@
 #include <sys/gfs.h>
 #include <sys/stat.h>
 #include <sys/dmu.h>
+#include <sys/dsl_destroy.h>
 #include <sys/dsl_deleg.h>
 #include <sys/mount.h>
 #include <sys/sunddi.h>
@@ -303,7 +303,7 @@
 	int flags = ap->a_mode;
 
 	if (flags & FWRITE)
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 
 	return (0);
 }
@@ -336,11 +336,11 @@
 #ifdef TODO
 	if (flags & V_ACE_MASK) {
 		if (accmode & ACE_ALL_WRITE_PERMS)
-			return (EACCES);
+			return (SET_ERROR(EACCES));
 	} else {
 #endif
 		if (accmode & VWRITE)
-			return (EACCES);
+			return (SET_ERROR(EACCES));
 #ifdef TODO
 	}
 #endif
@@ -397,7 +397,15 @@
 
 	ZFS_ENTER(zfsvfs);
 
+#ifdef illumos
+	if (fidp->fid_len < SHORT_FID_LEN) {
+		fidp->fid_len = SHORT_FID_LEN;
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOSPC));
+	}
+#else
 	fidp->fid_len = SHORT_FID_LEN;
+#endif
 
 	zfid = (zfid_short_t *)fidp;
 
@@ -433,7 +441,7 @@
 
 	if (zfsvfs->z_shares_dir == 0) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
@@ -523,7 +531,7 @@
 	 * No extended attributes allowed under .zfs
 	 */
 	if (flags & LOOKUP_XATTR)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 
@@ -631,10 +639,10 @@
 	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
 
 	if (snapshot_namecheck(name, NULL, NULL) != 0)
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	dmu_objset_name(os, zname);
 	if (strlen(zname) + 1 + strlen(name) >= len)
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 	(void) strcat(zname, "@");
 	(void) strcat(zname, name);
 	return (0);
@@ -743,7 +751,7 @@
 	zfsvfs_t *zfsvfs;
 	avl_index_t where;
 	char from[MAXNAMELEN], to[MAXNAMELEN];
-	char real[MAXNAMELEN];
+	char real[MAXNAMELEN], fsname[MAXNAMELEN];
 	int err;
 
 	zfsvfs = sdvp->v_vfsp->vfs_data;
@@ -762,12 +770,14 @@
 
 	ZFS_EXIT(zfsvfs);
 
+	dmu_objset_name(zfsvfs->z_os, fsname);
+
 	err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
-	if (!err)
+	if (err == 0)
 		err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
-	if (!err)
+	if (err == 0)
 		err = zfs_secpolicy_rename_perms(from, to, cr);
-	if (err)
+	if (err != 0)
 		return (err);
 
 	/*
@@ -774,7 +784,7 @@
 	 * Cannot move snapshots out of the snapdir.
 	 */
 	if (sdvp != tdvp)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (strcmp(snm, tnm) == 0)
 		return (0);
@@ -784,10 +794,10 @@
 	search.se_name = (char *)snm;
 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
 		mutex_exit(&sdp->sd_lock);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
-	err = dmu_objset_rename(from, to, 0);
+	err = dsl_dataset_rename_snapshot(fsname, snm, tnm, 0);
 	if (err == 0)
 		zfsctl_rename_snap(sdp, sep, tnm);
 
@@ -829,9 +839,9 @@
 	ZFS_EXIT(zfsvfs);
 
 	err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
-	if (!err)
+	if (err == 0)
 		err = zfs_secpolicy_destroy_perms(snapname, cr);
-	if (err)
+	if (err != 0)
 		return (err);
 
 	mutex_enter(&sdp->sd_lock);
@@ -841,15 +851,12 @@
 	if (sep) {
 		avl_remove(&sdp->sd_snaps, sep);
 		err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
-		if (err) {
-			avl_index_t where;
-
-			if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)
-				avl_insert(&sdp->sd_snaps, sep, where);
-		} else
-			err = dmu_objset_destroy(snapname, B_FALSE);
+		if (err != 0)
+			avl_add(&sdp->sd_snaps, sep);
+		else
+			err = dsl_destroy_snapshot(snapname, B_FALSE);
 	} else {
-		err = ENOENT;
+		err = SET_ERROR(ENOENT);
 	}
 
 	mutex_exit(&sdp->sd_lock);
@@ -873,7 +880,7 @@
 	static enum uio_seg seg = UIO_SYSSPACE;
 
 	if (snapshot_namecheck(dirname, NULL, NULL) != 0)
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 
 	dmu_objset_name(zfsvfs->z_os, name);
 
@@ -880,13 +887,12 @@
 	*vpp = NULL;
 
 	err = zfs_secpolicy_snapshot_perms(name, cr);
-	if (err)
+	if (err != 0)
 		return (err);
 
 	if (err == 0) {
-		err = dmu_objset_snapshot(name, dirname, NULL, NULL,
-		    B_FALSE, B_FALSE, -1);
-		if (err)
+		err = dmu_objset_snapshot_one(name, dirname);
+		if (err != 0)
 			return (err);
 		err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
 	}
@@ -944,7 +950,7 @@
 	 * No extended attributes allowed under .zfs
 	 */
 	if (flags & LOOKUP_XATTR)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
 	strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
 
@@ -959,7 +965,7 @@
 	 * add some flag to domount() to tell it not to do this lookup.
 	 */
 	if (MUTEX_HELD(&sdp->sd_lock))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	ZFS_ENTER(zfsvfs);
 
@@ -994,7 +1000,7 @@
 		*vpp = sep->se_root;
 		VN_HOLD(*vpp);
 		err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
-		if (err) {
+		if (err != 0) {
 			VN_RELE(*vpp);
 			*vpp = NULL;
 		} else if (*vpp == sep->se_root) {
@@ -1021,7 +1027,7 @@
 	 * The requested snapshot is not currently mounted, look it up.
 	 */
 	err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
-	if (err) {
+	if (err != 0) {
 		mutex_exit(&sdp->sd_lock);
 		ZFS_EXIT(zfsvfs);
 		/*
@@ -1033,15 +1039,20 @@
 	}
 	if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 		mutex_exit(&sdp->sd_lock);
+#ifdef illumos
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOENT));
+#else	/* !illumos */
 		/* Translate errors and add SAVENAME when needed. */
 		if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
 			err = EJUSTRETURN;
 			cnp->cn_flags |= SAVENAME;
 		} else {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		}
 		ZFS_EXIT(zfsvfs);
 		return (err);
+#endif	/* !illumos */
 	}
 
 	sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
@@ -1074,8 +1085,20 @@
 	}
 	mutex_exit(&sdp->sd_lock);
 	ZFS_EXIT(zfsvfs);
+
+#ifdef illumos
+	/*
+	 * If we had an error, drop our hold on the vnode and
+	 * zfsctl_snapshot_inactive() will clean up.
+	 */
+	if (err != 0) {
+		VN_RELE(*vpp);
+		*vpp = NULL;
+	}
+#else
 	if (err != 0)
 		*vpp = NULL;
+#endif
 	return (err);
 }
 
@@ -1108,7 +1131,7 @@
 
 	if (zfsvfs->z_shares_dir == 0) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
 		error = VOP_LOOKUP(ZTOV(dzp), vpp, cnp);
@@ -1133,8 +1156,10 @@
 	ZFS_ENTER(zfsvfs);
 
 	cookie = *offp;
+	dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
 	error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
 	    &cookie, &case_conflict);
+	dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		if (error == ENOENT) {
@@ -1187,7 +1212,7 @@
 
 	if (zfsvfs->z_shares_dir == 0) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 		vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY);
@@ -1195,7 +1220,7 @@
 		VN_URELE(ZTOV(dzp));
 	} else {
 		*eofp = 1;
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 	}
 
 	ZFS_EXIT(zfsvfs);
@@ -1264,7 +1289,7 @@
 	ZFS_ENTER(zfsvfs);
 	if (zfsvfs->z_shares_dir == 0) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 		vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY);
@@ -1656,7 +1681,7 @@
 		error = traverse(&vp, LK_SHARED | LK_RETRY);
 		if (error == 0) {
 			if (vp == sep->se_root)
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 			else
 				*zfsvfsp = VTOZ(vp)->z_zfsvfs;
 		}
@@ -1666,7 +1691,7 @@
 		else
 			VN_RELE(vp);
 	} else {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		mutex_exit(&sdp->sd_lock);
 	}
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -154,7 +155,7 @@
 	if (name[0] == '.' &&
 	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
 	    zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 
 	/*
 	 * Case sensitivity and normalization preferences are set when
@@ -225,7 +226,7 @@
 			mutex_exit(&dzp->z_lock);
 			if (!(flag & ZHAVELOCK))
 				rw_exit(&dzp->z_name_lock);
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		}
 		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
 			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
@@ -236,7 +237,7 @@
 			mutex_exit(&dzp->z_lock);
 			if (!(flag & ZHAVELOCK))
 				rw_exit(&dzp->z_name_lock);
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		}
 		if (dl == NULL)	{
 			size_t namesize;
@@ -289,12 +290,12 @@
 			vp = dnlc_lookup(ZTOV(dzp), name);
 		if (vp == DNLC_NO_VNODE) {
 			VN_RELE(vp);
-			error = ENOENT;
+			error = SET_ERROR(ENOENT);
 		} else if (vp) {
 			if (flag & ZNEW) {
 				zfs_dirent_unlock(dl);
 				VN_RELE(vp);
-				return (EEXIST);
+				return (SET_ERROR(EEXIST));
 			}
 			*dlpp = dl;
 			*zpp = VTOZ(vp);
@@ -312,7 +313,7 @@
 	} else {
 		if (flag & ZNEW) {
 			zfs_dirent_unlock(dl);
-			return (EEXIST);
+			return (SET_ERROR(EEXIST));
 		}
 		error = zfs_zget(zfsvfs, zoid, zpp);
 		if (error) {
@@ -719,7 +720,7 @@
 		if (zp->z_unlinked) {	/* no new links to unlinked zp */
 			ASSERT(!(flag & (ZNEW | ZEXISTS)));
 			mutex_exit(&zp->z_lock);
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		}
 		zp->z_links++;
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
@@ -820,11 +821,11 @@
 
 	if (!(flag & ZRENAMING)) {
 		if (vn_vfswlock(vp))		/* prevent new mounts on zp */
-			return (EBUSY);
+			return (SET_ERROR(EBUSY));
 
 		if (vn_ismntpt(vp)) {		/* don't remove mount point */
 			vn_vfsunlock(vp);
-			return (EBUSY);
+			return (SET_ERROR(EBUSY));
 		}
 
 		mutex_enter(&zp->z_lock);
@@ -832,7 +833,11 @@
 		if (zp_is_dir && !zfs_dirempty(zp)) {
 			mutex_exit(&zp->z_lock);
 			vn_vfsunlock(vp);
-			return (ENOTEMPTY);
+#ifdef illumos
+			return (SET_ERROR(EEXIST));
+#else
+			return (SET_ERROR(ENOTEMPTY));
+#endif
 		}
 
 		/*
@@ -943,7 +948,7 @@
 		return (error);
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 		zfs_acl_ids_free(&acl_ids);
-		return (EDQUOT);
+		return (SET_ERROR(EDQUOT));
 	}
 
 top:
@@ -1026,16 +1031,16 @@
 
 	if (!(flags & CREATE_XATTR_DIR)) {
 		zfs_dirent_unlock(dl);
-#ifdef __FreeBSD__
-		return (ENOATTR);
+#ifdef illumos
+		return (SET_ERROR(ENOENT));
 #else
-		return (ENOENT);
+		return (SET_ERROR(ENOATTR));
 #endif
 	}
 
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		zfs_dirent_unlock(dl);
-		return (EROFS);
+		return (SET_ERROR(EROFS));
 	}
 
 	/*

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -25,11 +25,112 @@
  * All rights reserved.
  * Copyright 2013 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
+/*
+ * ZFS ioctls.
+ *
+ * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
+ * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
+ *
+ * There are two ways that we handle ioctls: the legacy way where almost
+ * all of the logic is in the ioctl callback, and the new way where most
+ * of the marshalling is handled in the common entry point, zfsdev_ioctl().
+ *
+ * Non-legacy ioctls should be registered by calling
+ * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
+ * from userland by lzc_ioctl().
+ *
+ * The registration arguments are as follows:
+ *
+ * const char *name
+ *   The name of the ioctl.  This is used for history logging.  If the
+ *   ioctl returns successfully (the callback returns 0), and allow_log
+ *   is true, then a history log entry will be recorded with the input &
+ *   output nvlists.  The log entry can be printed with "zpool history -i".
+ *
+ * zfs_ioc_t ioc
+ *   The ioctl request number, which userland will pass to ioctl(2).
+ *   The ioctl numbers can change from release to release, because
+ *   the caller (libzfs) must be matched to the kernel.
+ *
+ * zfs_secpolicy_func_t *secpolicy
+ *   This function will be called before the zfs_ioc_func_t, to
+ *   determine if this operation is permitted.  It should return EPERM
+ *   on failure, and 0 on success.  Checks include determining if the
+ *   dataset is visible in this zone, and if the user has either all
+ *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
+ *   to do this operation on this dataset with "zfs allow".
+ *
+ * zfs_ioc_namecheck_t namecheck
+ *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
+ *   name, a dataset name, or nothing.  If the name is not well-formed,
+ *   the ioctl will fail and the callback will not be called.
+ *   Therefore, the callback can assume that the name is well-formed
+ *   (e.g. is null-terminated, doesn't have more than one '@' character,
+ *   doesn't have invalid characters).
+ *
+ * zfs_ioc_poolcheck_t pool_check
+ *   This specifies requirements on the pool state.  If the pool does
+ *   not meet them (is suspended or is readonly), the ioctl will fail
+ *   and the callback will not be called.  If any checks are specified
+ *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
+ *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
+ *   POOL_CHECK_READONLY).
+ *
+ * boolean_t smush_outnvlist
+ *   If smush_outnvlist is true, then the output is presumed to be a
+ *   list of errors, and it will be "smushed" down to fit into the
+ *   caller's buffer, by removing some entries and replacing them with a
+ *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
+ *   nvlist_smush() for details.  If smush_outnvlist is false, and the
+ *   outnvlist does not fit into the userland-provided buffer, then the
+ *   ioctl will fail with ENOMEM.
+ *
+ * zfs_ioc_func_t *func
+ *   The callback function that will perform the operation.
+ *
+ *   The callback should return 0 on success, or an error number on
+ *   failure.  If the function fails, the userland ioctl will return -1,
+ *   and errno will be set to the callback's return value.  The callback
+ *   will be called with the following arguments:
+ *
+ *   const char *name
+ *     The name of the pool or dataset to operate on, from
+ *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
+ *     expected type (pool, dataset, or none).
+ *
+ *   nvlist_t *innvl
+ *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
+ *     NULL if no input nvlist was provided.  Changes to this nvlist are
+ *     ignored.  If the input nvlist could not be deserialized, the
+ *     ioctl will fail and the callback will not be called.
+ *
+ *   nvlist_t *outnvl
+ *     The output nvlist, initially empty.  The callback can fill it in,
+ *     and it will be returned to userland by serializing it into
+ *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
+ *     fails (e.g. because the caller didn't supply a large enough
+ *     buffer), then the overall ioctl will fail.  See the
+ *     'smush_nvlist' argument above for additional behaviors.
+ *
+ *     There are two typical uses of the output nvlist:
+ *       - To return state, e.g. property values.  In this case,
+ *         smush_outnvlist should be false.  If the buffer was not large
+ *         enough, the caller will reallocate a larger buffer and try
+ *         the ioctl again.
+ *
+ *       - To return multiple errors from an ioctl which makes on-disk
+ *         changes.  In this case, smush_outnvlist should be true.
+ *         Ioctls which make on-disk modifications should generally not
+ *         use the outnvl if they succeed, because the caller can not
+ *         distinguish between the operation failing, and
+ *         deserialization failing.
+ */
+
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -61,6 +162,7 @@
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
+#include <sys/dmu_tx.h>
 #include <sys/sunddi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
@@ -76,6 +178,9 @@
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dmu_objset.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 
 #include "zfs_namecheck.h"
@@ -97,9 +202,14 @@
 extern void zfs_init(void);
 extern void zfs_fini(void);
 
-typedef int zfs_ioc_func_t(zfs_cmd_t *);
-typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
+uint_t zfs_fsyncer_key;
+extern uint_t rrw_tsd_key;
+static uint_t zfs_allow_log_key;
 
+typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
+typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
+typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
+
 typedef enum {
 	NO_NAME,
 	POOL_NAME,
@@ -109,15 +219,18 @@
 typedef enum {
 	POOL_CHECK_NONE		= 1 << 0,
 	POOL_CHECK_SUSPENDED	= 1 << 1,
-	POOL_CHECK_READONLY	= 1 << 2
+	POOL_CHECK_READONLY	= 1 << 2,
 } zfs_ioc_poolcheck_t;
 
 typedef struct zfs_ioc_vec {
+	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
 	zfs_secpolicy_func_t	*zvec_secpolicy;
 	zfs_ioc_namecheck_t	zvec_namecheck;
-	boolean_t		zvec_his_log;
+	boolean_t		zvec_allow_log;
 	zfs_ioc_poolcheck_t	zvec_pool_check;
+	boolean_t		zvec_smush_outnvlist;
+	const char		*zvec_name;
 } zfs_ioc_vec_t;
 
 /* This array is indexed by zfs_userquota_prop_t */
@@ -135,15 +248,12 @@
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
-int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
+int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
+static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
  
 static void zfsdev_close(void *data);
 
-static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature);
-static int zfs_prop_activate_feature_check(void *arg1, void *arg2,
-    dmu_tx_t *tx);
-static void zfs_prop_activate_feature_sync(void *arg1, void *arg2,
-    dmu_tx_t *tx);
+static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
 
 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 void
@@ -281,7 +391,7 @@
 
 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
-			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
+			(void) spa_history_log(spa, buf);
 		spa_close(spa, FTAG);
 	}
 	history_str_free(buf);
@@ -293,7 +403,7 @@
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (0);
 }
@@ -304,13 +414,13 @@
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (INGLOBALZONE(curthread) ||
 	    zone_dataset_visible(zc->zc_name, NULL))
 		return (0);
 
-	return (ENOENT);
+	return (SET_ERROR(ENOENT));
 }
 
 static int
@@ -324,7 +434,7 @@
 	 */
 	if (!INGLOBALZONE(curthread) &&
 	    !zone_dataset_visible(dataset, &writable))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	if (INGLOBALZONE(curthread)) {
 		/*
@@ -332,17 +442,17 @@
 		 * global zone.
 		 */
 		if (secpolicy_zfs(cr) && zoned)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 	} else {
 		/*
 		 * If we are in a local zone, the 'zoned' property must be set.
 		 */
 		if (!zoned)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 
 		/* must be writable by this zone */
 		if (!writable)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 	}
 	return (0);
 }
@@ -353,7 +463,7 @@
 	uint64_t zoned;
 
 	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
@@ -363,62 +473,48 @@
 {
 	uint64_t zoned;
 
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	if (dsl_prop_get_ds(ds, "jailed", 8, 1, &zoned, NULL)) {
-		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
-		return (ENOENT);
-	}
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
+		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
-/*
- * If name ends in a '@', then require recursive permissions.
- */
-int
-zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
+static int
+zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
+    const char *perm, cred_t *cr)
 {
 	int error;
-	boolean_t descendent = B_FALSE;
-	dsl_dataset_t *ds;
-	char *at;
 
-	at = strchr(name, '@');
-	if (at != NULL && at[1] == '\0') {
-		*at = '\0';
-		descendent = B_TRUE;
-	}
-
-	error = dsl_dataset_hold(name, FTAG, &ds);
-	if (at != NULL)
-		*at = '@';
-	if (error != 0)
-		return (error);
-
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
-		if (error)
-			error = dsl_deleg_access_impl(ds, descendent, perm, cr);
+		if (error != 0)
+			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
-
-	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
-int
-zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
-    const char *perm, cred_t *cr)
+static int
+zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
 	int error;
+	dsl_dataset_t *ds;
+	dsl_pool_t *dp;
 
-	error = zfs_dozonecheck_ds(name, ds, cr);
-	if (error == 0) {
-		error = secpolicy_zfs(cr);
-		if (error)
-			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
+	error = dsl_pool_hold(name, FTAG, &dp);
+	if (error != 0)
+		return (error);
+
+	error = dsl_dataset_hold(dp, name, FTAG, &ds);
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
 	}
+
+	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
+
+	dsl_dataset_rele(ds, FTAG);
+	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
@@ -441,8 +537,8 @@
 	/* First get the existing dataset label. */
 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
-	if (error)
-		return (EPERM);
+	if (error != 0)
+		return (SET_ERROR(EPERM));
 
 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 		new_default = TRUE;
@@ -449,7 +545,7 @@
 
 	/* The label must be translatable */
 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	/*
 	 * In a non-global zone, disallow attempts to set a label that
@@ -458,7 +554,7 @@
 	 */
 	if (!INGLOBALZONE(curproc)) {
 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 		return (0);
 	}
 
@@ -469,10 +565,10 @@
 	 */
 	if (dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	if (!zoned) {
 		if (zfs_check_global_label(name, strval) != 0)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 	}
 
 	/*
@@ -491,8 +587,8 @@
 		 */
 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 		    setsl_tag, &os);
-		if (error)
-			return (EPERM);
+		if (error != 0)
+			return (SET_ERROR(EPERM));
 
 		dmu_objset_disown(os, setsl_tag);
 
@@ -502,7 +598,7 @@
 		}
 
 		if (hexstr_to_label(strval, &new_sl) != 0)
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 
 		if (blstrictdom(&ds_sl, &new_sl))
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
@@ -536,7 +632,7 @@
 		 * Disallow setting of 'zoned' from within a local zone.
 		 */
 		if (!INGLOBALZONE(curthread))
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 		break;
 
 	case ZFS_PROP_QUOTA:
@@ -550,9 +646,9 @@
 			 */
 			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
 			    setpoint))
-				return (EPERM);
+				return (SET_ERROR(EPERM));
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
-				return (EPERM);
+				return (SET_ERROR(EPERM));
 		}
 		break;
 
@@ -559,7 +655,7 @@
 	case ZFS_PROP_MLSLABEL:
 #ifdef SECLABEL
 		if (!is_system_labeled())
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 
 		if (nvpair_value_string(propval, &strval) == 0) {
 			int err;
@@ -577,13 +673,14 @@
 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 }
 
-int
-zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck(zc->zc_name, cr);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	/*
@@ -593,17 +690,18 @@
 	return (0);
 }
 
-int
-zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_ROLLBACK, cr));
 }
 
-int
-zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	char *cp;
@@ -615,18 +713,16 @@
 	 */
 	cp = strchr(zc->zc_name, '@');
 	if (cp == NULL)
-		return (EINVAL);
-	error = spa_open(zc->zc_name, &spa, FTAG);
-	if (error)
+		return (SET_ERROR(EINVAL));
+	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+	if (error != 0)
 		return (error);
 
-	dp = spa_get_dsl(spa);
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
-	rw_exit(&dp->dp_config_rwlock);
-	spa_close(spa, FTAG);
-	if (error)
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
 		return (error);
+	}
 
 	dsl_dataset_name(ds, zc->zc_name);
 
@@ -633,13 +729,23 @@
 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 	    ZFS_DELEG_PERM_SEND, cr);
 	dsl_dataset_rele(ds, FTAG);
+	dsl_pool_rele(dp, FTAG);
 
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_SEND, cr));
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
 	vnode_t *vp;
 	int error;
 
@@ -653,7 +759,7 @@
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	VN_RELE(vp);
@@ -662,28 +768,28 @@
 }
 
 int
-zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (!INGLOBALZONE(curthread))
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	if (secpolicy_nfs(cr) == 0) {
 		return (0);
 	} else {
-		return (zfs_secpolicy_deleg_share(zc, cr));
+		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 }
 
 int
-zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (!INGLOBALZONE(curthread))
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	if (secpolicy_smb(cr) == 0) {
 		return (0);
 	} else {
-		return (zfs_secpolicy_deleg_share(zc, cr));
+		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 }
 
@@ -702,7 +808,7 @@
 	} else {
 		cp = strrchr(parent, '/');
 		if (cp == NULL)
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 		cp[0] = '\0';
 	}
 
@@ -721,8 +827,9 @@
 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 }
@@ -729,22 +836,51 @@
 
 /*
  * Destroying snapshots with delegated permissions requires
- * descendent mount and destroy permissions.
+ * descendant mount and destroy permissions.
  */
+/* ARGSUSED */
 static int
-zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int error;
-	char *dsname;
+	nvlist_t *snaps;
+	nvpair_t *pair, *nextpair;
+	int error = 0;
 
-	dsname = kmem_asprintf("%s@", zc->zc_name);
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (SET_ERROR(EINVAL));
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nextpair) {
+		dsl_pool_t *dp;
+		dsl_dataset_t *ds;
 
-	error = zfs_secpolicy_destroy_perms(dsname, cr);
+		error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
+		if (error != 0)
+			break;
+		nextpair = nvlist_next_nvpair(snaps, pair);
+		error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
+		if (error == 0)
+			dsl_dataset_rele(ds, FTAG);
+		dsl_pool_rele(dp, FTAG);
 
- 	if (error == ENOENT)
- 		error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
+		if (error == 0) {
+			error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
+			    cr);
+		} else if (error == ENOENT) {
+			/*
+			 * Ignore any snapshots that don't exist (we consider
+			 * them "already destroyed").  Remove the name from the
+			 * nvl here in case the snapshot is created between
+			 * now and when we try to destroy it (in which case
+			 * we don't want to destroy it since we haven't
+			 * checked for permission).
+			 */
+			fnvlist_remove_nvpair(snaps, pair);
+			error = 0;
+		}
+		if (error != 0)
+			break;
+	}
 
-	strfree(dsname);
 	return (error);
 }
 
@@ -777,8 +913,9 @@
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char *at = NULL;
 	int error;
@@ -802,49 +939,57 @@
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	char	parentname[MAXNAMELEN];
-	objset_t *clone;
+	dsl_pool_t *dp;
+	dsl_dataset_t *clone;
 	int error;
 
 	error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_PROMOTE, cr);
-	if (error)
+	if (error != 0)
 		return (error);
 
-	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
+	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+	if (error != 0)
+		return (error);
 
+	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
+
 	if (error == 0) {
-		dsl_dataset_t *pclone = NULL;
+		char parentname[MAXNAMELEN];
+		dsl_dataset_t *origin = NULL;
 		dsl_dir_t *dd;
-		dd = clone->os_dsl_dataset->ds_dir;
+		dd = clone->ds_dir;
 
-		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dd->dd_pool,
-		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
-		rw_exit(&dd->dd_pool->dp_config_rwlock);
-		if (error) {
-			dmu_objset_rele(clone, FTAG);
+		    dd->dd_phys->dd_origin_obj, FTAG, &origin);
+		if (error != 0) {
+			dsl_dataset_rele(clone, FTAG);
+			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
-		error = zfs_secpolicy_write_perms(zc->zc_name,
+		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
-		dsl_dataset_name(pclone, parentname);
-		dmu_objset_rele(clone, FTAG);
-		dsl_dataset_rele(pclone, FTAG);
-		if (error == 0)
-			error = zfs_secpolicy_write_perms(parentname,
+		dsl_dataset_name(origin, parentname);
+		if (error == 0) {
+			error = zfs_secpolicy_write_perms_ds(parentname, origin,
 			    ZFS_DELEG_PERM_PROMOTE, cr);
+		}
+		dsl_dataset_rele(clone, FTAG);
+		dsl_dataset_rele(origin, FTAG);
 	}
+	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
@@ -867,51 +1012,74 @@
 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
 }
 
+/*
+ * Check for permission to create each snapshot in the nvlist.
+ */
+/* ARGSUSED */
 static int
-zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
+	nvlist_t *snaps;
+	int error;
+	nvpair_t *pair;
 
-	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (SET_ERROR(EINVAL));
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
+		char *name = nvpair_name(pair);
+		char *atp = strchr(name, '@');
+
+		if (atp == NULL) {
+			error = SET_ERROR(EINVAL);
+			break;
+		}
+		*atp = '\0';
+		error = zfs_secpolicy_snapshot_perms(name, cr);
+		*atp = '@';
+		if (error != 0)
+			break;
+	}
+	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
+	/*
+	 * Even root must have a proper TSD so that we know what pool
+	 * to log to.
+	 */
+	if (tsd_get(zfs_allow_log_key) == NULL)
+		return (SET_ERROR(EPERM));
+	return (0);
+}
+
+static int
+zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
 	char	parentname[MAXNAMELEN];
 	int	error;
+	char	*origin;
 
 	if ((error = zfs_get_parent(zc->zc_name, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
-	if (zc->zc_value[0] != '\0') {
-		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
-		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
-			return (error);
-	}
+	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
+	    (error = zfs_secpolicy_write_perms(origin,
+	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
+		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
-	error = zfs_secpolicy_write_perms(parentname,
-	    ZFS_DELEG_PERM_MOUNT, cr);
-
-	return (error);
+	return (zfs_secpolicy_write_perms(parentname,
+	    ZFS_DELEG_PERM_MOUNT, cr));
 }
 
-static int
-zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
-{
-	int error;
-
-	error = secpolicy_fs_unmount(cr, NULL);
-	if (error) {
-		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
-	}
-	return (error);
-}
-
 /*
  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
  * SYS_CONFIG privilege, which is not available in a local zone.
@@ -918,10 +1086,10 @@
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	return (0);
 }
@@ -931,7 +1099,7 @@
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
@@ -947,19 +1115,20 @@
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (secpolicy_zinject(cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 
 	if (prop == ZPROP_INVAL) {
 		if (!zfs_prop_user(zc->zc_value))
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		return (zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_USERPROP, cr));
 	} else {
@@ -969,14 +1138,14 @@
 }
 
 static int
-zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int err = zfs_secpolicy_read(zc, cr);
+	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (zc->zc_value[0] == 0) {
 		/*
@@ -998,38 +1167,72 @@
 }
 
 static int
-zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int err = zfs_secpolicy_read(zc, cr);
+	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 	    NULL, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	return (zfs_secpolicy_write_perms(zc->zc_name,
-	    ZFS_DELEG_PERM_HOLD, cr));
+	nvpair_t *pair;
+	nvlist_t *holds;
+	int error;
+
+	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(holds, pair)) {
+		char fsname[MAXNAMELEN];
+		error = dmu_fsname(nvpair_name(pair), fsname);
+		if (error != 0)
+			return (error);
+		error = zfs_secpolicy_write_perms(fsname,
+		    ZFS_DELEG_PERM_HOLD, cr);
+		if (error != 0)
+			return (error);
+	}
+	return (0);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	return (zfs_secpolicy_write_perms(zc->zc_name,
-	    ZFS_DELEG_PERM_RELEASE, cr));
+	nvpair_t *pair;
+	int error;
+
+	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(innvl, pair)) {
+		char fsname[MAXNAMELEN];
+		error = dmu_fsname(nvpair_name(pair), fsname);
+		if (error != 0)
+			return (error);
+		error = zfs_secpolicy_write_perms(fsname,
+		    ZFS_DELEG_PERM_RELEASE, cr);
+		if (error != 0)
+			return (error);
+	}
+	return (0);
 }
 
 /*
@@ -1036,7 +1239,7 @@
  * Policy for allowing temporary snapshots to be taken or released
  */
 static int
-zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * A temporary snapshot is the same as a snapshot,
@@ -1049,13 +1252,13 @@
 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
 		return (0);
 
-	error = zfs_secpolicy_snapshot(zc, cr);
-	if (!error)
-		error = zfs_secpolicy_hold(zc, cr);
-	if (!error)
-		error = zfs_secpolicy_release(zc, cr);
-	if (!error)
-		error = zfs_secpolicy_destroy(zc, cr);
+	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
+	if (error == 0)
+		error = zfs_secpolicy_hold(zc, innvl, cr);
+	if (error == 0)
+		error = zfs_secpolicy_release(zc, innvl, cr);
+	if (error == 0)
+		error = zfs_secpolicy_destroy(zc, innvl, cr);
 	return (error);
 }
 
@@ -1073,7 +1276,7 @@
 	 * Read in and unpack the user-supplied nvlist.
 	 */
 	if (size == 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	packed = kmem_alloc(size, KM_SLEEP);
 
@@ -1094,36 +1297,40 @@
 	return (0);
 }
 
+/*
+ * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
+ * Entries will be removed from the end of the nvlist, and one int32 entry
+ * named "N_MORE_ERRORS" will be added indicating how many entries were
+ * removed.
+ */
 static int
-fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
+nvlist_smush(nvlist_t *errors, size_t max)
 {
 	size_t size;
 
-	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
+	size = fnvlist_size(errors);
 
-	if (size > zc->zc_nvlist_dst_size) {
+	if (size > max) {
 		nvpair_t *more_errors;
 		int n = 0;
 
-		if (zc->zc_nvlist_dst_size < 1024)
-			return (ENOMEM);
+		if (max < 1024)
+			return (SET_ERROR(ENOMEM));
 
-		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
-		more_errors = nvlist_prev_nvpair(*errors, NULL);
+		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
+		more_errors = nvlist_prev_nvpair(errors, NULL);
 
 		do {
-			nvpair_t *pair = nvlist_prev_nvpair(*errors,
+			nvpair_t *pair = nvlist_prev_nvpair(errors,
 			    more_errors);
-			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
+			fnvlist_remove_nvpair(errors, pair);
 			n++;
-			VERIFY(nvlist_size(*errors, &size,
-			    NV_ENCODE_NATIVE) == 0);
-		} while (size > zc->zc_nvlist_dst_size);
+			size = fnvlist_size(errors);
+		} while (size > max);
 
-		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
-		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
-		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
-		ASSERT(size <= zc->zc_nvlist_dst_size);
+		fnvlist_remove_nvpair(errors, more_errors);
+		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
+		ASSERT3U(fnvlist_size(errors), <=, max);
 	}
 
 	return (0);
@@ -1136,7 +1343,7 @@
 	int error = 0;
 	size_t size;
 
-	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
+	size = fnvlist_size(nvl);
 
 	if (size > zc->zc_nvlist_dst_size) {
 		/*
@@ -1149,16 +1356,15 @@
 		 */
 		error = 0;
 	} else {
-		packed = kmem_alloc(size, KM_SLEEP);
-		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
-		    KM_SLEEP) == 0);
+		packed = fnvlist_pack(nvl, &size);
 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    size, zc->zc_iflags) != 0)
-			error = EFAULT;
-		kmem_free(packed, size);
+			error = SET_ERROR(EFAULT);
+		fnvlist_pack_free(packed, size);
 	}
 
 	zc->zc_nvlist_dst_size = size;
+	zc->zc_nvlist_dst_filled = B_TRUE;
 	return (error);
 }
 
@@ -1169,11 +1375,11 @@
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
-	if (error)
+	if (error != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	mutex_enter(&os->os_user_ptr_lock);
@@ -1181,7 +1387,7 @@
 	if (*zfvp) {
 		VFS_HOLD((*zfvp)->z_vfs);
 	} else {
-		error = ESRCH;
+		error = SET_ERROR(ESRCH);
 	}
 	mutex_exit(&os->os_user_ptr_lock);
 	dmu_objset_rele(os, FTAG);
@@ -1211,7 +1417,7 @@
 			 * objset from the zfsvfs.
 			 */
 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
-			return (EBUSY);
+			return (SET_ERROR(EBUSY));
 		}
 	}
 	return (error);
@@ -1237,7 +1443,6 @@
 	nvlist_t *config, *props = NULL;
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
-	char *buf;
 
 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))
@@ -1257,7 +1462,7 @@
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			goto pool_props_bad;
 		}
 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
@@ -1273,14 +1478,12 @@
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
-		if (error)
+		if (error != 0)
 			goto pool_props_bad;
 	}
 
-	buf = history_str_get(zc);
+	error = spa_create(zc->zc_name, config, props, zplprops);
 
-	error = spa_create(zc->zc_name, config, props, buf, zplprops);
-
 	/*
 	 * Set the remaining root properties
 	 */
@@ -1288,9 +1491,6 @@
 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
 		(void) spa_destroy(zc->zc_name);
 
-	if (buf != NULL)
-		history_str_free(buf);
-
 pool_props_bad:
 	nvlist_free(rootprops);
 	nvlist_free(zplprops);
@@ -1331,7 +1531,7 @@
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 	    guid != zc->zc_guid)
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	else
 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 
@@ -1371,7 +1571,7 @@
 	int error;
 
 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 
 	error = put_nvlist(zc, configs);
 
@@ -1435,7 +1635,7 @@
 	nvlist_free(tryconfig);
 
 	if (config == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	error = put_nvlist(zc, config);
 	nvlist_free(config);
@@ -1493,7 +1693,7 @@
 	if (zc->zc_cookie < spa_version(spa) ||
 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	spa_upgrade(spa, zc->zc_cookie);
@@ -1511,7 +1711,7 @@
 	int error;
 
 	if ((size = zc->zc_history_len) == 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
@@ -1518,7 +1718,7 @@
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	hist_buf = kmem_alloc(size, KM_SLEEP);
@@ -1551,12 +1751,7 @@
 static int
 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 {
-	int error;
-
-	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
-		return (error);
-
-	return (0);
+	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
 }
 
 /*
@@ -1578,7 +1773,7 @@
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
@@ -1607,7 +1802,7 @@
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
@@ -1649,7 +1844,7 @@
 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
 		nvlist_free(config);
 		spa_close(spa, FTAG);
-		return (EDOM);
+		return (SET_ERROR(EDOM));
 	}
 
 	if (error == 0) {
@@ -1715,7 +1910,7 @@
 		break;
 
 	default:
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 	}
 	zc->zc_cookie = newstate;
 	spa_close(spa, FTAG);
@@ -1872,16 +2067,15 @@
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
-	objset_t *os = NULL;
+	objset_t *os;
 	int error;
 
-	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
-		return (error);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+	if (error == 0) {
+		error = zfs_ioc_objset_stats_impl(zc, os);
+		dmu_objset_rele(os, FTAG);
+	}
 
-	error = zfs_ioc_objset_stats_impl(zc, os);
-
-	dmu_objset_rele(os, FTAG);
-
 	if (error == ENOMEM)
 		error = 0;
 	return (error);
@@ -1903,30 +2097,23 @@
 static int
 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
-	objset_t *os = NULL;
-	int error;
+	int error = 0;
 	nvlist_t *nv;
 
-	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
-		return (error);
-
 	/*
 	 * Without this check, we would return local property values if the
 	 * caller has not already received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
-	if (!dsl_prop_get_hasrecvd(os)) {
-		dmu_objset_rele(os, FTAG);
-		return (ENOTSUP);
-	}
+	if (!dsl_prop_get_hasrecvd(zc->zc_name))
+		return (SET_ERROR(ENOTSUP));
 
 	if (zc->zc_nvlist_dst != 0 &&
-	    (error = dsl_prop_get_received(os, &nv)) == 0) {
+	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
-	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -1985,7 +2172,7 @@
 			err = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	} else {
-		err = ENOENT;
+		err = SET_ERROR(ENOENT);
 	}
 	dmu_objset_rele(os, FTAG);
 	return (err);
@@ -2032,7 +2219,7 @@
 top:
 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
 		if (error == ENOENT)
-			error = ESRCH;
+			error = SET_ERROR(ESRCH);
 		return (error);
 	}
 
@@ -2041,26 +2228,12 @@
 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 	p = zc->zc_name + strlen(zc->zc_name);
 
-	/*
-	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
-	 * but is not declared void because its called by dmu_objset_find().
-	 */
-	if (zc->zc_cookie == 0) {
-		uint64_t cookie = 0;
-		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
-
-		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
-			if (!dataset_name_hidden(zc->zc_name))
-				(void) dmu_objset_prefetch(zc->zc_name, NULL);
-		}
-	}
-
 	do {
 		error = dmu_dir_list_next(os,
 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
-			error = ESRCH;
+			error = SET_ERROR(ESRCH);
 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
@@ -2098,14 +2271,10 @@
 	objset_t *os;
 	int error;
 
-top:
-	if (snapshot_list_prefetch && zc->zc_cookie == 0 && !zc->zc_simple)
-		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
-		    NULL, DS_FIND_SNAPSHOTS);
-
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
-	if (error)
+	if (error != 0) {
 		return (error == ENOENT ? ESRCH : error);
+	}
 
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
@@ -2113,7 +2282,7 @@
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
 		dmu_objset_rele(os, FTAG);
-		return (ESRCH);
+		return (SET_ERROR(ESRCH));
 	}
 
 	error = dmu_snapshot_list_next(os,
@@ -2125,24 +2294,8 @@
 		dsl_dataset_t *ds;
 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
 
-		/*
-		 * Since we probably don't have a hold on this snapshot,
-		 * it's possible that the objsetid could have been destroyed
-		 * and reused for a new objset. It's OK if this happens during
-		 * a zfs send operation, since the new createtxg will be
-		 * beyond the range we're interested in.
-		 */
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
-		rw_exit(&dp->dp_config_rwlock);
-		if (error) {
-			if (error == ENOENT) {
-				/* Racing with destroy, get the next one. */
-				*strchr(zc->zc_name, '@') = '\0';
-				dmu_objset_rele(os, FTAG);
-				goto top;
-			}
-		} else {
+		if (error == 0) {
 			objset_t *ossnap;
 
 			error = dmu_objset_from_ds(ds, &ossnap);
@@ -2151,12 +2304,12 @@
 			dsl_dataset_rele(ds, FTAG);
 		}
 	} else if (error == ENOENT) {
-		error = ESRCH;
+		error = SET_ERROR(ESRCH);
 	}
 
 	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
-	if (error)
+	if (error != 0)
 		*strchr(zc->zc_name, '@') = '\0';
 	return (error);
 }
@@ -2180,7 +2333,7 @@
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -2190,7 +2343,7 @@
 	if ((dash = strchr(propname, '-')) == NULL ||
 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 	    vallen != 3)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	domain = dash + 1;
 	type = valary[0];
@@ -2246,13 +2399,13 @@
 		err = dsl_dir_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFQUOTA:
-		err = dsl_dataset_set_quota(dsname, source, intval);
+		err = dsl_dataset_set_refquota(dsname, source, intval);
 		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFRESERVATION:
-		err = dsl_dataset_set_reservation(dsname, source, intval);
+		err = dsl_dataset_set_refreservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
@@ -2284,19 +2437,16 @@
 			zfeature_info_t *feature =
 			    &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
 			spa_t *spa;
-			dsl_pool_t *dp;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
-			dp = spa->spa_dsl_pool;
-
 			/*
 			 * Setting the LZ4 compression algorithm activates
 			 * the feature.
 			 */
 			if (!spa_feature_is_active(spa, feature)) {
-				if ((err = zfs_prop_activate_feature(dp,
+				if ((err = zfs_prop_activate_feature(spa,
 				    feature)) != 0) {
 					spa_close(spa, FTAG);
 					return (err);
@@ -2322,18 +2472,17 @@
 
 /*
  * This function is best effort. If it fails to set any of the given properties,
- * it continues to set as many as it can and returns the first error
- * encountered. If the caller provides a non-NULL errlist, it also gives the
- * complete list of names of all the properties it failed to set along with the
- * corresponding error numbers. The caller is responsible for freeing the
- * returned errlist.
+ * it continues to set as many as it can and returns the last error
+ * encountered. If the caller provides a non-NULL errlist, it will be filled in
+ * with the list of names of all the properties that failed along with the
+ * corresponding error numbers.
  *
- * If every property is set successfully, zero is returned and the list pointed
- * at by errlist is NULL.
+ * If every property is set successfully, zero is returned and errlist is not
+ * modified.
  */
 int
 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
-    nvlist_t **errlist)
+    nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvpair_t *propval;
@@ -2340,14 +2489,9 @@
 	int rv = 0;
 	uint64_t intval;
 	char *strval;
-	nvlist_t *genericnvl;
-	nvlist_t *errors;
-	nvlist_t *retrynvl;
+	nvlist_t *genericnvl = fnvlist_alloc();
+	nvlist_t *retrynvl = fnvlist_alloc();
 
-	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
 retry:
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
@@ -2359,10 +2503,10 @@
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
-			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
+			attrs = fnvpair_value_nvlist(pair);
 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &propval) != 0)
-				err = EINVAL;
+				err = SET_ERROR(EINVAL);
 		}
 
 		/* Validate value type */
@@ -2369,34 +2513,33 @@
 		if (err == 0 && prop == ZPROP_INVAL) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
-					err = EINVAL;
+					err = SET_ERROR(EINVAL);
 			} else if (zfs_prop_userquota(propname)) {
 				if (nvpair_type(propval) !=
 				    DATA_TYPE_UINT64_ARRAY)
-					err = EINVAL;
+					err = SET_ERROR(EINVAL);
 			} else {
-				err = EINVAL;
+				err = SET_ERROR(EINVAL);
 			}
 		} else if (err == 0) {
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
-					err = EINVAL;
+					err = SET_ERROR(EINVAL);
 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 				const char *unused;
 
-				VERIFY(nvpair_value_uint64(propval,
-				    &intval) == 0);
+				intval = fnvpair_value_uint64(propval);
 
 				switch (zfs_prop_get_type(prop)) {
 				case PROP_TYPE_NUMBER:
 					break;
 				case PROP_TYPE_STRING:
-					err = EINVAL;
+					err = SET_ERROR(EINVAL);
 					break;
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
-						err = EINVAL;
+						err = SET_ERROR(EINVAL);
 					break;
 				default:
 					cmn_err(CE_PANIC,
@@ -2403,7 +2546,7 @@
 					    "unknown property type");
 				}
 			} else {
-				err = EINVAL;
+				err = SET_ERROR(EINVAL);
 			}
 		}
 
@@ -2429,8 +2572,11 @@
 			}
 		}
 
-		if (err != 0)
-			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
+		if (err != 0) {
+			if (errlist != NULL)
+				fnvlist_add_int32(errlist, propname, err);
+			rv = err;
+		}
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
@@ -2452,26 +2598,27 @@
 			propval = pair;
 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 				nvlist_t *attrs;
-				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
-				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-				    &propval) == 0);
+				attrs = fnvpair_value_nvlist(pair);
+				propval = fnvlist_lookup_nvpair(attrs,
+				    ZPROP_VALUE);
 			}
 
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
-				VERIFY(nvpair_value_string(propval,
-				    &strval) == 0);
-				err = dsl_prop_set(dsname, propname, source, 1,
-				    strlen(strval) + 1, strval);
+				strval = fnvpair_value_string(propval);
+				err = dsl_prop_set_string(dsname, propname,
+				    source, strval);
 			} else {
-				VERIFY(nvpair_value_uint64(propval,
-				    &intval) == 0);
-				err = dsl_prop_set(dsname, propname, source, 8,
-				    1, &intval);
+				intval = fnvpair_value_uint64(propval);
+				err = dsl_prop_set_int(dsname, propname, source,
+				    intval);
 			}
 
 			if (err != 0) {
-				VERIFY(nvlist_add_int32(errors, propname,
-				    err) == 0);
+				if (errlist != NULL) {
+					fnvlist_add_int32(errlist, propname,
+					    err);
+				}
+				rv = err;
 			}
 		}
 	}
@@ -2478,18 +2625,6 @@
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
-	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
-		nvlist_free(errors);
-		errors = NULL;
-	} else {
-		VERIFY(nvpair_value_int32(pair, &rv) == 0);
-	}
-
-	if (errlist == NULL)
-		nvlist_free(errors);
-	else
-		*errlist = errors;
-
 	return (rv);
 }
 
@@ -2497,7 +2632,7 @@
  * Check that all the properties are valid user properties.
  */
 static int
-zfs_check_userprops(char *fsname, nvlist_t *nvl)
+zfs_check_userprops(const char *fsname, nvlist_t *nvl)
 {
 	nvpair_t *pair = NULL;
 	int error = 0;
@@ -2508,7 +2643,7 @@
 
 		if (!zfs_prop_user(propname) ||
 		    nvpair_type(pair) != DATA_TYPE_STRING)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		if (error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_USERPROP, CRED()))
@@ -2515,7 +2650,7 @@
 			return (error);
 
 		if (strlen(propname) >= ZAP_MAXNAMELEN)
-			return (ENAMETOOLONG);
+			return (SET_ERROR(ENAMETOOLONG));
 
 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
@@ -2541,7 +2676,7 @@
 }
 
 static int
-clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
+clear_received_props(const char *dsname, nvlist_t *props,
     nvlist_t *skipped)
 {
 	int err = 0;
@@ -2553,8 +2688,8 @@
 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 		 */
 		zprop_source_t flags = (ZPROP_SRC_NONE |
-		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
-		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
+		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
+		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
 	}
 	nvlist_free(cleared_props);
 	return (err);
@@ -2577,7 +2712,7 @@
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 	    ZPROP_SRC_LOCAL);
-	nvlist_t *errors = NULL;
+	nvlist_t *errors;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
@@ -2586,21 +2721,19 @@
 
 	if (received) {
 		nvlist_t *origprops;
-		objset_t *os;
 
-		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
-			if (dsl_prop_get_received(os, &origprops) == 0) {
-				(void) clear_received_props(os,
-				    zc->zc_name, origprops, nvl);
-				nvlist_free(origprops);
-			}
+		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
+			(void) clear_received_props(zc->zc_name,
+			    origprops, nvl);
+			nvlist_free(origprops);
+		}
 
-			dsl_prop_set_hasrecvd(os);
-			dmu_objset_rele(os, FTAG);
-		}
+		error = dsl_prop_set_hasrecvd(zc->zc_name);
 	}
 
-	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
+	errors = fnvlist_alloc();
+	if (error == 0)
+		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 
 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 		(void) put_nvlist(zc, errors);
@@ -2641,12 +2774,12 @@
 		 */
 		if (prop == ZPROP_INVAL) {
 			if (!zfs_prop_user(propname))
-				return (EINVAL);
+				return (SET_ERROR(EINVAL));
 
 			type = PROP_TYPE_STRING;
 		} else if (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION) {
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		} else {
 			type = zfs_prop_get_type(prop);
 		}
@@ -2663,7 +2796,7 @@
 			break;
 		default:
 			nvlist_free(dummy);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 
 		pair = nvlist_next_nvpair(dummy, NULL);
@@ -2679,11 +2812,11 @@
 		 * they are not considered inheritable.
 		 */
 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 	}
 
-	/* the property name has been validated by zfs_secpolicy_inherit() */
-	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
+	/* property name has been validated by zfs_secpolicy_inherit_prop() */
+	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
 }
 
 static int
@@ -2756,7 +2889,7 @@
 	if (error == 0 && zc->zc_nvlist_dst != 0)
 		error = put_nvlist(zc, nvp);
 	else
-		error = EFAULT;
+		error = SET_ERROR(EFAULT);
 
 	nvlist_free(nvp);
 	return (error);
@@ -2785,7 +2918,7 @@
 	 */
 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
 		nvlist_free(fsaclnv);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -2795,7 +2928,7 @@
 	 */
 
 	error = secpolicy_zfs(CRED());
-	if (error) {
+	if (error != 0) {
 		if (zc->zc_perm_action == B_FALSE) {
 			error = dsl_deleg_can_allow(zc->zc_name,
 			    fsaclnv, CRED());
@@ -2928,7 +3061,7 @@
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Put the version in the zplprops
@@ -3021,26 +3154,30 @@
 }
 
 /*
- * inputs:
- * zc_objset_type	type of objset to create (fs vs zvol)
- * zc_name		name of new objset
- * zc_value		name of snapshot to clone from (may be empty)
- * zc_nvlist_src{_size}	nvlist of properties to apply
+ * innvl: {
+ *     "type" -> dmu_objset_type_t (int32)
+ *     (optional) "props" -> { prop -> value }
+ * }
  *
- * outputs: none
+ * outnvl: propname -> error code (int32)
  */
 static int
-zfs_ioc_create(zfs_cmd_t *zc)
+zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	objset_t *clone;
 	int error = 0;
-	zfs_creat_t zct;
+	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
-	dmu_objset_type_t type = zc->zc_objset_type;
+	int32_t type32;
+	dmu_objset_type_t type;
+	boolean_t is_insensitive = B_FALSE;
 
+	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
+		return (SET_ERROR(EINVAL));
+	type = type32;
+	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+
 	switch (type) {
-
 	case DMU_OST_ZFS:
 		cbfunc = zfs_create_cb;
 		break;
@@ -3053,253 +3190,333 @@
 		cbfunc = NULL;
 		break;
 	}
-	if (strchr(zc->zc_name, '@') ||
-	    strchr(zc->zc_name, '%'))
-		return (EINVAL);
+	if (strchr(fsname, '@') ||
+	    strchr(fsname, '%'))
+		return (SET_ERROR(EINVAL));
 
-	if (zc->zc_nvlist_src != 0 &&
-	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvprops)) != 0)
-		return (error);
-
-	zct.zct_zplprops = NULL;
 	zct.zct_props = nvprops;
 
-	if (zc->zc_value[0] != '\0') {
-		/*
-		 * We're creating a clone of an existing snapshot.
-		 */
-		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
-		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
-			nvlist_free(nvprops);
-			return (EINVAL);
-		}
+	if (cbfunc == NULL)
+		return (SET_ERROR(EINVAL));
 
-		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
-		if (error) {
-			nvlist_free(nvprops);
-			return (error);
-		}
+	if (type == DMU_OST_ZVOL) {
+		uint64_t volsize, volblocksize;
 
-		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
-		dmu_objset_rele(clone, FTAG);
-		if (error) {
-			nvlist_free(nvprops);
-			return (error);
-		}
-	} else {
-		boolean_t is_insensitive = B_FALSE;
+		if (nvprops == NULL)
+			return (SET_ERROR(EINVAL));
+		if (nvlist_lookup_uint64(nvprops,
+		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
+			return (SET_ERROR(EINVAL));
 
-		if (cbfunc == NULL) {
-			nvlist_free(nvprops);
-			return (EINVAL);
-		}
+		if ((error = nvlist_lookup_uint64(nvprops,
+		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    &volblocksize)) != 0 && error != ENOENT)
+			return (SET_ERROR(EINVAL));
 
-		if (type == DMU_OST_ZVOL) {
-			uint64_t volsize, volblocksize;
+		if (error != 0)
+			volblocksize = zfs_prop_default_numeric(
+			    ZFS_PROP_VOLBLOCKSIZE);
 
-			if (nvprops == NULL ||
-			    nvlist_lookup_uint64(nvprops,
-			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
-			    &volsize) != 0) {
-				nvlist_free(nvprops);
-				return (EINVAL);
-			}
+		if ((error = zvol_check_volblocksize(
+		    volblocksize)) != 0 ||
+		    (error = zvol_check_volsize(volsize,
+		    volblocksize)) != 0)
+			return (error);
+	} else if (type == DMU_OST_ZFS) {
+		int error;
 
-			if ((error = nvlist_lookup_uint64(nvprops,
-			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
-			    &volblocksize)) != 0 && error != ENOENT) {
-				nvlist_free(nvprops);
-				return (EINVAL);
-			}
-
-			if (error != 0)
-				volblocksize = zfs_prop_default_numeric(
-				    ZFS_PROP_VOLBLOCKSIZE);
-
-			if ((error = zvol_check_volblocksize(
-			    volblocksize)) != 0 ||
-			    (error = zvol_check_volsize(volsize,
-			    volblocksize)) != 0) {
-				nvlist_free(nvprops);
-				return (error);
-			}
-		} else if (type == DMU_OST_ZFS) {
-			int error;
-
-			/*
-			 * We have to have normalization and
-			 * case-folding flags correct when we do the
-			 * file system creation, so go figure them out
-			 * now.
-			 */
-			VERIFY(nvlist_alloc(&zct.zct_zplprops,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-			error = zfs_fill_zplprops(zc->zc_name, nvprops,
-			    zct.zct_zplprops, &is_insensitive);
-			if (error != 0) {
-				nvlist_free(nvprops);
-				nvlist_free(zct.zct_zplprops);
-				return (error);
-			}
+		/*
+		 * We have to have normalization and
+		 * case-folding flags correct when we do the
+		 * file system creation, so go figure them out
+		 * now.
+		 */
+		VERIFY(nvlist_alloc(&zct.zct_zplprops,
+		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		error = zfs_fill_zplprops(fsname, nvprops,
+		    zct.zct_zplprops, &is_insensitive);
+		if (error != 0) {
+			nvlist_free(zct.zct_zplprops);
+			return (error);
 		}
-		error = dmu_objset_create(zc->zc_name, type,
-		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
-		nvlist_free(zct.zct_zplprops);
 	}
 
+	error = dmu_objset_create(fsname, type,
+	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+	nvlist_free(zct.zct_zplprops);
+
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
-		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
-		    nvprops, NULL);
+		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
+		    nvprops, outnvl);
 		if (error != 0)
-			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
+			(void) dsl_destroy_head(fsname);
 	}
-	nvlist_free(nvprops);
 #ifdef __FreeBSD__
 	if (error == 0 && type == DMU_OST_ZVOL)
-		zvol_create_minors(zc->zc_name);
+		zvol_create_minors(fsname);
 #endif
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name	name of filesystem
- * zc_value	short name of snapshot
- * zc_cookie	recursive flag
- * zc_nvlist_src[_size] property list
+ * innvl: {
+ *     "origin" -> name of origin snapshot
+ *     (optional) "props" -> { prop -> value }
+ * }
  *
- * outputs:
- * zc_value	short snapname (i.e. part after the '@')
+ * outnvl: propname -> error code (int32)
  */
 static int
-zfs_ioc_snapshot(zfs_cmd_t *zc)
+zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
+	int error = 0;
 	nvlist_t *nvprops = NULL;
-	int error;
-	boolean_t recursive = zc->zc_cookie;
+	char *origin_name;
 
-	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
-		return (EINVAL);
+	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
+		return (SET_ERROR(EINVAL));
+	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
-	if (zc->zc_nvlist_src != 0 &&
-	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvprops)) != 0)
+	if (strchr(fsname, '@') ||
+	    strchr(fsname, '%'))
+		return (SET_ERROR(EINVAL));
+
+	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
+		return (SET_ERROR(EINVAL));
+	error = dmu_objset_clone(fsname, origin_name);
+	if (error != 0)
 		return (error);
 
-	error = zfs_check_userprops(zc->zc_name, nvprops);
-	if (error)
-		goto out;
+	/*
+	 * It would be nice to do this atomically.
+	 */
+	if (error == 0) {
+		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
+		    nvprops, outnvl);
+		if (error != 0)
+			(void) dsl_destroy_head(fsname);
+	}
+	return (error);
+}
 
-	if (!nvlist_empty(nvprops) &&
-	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
-		error = ENOTSUP;
-		goto out;
+/*
+ * innvl: {
+ *     "snaps" -> { snapshot1, snapshot2 }
+ *     (optional) "props" -> { prop -> value (string) }
+ * }
+ *
+ * outnvl: snapshot -> error code (int32)
+ */
+static int
+zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	nvlist_t *snaps;
+	nvlist_t *props = NULL;
+	int error, poollen;
+	nvpair_t *pair;
+
+	(void) nvlist_lookup_nvlist(innvl, "props", &props);
+	if ((error = zfs_check_userprops(poolname, props)) != 0)
+		return (error);
+
+	if (!nvlist_empty(props) &&
+	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
+		return (SET_ERROR(ENOTSUP));
+
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (SET_ERROR(EINVAL));
+	poollen = strlen(poolname);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
+		const char *name = nvpair_name(pair);
+		const char *cp = strchr(name, '@');
+
+		/*
+		 * The snap name must contain an @, and the part after it must
+		 * contain only valid characters.
+		 */
+		if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
+			return (SET_ERROR(EINVAL));
+
+		/*
+		 * The snap must be in the specified pool.
+		 */
+		if (strncmp(name, poolname, poollen) != 0 ||
+		    (name[poollen] != '/' && name[poollen] != '@'))
+			return (SET_ERROR(EXDEV));
+
+		/* This must be the only snap of this fs. */
+		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
+		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
+			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
+			    == 0) {
+				return (SET_ERROR(EXDEV));
+			}
+		}
 	}
 
-	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
-	    nvprops, recursive, B_FALSE, -1);
-
-out:
-	nvlist_free(nvprops);
+	error = dsl_dataset_snapshot(snaps, props, outnvl);
 	return (error);
 }
 
-int
-zfs_unmount_snap(const char *name, void *arg)
+/*
+ * innvl: "message" -> string
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	vfs_t *vfsp = NULL;
+	char *message;
+	spa_t *spa;
+	int error;
+	char *poolname;
 
-	if (arg) {
-		char *snapname = arg;
-		char *fullname = kmem_asprintf("%s@%s", name, snapname);
-		vfsp = zfs_get_vfs(fullname);
-		strfree(fullname);
-	} else if (strchr(name, '@')) {
-		vfsp = zfs_get_vfs(name);
+	/*
+	 * The poolname in the ioctl is not set, we get it from the TSD,
+	 * which was set at the end of the last successful ioctl that allows
+	 * logging.  The secpolicy func already checked that it is set.
+	 * Only one log ioctl is allowed after each successful ioctl, so
+	 * we clear the TSD here.
+	 */
+	poolname = tsd_get(zfs_allow_log_key);
+	(void) tsd_set(zfs_allow_log_key, NULL);
+	error = spa_open(poolname, &spa, FTAG);
+	strfree(poolname);
+	if (error != 0)
+		return (error);
+
+	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
+		spa_close(spa, FTAG);
+		return (SET_ERROR(EINVAL));
 	}
 
-	if (vfsp) {
-		/*
-		 * Always force the unmount for snapshots.
-		 */
-		int flag = MS_FORCE;
-		int err;
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
+		spa_close(spa, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
 
-		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
-			VFS_RELE(vfsp);
-			return (err);
-		}
+	error = spa_history_log(spa, message);
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+/*
+ * The dp_config_rwlock must not be held when calling this, because the
+ * unmount may need to write out data.
+ *
+ * This function is best-effort.  Callers must deal gracefully if it
+ * remains mounted (or is remounted after this call).
+ */
+void
+zfs_unmount_snap(const char *snapname)
+{
+	vfs_t *vfsp;
+	zfsvfs_t *zfsvfs;
+
+	if (strchr(snapname, '@') == NULL)
+		return;
+
+	vfsp = zfs_get_vfs(snapname);
+	if (vfsp == NULL)
+		return;
+
+	zfsvfs = vfsp->vfs_data;
+	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
+
+	if (vn_vfswlock(vfsp->vfs_vnodecovered) != 0) {
 		VFS_RELE(vfsp);
-		mtx_lock(&Giant);	/* dounmount() */
-		dounmount(vfsp, flag, curthread);
-		mtx_unlock(&Giant);	/* dounmount() */
+		return;
 	}
+	VFS_RELE(vfsp);
+
+	/*
+	 * Always force the unmount for snapshots.
+	 */
+
+#ifdef illumos
+	(void) dounmount(vfsp, MS_FORCE, kcred);
+#else
+	mtx_lock(&Giant);	/* dounmount() */
+	dounmount(vfsp, MS_FORCE, curthread);
+	mtx_unlock(&Giant);	/* dounmount() */
+#endif
+}
+
+/* ARGSUSED */
+static int
+zfs_unmount_snap_cb(const char *snapname, void *arg)
+{
+	zfs_unmount_snap(snapname);
 	return (0);
 }
 
 /*
- * inputs:
- * zc_name		name of filesystem, snaps must be under it
- * zc_nvlist_src[_size]	full names of snapshots to destroy
- * zc_defer_destroy	mark for deferred destroy
+ * When a clone is destroyed, its origin may also need to be destroyed,
+ * in which case it must be unmounted.  This routine will do that unmount
+ * if necessary.
+ */
+void
+zfs_destroy_unmount_origin(const char *fsname)
+{
+	int error;
+	objset_t *os;
+	dsl_dataset_t *ds;
+
+	error = dmu_objset_hold(fsname, FTAG, &os);
+	if (error != 0)
+		return;
+	ds = dmu_objset_ds(os);
+	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
+		char originname[MAXNAMELEN];
+		dsl_dataset_name(ds->ds_prev, originname);
+		dmu_objset_rele(os, FTAG);
+		zfs_unmount_snap(originname);
+	} else {
+		dmu_objset_rele(os, FTAG);
+	}
+}
+
+/*
+ * innvl: {
+ *     "snaps" -> { snapshot1, snapshot2 }
+ *     (optional boolean) "defer"
+ * }
  *
- * outputs:
- * zc_name		on failure, name of failed snapshot
+ * outnvl: snapshot -> error code (int32)
+ *
  */
 static int
-zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
+zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	int err, len;
-	nvlist_t *nvl;
+	int poollen;
+	nvlist_t *snaps;
 	nvpair_t *pair;
+	boolean_t defer;
 
-	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvl)) != 0) {
-#ifndef __FreeBSD__
-		return (err);
-#else
-		/*
-		 * We are probably called by older binaries,
-		 * allocate and populate nvlist with recursive snapshots
-		 */
-		if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
-			return (EINVAL);
-		VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		err = dmu_get_recursive_snaps_nvl(zc->zc_name,
-		    zc->zc_value, nvl);
-		if (err) {
-			nvlist_free(nvl);
-			return (err);
-		}
-#endif /* __FreeBSD__ */
-	}
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (SET_ERROR(EINVAL));
+	defer = nvlist_exists(innvl, "defer");
 
-	len = strlen(zc->zc_name);
-	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(nvl, pair)) {
+	poollen = strlen(poolname);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
+
 		/*
-		 * The snap name must be underneath the zc_name.  This ensures
-		 * that our permission checks were legitimate.
+		 * The snap must be in the specified pool.
 		 */
-		if (strncmp(zc->zc_name, name, len) != 0 ||
-		    (name[len] != '@' && name[len] != '/')) {
-			nvlist_free(nvl);
-			return (EINVAL);
-		}
+		if (strncmp(name, poolname, poollen) != 0 ||
+		    (name[poollen] != '/' && name[poollen] != '@'))
+			return (SET_ERROR(EXDEV));
 
-		(void) zfs_unmount_snap(name, NULL);
+		zfs_unmount_snap(name);
 		(void) zvol_remove_minor(name);
 	}
 
-	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
-	    zc->zc_name);
-	nvlist_free(nvl);
-	return (err);
+	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 }
 
 /*
@@ -3314,13 +3531,13 @@
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	int err;
-	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
-		err = zfs_unmount_snap(zc->zc_name, NULL);
-		if (err)
-			return (err);
-	}
+	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS)
+		zfs_unmount_snap(zc->zc_name);
 
-	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
+	if (strchr(zc->zc_name, '@'))
+		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
+	else
+		err = dsl_destroy_head(zc->zc_name);
 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
 		(void) zvol_remove_minor(zc->zc_name);
 	return (err);
@@ -3335,79 +3552,34 @@
 static int
 zfs_ioc_rollback(zfs_cmd_t *zc)
 {
-	dsl_dataset_t *ds, *clone;
+	zfsvfs_t *zfsvfs;
 	int error;
-	zfsvfs_t *zfsvfs;
-	char *clone_name;
 
-	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
-	if (error)
-		return (error);
-
-	/* must not be a snapshot */
-	if (dsl_dataset_is_snapshot(ds)) {
-		dsl_dataset_rele(ds, FTAG);
-		return (EINVAL);
-	}
-
-	/* must have a most recent snapshot */
-	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
-		dsl_dataset_rele(ds, FTAG);
-		return (EINVAL);
-	}
-
-	/*
-	 * Create clone of most recent snapshot.
-	 */
-	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
-	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
-	if (error)
-		goto out;
-
-	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
-	if (error)
-		goto out;
-
-	/*
-	 * Do clone swap.
-	 */
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
-			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
-				error = dsl_dataset_clone_swap(clone, ds,
-				    B_TRUE);
-				dsl_dataset_disown(ds, FTAG);
-				ds = NULL;
-			} else {
-				error = EBUSY;
-			}
+			error = dsl_dataset_rollback(zc->zc_name);
 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
 			error = error ? error : resume_err;
 		}
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
-			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
-			dsl_dataset_disown(ds, FTAG);
-			ds = NULL;
-		} else {
-			error = EBUSY;
-		}
+		error = dsl_dataset_rollback(zc->zc_name);
 	}
+	return (error);
+}
 
-	/*
-	 * Destroy clone (which also closes it).
-	 */
-	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
+static int
+recursive_unmount(const char *fsname, void *arg)
+{
+	const char *snapname = arg;
+	char fullname[MAXNAMELEN];
 
-out:
-	strfree(clone_name);
-	if (ds)
-		dsl_dataset_rele(ds, FTAG);
-	return (error);
+	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
+	zfs_unmount_snap(fullname);
+	return (0);
 }
 
 /*
@@ -3421,31 +3593,43 @@
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
-	int flags = 0;
+	boolean_t recursive = zc->zc_cookie & 1;
+#ifdef __FreeBSD__
+	boolean_t allow_mounted = zc->zc_cookie & 2;
+#endif
+	char *at;
 
-	if (zc->zc_cookie & 1)
-		flags |= ZFS_RENAME_RECURSIVE;
-	if (zc->zc_cookie & 2)
-		flags |= ZFS_RENAME_ALLOW_MOUNTED;
-
 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '%'))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
-	/*
-	 * Unmount snapshot unless we're doing a recursive rename,
-	 * in which case the dataset code figures out which snapshots
-	 * to unmount.
-	 */
-	if (!(flags & ZFS_RENAME_RECURSIVE) &&
-	    strchr(zc->zc_name, '@') != NULL &&
-	    zc->zc_objset_type == DMU_OST_ZFS) {
-		int err = zfs_unmount_snap(zc->zc_name, NULL);
-		if (err)
-			return (err);
+	at = strchr(zc->zc_name, '@');
+	if (at != NULL) {
+		/* snaps must be in same fs */
+		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
+			return (SET_ERROR(EXDEV));
+		*at = '\0';
+#ifdef illumos
+		if (zc->zc_objset_type == DMU_OST_ZFS) {
+#else
+		if (zc->zc_objset_type == DMU_OST_ZFS && allow_mounted) {
+#endif
+			int error = dmu_objset_find(zc->zc_name,
+			    recursive_unmount, at + 1,
+			    recursive ? DS_FIND_CHILDREN : 0);
+			if (error != 0)
+				return (error);
+		}
+		return (dsl_dataset_rename_snapshot(zc->zc_name,
+		    at + 1, strchr(zc->zc_value, '@') + 1, recursive));
+	} else {
+#ifdef illumos
+		if (zc->zc_objset_type == DMU_OST_ZVOL)
+			(void) zvol_remove_minor(zc->zc_name);
+#endif
+		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
 	}
-	return (dmu_objset_rename(zc->zc_name, zc->zc_value, flags));
 }
 
 static int
@@ -3480,7 +3664,7 @@
 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
 			} else {
 				/* USERUSED and GROUPUSED are read-only */
-				return (EINVAL);
+				return (SET_ERROR(EINVAL));
 			}
 
 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
@@ -3488,11 +3672,11 @@
 			return (0);
 		}
 
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (issnap)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		/*
@@ -3521,13 +3705,13 @@
 			    intval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
-				return (ENOTSUP);
+				return (SET_ERROR(ENOTSUP));
 			}
 
 			if (intval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
-				return (ENOTSUP);
+				return (SET_ERROR(ENOTSUP));
 
 			if (intval == ZIO_COMPRESS_LZ4) {
 				zfeature_info_t *feature =
@@ -3540,7 +3724,7 @@
 
 				if (!spa_feature_is_enabled(spa, feature)) {
 					spa_close(spa, FTAG);
-					return (ENOTSUP);
+					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
@@ -3554,7 +3738,7 @@
 			 */
 			if (zfs_is_bootfs(dsname) &&
 			    !BOOTFS_COMPRESS_VALID(intval)) {
-				return (ERANGE);
+				return (SET_ERROR(ERANGE));
 			}
 		}
 		break;
@@ -3561,17 +3745,17 @@
 
 	case ZFS_PROP_COPIES:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_DEDUP:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
-			return (ENOTSUP);
+			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_ACLINHERIT:
@@ -3580,7 +3764,7 @@
 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_PASSTHROUGH_X))
-				return (ENOTSUP);
+				return (SET_ERROR(ENOTSUP));
 		}
 		break;
 	}
@@ -3589,40 +3773,19 @@
 }
 
 /*
- * Activates a feature on a pool in response to a property setting. This
- * creates a new sync task which modifies the pool to reflect the feature
- * as being active.
- */
-static int
-zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature)
-{
-	int err;
-
-	/* EBUSY here indicates that the feature is already active */
-	err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check,
-	    zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2);
-
-	if (err != 0 && err != EBUSY)
-		return (err);
-	else
-		return (0);
-}
-
-/*
  * Checks for a race condition to make sure we don't increment a feature flag
  * multiple times.
  */
-/*ARGSUSED*/
 static int
-zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx)
+zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	zfeature_info_t *feature = arg2;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	zfeature_info_t *feature = arg;
 
 	if (!spa_feature_is_active(spa, feature))
 		return (0);
 	else
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 }
 
 /*
@@ -3630,15 +3793,36 @@
  * zfs_prop_activate_feature.
  */
 static void
-zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
 {
-	spa_t *spa = arg1;
-	zfeature_info_t *feature = arg2;
+	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+	zfeature_info_t *feature = arg;
 
 	spa_feature_incr(spa, feature, tx);
 }
 
 /*
+ * Activates a feature on a pool in response to a property setting. This
+ * creates a new sync task which modifies the pool to reflect the feature
+ * as being active.
+ */
+static int
+zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
+{
+	int err;
+
+	/* EBUSY here indicates that the feature is already active */
+	err = dsl_sync_task(spa_name(spa),
+	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
+	    feature, 2);
+
+	if (err != 0 && err != EBUSY)
+		return (err);
+	else
+		return (0);
+}
+
+/*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
  * property, make sure we have both set and inherit permissions.
@@ -3673,7 +3857,7 @@
 
 		(void) strcpy(zc->zc_value, nvpair_name(pair));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
-		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
+		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 			VERIFY(nvlist_add_int32(errors,
 			    zc->zc_value, err) == 0);
@@ -3792,7 +3976,6 @@
 zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	file_t *fp;
-	objset_t *os;
 	dmu_recv_cookie_t drc;
 	boolean_t force = (boolean_t)zc->zc_guid;
 	int fd;
@@ -3802,7 +3985,7 @@
 	offset_t off;
 	nvlist_t *props = NULL; /* sent properties */
 	nvlist_t *origprops = NULL; /* existing properties */
-	objset_t *origin = NULL;
+	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAXNAMELEN];
 	boolean_t first_recvd_props = B_FALSE;
@@ -3810,7 +3993,7 @@
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	(void) strcpy(tofs, zc->zc_value);
 	tosnap = strchr(tofs, '@');
@@ -3825,16 +4008,29 @@
 	fp = getf(fd);
 	if (fp == NULL) {
 		nvlist_free(props);
-		return (EBADF);
+		return (SET_ERROR(EBADF));
 	}
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
-	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
-		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
-		    !dsl_prop_get_hasrecvd(os)) {
+	if (zc->zc_string[0])
+		origin = zc->zc_string;
+
+	error = dmu_recv_begin(tofs, tosnap,
+	    &zc->zc_begin_record, force, origin, &drc);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * Set properties before we receive the stream so that they are applied
+	 * to the new data. Note that we must call dmu_recv_stream() if
+	 * dmu_recv_begin() succeeds.
+	 */
+	if (props != NULL && !drc.drc_newfs) {
+		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
+		    SPA_VERSION_RECVD_PROPS &&
+		    !dsl_prop_get_hasrecvd(tofs))
 			first_recvd_props = B_TRUE;
-		}
 
 		/*
 		 * If new received properties are supplied, they are to
@@ -3841,7 +4037,7 @@
 		 * completely replace the existing received properties, so stash
 		 * away the existing ones.
 		 */
-		if (dsl_prop_get_received(os, &origprops) == 0) {
+		if (dsl_prop_get_received(tofs, &origprops) == 0) {
 			nvlist_t *errlist = NULL;
 			/*
 			 * Don't bother writing a property if its value won't
@@ -3853,65 +4049,35 @@
 			 */
 			if (!first_recvd_props)
 				props_reduce(props, origprops);
-			if (zfs_check_clearable(tofs, origprops,
-			    &errlist) != 0)
+			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
 				(void) nvlist_merge(errors, errlist, 0);
 			nvlist_free(errlist);
+
+			if (clear_received_props(tofs, origprops,
+			    first_recvd_props ? NULL : props) != 0)
+				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+		} else {
+			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 		}
-
-		dmu_objset_rele(os, FTAG);
 	}
 
-	if (zc->zc_string[0]) {
-		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
-		if (error)
-			goto out;
-	}
+	if (props != NULL) {
+		props_error = dsl_prop_set_hasrecvd(tofs);
 
-	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
-	    &zc->zc_begin_record, force, origin, &drc);
-	if (origin)
-		dmu_objset_rele(origin, FTAG);
-	if (error)
-		goto out;
-
-	/*
-	 * Set properties before we receive the stream so that they are applied
-	 * to the new data. Note that we must call dmu_recv_stream() if
-	 * dmu_recv_begin() succeeds.
-	 */
-	if (props) {
-		nvlist_t *errlist;
-
-		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
-			if (drc.drc_newfs) {
-				if (spa_version(os->os_spa) >=
-				    SPA_VERSION_RECVD_PROPS)
-					first_recvd_props = B_TRUE;
-			} else if (origprops != NULL) {
-				if (clear_received_props(os, tofs, origprops,
-				    first_recvd_props ? NULL : props) != 0)
-					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
-			} else {
-				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
-			}
-			dsl_prop_set_hasrecvd(os);
-		} else if (!drc.drc_newfs) {
-			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+		if (props_error == 0) {
+			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+			    props, errors);
 		}
-
-		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
-		    props, &errlist);
-		(void) nvlist_merge(errors, errlist, 0);
-		nvlist_free(errlist);
 	}
 
-	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
+	if (zc->zc_nvlist_dst_size != 0 &&
+	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+	    put_nvlist(zc, errors) != 0)) {
 		/*
 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
 		 * size or supplied an invalid address.
 		 */
-		props_error = EINVAL;
+		props_error = SET_ERROR(EINVAL);
 	}
 
 	off = fp->f_offset;
@@ -3959,22 +4125,16 @@
 	/*
 	 * On error, restore the original props.
 	 */
-	if (error && props) {
-		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
-			if (clear_received_props(os, tofs, props, NULL) != 0) {
-				/*
-				 * We failed to clear the received properties.
-				 * Since we may have left a $recvd value on the
-				 * system, we can't clear the $hasrecvd flag.
-				 */
-				zc->zc_obj |= ZPROP_ERR_NORESTORE;
-			} else if (first_recvd_props) {
-				dsl_prop_unset_hasrecvd(os);
-			}
-			dmu_objset_rele(os, FTAG);
-		} else if (!drc.drc_newfs) {
-			/* We failed to clear the received properties. */
+	if (error != 0 && props != NULL && !drc.drc_newfs) {
+		if (clear_received_props(tofs, props, NULL) != 0) {
+			/*
+			 * We failed to clear the received properties.
+			 * Since we may have left a $recvd value on the
+			 * system, we can't clear the $hasrecvd flag.
+			 */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
+		} else if (first_recvd_props) {
+			dsl_prop_unset_hasrecvd(tofs);
 		}
 
 		if (origprops == NULL && !drc.drc_newfs) {
@@ -4026,78 +4186,79 @@
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
 {
-	objset_t *fromsnap = NULL;
-	objset_t *tosnap;
 	int error;
 	offset_t off;
-	dsl_dataset_t *ds;
-	dsl_dataset_t *dsfrom = NULL;
-	spa_t *spa;
-	dsl_pool_t *dp;
 	boolean_t estimate = (zc->zc_guid != 0);
 
-	error = spa_open(zc->zc_name, &spa, FTAG);
-	if (error)
-		return (error);
+	if (zc->zc_obj != 0) {
+		dsl_pool_t *dp;
+		dsl_dataset_t *tosnap;
 
-	dp = spa_get_dsl(spa);
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
-	rw_exit(&dp->dp_config_rwlock);
-	if (error) {
-		spa_close(spa, FTAG);
-		return (error);
-	}
+		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+		if (error != 0)
+			return (error);
 
-	error = dmu_objset_from_ds(ds, &tosnap);
-	if (error) {
-		dsl_dataset_rele(ds, FTAG);
-		spa_close(spa, FTAG);
-		return (error);
+		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+		if (error != 0) {
+			dsl_pool_rele(dp, FTAG);
+			return (error);
+		}
+
+		if (dsl_dir_is_clone(tosnap->ds_dir))
+			zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj;
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
 	}
 
-	if (zc->zc_fromobj != 0) {
-		rw_enter(&dp->dp_config_rwlock, RW_READER);
-		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
-		rw_exit(&dp->dp_config_rwlock);
-		spa_close(spa, FTAG);
-		if (error) {
-			dsl_dataset_rele(ds, FTAG);
+	if (estimate) {
+		dsl_pool_t *dp;
+		dsl_dataset_t *tosnap;
+		dsl_dataset_t *fromsnap = NULL;
+
+		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+		if (error != 0)
 			return (error);
-		}
-		error = dmu_objset_from_ds(dsfrom, &fromsnap);
-		if (error) {
-			dsl_dataset_rele(dsfrom, FTAG);
-			dsl_dataset_rele(ds, FTAG);
+
+		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+		if (error != 0) {
+			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
-	} else {
-		spa_close(spa, FTAG);
-	}
 
-	if (estimate) {
-		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
+		if (zc->zc_fromobj != 0) {
+			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
+			    FTAG, &fromsnap);
+			if (error != 0) {
+				dsl_dataset_rele(tosnap, FTAG);
+				dsl_pool_rele(dp, FTAG);
+				return (error);
+			}
+		}
+
+		error = dmu_send_estimate(tosnap, fromsnap,
 		    &zc->zc_objset_type);
+
+		if (fromsnap != NULL)
+			dsl_dataset_rele(fromsnap, FTAG);
+		dsl_dataset_rele(tosnap, FTAG);
+		dsl_pool_rele(dp, FTAG);
 	} else {
 		file_t *fp = getf(zc->zc_cookie);
-		if (fp == NULL) {
-			dsl_dataset_rele(ds, FTAG);
-			if (dsfrom)
-				dsl_dataset_rele(dsfrom, FTAG);
-			return (EBADF);
-		}
+		if (fp == NULL)
+			return (SET_ERROR(EBADF));
 
 		off = fp->f_offset;
-		error = dmu_send(tosnap, fromsnap, zc->zc_obj,
-		    zc->zc_cookie, fp, &off);
+		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
+#ifdef illumos
+		    zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
+#else
+		    zc->zc_fromobj, zc->zc_cookie, fp, &off);
+#endif
 
 		if (off >= 0 && off <= MAXOFFSET_T)
 			fp->f_offset = off;
 		releasef(zc->zc_cookie);
 	}
-	if (dsfrom)
-		dsl_dataset_rele(dsfrom, FTAG);
-	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
@@ -4112,13 +4273,21 @@
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
+	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	dmu_sendarg_t *dsp = NULL;
 	int error;
 
-	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
+	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+	if (error != 0)
 		return (error);
 
+	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+
 	mutex_enter(&ds->ds_sendstream_lock);
 
 	/*
@@ -4137,10 +4306,11 @@
 	if (dsp != NULL)
 		zc->zc_cookie = *(dsp->dsa_off);
 	else
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
+	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
@@ -4214,7 +4384,7 @@
 	spa = spa_lookup(zc->zc_name);
 	if (spa == NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 		/* we need to let spa_open/spa_load clear the chains */
@@ -4230,7 +4400,7 @@
 		nvlist_t *config = NULL;
 
 		if (zc->zc_nvlist_src == 0)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
@@ -4247,7 +4417,7 @@
 		}
 	}
 
-	if (error)
+	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
@@ -4259,7 +4429,7 @@
 		if (vd == NULL) {
 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
 			spa_close(spa, FTAG);
-			return (ENODEV);
+			return (SET_ERROR(ENODEV));
 		}
 	}
 
@@ -4271,7 +4441,7 @@
 	 * Resume any suspended I/Os.
 	 */
 	if (zio_resume(spa) != 0)
-		error = EIO;
+		error = SET_ERROR(EIO);
 
 	spa_close(spa, FTAG);
 
@@ -4285,7 +4455,7 @@
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
@@ -4325,7 +4495,7 @@
 	if (cp)
 		*cp = '\0';
 	(void) dmu_objset_find(zc->zc_value,
-	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
+	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 }
 
@@ -4348,10 +4518,10 @@
 	int error;
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	error = zfs_userspace_one(zfsvfs,
@@ -4379,10 +4549,10 @@
 	int bufsize = zc->zc_nvlist_dst_size;
 
 	if (bufsize <= 0)
-		return (ENOMEM);
+		return (SET_ERROR(ENOMEM));
 
 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
@@ -4432,7 +4602,7 @@
 	} else {
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
-		if (error)
+		if (error != 0)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
@@ -4474,12 +4644,12 @@
 	if (sharefs_mod == NULL && ((sharefs_mod =
 	    ddi_modopen("fs/sharefs",
 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
-		return (ENOSYS);
+		return (SET_ERROR(ENOSYS));
 	}
 	if (zshare_fs == NULL && ((zshare_fs =
 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
-		return (ENOSYS);
+		return (SET_ERROR(ENOSYS));
 	}
 	return (0);
 }
@@ -4500,7 +4670,7 @@
 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			if (znfsexport_fs == NULL &&
 			    ((znfsexport_fs = (int (*)(void *))
@@ -4507,12 +4677,12 @@
 			    ddi_modsym(nfs_mod,
 			    "nfs_export", &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			error = zfs_init_sharefs();
-			if (error) {
+			if (error != 0) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			zfs_nfsshare_inited = 1;
 			mutex_exit(&zfs_share_lock);
@@ -4526,18 +4696,18 @@
 			    ddi_modopen("drv/smbsrv",
 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
 			    "smb_server_share", &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			error = zfs_init_sharefs();
-			if (error) {
+			if (error != 0) {
 				mutex_exit(&zfs_share_lock);
-				return (ENOSYS);
+				return (SET_ERROR(ENOSYS));
 			}
 			zfs_smbshare_inited = 1;
 			mutex_exit(&zfs_share_lock);
@@ -4544,7 +4714,7 @@
 		}
 		break;
 	default:
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	switch (zc->zc_share.z_sharetype) {
@@ -4603,7 +4773,7 @@
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
@@ -4620,31 +4790,32 @@
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:
+ * zc_value		short name of new snapshot
  */
 static int
 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 {
 	char *snap_name;
+	char *hold_name;
 	int error;
+	minor_t minor;
 
+	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+	if (error != 0)
+		return (error);
+
 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
+	hold_name = kmem_asprintf("%%%s", zc->zc_value);
 
-	if (strlen(snap_name) >= MAXNAMELEN) {
-		strfree(snap_name);
-		return (E2BIG);
-	}
-
-	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
-	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
-	if (error != 0) {
-		strfree(snap_name);
-		return (error);
-	}
-
-	(void) strcpy(zc->zc_value, snap_name);
+	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+	    hold_name);
+	if (error == 0)
+		(void) strcpy(zc->zc_value, snap_name);
 	strfree(snap_name);
-	return (0);
+	strfree(hold_name);
+	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+	return (error);
 }
 
 /*
@@ -4659,39 +4830,26 @@
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
-	objset_t *fromsnap;
-	objset_t *tosnap;
 	file_t *fp;
 	offset_t off;
 	int error;
 
-	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
-	if (error)
-		return (error);
-
-	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
-	if (error) {
-		dmu_objset_rele(tosnap, FTAG);
-		return (error);
-	}
-
 	fp = getf(zc->zc_cookie);
-	if (fp == NULL) {
-		dmu_objset_rele(fromsnap, FTAG);
-		dmu_objset_rele(tosnap, FTAG);
-		return (EBADF);
-	}
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	off = fp->f_offset;
 
-	error = dmu_diff(tosnap, fromsnap, fp, &off);
+#ifdef illumos
+	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
+#else
+	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
+#endif
 
 	if (off >= 0 && off <= MAXOFFSET_T)
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 
-	dmu_objset_rele(fromsnap, FTAG);
-	dmu_objset_rele(tosnap, FTAG);
 	return (error);
 }
 
@@ -4744,7 +4902,7 @@
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	dzp = VTOZ(vp);
@@ -4763,13 +4921,13 @@
 		    ZFS_SHARES_DIR);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
+		if (error != 0) {
 			dmu_tx_abort(tx);
 		} else {
 			error = zfs_create_share_dir(zfsvfs, tx);
 			dmu_tx_commit(tx);
 		}
-		if (error) {
+		if (error != 0) {
 			mutex_exit(&zfsvfs->z_lock);
 			VN_RELE(vp);
 			ZFS_EXIT(zfsvfs);
@@ -4835,7 +4993,7 @@
 		break;
 
 	default:
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		break;
 	}
 
@@ -4851,124 +5009,82 @@
 }
 
 /*
- * inputs:
- * zc_name		name of filesystem
- * zc_value		short name of snap
- * zc_string		user-supplied tag for this hold
- * zc_cookie		recursive flag
- * zc_temphold		set if hold is temporary
- * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
- * zc_sendobj		if non-zero, the objid for zc_name at zc_value
- * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
+ * innvl: {
+ *     "holds" -> { snapname -> holdname (string), ... }
+ *     (optional) "cleanup_fd" -> fd (int32)
+ * }
  *
- * outputs:		none
+ * outnvl: {
+ *     snapname -> error value (int32)
+ *     ...
+ * }
  */
+/* ARGSUSED */
 static int
-zfs_ioc_hold(zfs_cmd_t *zc)
+zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
 {
-	boolean_t recursive = zc->zc_cookie;
-	spa_t *spa;
-	dsl_pool_t *dp;
-	dsl_dataset_t *ds;
+	nvlist_t *holds;
+	int cleanup_fd = -1;
 	int error;
 	minor_t minor = 0;
 
-	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
-		return (EINVAL);
+	error = nvlist_lookup_nvlist(args, "holds", &holds);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
 
-	if (zc->zc_sendobj == 0) {
-		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
-		    zc->zc_string, recursive, zc->zc_temphold,
-		    zc->zc_cleanup_fd));
-	}
-
-	if (recursive)
-		return (EINVAL);
-
-	error = spa_open(zc->zc_name, &spa, FTAG);
-	if (error)
-		return (error);
-
-	dp = spa_get_dsl(spa);
-	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
-	rw_exit(&dp->dp_config_rwlock);
-	spa_close(spa, FTAG);
-	if (error)
-		return (error);
-
-	/*
-	 * Until we have a hold on this snapshot, it's possible that
-	 * zc_sendobj could've been destroyed and reused as part
-	 * of a later txg.  Make sure we're looking at the right object.
-	 */
-	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
-		dsl_dataset_rele(ds, FTAG);
-		return (ENOENT);
-	}
-
-	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
-		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
-		if (error) {
-			dsl_dataset_rele(ds, FTAG);
+	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
+		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (error != 0)
 			return (error);
-		}
 	}
 
-	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
-	    zc->zc_temphold);
-	if (minor != 0) {
-		if (error == 0) {
-			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
-			    minor);
-		}
-		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
-	}
-	dsl_dataset_rele(ds, FTAG);
-
+	error = dsl_dataset_user_hold(holds, minor, errlist);
+	if (minor != 0)
+		zfs_onexit_fd_rele(cleanup_fd);
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name	name of dataset from which we're releasing a user hold
- * zc_value	short name of snap
- * zc_string	user-supplied tag for this hold
- * zc_cookie	recursive flag
+ * innvl is not used.
  *
- * outputs:	none
+ * outnvl: {
+ *    holdname -> time added (uint64 seconds since epoch)
+ *    ...
+ * }
  */
+/* ARGSUSED */
 static int
-zfs_ioc_release(zfs_cmd_t *zc)
+zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
 {
-	boolean_t recursive = zc->zc_cookie;
-
-	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
-		return (EINVAL);
-
-	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
-	    zc->zc_string, recursive));
+	return (dsl_dataset_get_holds(snapname, outnvl));
 }
 
 /*
- * inputs:
- * zc_name		name of filesystem
+ * innvl: {
+ *     snapname -> { holdname, ... }
+ *     ...
+ * }
  *
- * outputs:
- * zc_nvlist_src{_size}	nvlist of snapshot holds
+ * outnvl: {
+ *     snapname -> error value (int32)
+ *     ...
+ * }
  */
+/* ARGSUSED */
 static int
-zfs_ioc_get_holds(zfs_cmd_t *zc)
+zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
-	nvlist_t *nvp;
-	int error;
+	nvpair_t *pair;
 
-	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
-		error = put_nvlist(zc, nvp);
-		nvlist_free(nvp);
-	}
+	/*
+	 * The release may cause the snapshot to be destroyed; make sure it
+	 * is not mounted.
+	 */
+	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(holds, pair))
+		zfs_unmount_snap(nvpair_name(pair));
 
-	return (error);
+	return (dsl_dataset_user_release(holds, errlist));
 }
 
 /*
@@ -4985,14 +5101,21 @@
 zfs_ioc_space_written(zfs_cmd_t *zc)
 {
 	int error;
+	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 
-	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
-	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
 	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
+	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
+		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
@@ -5000,46 +5123,59 @@
 	    &zc->zc_objset_type, &zc->zc_perm_action);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
+	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name		full name of last snapshot
- * zc_value		full name of first snapshot
+ * innvl: {
+ *     "firstsnap" -> snapshot name
+ * }
  *
- * outputs:
- * zc_cookie		space in bytes
- * zc_objset_type	compressed space in bytes
- * zc_perm_action	uncompressed space in bytes
+ * outnvl: {
+ *     "used" -> space in bytes
+ *     "compressed" -> compressed space in bytes
+ *     "uncompressed" -> uncompressed space in bytes
+ * }
  */
 static int
-zfs_ioc_space_snaps(zfs_cmd_t *zc)
+zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
+	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
+	char *firstsnap;
+	uint64_t used, comp, uncomp;
 
-	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
+		return (SET_ERROR(EINVAL));
+
+	error = dsl_pool_hold(lastsnap, FTAG, &dp);
 	if (error != 0)
 		return (error);
-	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+
+	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
 	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
+	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
+		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
-	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
-	    &zc->zc_objset_type, &zc->zc_perm_action);
+	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
+	dsl_pool_rele(dp, FTAG);
+	fnvlist_add_uint64(outnvl, "used", used);
+	fnvlist_add_uint64(outnvl, "compressed", comp);
+	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 	return (error);
 }
 
-/*
- * pool create, destroy, and export don't log the history as part of
- * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
- * do the logging of those commands.
- */
 static int
 zfs_ioc_jail(zfs_cmd_t *zc)
 {
@@ -5056,139 +5192,391 @@
 	    (int)zc->zc_jailid));
 }
 
-static zfs_ioc_vec_t zfs_ioc_vec[] = {
-	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY},
-	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
-	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
-	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_jail, zfs_secpolicy_config, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED },
-};
+/*
+ * innvl: {
+ *     "fd" -> file descriptor to write stream to (int32)
+ *     (optional) "fromsnap" -> full snap name to send an incremental from
+ * }
+ *
+ * outnvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int error;
+	offset_t off;
+	char *fromname = NULL;
+	int fd;
 
+	error = nvlist_lookup_int32(innvl, "fd", &fd);
+	if (error != 0)
+		return (SET_ERROR(EINVAL));
+
+	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
+
+	file_t *fp = getf(fd);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
+
+	off = fp->f_offset;
+#ifdef illumos
+	error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
+#else
+	error = dmu_send(snapname, fromname, fd, fp, &off);
+#endif
+
+#ifdef illumos
+	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+		fp->f_offset = off;
+#else
+	fp->f_offset = off;
+#endif
+
+	releasef(fd);
+	return (error);
+}
+
+/*
+ * Determine approximately how large a zfs send stream will be -- the number
+ * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
+ *
+ * innvl: {
+ *     (optional) "fromsnap" -> full snap name to send an incremental from
+ * }
+ *
+ * outnvl: {
+ *     "space" -> bytes of space (uint64)
+ * }
+ */
+static int
+zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	dsl_pool_t *dp;
+	dsl_dataset_t *fromsnap = NULL;
+	dsl_dataset_t *tosnap;
+	int error;
+	char *fromname;
+	uint64_t space;
+
+	error = dsl_pool_hold(snapname, FTAG, &dp);
+	if (error != 0)
+		return (error);
+
+	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
+	if (error != 0) {
+		dsl_pool_rele(dp, FTAG);
+		return (error);
+	}
+
+	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+	if (error == 0) {
+		error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+		if (error != 0) {
+			dsl_dataset_rele(tosnap, FTAG);
+			dsl_pool_rele(dp, FTAG);
+			return (error);
+		}
+	}
+
+	error = dmu_send_estimate(tosnap, fromsnap, &space);
+	fnvlist_add_uint64(outnvl, "space", space);
+
+	if (fromsnap != NULL)
+		dsl_dataset_rele(fromsnap, FTAG);
+	dsl_dataset_rele(tosnap, FTAG);
+	dsl_pool_rele(dp, FTAG);
+	return (error);
+}
+
+
+static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
+
+static void
+zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
+    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
+
+	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
+	ASSERT3U(ioc, <, ZFS_IOC_LAST);
+	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
+	ASSERT3P(vec->zvec_func, ==, NULL);
+
+	vec->zvec_legacy_func = func;
+	vec->zvec_secpolicy = secpolicy;
+	vec->zvec_namecheck = namecheck;
+	vec->zvec_allow_log = log_history;
+	vec->zvec_pool_check = pool_check;
+}
+
+/*
+ * See the block comment at the beginning of this file for details on
+ * each argument to this function.
+ */
+static void
+zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
+    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
+    boolean_t allow_log)
+{
+	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
+
+	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
+	ASSERT3U(ioc, <, ZFS_IOC_LAST);
+	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
+	ASSERT3P(vec->zvec_func, ==, NULL);
+
+	/* if we are logging, the name must be valid */
+	ASSERT(!allow_log || namecheck != NO_NAME);
+
+	vec->zvec_name = name;
+	vec->zvec_func = func;
+	vec->zvec_secpolicy = secpolicy;
+	vec->zvec_namecheck = namecheck;
+	vec->zvec_pool_check = pool_check;
+	vec->zvec_smush_outnvlist = smush_outnvlist;
+	vec->zvec_allow_log = allow_log;
+}
+
+static void
+zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
+    zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    POOL_NAME, log_history, pool_check);
+}
+
+static void
+zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_FALSE, pool_check);
+}
+
+static void
+zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
+{
+	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
+	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+}
+
+static void
+zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
+}
+
+static void
+zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
+    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
+}
+
+static void
+zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
+{
+	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
+	    zfs_secpolicy_read);
+}
+
+static void
+zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+	zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+}
+
+static void
+zfs_ioctl_init(void)
+{
+	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
+	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
+	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
+	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
+	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
+	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("create", ZFS_IOC_CREATE,
+	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
+	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
+	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
+	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
+	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
+	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	/* IOCTLS that use the legacy function signature */
+
+	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
+	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
+	    zfs_ioc_pool_scan);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
+	    zfs_ioc_pool_upgrade);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
+	    zfs_ioc_vdev_add);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
+	    zfs_ioc_vdev_remove);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
+	    zfs_ioc_vdev_set_state);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
+	    zfs_ioc_vdev_attach);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
+	    zfs_ioc_vdev_detach);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
+	    zfs_ioc_vdev_setpath);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
+	    zfs_ioc_vdev_setfru);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
+	    zfs_ioc_pool_set_props);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
+	    zfs_ioc_vdev_split);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
+	    zfs_ioc_pool_reguid);
+
+	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
+	    zfs_ioc_pool_configs, zfs_secpolicy_none);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
+	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
+	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
+	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
+	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
+
+	/*
+	 * pool destroy, and export don't log the history as part of
+	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
+	 * does the logging of those commands.
+	 */
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
+	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
+	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
+	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
+	    zfs_ioc_dsobj_to_dsname,
+	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
+	    zfs_ioc_pool_get_history,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
+	    zfs_ioc_space_written);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
+	    zfs_ioc_objset_recvd_props);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
+	    zfs_ioc_next_obj);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
+	    zfs_ioc_get_fsacl);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
+	    zfs_ioc_objset_stats);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
+	    zfs_ioc_objset_zplprops);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
+	    zfs_ioc_dataset_list_next);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    zfs_ioc_snapshot_list_next);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
+	    zfs_ioc_send_progress);
+
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
+	    zfs_ioc_diff, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
+	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
+	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
+	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
+	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
+	    zfs_ioc_send, zfs_secpolicy_send);
+
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
+	    zfs_secpolicy_none);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
+	    zfs_secpolicy_destroy);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback,
+	    zfs_secpolicy_rollback);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
+	    zfs_secpolicy_rename);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
+	    zfs_secpolicy_recv);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
+	    zfs_secpolicy_promote);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
+	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
+	    zfs_secpolicy_set_fsacl);
+
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
+	    zfs_secpolicy_share, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
+	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
+	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
+	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+}
+
 int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
@@ -5204,9 +5592,9 @@
 	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
-			error = EAGAIN;
+			error = SET_ERROR(EAGAIN);
 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
-			error = EROFS;
+			error = SET_ERROR(EROFS);
 		spa_close(spa, FTAG);
 	}
 	return (error);
@@ -5245,10 +5633,10 @@
 
 	minor = zfsdev_minor_alloc();
 	if (minor == 0)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 
 	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
 
@@ -5320,36 +5708,66 @@
 }
 
 static int
-zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
+zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
     struct thread *td)
 {
 	zfs_cmd_t *zc;
-	uint_t vec;
-	int cflag, error, len;
+	uint_t vecnum;
+	int error, rc, len;
+#ifdef illumos
+	minor_t minor = getminor(dev);
+#else
+	zfs_iocparm_t *zc_iocparm;
+	int cflag, cmd, oldvecnum;
+	boolean_t newioc, compat;
+	cred_t *cr = td->td_ucred;
+#endif
+	const zfs_ioc_vec_t *vec;
+	char *saved_poolname = NULL;
+	nvlist_t *innvl = NULL;
 
 	cflag = ZFS_CMD_COMPAT_NONE;
-	len = IOCPARM_LEN(cmd);
+	compat = B_FALSE;
+	newioc = B_TRUE;
 
+	len = IOCPARM_LEN(zcmd);
+	cmd = zcmd & 0xff;
+
 	/*
 	 * Check if we are talking to supported older binaries
 	 * and translate zfs_cmd if necessary
 	 */
-	if (len < sizeof(zfs_cmd_t))
-		if (len == sizeof(zfs_cmd_v28_t)) {
+	if (len != sizeof(zfs_iocparm_t)) {
+		newioc = B_FALSE;
+		if (len == sizeof(zfs_cmd_t)) {
+			cflag = ZFS_CMD_COMPAT_LZC;
+			vecnum = cmd;
+		} else if (len == sizeof(zfs_cmd_deadman_t)) {
+			cflag = ZFS_CMD_COMPAT_DEADMAN;
+			compat = B_TRUE;
+			vecnum = cmd;
+		} else if (len == sizeof(zfs_cmd_v28_t)) {
 			cflag = ZFS_CMD_COMPAT_V28;
-			vec = ZFS_IOC(cmd);
+			compat = B_TRUE;
+			vecnum = cmd;
 		} else if (len == sizeof(zfs_cmd_v15_t)) {
 			cflag = ZFS_CMD_COMPAT_V15;
-			vec = zfs_ioctl_v15_to_v28[ZFS_IOC(cmd)];
+			compat = B_TRUE;
+			vecnum = zfs_ioctl_v15_to_v28[cmd];
 		} else
 			return (EINVAL);
-	else
-		vec = ZFS_IOC(cmd);
+	} else
+		vecnum = cmd;
 
-	if (cflag != ZFS_CMD_COMPAT_NONE) {
-		if (vec == ZFS_IOC_COMPAT_PASS)
+#ifdef illumos
+	vecnum = cmd - ZFS_IOC_FIRST;
+	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
+#endif
+
+	if (compat) {
+		if (vecnum == ZFS_IOC_COMPAT_PASS)
 			return (0);
-		else if (vec == ZFS_IOC_COMPAT_FAIL)
+		else if (vecnum == ZFS_IOC_COMPAT_FAIL)
 			return (ENOTSUP);
 	}
 
@@ -5358,65 +5776,203 @@
 	 * for the zfs_cmd_t request.  Bail out if not so we
 	 * will not access undefined memory region.
 	 */
-	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
-		return (EINVAL);
+	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
+		return (SET_ERROR(EINVAL));
+	vec = &zfs_ioc_vec[vecnum];
 
-	if (cflag != ZFS_CMD_COMPAT_NONE) {
+#ifdef illumos
+	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
+	bzero(zc, sizeof(zfs_cmd_t));
+
+	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
+	if (error != 0) {
+		error = SET_ERROR(EFAULT);
+		goto out;
+	}
+#else	/* !illumos */
+	/*
+	 * We don't alloc/free zc only if talking to library ioctl version 2
+	 */
+	if (cflag != ZFS_CMD_COMPAT_LZC) {
 		zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
 		bzero(zc, sizeof(zfs_cmd_t));
-		zfs_cmd_compat_get(zc, addr, cflag);
-		zfs_ioctl_compat_pre(zc, &vec, cflag);
 	} else {
-		zc = (void *)addr;
+		zc = (void *)arg;
+		error = 0;
 	}
 
-	error = zfs_ioc_vec[vec].zvec_secpolicy(zc, td->td_ucred);
+	if (newioc) {
+		zc_iocparm = (void *)arg;
+		if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
+			error = SET_ERROR(EFAULT);
+			goto out;
+		}
+		error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd, zc,
+		    sizeof(zfs_cmd_t), flag);
+		if (error != 0) {
+			error = SET_ERROR(EFAULT);
+			goto out;
+		}
+	}
 
+	if (compat) {
+		zfs_cmd_compat_get(zc, arg, cflag);
+		oldvecnum = vecnum;
+		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
+		if (error != 0)
+			goto out;
+		if (oldvecnum != vecnum)
+			vec = &zfs_ioc_vec[vecnum];
+	}
+#endif	/* !illumos */
+
+	zc->zc_iflags = flag & FKIOCTL;
+	if (zc->zc_nvlist_src_size != 0) {
+		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+		    zc->zc_iflags, &innvl);
+		if (error != 0)
+			goto out;
+	}
+
+	/* rewrite innvl for backwards compatibility */
+	if (compat)
+		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
+
 	/*
 	 * Ensure that all pool/dataset names are valid before we pass down to
 	 * the lower layers.
 	 */
-	if (error == 0) {
-		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
-		zc->zc_iflags = flag & FKIOCTL;
-		switch (zfs_ioc_vec[vec].zvec_namecheck) {
-		case POOL_NAME:
-			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
-				error = EINVAL;
-			else
-				error = pool_status_check(zc->zc_name,
-				    zfs_ioc_vec[vec].zvec_namecheck,
-				    zfs_ioc_vec[vec].zvec_pool_check);
-			break;
+	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
+	switch (vec->zvec_namecheck) {
+	case POOL_NAME:
+		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
+			error = SET_ERROR(EINVAL);
+		else
+			error = pool_status_check(zc->zc_name,
+			    vec->zvec_namecheck, vec->zvec_pool_check);
+		break;
 
-		case DATASET_NAME:
-			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
-				error = EINVAL;
-			else
-				error = pool_status_check(zc->zc_name,
-				    zfs_ioc_vec[vec].zvec_namecheck,
-				    zfs_ioc_vec[vec].zvec_pool_check);
-			break;
+	case DATASET_NAME:
+		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
+			error = SET_ERROR(EINVAL);
+		else
+			error = pool_status_check(zc->zc_name,
+			    vec->zvec_namecheck, vec->zvec_pool_check);
+		break;
 
-		case NO_NAME:
-			break;
+	case NO_NAME:
+		break;
+	}
+
+	if (error == 0 && !(flag & FKIOCTL))
+		error = vec->zvec_secpolicy(zc, innvl, cr);
+
+	if (error != 0)
+		goto out;
+
+	/* legacy ioctls can modify zc_name */
+	len = strcspn(zc->zc_name, "/@") + 1;
+	saved_poolname = kmem_alloc(len, KM_SLEEP);
+	(void) strlcpy(saved_poolname, zc->zc_name, len);
+
+	if (vec->zvec_func != NULL) {
+		nvlist_t *outnvl;
+		int puterror = 0;
+		spa_t *spa;
+		nvlist_t *lognv = NULL;
+
+		ASSERT(vec->zvec_legacy_func == NULL);
+
+		/*
+		 * Add the innvl to the lognv before calling the func,
+		 * in case the func changes the innvl.
+		 */
+		if (vec->zvec_allow_log) {
+			lognv = fnvlist_alloc();
+			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
+			    vec->zvec_name);
+			if (!nvlist_empty(innvl)) {
+				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
+				    innvl);
+			}
 		}
+
+		outnvl = fnvlist_alloc();
+		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+
+		if (error == 0 && vec->zvec_allow_log &&
+		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
+			if (!nvlist_empty(outnvl)) {
+				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
+				    outnvl);
+			}
+			(void) spa_history_log_nvl(spa, lognv);
+			spa_close(spa, FTAG);
+		}
+		fnvlist_free(lognv);
+
+		/* rewrite outnvl for backwards compatibility */
+		if (cflag != ZFS_CMD_COMPAT_NONE && cflag != ZFS_CMD_COMPAT_LZC)
+			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
+			    cflag);
+
+		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
+			int smusherror = 0;
+			if (vec->zvec_smush_outnvlist) {
+				smusherror = nvlist_smush(outnvl,
+				    zc->zc_nvlist_dst_size);
+			}
+			if (smusherror == 0)
+				puterror = put_nvlist(zc, outnvl);
+		}
+
+		if (puterror != 0)
+			error = puterror;
+
+		nvlist_free(outnvl);
+	} else {
+		error = vec->zvec_legacy_func(zc);
 	}
 
-	if (error == 0)
-		error = zfs_ioc_vec[vec].zvec_func(zc);
+out:
+	nvlist_free(innvl);
 
-	if (error == 0) {
-		if (zfs_ioc_vec[vec].zvec_his_log)
-			zfs_log_history(zc);
+	if (compat) {
+		zfs_ioctl_compat_post(zc, cmd, cflag);
+		zfs_cmd_compat_put(zc, arg, vecnum, cflag);
 	}
 
-	if (cflag != ZFS_CMD_COMPAT_NONE) {
-		zfs_ioctl_compat_post(zc, ZFS_IOC(cmd), cflag);
-		zfs_cmd_compat_put(zc, addr, cflag);
-		kmem_free(zc, sizeof(zfs_cmd_t));
+#ifdef illumos
+	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
+	if (error == 0 && rc != 0)
+		error = SET_ERROR(EFAULT);
+#else
+	if (newioc) {
+		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
+		    sizeof (zfs_cmd_t), flag);
+		if (error == 0 && rc != 0)
+			error = SET_ERROR(EFAULT);
 	}
+#endif
+	if (error == 0 && vec->zvec_allow_log) {
+		char *s = tsd_get(zfs_allow_log_key);
+		if (s != NULL)
+			strfree(s);
+		(void) tsd_set(zfs_allow_log_key, saved_poolname);
+	} else {
+		if (saved_poolname != NULL)
+			strfree(saved_poolname);
+	}
 
+#ifdef illumos
+	kmem_free(zc, sizeof (zfs_cmd_t));
+#else
+	/*
+	 * We don't alloc/free zc only if talking to library ioctl version 2
+	 */
+	if (cflag != ZFS_CMD_COMPAT_LZC)
+		kmem_free(zc, sizeof (zfs_cmd_t));
+#endif
 	return (error);
 }
 
@@ -5541,6 +6097,13 @@
 };
 
 static void
+zfs_allow_log_destroy(void *arg)
+{
+	char *poolname = arg;
+	strfree(poolname);
+}
+
+static void
 zfsdev_init(void)
 {
 	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
@@ -5557,9 +6120,6 @@
 static struct root_hold_token *zfs_root_token;
 struct proc *zfsproc;
 
-uint_t zfs_fsyncer_key;
-extern uint_t rrw_tsd_key;
-
 #ifdef sun
 int
 _init(void)
@@ -5569,6 +6129,7 @@
 	spa_init(FREAD | FWRITE);
 	zfs_init();
 	zvol_init();
+	zfs_ioctl_init();
 
 	if ((error = mod_install(&modlinkage)) != 0) {
 		zvol_fini();
@@ -5578,7 +6139,8 @@
 	}
 
 	tsd_create(&zfs_fsyncer_key, NULL);
-	tsd_create(&rrw_tsd_key, NULL);
+	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
+	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
 	ASSERT(error == 0);
@@ -5593,7 +6155,7 @@
 	int error;
 
 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 
 	if ((error = mod_remove(&modlinkage)) != 0)
 		return (error);
@@ -5637,9 +6199,11 @@
 		spa_init(FREAD | FWRITE);
 		zfs_init();
 		zvol_init();
+		zfs_ioctl_init();
 
 		tsd_create(&zfs_fsyncer_key, NULL);
-		tsd_create(&rrw_tsd_key, NULL);
+		tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
+		tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 		printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
 		root_mount_rel(zfs_root_token);
@@ -5660,6 +6224,7 @@
 
 		tsd_destroy(&zfs_fsyncer_key);
 		tsd_destroy(&rrw_tsd_key);
+		tsd_destroy(&zfs_allow_log_key);
 
 		mutex_destroy(&zfs_share_lock);
 		break;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -105,7 +106,7 @@
 {
 	*zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
 	if (*zo == NULL)
-		return (EBADF);
+		return (SET_ERROR(EBADF));
 
 	return (0);
 }
@@ -126,7 +127,7 @@
 
 	fp = getf(fd);
 	if (fp == NULL)
-		return (EBADF);
+		return (SET_ERROR(EBADF));
 
 	tmpfp = curthread->td_fpop;
 	curthread->td_fpop = fp;
@@ -216,7 +217,7 @@
 		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
 	} else {
 		mutex_exit(&zo->zo_lock);
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 	}
 
 	return (error);
@@ -245,7 +246,7 @@
 	if (ap != NULL)
 		*data = ap->za_data;
 	else
-		error = ENOENT;
+		error = SET_ERROR(ENOENT);
 	mutex_exit(&zo->zo_lock);
 
 	return (error);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -73,7 +74,7 @@
 static int
 zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
 {
-	return (ENOTSUP);
+	return (SET_ERROR(ENOTSUP));
 }
 
 static void
@@ -396,7 +397,7 @@
 #endif
 		break;
 	default:
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 	}
 
 bail:
@@ -528,7 +529,7 @@
 		error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &xva.xva_vattr, link /*,vflg*/);
 		break;
 	default:
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 	}
 	VOP_UNLOCK(ZTOV(dzp), 0);
 
@@ -584,7 +585,7 @@
 		error = VOP_RMDIR(ZTOV(dzp), vp, &cn /*,vflg*/);
 		break;
 	default:
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 	}
 	vput(vp);
 	VOP_UNLOCK(ZTOV(dzp), 0);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -395,8 +396,10 @@
 	boolean_t do_setuid = B_FALSE;
 	boolean_t exec = B_FALSE;
 	boolean_t do_exec = B_FALSE;
+#ifdef illumos
 	boolean_t devices = B_FALSE;
 	boolean_t do_devices = B_FALSE;
+#endif
 	boolean_t xattr = B_FALSE;
 	boolean_t do_xattr = B_FALSE;
 	boolean_t atime = B_FALSE;
@@ -492,25 +495,33 @@
 	 * overboard...
 	 */
 	ds = dmu_objset_ds(os);
-	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+	error = dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "xattr", xattr_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "recordsize", blksz_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "readonly", readonly_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
+#ifdef illumos
 	error = error ? error : dsl_prop_register(ds,
-	    "setuid", setuid_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
+#endif
 	error = error ? error : dsl_prop_register(ds,
-	    "exec", exec_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "snapdir", snapdir_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "aclmode", acl_mode_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
-	    "vscan", vscan_changed_cb, zfsvfs);
+	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+	    zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	if (error)
 		goto unregister;
 
@@ -538,19 +549,31 @@
 	 * registered, but this is OK; it will simply return ENOMSG,
 	 * which we will ignore.
 	 */
-	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
-	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
-	    zfsvfs);
-	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
+	    atime_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
+	    xattr_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
+	    blksz_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
+	    readonly_changed_cb, zfsvfs);
+#ifdef illumos
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
+	    devices_changed_cb, zfsvfs);
+#endif
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
+	    setuid_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
+	    exec_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
+	    snapdir_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE),
+	    acl_mode_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
+	    acl_inherit_changed_cb, zfsvfs);
+	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
+	    vscan_changed_cb, zfsvfs);
 	return (error);
-
 }
 
 static int
@@ -557,13 +580,11 @@
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
-	int error = 0;
-
 	/*
 	 * Is it a valid type of object to track?
 	 */
 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 
 	/*
 	 * If we have a NULL data pointer
@@ -572,7 +593,7 @@
 	 * use the same ids
 	 */
 	if (data == NULL)
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 
 	if (bonustype == DMU_OT_ZNODE) {
 		znode_phys_t *znp = data;
@@ -615,7 +636,7 @@
 			*groupp = BSWAP_64(*groupp);
 		}
 	}
-	return (error);
+	return (0);
 }
 
 static void
@@ -662,7 +683,7 @@
 	uint64_t obj;
 
 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 	if (obj == 0) {
@@ -706,7 +727,7 @@
 	if (domain && domain[0]) {
 		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
 		if (domainid == -1)
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 	}
 	fuid = FUID_ENCODE(domainid, rid);
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
@@ -724,7 +745,7 @@
 	*valp = 0;
 
 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 	if (obj == 0)
@@ -751,10 +772,10 @@
 	boolean_t fuid_dirtied;
 
 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
 	    &zfsvfs->z_groupquota_obj;
@@ -882,7 +903,7 @@
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 		goto out;
 	}
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
@@ -967,7 +988,7 @@
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
-	rrw_init(&zfsvfs->z_teardown_lock);
+	rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1131,6 +1152,18 @@
 		return (error);
 	zfsvfs->z_vfs = vfsp;
 
+#ifdef illumos
+	/* Initialize the generic filesystem structure. */
+	vfsp->vfs_bcount = 0;
+	vfsp->vfs_data = NULL;
+
+	if (zfs_create_unique_device(&mount_dev) == -1) {
+		error = SET_ERROR(ENODEV);
+		goto out;
+	}
+	ASSERT(vfs_devismounted(mount_dev) == 0);
+#endif
+
 	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
 	    NULL))
 		goto out;
@@ -1262,7 +1295,7 @@
 
 	while (*str) {
 		if (*str < '0' || *str > '9')
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		num = num*10 + *str++ - '0';
 	}
@@ -1284,7 +1317,7 @@
 	int error;
 
 	if (*bpath == 0 || *bpath == '/')
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	(void) strcpy(outpath, bpath);
 
@@ -1329,10 +1362,10 @@
 
 		if (dsl_prop_get_integer(dsname,
 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
-			return (EACCES);
+			return (SET_ERROR(EACCES));
 		return (rdonly ? 0 : EACCES);
 	}
-	return (EACCES);
+	return (SET_ERROR(EACCES));
 }
 
 /*
@@ -1364,7 +1397,7 @@
 	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error)
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 
 	/*
 	 * If labeling is NOT enabled, then disallow the mount of datasets
@@ -1374,7 +1407,7 @@
 	if (!is_system_labeled()) {
 		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
 			return (0);
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 	}
 
 	/*
@@ -1391,7 +1424,7 @@
 
 		if (dsl_prop_get_integer(osname,
 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
-			return (EACCES);
+			return (SET_ERROR(EACCES));
 		if (!zoned)
 			return (zfs_check_global_label(osname, ds_hexsl));
 		else
@@ -1415,8 +1448,9 @@
 		char *str = NULL;
 
 		if (l_to_str_internal(mnt_sl, &str) == 0 &&
-		    dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
-		    ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
+		    dsl_prop_set_string(osname,
+		    zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+		    ZPROP_SRC_LOCAL, str) == 0)
 			retv = 0;
 		if (str != NULL)
 			kmem_free(str, strlen(str) + 1);
@@ -1462,7 +1496,7 @@
 	 */
 	if (why == ROOT_INIT) {
 		if (zfsrootdone++)
-			return (EBUSY);
+			return (SET_ERROR(EBUSY));
 		/*
 		 * the process of doing a spa_load will require the
 		 * clock to be set before we could (for example) do
@@ -1474,7 +1508,7 @@
 		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
 			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
 			    "bootfs name");
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 		zfs_devid = spa_get_bootprop("diskdevid");
 		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
@@ -1543,7 +1577,7 @@
 	 * if "why" is equal to anything else other than ROOT_INIT,
 	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
 	 */
-	return (ENOTSUP);
+	return (SET_ERROR(ENOTSUP));
 }
 #endif	/* OPENSOLARIS_MOUNTROOT */
 
@@ -1577,11 +1611,33 @@
 	int		error = 0;
 	int		canwrite;
 
+#ifdef illumos
+	if (mvp->v_type != VDIR)
+		return (SET_ERROR(ENOTDIR));
+
+	mutex_enter(&mvp->v_lock);
+	if ((uap->flags & MS_REMOUNT) == 0 &&
+	    (uap->flags & MS_OVERLAY) == 0 &&
+	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+		mutex_exit(&mvp->v_lock);
+		return (SET_ERROR(EBUSY));
+	}
+	mutex_exit(&mvp->v_lock);
+
+	/*
+	 * ZFS does not support passing unparsed data in via MS_DATA.
+	 * Users should use the MS_OPTIONSTR interface; this means
+	 * that all option parsing is already done and the options struct
+	 * can be interrogated.
+	 */
+	if ((uap->flags & MS_DATA) && uap->datalen > 0)
+#else
 	if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS))
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 
 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
+#endif	/* ! illumos */
 
 	/*
 	 * If full-owner-access is enabled and delegated administration is
@@ -1636,7 +1692,7 @@
 	 */
 	if (!INGLOBALZONE(curthread) &&
 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
-		error = EPERM;
+		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
@@ -1827,7 +1883,7 @@
 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 
 	/*
@@ -1876,7 +1932,7 @@
 	if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
 	    !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
-	(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
+	dmu_objset_evict_dbufs(zfsvfs->z_os);
 
 	return (0);
 }
@@ -1960,11 +2016,11 @@
 		 */
 		if (zfsvfs->z_ctldir == NULL) {
 			if (vfsp->vfs_count > 1)
-				return (EBUSY);
+				return (SET_ERROR(EBUSY));
 		} else {
 			if (vfsp->vfs_count > 2 ||
 			    zfsvfs->z_ctldir->v_count > 1)
-				return (EBUSY);
+				return (SET_ERROR(EBUSY));
 		}
 	}
 
@@ -2093,7 +2149,7 @@
 
 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 		if (err)
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		ZFS_ENTER(zfsvfs);
 	}
 
@@ -2107,7 +2163,7 @@
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -2152,7 +2208,7 @@
 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
 		VN_RELE(ZTOV(zp));
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	*vpp = ZTOV(zp);
@@ -2363,14 +2419,14 @@
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (newvers < zfsvfs->z_version)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
@@ -2409,9 +2465,8 @@
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
-	spa_history_log_internal(LOG_DS_UPGRADE,
-	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
-	    zfsvfs->z_version, newvers, dmu_objset_id(os));
+	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
+	    "from %llu to %llu", zfsvfs->z_version, newvers);
 
 	dmu_tx_commit(tx);
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
@@ -177,7 +177,7 @@
 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & FAPPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
@@ -185,7 +185,7 @@
 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
 		if (fs_vscan(*vpp, cr, 0) != 0) {
 			ZFS_EXIT(zfsvfs);
-			return (EACCES);
+			return (SET_ERROR(EACCES));
 		}
 	}
 
@@ -242,7 +242,7 @@
 
 	file_sz = zp->z_size;
 	if (noff >= file_sz)  {
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (cmd == _FIO_SEEK_HOLE)
@@ -261,7 +261,7 @@
 			*off = file_sz;
 			return (0);
 		}
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (noff < *off)
@@ -296,7 +296,7 @@
 	case _FIO_SEEK_HOLE:
 #ifdef sun
 		if (ddi_copyin((void *)data, &off, sizeof (off), flag))
-			return (EFAULT);
+			return (SET_ERROR(EFAULT));
 #else
 		off = *(offset_t *)data;
 #endif
@@ -312,13 +312,13 @@
 			return (error);
 #ifdef sun
 		if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
-			return (EFAULT);
+			return (SET_ERROR(EFAULT));
 #else
 		*(offset_t *)data = off;
 #endif
 		return (0);
 	}
-	return (ENOTTY);
+	return (SET_ERROR(ENOTTY));
 }
 
 static vm_page_t
@@ -653,7 +653,7 @@
 
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		ZFS_EXIT(zfsvfs);
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 	}
 
 	/*
@@ -661,7 +661,7 @@
 	 */
 	if (uio->uio_loffset < (offset_t)0) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -754,7 +754,7 @@
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
-				error = EIO;
+				error = SET_ERROR(EIO);
 			break;
 		}
 
@@ -842,7 +842,7 @@
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
 	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	zilog = zfsvfs->z_log;
@@ -853,7 +853,7 @@
 	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -916,7 +916,7 @@
 	if (woff >= limit) {
 		zfs_range_unlock(rl);
 		ZFS_EXIT(zfsvfs);
-		return (EFBIG);
+		return (SET_ERROR(EFBIG));
 	}
 
 	if ((woff + n) > limit || woff > (limit - n))
@@ -940,7 +940,7 @@
 		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
-			error = EDQUOT;
+			error = SET_ERROR(EDQUOT);
 			break;
 		}
 
@@ -1208,7 +1208,7 @@
 	 * Nothing to do if the file has been removed
 	 */
 	if (zfs_zget(zfsvfs, object, &zp) != 0)
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	if (zp->z_unlinked) {
 		/*
 		 * Release the vnode asynchronously as we currently have the
@@ -1216,7 +1216,7 @@
 		 */
 		VN_RELE_ASYNC(ZTOV(zp),
 		    dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
@@ -1234,7 +1234,7 @@
 		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
 		/* test for truncation needs to be done while range locked */
 		if (offset >= zp->z_size) {
-			error = ENOENT;
+			error = SET_ERROR(ENOENT);
 		} else {
 			error = dmu_read(os, object, offset, size, buf,
 			    DMU_READ_NO_PREFETCH);
@@ -1261,10 +1261,10 @@
 		}
 		/* test for truncation needs to be done while range locked */
 		if (lr->lr_offset >= zp->z_size)
-			error = ENOENT;
+			error = SET_ERROR(ENOENT);
 #ifdef DEBUG
 		if (zil_fault_io) {
-			error = EIO;
+			error = SET_ERROR(EIO);
 			zil_fault_io = 0;
 		}
 #endif
@@ -1345,7 +1345,7 @@
 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 		VN_RELE(*vpp);
 		if (svp == NULL)
-			error = ENOSYS;
+			error = SET_ERROR(ENOSYS);
 		*vpp = svp;
 	}
 	return (error);
@@ -1389,9 +1389,9 @@
 	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
 
 		if (dvp->v_type != VDIR) {
-			return (ENOTDIR);
+			return (SET_ERROR(ENOTDIR));
 		} else if (zdp->z_sa_hdl == NULL) {
-			return (EIO);
+			return (SET_ERROR(EIO));
 		}
 
 		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
@@ -1413,7 +1413,7 @@
 				}
 				if (tvp == DNLC_NO_VNODE) {
 					VN_RELE(tvp);
-					return (ENOENT);
+					return (SET_ERROR(ENOENT));
 				} else {
 					*vpp = tvp;
 					return (specvp_check(vpp, cr));
@@ -1436,7 +1436,7 @@
 		 */
 		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
 			ZFS_EXIT(zfsvfs);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 #endif
 
@@ -1446,7 +1446,7 @@
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
 			ZFS_EXIT(zfsvfs);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 
 		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
@@ -1470,7 +1470,7 @@
 
 	if (dvp->v_type != VDIR) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOTDIR);
+		return (SET_ERROR(ENOTDIR));
 	}
 
 	/*
@@ -1485,7 +1485,7 @@
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 
 	error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
@@ -1609,7 +1609,7 @@
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
@@ -1619,7 +1619,7 @@
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
@@ -1656,7 +1656,7 @@
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			if (strcmp(name, "..") == 0)
-				error = EISDIR;
+				error = SET_ERROR(EISDIR);
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
@@ -1684,7 +1684,7 @@
 		    (vap->va_type != VREG)) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
-			error = EINVAL;
+			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 
@@ -1695,7 +1695,7 @@
 
 		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 			zfs_acl_ids_free(&acl_ids);
-			error = EDQUOT;
+			error = SET_ERROR(EDQUOT);
 			goto out;
 		}
 
@@ -1754,7 +1754,7 @@
 		 * Can't truncate an existing file if in exclusive mode.
 		 */
 		if (excl == EXCL) {
-			error = EEXIST;
+			error = SET_ERROR(EEXIST);
 			goto out;
 		}
 		/*
@@ -1761,7 +1761,7 @@
 		 * Can't open a directory for writing.
 		 */
 		if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) {
-			error = EISDIR;
+			error = SET_ERROR(EISDIR);
 			goto out;
 		}
 		/*
@@ -1884,7 +1884,7 @@
 	 * Need to use rmdir for removing directories.
 	 */
 	if (vp->v_type == VDIR) {
-		error = EPERM;
+		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
@@ -2099,7 +2099,7 @@
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
@@ -2107,13 +2107,13 @@
 
 	if (dzp->z_pflags & ZFS_XATTR) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
@@ -2159,7 +2159,7 @@
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
-		return (EDQUOT);
+		return (SET_ERROR(EDQUOT));
 	}
 
 	/*
@@ -2285,12 +2285,12 @@
 	}
 
 	if (vp->v_type != VDIR) {
-		error = ENOTDIR;
+		error = SET_ERROR(ENOTDIR);
 		goto out;
 	}
 
 	if (vp == cwd) {
-		error = EINVAL;
+		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
@@ -2436,7 +2436,7 @@
 	 */
 	if (uio->uio_iov->iov_len <= 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -2550,7 +2550,7 @@
 				    "entry, obj = %lld, offset = %lld\n",
 				    (u_longlong_t)zp->z_id,
 				    (u_longlong_t)offset);
-				error = ENXIO;
+				error = SET_ERROR(ENXIO);
 				goto update;
 			}
 
@@ -2599,7 +2599,7 @@
 			 * Did we manage to fit anything in the buffer?
 			 */
 			if (!outcount) {
-				error = EINVAL;
+				error = SET_ERROR(EINVAL);
 				goto update;
 			}
 			break;
@@ -2985,7 +2985,7 @@
 		return (0);
 
 	if (mask & AT_NOSET)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
@@ -3002,17 +3002,17 @@
 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & AT_XVATTR))) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & AT_SIZE && vp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
-		return (EISDIR);
+		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -3030,12 +3030,12 @@
 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	/*
@@ -3048,7 +3048,7 @@
 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
 			ZFS_EXIT(zfsvfs);
-			return (EOVERFLOW);
+			return (SET_ERROR(EOVERFLOW));
 		}
 	}
 
@@ -3059,7 +3059,7 @@
 	/* Can this be moved to before the top label? */
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		ZFS_EXIT(zfsvfs);
-		return (EROFS);
+		return (SET_ERROR(EROFS));
 	}
 
 	/*
@@ -3217,7 +3217,7 @@
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			mutex_exit(&zp->z_lock);
 			ZFS_EXIT(zfsvfs);
-			return (EPERM);
+			return (SET_ERROR(EPERM));
 		}
 
 		if (need_policy == FALSE &&
@@ -3304,7 +3304,7 @@
 			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
 				if (attrzp)
 					VN_RELE(ZTOV(attrzp));
-				err = EDQUOT;
+				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
@@ -3316,7 +3316,7 @@
 			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
 				if (attrzp)
 					VN_RELE(ZTOV(attrzp));
-				err = EDQUOT;
+				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
@@ -3330,7 +3330,7 @@
 
 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
-			err = EPERM;
+			err = SET_ERROR(EPERM);
 			goto out;
 		}
 
@@ -3658,7 +3658,7 @@
 		*zlpp = zl;
 
 		if (oidp == szp->z_id)		/* We're a descendant of szp */
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 
 		if (oidp == rootid)		/* We've hit the top */
 			return (0);
@@ -3726,7 +3726,7 @@
 
 	if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) {
 		ZFS_EXIT(zfsvfs);
-		return (EXDEV);
+		return (SET_ERROR(EXDEV));
 	}
 
 	tdzp = VTOZ(tdvp);
@@ -3734,7 +3734,7 @@
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 
 	if (flags & FIGNORECASE)
@@ -3752,7 +3752,7 @@
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	/*
@@ -3857,7 +3857,7 @@
 		 * not the case for FreeBSD, so we check for "." explicitly.
 		 */
 		if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0)
-			serr = EINVAL;
+			serr = SET_ERROR(EINVAL);
 		ZFS_EXIT(zfsvfs);
 		return (serr);
 	}
@@ -3869,7 +3869,7 @@
 			rw_exit(&sdzp->z_name_lock);
 
 		if (strcmp(tnm, "..") == 0)
-			terr = EINVAL;
+			terr = SET_ERROR(EINVAL);
 		ZFS_EXIT(zfsvfs);
 		return (terr);
 	}
@@ -3902,12 +3902,12 @@
 		 */
 		if (ZTOV(szp)->v_type == VDIR) {
 			if (ZTOV(tzp)->v_type != VDIR) {
-				error = ENOTDIR;
+				error = SET_ERROR(ENOTDIR);
 				goto out;
 			}
 		} else {
 			if (ZTOV(tzp)->v_type == VDIR) {
-				error = EISDIR;
+				error = SET_ERROR(EISDIR);
 				goto out;
 			}
 		}
@@ -4089,7 +4089,7 @@
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
@@ -4096,7 +4096,7 @@
 
 	if (len > MAXPATHLEN) {
 		ZFS_EXIT(zfsvfs);
-		return (ENAMETOOLONG);
+		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
@@ -4126,7 +4126,7 @@
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
-		return (EDQUOT);
+		return (SET_ERROR(EDQUOT));
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
@@ -4288,12 +4288,12 @@
 	 */
 	if (svp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) {
 		ZFS_EXIT(zfsvfs);
-		return (EXDEV);
+		return (SET_ERROR(EXDEV));
 	}
 
 	szp = VTOZ(svp);
@@ -4308,13 +4308,13 @@
 	}
 	if (parent == zfsvfs->z_shares_dir) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EILSEQ);
+		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
@@ -4327,7 +4327,7 @@
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 
@@ -4334,7 +4334,7 @@
 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
@@ -4475,7 +4475,7 @@
 
 	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
 	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
-		err = EDQUOT;
+		err = SET_ERROR(EDQUOT);
 		goto out;
 	}
 top:
@@ -4727,7 +4727,7 @@
 	 */
 	if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) {
 		ZFS_EXIT(zfsvfs);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 	ZFS_EXIT(zfsvfs);
 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
@@ -4791,7 +4791,7 @@
 			pvn_read_done(pp, B_ERROR);
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
-				err = EIO;
+				err = SET_ERROR(EIO);
 			return (err);
 		}
 		cur_pp = cur_pp->p_next;
@@ -4941,28 +4941,28 @@
 	if ((prot & PROT_WRITE) && (zp->z_pflags &
 	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
 		ZFS_EXIT(zfsvfs);
-		return (EPERM);
+		return (SET_ERROR(EPERM));
 	}
 
 	if ((prot & (PROT_READ | PROT_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
 		ZFS_EXIT(zfsvfs);
-		return (EACCES);
+		return (SET_ERROR(EACCES));
 	}
 
 	if (vp->v_flag & VNOMAP) {
 		ZFS_EXIT(zfsvfs);
-		return (ENOSYS);
+		return (SET_ERROR(ENOSYS));
 	}
 
 	if (off < 0 || len > MAXOFFSET_T - off) {
 		ZFS_EXIT(zfsvfs);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (vp->v_type != VREG) {
 		ZFS_EXIT(zfsvfs);
-		return (ENODEV);
+		return (SET_ERROR(ENODEV));
 	}
 
 	/*
@@ -4970,7 +4970,7 @@
 	 */
 	if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) {
 		ZFS_EXIT(zfsvfs);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 
 	as_rangelock(as);
@@ -5085,7 +5085,7 @@
 
 	if (cmd != F_FREESP) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	if (error = convoff(vp, bfp, 0, offset)) {
@@ -5095,7 +5095,7 @@
 
 	if (bfp->l_len < 0) {
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	off = bfp->l_start;
@@ -5135,7 +5135,16 @@
 	gen = (uint32_t)gen64;
 
 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
+
+#ifdef illumos
+	if (fidp->fid_len < size) {
+		fidp->fid_len = size;
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOSPC));
+	}
+#else
 	fidp->fid_len = size;
+#endif
 
 	zfid = (zfid_short_t *)fidp;
 
@@ -5320,7 +5329,7 @@
 	int preamble, postamble;
 
 	if (xuio->xu_type != UIOTYPE_ZEROCOPY)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
@@ -5333,7 +5342,7 @@
 		blksz = max_blksz;
 		if (size < blksz || zp->z_blksz != blksz) {
 			ZFS_EXIT(zfsvfs);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 		/*
 		 * Caller requests buffers for write before knowing where the
@@ -5401,7 +5410,7 @@
 		/* avoid potential complexity of dealing with it */
 		if (blksz > max_blksz) {
 			ZFS_EXIT(zfsvfs);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 
 		maxsize = zp->z_size - uio->uio_loffset;
@@ -5410,12 +5419,12 @@
 
 		if (size < blksz || vn_has_cached_data(vp)) {
 			ZFS_EXIT(zfsvfs);
-			return (EINVAL);
+			return (SET_ERROR(EINVAL));
 		}
 		break;
 	default:
 		ZFS_EXIT(zfsvfs);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	uio->uio_extflg = UIO_XUIO;
@@ -5462,13 +5471,13 @@
 static int
 zfs_inval()
 {
-	return (EINVAL);
+	return (SET_ERROR(EINVAL));
 }
 
 static int
 zfs_isdir()
 {
-	return (EISDIR);
+	return (SET_ERROR(EISDIR));
 }
 /*
  * Directory vnode operations template

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
@@ -1173,8 +1173,10 @@
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+#ifdef __FreeBSD__
 		getnewvnode_drop_reserve();
-		return (EINVAL);
+#endif
+		return (SET_ERROR(EINVAL));
 	}
 
 	hdl = dmu_buf_get_user(db);
@@ -1193,7 +1195,7 @@
 		mutex_enter(&zp->z_lock);
 		ASSERT3U(zp->z_id, ==, obj_num);
 		if (zp->z_unlinked) {
-			err = ENOENT;
+			err = SET_ERROR(ENOENT);
 		} else {
 			vp = ZTOV(zp);
 			*zpp = zp;
@@ -1245,7 +1247,7 @@
 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
 	    doi.doi_bonus_type, NULL);
 	if (zp == NULL) {
-		err = ENOENT;
+		err = SET_ERROR(ENOENT);
 	} else {
 		*zpp = zp;
 	}
@@ -1304,7 +1306,7 @@
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 
 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
@@ -1331,7 +1333,7 @@
 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
 		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 
 	zp->z_mode = mode;
@@ -1339,7 +1341,7 @@
 	if (gen != zp->z_gen) {
 		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
-		return (EIO);
+		return (SET_ERROR(EIO));
 	}
 
 	/*
@@ -2007,7 +2009,7 @@
 	    doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
 		sa_buf_rele(*db, tag);
-		return (ENOTSUP);
+		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
@@ -2076,7 +2078,7 @@
 	 * Otherwise the parent must be a directory.
 	 */
 	if (!*is_xattrdir && !S_ISDIR(parent_mode))
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	*pobjp = parent;
 

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -154,7 +154,7 @@
 	avl_index_t where;
 
 	if (avl_find(t, dva, &where) != NULL)
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 
 	zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP);
 	zn->zn_dva = *dva;
@@ -225,7 +225,7 @@
 
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
 			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
-				error = ECKSUM;
+				error = SET_ERROR(ECKSUM);
 			} else {
 				bcopy(lr, dst, len);
 				*end = (char *)dst + len;
@@ -239,7 +239,7 @@
 			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
 			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
 			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
-				error = ECKSUM;
+				error = SET_ERROR(ECKSUM);
 			} else {
 				bcopy(lr, dst, zilc->zc_nused);
 				*end = (char *)dst + zilc->zc_nused;
@@ -247,7 +247,7 @@
 			}
 		}
 
-		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+		VERIFY(arc_buf_remove_ref(abuf, &abuf));
 	}
 
 	return (error);
@@ -344,7 +344,7 @@
 			break;
 
 		error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
-		if (error)
+		if (error != 0)
 			break;
 
 		for (lrp = lrbuf; lrp < end; lrp += reclen) {
@@ -479,7 +479,7 @@
 	if (dsl_dataset_is_snapshot(ds))
 		panic("dirtying snapshot!");
 
-	if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
+	if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(ds->ds_dbuf, zilog);
 	}
@@ -638,8 +638,8 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_hold(osname, FTAG, &os);
-	if (error) {
+	error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
+	if (error != 0) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
 	}
@@ -652,7 +652,7 @@
 			zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 		BP_ZERO(&zh->zh_log);
 		dsl_dataset_dirty(dmu_objset_ds(os), tx);
-		dmu_objset_rele(os, FTAG);
+		dmu_objset_disown(os, FTAG);
 		return (0);
 	}
 
@@ -677,7 +677,7 @@
 	}
 
 	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
-	dmu_objset_rele(os, FTAG);
+	dmu_objset_disown(os, FTAG);
 	return (0);
 }
 
@@ -697,7 +697,7 @@
 	ASSERT(tx == NULL);
 
 	error = dmu_objset_hold(osname, FTAG, &os);
-	if (error) {
+	if (error != 0) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
 	}
@@ -985,7 +985,7 @@
 	/* pass the old blkptr in order to spread log blocks across devs */
 	error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz,
 	    USE_SLOG(zilog));
-	if (!error) {
+	if (error == 0) {
 		ASSERT3U(bp->blk_birth, ==, txg);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
 		bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
@@ -1096,7 +1096,7 @@
 				txg_wait_synced(zilog->zl_dmu_pool, txg);
 				return (lwb);
 			}
-			if (error) {
+			if (error != 0) {
 				ASSERT(error == ENOENT || error == EEXIST ||
 				    error == EALREADY);
 				return (lwb);
@@ -1720,6 +1720,9 @@
 {
 	zilog->zl_stop_sync = 1;
 
+	ASSERT0(zilog->zl_suspend);
+	ASSERT0(zilog->zl_suspending);
+
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	list_destroy(&zilog->zl_lwb_list);
 
@@ -1815,32 +1818,100 @@
 	mutex_exit(&zilog->zl_lock);
 }
 
+static char *suspend_tag = "zil suspending";
+
 /*
  * Suspend an intent log.  While in suspended mode, we still honor
  * synchronous semantics, but we rely on txg_wait_synced() to do it.
- * We suspend the log briefly when taking a snapshot so that the snapshot
- * contains all the data it's supposed to, and has an empty intent log.
+ * On old version pools, we suspend the log briefly when taking a
+ * snapshot so that it will have an empty intent log.
+ *
+ * Long holds are not really intended to be used the way we do here --
+ * held for such a short time.  A concurrent caller of dsl_dataset_long_held()
+ * could fail.  Therefore we take pains to only put a long hold if it is
+ * actually necessary.  Fortunately, it will only be necessary if the
+ * objset is currently mounted (or the ZVOL equivalent).  In that case it
+ * will already have a long hold, so we are not really making things any worse.
+ *
+ * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
+ * zvol_state_t), and use their mechanism to prevent their hold from being
+ * dropped (e.g. VFS_HOLD()).  However, that would be even more pain for
+ * very little gain.
+ *
+ * if cookiep == NULL, this does both the suspend & resume.
+ * Otherwise, it returns with the dataset "long held", and the cookie
+ * should be passed into zil_resume().
  */
 int
-zil_suspend(zilog_t *zilog)
+zil_suspend(const char *osname, void **cookiep)
 {
-	const zil_header_t *zh = zilog->zl_header;
+	objset_t *os;
+	zilog_t *zilog;
+	const zil_header_t *zh;
+	int error;
 
+	error = dmu_objset_hold(osname, suspend_tag, &os);
+	if (error != 0)
+		return (error);
+	zilog = dmu_objset_zil(os);
+
 	mutex_enter(&zilog->zl_lock);
+	zh = zilog->zl_header;
+
 	if (zh->zh_flags & ZIL_REPLAY_NEEDED) {		/* unplayed log */
 		mutex_exit(&zilog->zl_lock);
-		return (EBUSY);
+		dmu_objset_rele(os, suspend_tag);
+		return (SET_ERROR(EBUSY));
 	}
-	if (zilog->zl_suspend++ != 0) {
+
+	/*
+	 * Don't put a long hold in the cases where we can avoid it.  This
+	 * is when there is no cookie so we are doing a suspend & resume
+	 * (i.e. called from zil_vdev_offline()), and there's nothing to do
+	 * for the suspend because it's already suspended, or there's no ZIL.
+	 */
+	if (cookiep == NULL && !zilog->zl_suspending &&
+	    (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
+		mutex_exit(&zilog->zl_lock);
+		dmu_objset_rele(os, suspend_tag);
+		return (0);
+	}
+
+	dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
+	dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
+
+	zilog->zl_suspend++;
+
+	if (zilog->zl_suspend > 1) {
 		/*
-		 * Someone else already began a suspend.
+		 * Someone else is already suspending it.
 		 * Just wait for them to finish.
 		 */
+
 		while (zilog->zl_suspending)
 			cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
 		mutex_exit(&zilog->zl_lock);
+
+		if (cookiep == NULL)
+			zil_resume(os);
+		else
+			*cookiep = os;
 		return (0);
 	}
+
+	/*
+	 * If there is no pointer to an on-disk block, this ZIL must not
+	 * be active (e.g. filesystem not mounted), so there's nothing
+	 * to clean up.
+	 */
+	if (BP_IS_HOLE(&zh->zh_log)) {
+		ASSERT(cookiep != NULL); /* fast path already handled */
+
+		*cookiep = os;
+		mutex_exit(&zilog->zl_lock);
+		return (0);
+	}
+
 	zilog->zl_suspending = B_TRUE;
 	mutex_exit(&zilog->zl_lock);
 
@@ -1853,16 +1924,25 @@
 	cv_broadcast(&zilog->zl_cv_suspend);
 	mutex_exit(&zilog->zl_lock);
 
+	if (cookiep == NULL)
+		zil_resume(os);
+	else
+		*cookiep = os;
 	return (0);
 }
 
 void
-zil_resume(zilog_t *zilog)
+zil_resume(void *cookie)
 {
+	objset_t *os = cookie;
+	zilog_t *zilog = dmu_objset_zil(os);
+
 	mutex_enter(&zilog->zl_lock);
 	ASSERT(zilog->zl_suspend != 0);
 	zilog->zl_suspend--;
 	mutex_exit(&zilog->zl_lock);
+	dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
+	dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
 }
 
 typedef struct zil_replay_arg {
@@ -1935,7 +2015,7 @@
 	if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
 		error = zil_read_log_data(zilog, (lr_write_t *)lr,
 		    zr->zr_lr + reclen);
-		if (error)
+		if (error != 0)
 			return (zil_replay_error(zilog, lr, error));
 	}
 
@@ -1956,7 +2036,7 @@
 	 * is updated if we are in replay mode.
 	 */
 	error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
-	if (error) {
+	if (error != 0) {
 		/*
 		 * The DMU's dnode layer doesn't see removes until the txg
 		 * commits, so a subsequent claim can spuriously fail with
@@ -1966,7 +2046,7 @@
 		 */
 		txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
 		error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
-		if (error)
+		if (error != 0)
 			return (zil_replay_error(zilog, lr, error));
 	}
 	return (0);
@@ -2040,19 +2120,10 @@
 int
 zil_vdev_offline(const char *osname, void *arg)
 {
-	objset_t *os;
-	zilog_t *zilog;
 	int error;
 
-	error = dmu_objset_hold(osname, FTAG, &os);
-	if (error)
-		return (error);
-
-	zilog = dmu_objset_zil(os);
-	if (zil_suspend(zilog) != 0)
-		error = EEXIST;
-	else
-		zil_resume(zilog);
-	dmu_objset_rele(os, FTAG);
-	return (error);
+	error = zil_suspend(osname, NULL);
+	if (error != 0)
+		return (SET_ERROR(EEXIST));
+	return (0);
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -402,7 +402,7 @@
 	if (zio->io_error == 0 &&
 	    zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
 	    zio->io_data, data, zio->io_size, size) != 0)
-		zio->io_error = EIO;
+		zio->io_error = SET_ERROR(EIO);
 }
 
 /*
@@ -754,6 +754,7 @@
 void
 zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
 {
+	metaslab_check_free(spa, bp);
 	bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
 }
 
@@ -799,6 +800,8 @@
 	ASSERT(txg == spa_first_txg(spa) || txg == 0);
 	ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));	/* zdb(1M) */
 
+	metaslab_check_free(spa, bp);
+
 	zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 	    done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
 	    NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
@@ -2073,8 +2076,8 @@
 				if (arc_buf_size(abuf) != zio->io_orig_size ||
 				    bcmp(abuf->b_data, zio->io_orig_data,
 				    zio->io_orig_size) != 0)
-					error = EEXIST;
-				VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+					error = SET_ERROR(EEXIST);
+				VERIFY(arc_buf_remove_ref(abuf, &abuf));
 			}
 
 			ddt_enter(ddt);
@@ -2544,7 +2547,7 @@
 			return (ZIO_PIPELINE_STOP);
 
 		if (!vdev_accessible(vd, zio)) {
-			zio->io_error = ENXIO;
+			zio->io_error = SET_ERROR(ENXIO);
 			zio_interrupt(zio);
 			return (ZIO_PIPELINE_STOP);
 		}
@@ -2593,7 +2596,7 @@
 
 		if (zio->io_error) {
 			if (!vdev_accessible(vd, zio)) {
-				zio->io_error = ENXIO;
+				zio->io_error = SET_ERROR(ENXIO);
 			} else {
 				unexpected_error = B_TRUE;
 			}
@@ -2692,7 +2695,7 @@
 	 */
 	if (zio->io_error && vd != NULL && vd->vdev_ops->vdev_op_leaf &&
 	    !vdev_accessible(vd, zio))
-		zio->io_error = ENXIO;
+		zio->io_error = SET_ERROR(ENXIO);
 
 	/*
 	 * If we can't write to an interior vdev (mirror or RAID-Z),
@@ -2699,8 +2702,9 @@
 	 * set vdev_cant_write so that we stop trying to allocate from it.
 	 */
 	if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
-	    vd != NULL && !vd->vdev_ops->vdev_op_leaf)
+	    vd != NULL && !vd->vdev_ops->vdev_op_leaf) {
 		vd->vdev_cant_write = B_TRUE;
+	}
 
 	if (zio->io_error)
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -201,7 +202,7 @@
 	zio_cksum_t actual_cksum, expected_cksum, verifier;
 
 	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (ci->ci_eck) {
 		zio_eck_t *eck;
@@ -216,10 +217,10 @@
 			else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC))
 				nused = BSWAP_64(zilc->zc_nused);
 			else
-				return (ECKSUM);
+				return (SET_ERROR(ECKSUM));
 
 			if (nused > size)
-				return (ECKSUM);
+				return (SET_ERROR(ECKSUM));
 
 			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
 		} else {
@@ -261,7 +262,7 @@
 	info->zbc_has_cksum = 1;
 
 	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
-		return (ECKSUM);
+		return (SET_ERROR(ECKSUM));
 
 	if (zio_injection_enabled && !zio->io_error &&
 	    (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -134,7 +134,7 @@
 	zio_compress_info_t *ci = &zio_compress_table[c];
 
 	if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
 }

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /*
@@ -276,7 +276,7 @@
 				break;
 			}
 			if (handler->zi_record.zi_error == ENXIO) {
-				ret = EIO;
+				ret = SET_ERROR(EIO);
 				break;
 			}
 		}
@@ -416,7 +416,7 @@
 		 * still allowing it to be unloaded.
 		 */
 		if ((spa = spa_inject_addref(name)) == NULL)
-			return (ENOENT);
+			return (SET_ERROR(ENOENT));
 
 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
 
@@ -468,7 +468,7 @@
 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
 		ret = 0;
 	} else {
-		ret = ENOENT;
+		ret = SET_ERROR(ENOENT);
 	}
 
 	rw_exit(&inject_lock);
@@ -495,7 +495,7 @@
 
 	if (handler == NULL) {
 		rw_exit(&inject_lock);
-		return (ENOENT);
+		return (SET_ERROR(ENOENT));
 	}
 
 	list_remove(&inject_handlers, handler);

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c	2016-10-02 00:51:03 UTC (rev 9136)
@@ -23,6 +23,7 @@
  *
  * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -144,7 +145,7 @@
 int zvol_maxphys = DMU_MAX_ACCESS/2;
 
 extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
-    nvlist_t *, nvlist_t **);
+    nvlist_t *, nvlist_t *);
 static int zvol_remove_zv(zvol_state_t *);
 static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
 static int zvol_dumpify(zvol_state_t *zv);
@@ -194,14 +195,14 @@
 zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
 {
 	if (volsize == 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	if (volsize % blocksize != 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 #ifdef _ILP32
 	if (volsize - 1 > SPEC_MAXOFFSET_T)
-		return (EOVERFLOW);
+		return (SET_ERROR(EOVERFLOW));
 #endif
 	return (0);
 }
@@ -212,7 +213,7 @@
 	if (volblocksize < SPA_MINBLOCKSIZE ||
 	    volblocksize > SPA_MAXBLOCKSIZE ||
 	    !ISP2(volblocksize))
-		return (EDOM);
+		return (SET_ERROR(EDOM));
 
 	return (0);
 }
@@ -288,7 +289,7 @@
 
 	/* Abort immediately if we have encountered gang blocks */
 	if (BP_IS_GANG(bp))
-		return (EFRAGS);
+		return (SET_ERROR(EFRAGS));
 
 	/*
 	 * See if the block is at the end of the previous extent.
@@ -427,7 +428,7 @@
 static int
 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
 {
-	return (ENOTSUP);
+	return (SET_ERROR(ENOTSUP));
 }
 
 /*
@@ -491,7 +492,7 @@
 
 	if (zvol_minor_lookup(name) != NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (EEXIST);
+		return (SET_ERROR(EEXIST));
 	}
 
 	/* lie and say we're read-only */
@@ -506,13 +507,13 @@
 	if ((minor = zfsdev_minor_alloc()) == 0) {
 		dmu_objset_disown(os, FTAG);
 		mutex_exit(&spa_namespace_lock);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
 		dmu_objset_disown(os, FTAG);
 		mutex_exit(&spa_namespace_lock);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 	(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
 	    (char *)name);
@@ -524,7 +525,7 @@
 		ddi_soft_state_free(zfsdev_state, minor);
 		dmu_objset_disown(os, FTAG);
 		mutex_exit(&spa_namespace_lock);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 
 	(void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
@@ -535,7 +536,7 @@
 		ddi_soft_state_free(zfsdev_state, minor);
 		dmu_objset_disown(os, FTAG);
 		mutex_exit(&spa_namespace_lock);
-		return (EAGAIN);
+		return (SET_ERROR(EAGAIN));
 	}
 
 	zs = ddi_get_soft_state(zfsdev_state, minor);
@@ -609,7 +610,7 @@
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	if (zv->zv_total_opens != 0)
-		return (EBUSY);
+		return (SET_ERROR(EBUSY));
 
 	ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
 
@@ -636,7 +637,7 @@
 	mutex_enter(&spa_namespace_lock);
 	if ((zv = zvol_minor_lookup(name)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 	g_topology_lock();
 	rc = zvol_remove_zv(zv);
@@ -700,7 +701,7 @@
 	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
 	    !(zv->zv_flags & ZVOL_RDONLY))
 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
-	(void) dmu_objset_evict_dbufs(zv->zv_objset);
+	dmu_objset_evict_dbufs(zv->zv_objset);
 
 	dmu_objset_disown(zv->zv_objset, zvol_tag);
 	zv->zv_objset = NULL;
@@ -719,7 +720,7 @@
 	/* Check the space usage before attempting to allocate the space */
 	dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs);
 	if (avail < zv->zv_volsize)
-		return (ENOSPC);
+		return (SET_ERROR(ENOSPC));
 
 	/* Free old extents if they exist */
 	zvol_free_extents(zv);
@@ -747,7 +748,7 @@
 }
 #endif	/* sun */
 
-int
+static int
 zvol_update_volsize(objset_t *os, uint64_t volsize)
 {
 	dmu_tx_t *tx;
@@ -919,7 +920,7 @@
 	if (zv == NULL) {
 		if (locked)
 			mutex_exit(&spa_namespace_lock);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (zv->zv_total_opens == 0)
@@ -930,17 +931,17 @@
 		return (err);
 	}
 	if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
-		err = EROFS;
+		err = SET_ERROR(EROFS);
 		goto out;
 	}
 	if (zv->zv_flags & ZVOL_EXCL) {
-		err = EBUSY;
+		err = SET_ERROR(EBUSY);
 		goto out;
 	}
 #ifdef FEXCL
 	if (flag & FEXCL) {
 		if (zv->zv_total_opens != 0) {
-			err = EBUSY;
+			err = SET_ERROR(EBUSY);
 			goto out;
 		}
 		zv->zv_flags |= ZVOL_EXCL;
@@ -978,7 +979,7 @@
 	if (zv == NULL) {
 		if (locked)
 			mutex_exit(&spa_namespace_lock);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 
 	if (zv->zv_flags & ZVOL_EXCL) {
@@ -1187,9 +1188,9 @@
 		return (numerrors < vd->vdev_children ? 0 : EIO);
 
 	if (doread && !vdev_readable(vd))
-		return (EIO);
+		return (SET_ERROR(EIO));
 	else if (!doread && !vdev_writeable(vd))
-		return (EIO);
+		return (SET_ERROR(EIO));
 
 	dvd = vd->vdev_tsd;
 	ASSERT3P(dvd, !=, NULL);
@@ -1198,7 +1199,7 @@
 	if (ddi_in_panic() || isdump) {
 		ASSERT(!doread);
 		if (doread)
-			return (EIO);
+			return (SET_ERROR(EIO));
 		return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
 		    lbtodb(size)));
 	} else {
@@ -1219,7 +1220,7 @@
 	/* Must be sector aligned, and not stradle a block boundary. */
 	if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
 	    P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 	}
 	ASSERT(size <= zv->zv_volblocksize);
 
@@ -1230,6 +1231,9 @@
 		ze = list_next(&zv->zv_extents, ze);
 	}
 
+	if (ze == NULL)
+		return (SET_ERROR(EINVAL));
+
 	if (!ddi_in_panic())
 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
@@ -1310,7 +1314,7 @@
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
-				error = EIO;
+				error = SET_ERROR(EIO);
 			break;
 		}
 		off += size;
@@ -1358,8 +1362,11 @@
 
 	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
+	if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0)
+		return (SET_ERROR(EINVAL));
+
 	boff = ldbtob(blkno);
 	resid = ldbtob(nblocks);
 
@@ -1390,12 +1397,12 @@
 
 	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	volsize = zv->zv_volsize;
 	if (uio->uio_resid > 0 &&
 	    (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
-		return (EIO);
+		return (SET_ERROR(EIO));
 
 	if (zv->zv_flags & ZVOL_DUMPIFIED) {
 		error = physio(zvol_strategy, NULL, dev, B_READ,
@@ -1416,7 +1423,7 @@
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
-				error = EIO;
+				error = SET_ERROR(EIO);
 			break;
 		}
 	}
@@ -1437,12 +1444,12 @@
 
 	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	volsize = zv->zv_volsize;
 	if (uio->uio_resid > 0 &&
 	    (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
-		return (EIO);
+		return (SET_ERROR(EIO));
 
 	if (zv->zv_flags & ZVOL_DUMPIFIED) {
 		error = physio(zvol_strategy, NULL, dev, B_WRITE,
@@ -1494,7 +1501,7 @@
 	char *ptr;
 
 	if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
-		return (EFAULT);
+		return (SET_ERROR(EFAULT));
 	ptr = (char *)(uintptr_t)efi.dki_data_64;
 	length = efi.dki_length;
 	/*
@@ -1505,7 +1512,7 @@
 	 * PMBR.
 	 */
 	if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0)
-		return (EINVAL);
+		return (SET_ERROR(EINVAL));
 
 	gpe.efi_gpe_StartingLBA = LE_64(34ULL);
 	gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1);
@@ -1530,13 +1537,13 @@
 		gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
 		if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length),
 		    flag))
-			return (EFAULT);
+			return (SET_ERROR(EFAULT));
 		ptr += sizeof (gpt);
 		length -= sizeof (gpt);
 	}
 	if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe),
 	    length), flag))
-		return (EFAULT);
+		return (SET_ERROR(EFAULT));
 	return (0);
 }
 
@@ -1556,9 +1563,9 @@
 
 	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	if (zv->zv_flags & ZVOL_DUMPIFIED)
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 
 	ASSERT(blksize && max_xfer_len && minor_hdl &&
 	    objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
@@ -1632,7 +1639,7 @@
 
 	if (zv == NULL) {
 		mutex_exit(&spa_namespace_lock);
-		return (ENXIO);
+		return (SET_ERROR(ENXIO));
 	}
 	ASSERT(zv->zv_total_opens > 0);
 
@@ -1647,7 +1654,7 @@
 		dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
 		mutex_exit(&spa_namespace_lock);
 		if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
-			error = EFAULT;
+			error = SET_ERROR(EFAULT);
 		return (error);
 
 	case DKIOCGMEDIAINFO:
@@ -1657,7 +1664,7 @@
 		dkm.dki_media_type = DK_UNKNOWN;
 		mutex_exit(&spa_namespace_lock);
 		if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag))
-			error = EFAULT;
+			error = SET_ERROR(EFAULT);
 		return (error);
 
 	case DKIOCGETEFI:
@@ -1685,7 +1692,7 @@
 			int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
 			if (ddi_copyout(&wce, (void *)arg, sizeof (int),
 			    flag))
-				error = EFAULT;
+				error = SET_ERROR(EFAULT);
 			break;
 		}
 	case DKIOCSETWCE:
@@ -1693,7 +1700,7 @@
 			int wce;
 			if (ddi_copyin((void *)arg, &wce, sizeof (int),
 			    flag)) {
-				error = EFAULT;
+				error = SET_ERROR(EFAULT);
 				break;
 			}
 			if (wce) {
@@ -1713,7 +1720,7 @@
 		 * commands using these (like prtvtoc) expect ENOTSUP
 		 * since we're emulating an EFI label
 		 */
-		error = ENOTSUP;
+		error = SET_ERROR(ENOTSUP);
 		break;
 
 	case DKIOCDUMPINIT:
@@ -1732,8 +1739,67 @@
 		zfs_range_unlock(rl);
 		break;
 
+	case DKIOCFREE:
+	{
+		dkioc_free_t df;
+		dmu_tx_t *tx;
+
+		if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
+			error = SET_ERROR(EFAULT);
+			break;
+		}
+
+		/*
+		 * Apply Postel's Law to length-checking.  If they overshoot,
+		 * just blank out until the end, if there's a need to blank
+		 * out anything.
+		 */
+		if (df.df_start >= zv->zv_volsize)
+			break;	/* No need to do anything... */
+		if (df.df_start + df.df_length > zv->zv_volsize)
+			df.df_length = DMU_OBJECT_END;
+
+		rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
+		    RL_WRITER);
+		tx = dmu_tx_create(zv->zv_objset);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error != 0) {
+			dmu_tx_abort(tx);
+		} else {
+			zvol_log_truncate(zv, tx, df.df_start,
+			    df.df_length, B_TRUE);
+			dmu_tx_commit(tx);
+			error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
+			    df.df_start, df.df_length);
+		}
+
+		zfs_range_unlock(rl);
+
+		if (error == 0) {
+			/*
+			 * If the write-cache is disabled or 'sync' property
+			 * is set to 'always' then treat this as a synchronous
+			 * operation (i.e. commit to zil).
+			 */
+			if (!(zv->zv_flags & ZVOL_WCE) ||
+			    (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS))
+				zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+			/*
+			 * If the caller really wants synchronous writes, and
+			 * can't wait for them, don't return until the write
+			 * is done.
+			 */
+			if (df.df_flags & DF_WAIT_SYNC) {
+				txg_wait_synced(
+				    dmu_objset_pool(zv->zv_objset), 0);
+			}
+		}
+		break;
+	}
+
 	default:
-		error = ENOTTY;
+		error = SET_ERROR(ENOTTY);
 		break;
 
 	}
@@ -1880,11 +1946,11 @@
 	objset_t *os = zv->zv_objset;
 
 	if (zv->zv_flags & ZVOL_RDONLY)
-		return (EROFS);
+		return (SET_ERROR(EROFS));
 
 	if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
 	    8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
-		boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE;
+		boolean_t resize = (dumpsize > 0);
 
 		if ((error = zvol_dump_init(zv, resize)) != 0) {
 			(void) zvol_dump_fini(zv);
@@ -2183,8 +2249,10 @@
 	cookie = obj = 0;
 	sname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
+#if 0
 	(void) dmu_objset_find(name, dmu_objset_prefetch, NULL,
 	    DS_FIND_SNAPSHOTS);
+#endif
 
 	for (;;) {
 		len = snprintf(sname, MAXPATHLEN, "%s@", name);
@@ -2194,8 +2262,10 @@
 			break;
 		}
 
+		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 		error = dmu_snapshot_list_next(os, MAXPATHLEN - len,
 		    sname + len, &obj, &cookie, NULL);
+		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		if (error != 0) {
 			if (error == ENOENT)
 				error = 0;
@@ -2230,6 +2300,8 @@
 		return (error);
 	}
 	if (dmu_objset_type(os) == DMU_OST_ZVOL) {
+		dsl_dataset_long_hold(os->os_dsl_dataset, FTAG);
+		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 		if ((error = zvol_create_minor(name)) == 0)
 			error = zvol_create_snapshots(os, name);
 		else {
@@ -2236,7 +2308,8 @@
 			printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
 			    name, error);
 		}
-		dmu_objset_rele(os, FTAG);
+		dsl_dataset_long_rele(os->os_dsl_dataset, FTAG);
+		dsl_dataset_rele(os->os_dsl_dataset, FTAG);
 		return (error);
 	}
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
@@ -2253,6 +2326,7 @@
 	p = osname + strlen(osname);
 	len = MAXPATHLEN - (p - osname);
 
+#if 0
 	/* Prefetch the datasets. */
 	cookie = 0;
 	while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
@@ -2259,6 +2333,7 @@
 		if (!dataset_name_hidden(osname))
 			(void) dmu_objset_prefetch(osname, NULL);
 	}
+#endif
 
 	cookie = 0;
 	while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL,

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -27,8 +27,6 @@
 #ifndef _SYS_FEATURE_TESTS_H
 #define	_SYS_FEATURE_TESTS_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/ccompile.h>
 
 #ifdef	__cplusplus
@@ -366,7 +364,7 @@
  * compiler is used. This allows for the use of single prototype
  * declarations regardless of compiler version.
  */
-#if (defined(__STDC__) && defined(_STDC_C99))
+#if (defined(__STDC__) && defined(_STDC_C99)) && !defined(__cplusplus)
 #define	_RESTRICT_KYWD	restrict
 #else
 #define	_RESTRICT_KYWD

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -55,6 +55,16 @@
 	ZFS_TYPE_POOL		= 0x8
 } zfs_type_t;
 
+typedef enum dmu_objset_type {
+	DMU_OST_NONE,
+	DMU_OST_META,
+	DMU_OST_ZFS,
+	DMU_OST_ZVOL,
+	DMU_OST_OTHER,			/* For testing only! */
+	DMU_OST_ANY,			/* Be careful! */
+	DMU_OST_NUMTYPES
+} dmu_objset_type_t;
+
 #define	ZFS_TYPE_DATASET	\
 	(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
 
@@ -750,76 +760,80 @@
 /*
  * /dev/zfs ioctl numbers.
  */
-typedef	unsigned long	zfs_ioc_t;
+typedef enum zfs_ioc {
+	ZFS_IOC_FIRST =	0,
+	ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,
+	ZFS_IOC_POOL_DESTROY,
+	ZFS_IOC_POOL_IMPORT,
+	ZFS_IOC_POOL_EXPORT,
+	ZFS_IOC_POOL_CONFIGS,
+	ZFS_IOC_POOL_STATS,
+	ZFS_IOC_POOL_TRYIMPORT,
+	ZFS_IOC_POOL_SCAN,
+	ZFS_IOC_POOL_FREEZE,
+	ZFS_IOC_POOL_UPGRADE,
+	ZFS_IOC_POOL_GET_HISTORY,
+	ZFS_IOC_VDEV_ADD,
+	ZFS_IOC_VDEV_REMOVE,
+	ZFS_IOC_VDEV_SET_STATE,
+	ZFS_IOC_VDEV_ATTACH,
+	ZFS_IOC_VDEV_DETACH,
+	ZFS_IOC_VDEV_SETPATH,
+	ZFS_IOC_VDEV_SETFRU,
+	ZFS_IOC_OBJSET_STATS,
+	ZFS_IOC_OBJSET_ZPLPROPS,
+	ZFS_IOC_DATASET_LIST_NEXT,
+	ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	ZFS_IOC_SET_PROP,
+	ZFS_IOC_CREATE,
+	ZFS_IOC_DESTROY,
+	ZFS_IOC_ROLLBACK,
+	ZFS_IOC_RENAME,
+	ZFS_IOC_RECV,
+	ZFS_IOC_SEND,
+	ZFS_IOC_INJECT_FAULT,
+	ZFS_IOC_CLEAR_FAULT,
+	ZFS_IOC_INJECT_LIST_NEXT,
+	ZFS_IOC_ERROR_LOG,
+	ZFS_IOC_CLEAR,
+	ZFS_IOC_PROMOTE,
+	ZFS_IOC_DESTROY_SNAPS,
+	ZFS_IOC_SNAPSHOT,
+	ZFS_IOC_DSOBJ_TO_DSNAME,
+	ZFS_IOC_OBJ_TO_PATH,
+	ZFS_IOC_POOL_SET_PROPS,
+	ZFS_IOC_POOL_GET_PROPS,
+	ZFS_IOC_SET_FSACL,
+	ZFS_IOC_GET_FSACL,
+	ZFS_IOC_SHARE,
+	ZFS_IOC_INHERIT_PROP,
+	ZFS_IOC_SMB_ACL,
+	ZFS_IOC_USERSPACE_ONE,
+	ZFS_IOC_USERSPACE_MANY,
+	ZFS_IOC_USERSPACE_UPGRADE,
+	ZFS_IOC_HOLD,
+	ZFS_IOC_RELEASE,
+	ZFS_IOC_GET_HOLDS,
+	ZFS_IOC_OBJSET_RECVD_PROPS,
+	ZFS_IOC_VDEV_SPLIT,
+	ZFS_IOC_NEXT_OBJ,
+	ZFS_IOC_DIFF,
+	ZFS_IOC_TMP_SNAPSHOT,
+	ZFS_IOC_OBJ_TO_STATS,
+	ZFS_IOC_JAIL,
+	ZFS_IOC_UNJAIL,
+	ZFS_IOC_POOL_REGUID,
+	ZFS_IOC_SPACE_WRITTEN,
+	ZFS_IOC_SPACE_SNAPS,
+	ZFS_IOC_SEND_PROGRESS,
+	ZFS_IOC_POOL_REOPEN,
+	ZFS_IOC_LOG_HISTORY,
+	ZFS_IOC_SEND_NEW,
+	ZFS_IOC_SEND_SPACE,
+	ZFS_IOC_CLONE,
+	ZFS_IOC_LAST
+} zfs_ioc_t;
 
-#define	ZFS_IOC(ioreq)	((ioreq) & 0xff)
-
-#define	ZFS_IOC_POOL_CREATE		_IOWR('Z', 0, struct zfs_cmd)
-#define	ZFS_IOC_POOL_DESTROY		_IOWR('Z', 1, struct zfs_cmd)
-#define	ZFS_IOC_POOL_IMPORT		_IOWR('Z', 2, struct zfs_cmd)
-#define	ZFS_IOC_POOL_EXPORT		_IOWR('Z', 3, struct zfs_cmd)
-#define	ZFS_IOC_POOL_CONFIGS		_IOWR('Z', 4, struct zfs_cmd)
-#define	ZFS_IOC_POOL_STATS		_IOWR('Z', 5, struct zfs_cmd)
-#define	ZFS_IOC_POOL_TRYIMPORT		_IOWR('Z', 6, struct zfs_cmd)
-#define	ZFS_IOC_POOL_SCAN		_IOWR('Z', 7, struct zfs_cmd)
-#define	ZFS_IOC_POOL_FREEZE		_IOWR('Z', 8, struct zfs_cmd)
-#define	ZFS_IOC_POOL_UPGRADE		_IOWR('Z', 9, struct zfs_cmd)
-#define	ZFS_IOC_POOL_GET_HISTORY	_IOWR('Z', 10, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_ADD		_IOWR('Z', 11, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_REMOVE		_IOWR('Z', 12, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_SET_STATE		_IOWR('Z', 13, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_ATTACH		_IOWR('Z', 14, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_DETACH		_IOWR('Z', 15, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_SETPATH		_IOWR('Z', 16, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_SETFRU		_IOWR('Z', 17, struct zfs_cmd)
-#define	ZFS_IOC_OBJSET_STATS		_IOWR('Z', 18, struct zfs_cmd)
-#define	ZFS_IOC_OBJSET_ZPLPROPS		_IOWR('Z', 19, struct zfs_cmd)
-#define	ZFS_IOC_DATASET_LIST_NEXT	_IOWR('Z', 20, struct zfs_cmd)
-#define	ZFS_IOC_SNAPSHOT_LIST_NEXT	_IOWR('Z', 21, struct zfs_cmd)
-#define	ZFS_IOC_SET_PROP		_IOWR('Z', 22, struct zfs_cmd)
-#define	ZFS_IOC_CREATE			_IOWR('Z', 23, struct zfs_cmd)
-#define	ZFS_IOC_DESTROY			_IOWR('Z', 24, struct zfs_cmd)
-#define	ZFS_IOC_ROLLBACK		_IOWR('Z', 25, struct zfs_cmd)
-#define	ZFS_IOC_RENAME			_IOWR('Z', 26, struct zfs_cmd)
-#define	ZFS_IOC_RECV			_IOWR('Z', 27, struct zfs_cmd)
-#define	ZFS_IOC_SEND			_IOWR('Z', 28, struct zfs_cmd)
-#define	ZFS_IOC_INJECT_FAULT		_IOWR('Z', 29, struct zfs_cmd)
-#define	ZFS_IOC_CLEAR_FAULT		_IOWR('Z', 30, struct zfs_cmd)
-#define	ZFS_IOC_INJECT_LIST_NEXT	_IOWR('Z', 31, struct zfs_cmd)
-#define	ZFS_IOC_ERROR_LOG		_IOWR('Z', 32, struct zfs_cmd)
-#define	ZFS_IOC_CLEAR			_IOWR('Z', 33, struct zfs_cmd)
-#define	ZFS_IOC_PROMOTE			_IOWR('Z', 34, struct zfs_cmd)
-#define	ZFS_IOC_DESTROY_SNAPS_NVL	_IOWR('Z', 35, struct zfs_cmd)
-#define	ZFS_IOC_SNAPSHOT		_IOWR('Z', 36, struct zfs_cmd)
-#define	ZFS_IOC_DSOBJ_TO_DSNAME		_IOWR('Z', 37, struct zfs_cmd)
-#define	ZFS_IOC_OBJ_TO_PATH		_IOWR('Z', 38, struct zfs_cmd)
-#define	ZFS_IOC_POOL_SET_PROPS		_IOWR('Z', 39, struct zfs_cmd)
-#define	ZFS_IOC_POOL_GET_PROPS		_IOWR('Z', 40, struct zfs_cmd)
-#define	ZFS_IOC_SET_FSACL		_IOWR('Z', 41, struct zfs_cmd)
-#define	ZFS_IOC_GET_FSACL		_IOWR('Z', 42, struct zfs_cmd)
-#define	ZFS_IOC_SHARE			_IOWR('Z', 43, struct zfs_cmd)
-#define	ZFS_IOC_INHERIT_PROP		_IOWR('Z', 44, struct zfs_cmd)
-#define	ZFS_IOC_SMB_ACL			_IOWR('Z', 45, struct zfs_cmd)
-#define	ZFS_IOC_USERSPACE_ONE		_IOWR('Z', 46, struct zfs_cmd)
-#define	ZFS_IOC_USERSPACE_MANY		_IOWR('Z', 47, struct zfs_cmd)
-#define	ZFS_IOC_USERSPACE_UPGRADE	_IOWR('Z', 48, struct zfs_cmd)
-#define	ZFS_IOC_HOLD			_IOWR('Z', 49, struct zfs_cmd)
-#define	ZFS_IOC_RELEASE			_IOWR('Z', 50, struct zfs_cmd)
-#define	ZFS_IOC_GET_HOLDS		_IOWR('Z', 51, struct zfs_cmd)
-#define	ZFS_IOC_OBJSET_RECVD_PROPS	_IOWR('Z', 52, struct zfs_cmd)
-#define	ZFS_IOC_VDEV_SPLIT		_IOWR('Z', 53, struct zfs_cmd)
-#define	ZFS_IOC_NEXT_OBJ		_IOWR('Z', 54, struct zfs_cmd)
-#define	ZFS_IOC_DIFF			_IOWR('Z', 55, struct zfs_cmd)
-#define	ZFS_IOC_TMP_SNAPSHOT		_IOWR('Z', 56, struct zfs_cmd)
-#define	ZFS_IOC_OBJ_TO_STATS		_IOWR('Z', 57, struct zfs_cmd)
-#define	ZFS_IOC_JAIL			_IOWR('Z', 58, struct zfs_cmd)
-#define	ZFS_IOC_UNJAIL			_IOWR('Z', 59, struct zfs_cmd)
-#define	ZFS_IOC_POOL_REGUID		_IOWR('Z', 60, struct zfs_cmd)
-#define	ZFS_IOC_SPACE_WRITTEN		_IOWR('Z', 61, struct zfs_cmd)
-#define	ZFS_IOC_SPACE_SNAPS		_IOWR('Z', 62, struct zfs_cmd)
-#define	ZFS_IOC_SEND_PROGRESS		_IOWR('Z', 63, struct zfs_cmd)
-#define	ZFS_IOC_POOL_REOPEN		_IOWR('Z', 64, struct zfs_cmd)
-
 /*
  * Internal SPA load state.  Used by FMA diagnosis engine.
  */
@@ -854,6 +868,12 @@
 #define	ZPOOL_HIST_TXG		"history txg"
 #define	ZPOOL_HIST_INT_EVENT	"history internal event"
 #define	ZPOOL_HIST_INT_STR	"history internal str"
+#define	ZPOOL_HIST_INT_NAME	"internal_name"
+#define	ZPOOL_HIST_IOCTL	"ioctl"
+#define	ZPOOL_HIST_INPUT_NVL	"in_nvl"
+#define	ZPOOL_HIST_OUTPUT_NVL	"out_nvl"
+#define	ZPOOL_HIST_DSNAME	"dsname"
+#define	ZPOOL_HIST_DSID		"dsid"
 
 /*
  * Flags for ZFS_IOC_VDEV_SET_STATE
@@ -899,56 +919,6 @@
 #define	ZFS_EV_VDEV_PATH	"vdev_path"
 #define	ZFS_EV_VDEV_GUID	"vdev_guid"
 
-/*
- * Note: This is encoded on-disk, so new events must be added to the
- * end, and unused events can not be removed.  Be sure to edit
- * libzfs_pool.c: hist_event_table[].
- */
-typedef enum history_internal_events {
-	LOG_NO_EVENT = 0,
-	LOG_POOL_CREATE,
-	LOG_POOL_VDEV_ADD,
-	LOG_POOL_REMOVE,
-	LOG_POOL_DESTROY,
-	LOG_POOL_EXPORT,
-	LOG_POOL_IMPORT,
-	LOG_POOL_VDEV_ATTACH,
-	LOG_POOL_VDEV_REPLACE,
-	LOG_POOL_VDEV_DETACH,
-	LOG_POOL_VDEV_ONLINE,
-	LOG_POOL_VDEV_OFFLINE,
-	LOG_POOL_UPGRADE,
-	LOG_POOL_CLEAR,
-	LOG_POOL_SCAN,
-	LOG_POOL_PROPSET,
-	LOG_DS_CREATE,
-	LOG_DS_CLONE,
-	LOG_DS_DESTROY,
-	LOG_DS_DESTROY_BEGIN,
-	LOG_DS_INHERIT,
-	LOG_DS_PROPSET,
-	LOG_DS_QUOTA,
-	LOG_DS_PERM_UPDATE,
-	LOG_DS_PERM_REMOVE,
-	LOG_DS_PERM_WHO_REMOVE,
-	LOG_DS_PROMOTE,
-	LOG_DS_RECEIVE,
-	LOG_DS_RENAME,
-	LOG_DS_RESERVATION,
-	LOG_DS_REPLAY_INC_SYNC,
-	LOG_DS_REPLAY_FULL_SYNC,
-	LOG_DS_ROLLBACK,
-	LOG_DS_SNAPSHOT,
-	LOG_DS_UPGRADE,
-	LOG_DS_REFQUOTA,
-	LOG_DS_REFRESERV,
-	LOG_POOL_SCAN_DONE,
-	LOG_DS_USER_HOLD,
-	LOG_DS_USER_RELEASE,
-	LOG_POOL_SPLIT,
-	LOG_END
-} history_internal_events_t;
-
 #ifdef	__cplusplus
 }
 #endif

Modified: trunk/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h
===================================================================
--- trunk/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h	2016-10-02 00:44:07 UTC (rev 9135)
+++ trunk/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h	2016-10-02 00:51:03 UTC (rev 9136)
@@ -283,6 +283,7 @@
 nvlist_t *fnvlist_unpack(char *, size_t);
 nvlist_t *fnvlist_dup(nvlist_t *);
 void fnvlist_merge(nvlist_t *, nvlist_t *);
+size_t fnvlist_num_pairs(nvlist_t *);
 
 void fnvlist_add_boolean(nvlist_t *, const char *);
 void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);